Map(function(x, y) {
    if (!x %in% list.files(.libPaths())){
        install.packages(x)   
    } else {
        if (packageVersion(x) < y) {
            install.packages(x)   
        } else {
            message(sprintf("Version of %s is suitable for demonstration", x))
        }
    }
}, c("qdapRegex", "qdapTools"), c("0.2.0", "1.1.0"))
 
lapply(c("qdapRegex", "qdapTools", "ggplot2", "qdap"), require, character.only=TRUE)


## Download .docx
url_dl("http://umlreading.weebly.com/uploads/2/5/2/5/25253346/whole_language_timeline-updated.docx")
 
## Read in .docx
txt <- read_docx("whole_language_timeline-updated.docx")
 
## Remove non ascii characters
txt <- rm_non_ascii(txt) 
 
## Split into body/references sections
parts <- split_vector(txt, split = "References", include = TRUE, regex=TRUE)
 
## View body
parts[[1]]

## View references
parts[[2]]

## Extract citations in order of appearance
rm_citation(unbag(parts[[1]]), extract=TRUE)[[1]]
 
## Extract citations by section 
rm_citation(parts[[1]], extract=TRUE)
 
## Frequency
left_just(cites <- list2df(sort(table(rm_citation(unbag(parts[[1]]),
    extract=TRUE)), TRUE), "freq", "citation")[2:1])


## Distribution of citations (find locations)
cite_locs <- do.call(rbind, lapply(cites[[1]], function(x){
    m <- gregexpr(x, unbag(parts[[1]]), fixed=TRUE)
    data.frame(
        citation=x,
        start = m[[1]] -5,
        end =  m[[1]] + 5 + attributes(m[[1]])[["match.length"]]
    )
}))
 
## Plot the distribution
ggplot(cite_locs) +
    geom_segment(aes(x=start, xend=end, y=citation, yend=citation), size=3,
        color="yellow") +
    xlab("Duration") +
    scale_x_continuous(expand = c(0,0),
        limits = c(0, nchar(unbag(parts[[1]])) + 25)) +
    theme_grey() +
    theme(
        panel.grid.major=element_line(color="grey20"),
        panel.grid.minor=element_line(color="grey20"),
        plot.background = element_rect(fill="black"),
        panel.background = element_rect(fill="black"),
        panel.border = element_rect(colour = "grey50", fill=NA, size=1),
        axis.text=element_text(color="grey50"),    
        axis.title=element_text(color="grey50")  
    )