library(ggplot2) library(plyr) ####For English data. ####Read the file of CorpusSearch codes into an R data frame. foo <- read.delim("~/constantentropy/outputs/infoTheoryTest.ymeb.cod.ooo",header=F,sep=":") ####Give appropriate column names to the columns colnames(foo) <- c("OV","Clause","ObjType","SbjType","ID","Year") ####Throw out all the codes that refer to tokens that are irrelevant for the study. "Got up to subsetting" objsbj.data <- subset(foo, OV != "z" & Clause != "z" & Year != "0" & Year != "" & Year != "na" & SbjType != "z" & SbjType != "" & ObjType != "z" & ObjType != "" & ID != "") library(gdata) ####Make sure R factor groups don't include factors for the irrelevant codes. objsbj.data <- droplevels(objsbj.data) "finished droplevels" ####Make sure dates abd 0/1 codes are stored as numbers, and weights objsbj.data$Year <- as.numeric(as.character(objsbj.data$Year)) objsbj.data$OV <- as.numeric(as.character(objsbj.data$OV)) "finished converting to numeric" library(RColorBrewer) ####Experimenting with cubic splines library(splines) library(MASS) #spline example #p <- ggplot(objsbj.data, aes(Year, OV, color=Clause, group=Clause)) + labs(y = "Proportion of OV", x = "\nYear") + stat_sum(aes(size=..n.., alpha=.5)) + scale_size_area(max_size=12) + stat_smooth(method= "lm", formula = y ~ ns(x,3)) + scale_alpha_continuous(guide="none", limits = c(0,.7)) + scale_color_brewer(palette = "Set1") + ylim(0,1) ##Gotta do invq separately, without having a third variable, because there isn't enough data objsbjNOinvq.data <- subset(objsbj.data, Clause != "invq") objsbjNOinvq.data <- droplevels(objsbjNOinvq.data) p <- ggplot(objsbjNOinvq.data, aes(Year, OV, color=SbjType)) + labs(y = "Proportion of OV", x = "\nYear") + stat_sum(aes(size=..n.., alpha=.1)) + scale_size_area(max_size=12) + stat_smooth() + #aes(linetype=ObjType) + #scale_linetype_manual(values=c("solid","dotted","dashed")) + scale_alpha_continuous(guide="none", limits = c(0,.7)) + scale_color_brewer(palette = "Set1") + ylim(0,1) + facet_grid(ObjType~Clause) + #facet_wrap(~ObjType) + theme_bw() + theme(panel.border = element_blank()) ggsave(p, file = "infoTheory-objsbjmatsub-English.pdf", width = 8, height = 5) ##simpler plot with obj type restricted to nominal, and removing gap subjects, just for clarity of explication in a CBE talk objsbjNOinvq.data <- subset(objsbjNOinvq.data, ObjType == "posobj") objsbjNOinvq.data <- droplevels(objsbjNOinvq.data) objsbjNOinvq.data <- subset(objsbjNOinvq.data, SbjType != "gapsbj") objsbjNOinvq.data <- droplevels(objsbjNOinvq.data) ##simpler plot with clause type restricted to sub, and no qobjs, just for clarity of explication in oxford thing objsbjNOinvq.data <- subset(objsbjNOinvq.data, Clause == "sub") objsbjNOinvq.data <- droplevels(objsbjNOinvq.data) objsbjNOinvq.data <- subset(objsbjNOinvq.data, ObjType != "qobj") objsbjNOinvq.data <- droplevels(objsbjNOinvq.data) p <- ggplot(objsbjNOinvq.data, aes(Year, OV, color=SbjType)) + labs(y = "Proportion of OV", x = "\nYear") + stat_sum(aes(size=..n.., alpha=.1)) + scale_size_area(max_size=12) + stat_smooth() + #aes(linetype=ObjType) + #scale_linetype_manual(values=c("solid","dotted","dashed")) + scale_alpha_continuous(guide="none", limits = c(0,.7)) + scale_color_brewer(palette = "Set1") + ylim(0,1) + facet_grid(~Clause) + theme_bw() + theme(panel.border = element_blank()) ggsave(p, file = "infoTheory-posObjsbjmatsub-English.pdf", width = 8, height = 5) ####little tests library(lme4) #Zing the year around the mean year objsbjNOinvq.data$zYear <- scale(objsbjNOinvq.data$Year, center=TRUE, scale=TRUE) #Make constrasts consistent objsbjNOinvq.data$ObjTypeRelevel <- relevel(objsbjNOinvq.data$ObjType, ref="posobj") objsbjNOinvq.data$SbjTypeRelevel <- relevel(objsbjNOinvq.data$SbjType, ref="nomsbj") sbjobj.fit <- glmer(OV~(1|ID)+Clause+zYear+SbjTypeRelevel+ObjTypeRelevel+SbjTypeRelevel:zYear, family = binomial, data=objsbjNOinvq.data) summary(sbjobj.fit) sbjobjNoYear.fit <- glmer(OV~(1|ID)+Clause+zYear+SbjTypeRelevel+ObjTypeRelevel, family = binomial, data=objsbjNOinvq.data) sbj.obj.fit <- glmer(OV~(1|ID)+Clause+zYear+SbjTypeRelevel+ObjTypeRelevel, family = binomial, data=objsbjNOinvq.data) basic.fit <- glmer(OV~(1|ID)+Clause+zYear, family = binomial, data=objsbjNOinvq.data) summary(sbjobjNoYear.fit) anova(sbj.obj.fit,basic.fit) #For Ice objsbjNOinvqIce.data$ObjTypeRelevel <- relevel(objsbjNOinvqIce.data$ObjType, ref="pronobj") objsbjNOinvqIce.data$SbjTypeRelevel <- relevel(objsbjNOinvqIce.data$SbjType, ref="pronsbj") objsbjNOinvqIce.data$zYear <- scale(objsbjNOinvqIce.data$Year, center=TRUE, scale=TRUE) sbj.obj.fit <- glmer(OV~(1|ID)+Clause+SimpleGenre+zYear+SbjTypeRelevel+ObjTypeRelevel, family = binomial, data=objsbjNOinvqIce.data) basic.fit <- glmer(OV~(1|ID)+SimpleGenre+Clause+zYear, family = binomial, data=objsbjNOinvqIce.data) summary(sbjobjNoYear.fit) anova(sbj.obj.fit,basic.fit) #More complex ones are not converging, so standard regressions: sbjobjYear.fit <- glm(OV~Clause+zYear+SbjTypeRelevel+ObjTypeRelevel+zYear*SbjTypeRelevel*ObjTypeRelevel, family = binomial, data=objsbjNOinvq.data) summary(sbjobjYear.fit) sbjobjNoYear.fit <- glm(OV~Clause+zYear+SbjTypeRelevel+ObjTypeRelevel+SbjTypeRelevel*ObjTypeRelevel, family = binomial, data=objsbjNOinvq.data) summary(sbjobjNoYear.fit) anova(sbjobjNoYear.fit, sbjobjYear.fit, test="Chisq") #For Ice sbjobjYear.fit <- glm(OV~Clause+SimpleGenre+zYear+SbjTypeRelevel+ObjTypeRelevel+zYear*SbjTypeRelevel*ObjTypeRelevel, family = binomial, data=objsbjNOinvqIce.data) summary(sbjobjYear.fit) sbjobjNoYear.fit <- glm(OV~Clause+SimpleGenre+zYear+SbjTypeRelevel+ObjTypeRelevel+SbjTypeRelevel*ObjTypeRelevel, family = binomial, data=objsbjNOinvqIce.data) summary(sbjobjNoYear.fit) anova(sbjobjNoYear.fit, sbjobjYear.fit, test="Chisq") ##invq data separately: invq.data <- subset(objsbj.data, Clause == "invq") invq.data <- droplevels(invq.data) p <- ggplot(invq.data, aes(Year, OV, color=SbjType)) + labs(y = "Proportion of OV", x = "\nYear") + stat_sum(aes(size=..n.., alpha=.5)) + scale_size_area(max_size=12) + stat_smooth() + scale_alpha_continuous(guide="none", limits = c(0,.7)) + scale_color_brewer(palette = "Set1") + ylim(0,1) ####Icelandic ####Read the file of CorpusSearch codes into an R data frame. foo <- read.delim("~/constantentropy/outputs/infoTheoryTest.ice.cod.ooo",header=F,sep=":") ####Give appropriate column names to the columns colnames(foo) <- c("OV","Clause","ObjType","SbjType","Year","Genre","ID") ####Throw out all the codes that refer to tokens that are irrelevant for the study. "Got up to subsetting" objsbjice.data <- subset(foo, OV != "z" & Clause != "z" & Year != "0" & Year != "" & Year != "na" & SbjType != "z" & SbjType != "" & ObjType != "z" & ObjType != "" & ID != "" & Genre != "") ####Make sure R factor groups don't include factors for the irrelevant codes. objsbjice.data <- droplevels(objsbjice.data) "finished droplevels" ####Make sure dates abd 0/1 codes are stored as numbers, and weights objsbjice.data$Year <- as.numeric(as.character(objsbjice.data$Year)) objsbjice.data$OV <- as.numeric(as.character(objsbjice.data$OV)) "finished converting to numeric" ##Gotta do invq separately, without having a third variable, because there isn't enough data objsbjNOinvqIce.data$SimpleGenre <- ifelse(objsbjNOinvqIce.data$Genre == "nar", "nar", "other") objsbjNOinvqIce.data <- subset(objsbjice.data, Clause != "invq") objsbjNOinvqIce.data <- droplevels(objsbjNOinvqIce.data) p <- ggplot(objsbjNOinvqIce.data, aes(Year, OV, color=SbjType)) + labs(y = "Proportion of OV", x = "\nYear") + stat_sum(aes(size=..n.., alpha=.1)) + scale_size_area(max_size=12) + stat_smooth() + scale_alpha_continuous(guide="none", limits = c(0,.7)) + scale_color_brewer(palette = "Set1") + ylim(0,1) + facet_grid(ObjType~Clause) + theme_bw() + theme(panel.border = element_blank()) ggsave(p, file = "infoTheory-objsbjmatsub-Ice.pdf", width = 8, height = 5) ###same for narrative texts only objsbjNOinvqIceNar.data <- subset(objsbjNOinvqIce.data, Genre == "nar") objsbjNOinvqIceNar.data <- droplevels(objsbjNOinvqIceNar.data) p <- ggplot(objsbjNOinvqIceNar.data, aes(Year, OV, color=SbjType)) + labs(y = "Proportion of OV", x = "\nYear") + stat_sum(aes(size=..n.., alpha=.1)) + scale_size_area(max_size=12) + stat_smooth() + scale_alpha_continuous(guide="none", limits = c(0,.7)) + scale_color_brewer(palette = "Set1") + ylim(0,1) + facet_grid(ObjType~Clause) + # facet_wrap(~ObjType) + theme_bw() + theme(panel.border = element_blank()) ggsave(p, file = "infoTheory-objsbjmatsubNar-Ice.pdf", width = 8, height = 5) ##simpler plot with obj type restricted to nominal, and removing gap subjects, just for clarity of explication in a CBE talk objsbjNOinvqIceNar.data <- subset(objsbjNOinvqIceNar.data, ObjType == "posobj") objsbjNOinvqIceNar.data <- droplevels(objsbjNOinvqIceNar.data) objsbjNOinvqIceNar.data <- subset(objsbjNOinvqIceNar.data, SbjType != "gapsbj") objsbjNOinvqIceNar.data <- droplevels(objsbjNOinvqIceNar.data) ##simpler plot with clause type restricted to sub, and no qobjs, just for clarity of explication in oxford thing objsbjNOinvqIceNar.data <- subset(objsbjNOinvqIceNar.data, Clause == "sub") objsbjNOinvqIceNar.data <- droplevels(objsbjNOinvqIceNar.data) objsbjNOinvqIceNar.data <- subset(objsbjNOinvqIceNar.data, ObjType != "qobj") objsbjNOinvqIceNar.data <- droplevels(objsbjNOinvqIceNar.data) p <- ggplot(objsbjNOinvqIceNar.data, aes(Year, OV, color=SbjType)) + labs(y = "Proportion of OV", x = "\nYear") + stat_sum(aes(size=..n.., alpha=.1)) + scale_size_area(max_size=12) + stat_smooth() + #aes(linetype=ObjType) + #scale_linetype_manual(values=c("solid","dotted","dashed")) + scale_alpha_continuous(guide="none", limits = c(0,.7)) + scale_color_brewer(palette = "Set1") + ylim(0,1) + facet_grid(~Clause) + theme_bw() + theme(panel.border = element_blank()) ggsave(p, file = "infoTheory-posObjsbjmatsub-Ice.pdf", width = 8, height = 5) ###same for religious texts only objsbjNOinvqIceRel.data <- subset(objsbjNOinvqIce.data, Genre == "rel") objsbjNOinvqIceRel.data <- droplevels(objsbjNOinvqIceRel.data) p <- ggplot(objsbjNOinvqIceRel.data, aes(Year, OV, color=SbjType)) + labs(y = "Proportion of OV", x = "\nYear") + stat_sum(aes(size=..n.., alpha=.1)) + scale_size_area(max_size=12) + stat_smooth() + scale_alpha_continuous(guide="none", limits = c(0,.7)) + scale_color_brewer(palette = "Set1") + ylim(0,1) + facet_grid(ObjType~Clause) + theme_bw() + theme(panel.border = element_blank()) ggsave(p, file = "infoTheory.objsbjmatsubRel.Ice.pdf", width = 8, height = 5)