# load libraries library(ggplot2) library(catspec) library(gridExtra) library(lcmm) # look at our data str(dat) head(dat) ################### # start exploring # ################### setwd("graphs") # looking at everyones x plot # p1 <- ggplot(dat, aes(x, y, group=id)) + geom_line() + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="One line per person, all subjects") p2 <- ggplot(dat[dat$id %in% unique(dat$id)[1:300] & dat$totobs>5,], aes(x, y, group=id)) + geom_line() + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Just 300 random subjects with >5 total obs") p3 <- ggplot(dat[dat$id %in% unique(dat$id)[1:300] & dat$totobs>5,], aes(x, y, group=id)) + geom_smooth(aes(group=id), method="lm", se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Just 300 random subjects with >5 total obs\n straight line per person") p4 <- ggplot(dat[dat$id %in% unique(dat$id)[1:300] & dat$totobs>5,], aes(x, y, group=id)) + geom_smooth(aes(group=id), method="loess", se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Just 300 random subjects with >5 total obs\n smoothed line per person") png(paste(Sys.Date(),"_4_bulk_a1c_plots.png",sep=""),width=1000,height=1000, res=90) grid.arrange(p1,p2,p3,p4, nrow=2) dev.off() # smoothing out the plots # # dat$id <- as.character(dat$id) p1 <- ggplot(dat[dat$totobs==58,], aes(x, y, group=id, colour=id)) + geom_line() + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Raw") p2 <- ggplot(dat[dat$totobs==58,], aes(x, y, group=id, colour=id)) + geom_smooth(aes(group=id), method="loess", se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Smoothed", legend.position="none") p3 <- ggplot(dat[dat$totobs==58,], aes(x, y, group=id, colour=id)) + geom_smooth(aes(group=id), method="loess", se=F, size=2) + geom_line(size=1) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Combined", legend.position="none") png(paste(Sys.Date(),"_3_smoothing_plots.png",sep=""), width=1000, height=500, res=90) grid.arrange(p1,p2,p3, ncol=3, main="Three subjects with 58 observations" ) dev.off() # latent class model dummy <- dat[dat$x0<(160.8) & dat$x0>(67) & dat$rf3 %in% c("95-97","98-00") & dat$rf4 > 9 & dat$totobs>4, ] # & dat$totobs>30 & dat$totobs<36 dummy <- dummy[!dummy$id %in% names(which(table(dummy$id)<4)),] length(unique(dummy$id)) summary(as.vector(table(dummy$id))) # trying with 2 groups dummy$id <- as.factor(dummy$id) d2 <-lcmm(y~x,random=~x,subject='id',mixture=~x,ng=2,idiag=TRUE,data=dummy,link="linear") summary(d2) postprob(d2) # look at the post probs closer round(summary(as.numeric(d2$pprob[d2$pprob[,"class"]==1,"prob1"])),2) round(summary(as.numeric(d2$pprob[d2$pprob[,"class"]==2,"prob2"])),2) # pull out who is in which class dummy$id <- as.character(dummy$id) people2 <- as.data.frame(d2$pprob[,1:2]) dummy$group2 <- factor(people2$class[sapply(dummy$id, function(x) which(people2$id==x))]) # plot check p1 <- ggplot(dummy, aes(x, y, group=id, colour=group2)) + geom_line() + geom_smooth(aes(group=group2), method="loess", size=2, se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Raw") p2 <- ggplot(dummy, aes(x, y, group=id, colour=group2)) + geom_smooth(aes(group=id, colour=group2),size=0.5, se=F) + geom_smooth(aes(group=group2), method="loess", size=2, se=T) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Smoothed", legend.position="none") png(paste(Sys.Date(),"_2_latent_class.png",sep=""), width=1000, height=550, res=90) grid.arrange(p1,p2, ncol=2, main="2 Latent Classes") dev.off() # trying with 3 groups # d3 <- lcmm(y~x,random=~x,subject='id',mixture=~x,ng=3,idiag=TRUE,data=dummy,link="linear") summary(d3) postprob(d3) summary(as.numeric(d3$pprob[d3$pprob[,"class"]==1,"prob1"])) summary(as.numeric(d3$pprob[d3$pprob[,"class"]==2,"prob2"])) summary(as.numeric(d3$pprob[d3$pprob[,"class"]==3,"prob3"])) people3 <- as.data.frame(d3$pprob[,1:2]) dummy$group3 <- factor(people3$class[sapply(as.numeric(dummy$id), function(x) which(people3$id==x))], ) # plot check p1 <- ggplot(dummy, aes(x, y, group=id, colour=group3)) + geom_line() + geom_smooth(aes(group=group3), method="loess", size=2, se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Raw") p2 <- ggplot(dummy, aes(x, y, group=id, colour=group3)) + geom_smooth(aes(group=id, colour=group3),size=0.5, se=F) + geom_smooth(aes(group=group3), method="loess", size=2, se=T) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Smoothed", legend.position="none") png(paste(Sys.Date(),"_3_latent_class.png",sep=""), width=1000, height=550, res=90) grid.arrange(p1,p2, ncol=2, main="3 Latent Classes") dev.off() # trying with 4 groups # d4 <-lcmm(y~x,random=~x,subject='id',mixture=~x,ng=4,idiag=TRUE,data=dummy,link="linear") summary(d4) postprob(d4) summary(as.numeric(d4$pprob[d4$pprob[,"class"]==1,"prob1"])) summary(as.numeric(d4$pprob[d4$pprob[,"class"]==2,"prob2"])) summary(as.numeric(d4$pprob[d4$pprob[,"class"]==3,"prob3"])) summary(as.numeric(d4$pprob[d4$pprob[,"class"]==4,"prob4"])) people4 <- as.data.frame(d4$pprob[,1:2]) dummy$group4 <- factor(people4$class[sapply(dummy$id, function(x) which(people4$id==x))]) # plot check p1 <- ggplot(dummy, aes(x, y, group=id, colour=group4)) + geom_line() + geom_smooth(aes(group=group4), method="loess", size=2, se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Raw") p2 <- ggplot(dummy, aes(x, y, group=id, colour=group4)) + geom_smooth(aes(group=id, colour=group4),size=0.5, se=F) + geom_smooth(aes(group=group4), method="loess", size=2, se=T) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Smoothed", legend.position="none") png(paste(Sys.Date(),"_4_latent_class.png",sep=""), width=1000, height=550, res=90) grid.arrange(p1,p2, ncol=2, main="4 Latent Classes") dev.off() # trying with 3 groups - higher term # dummy$x2 <- dummy$x^2 d32 <- lcmm(y~x+x2,random=~x+x2,subject='id',mixture=~x+x2,ng=3,idiag=TRUE,data=dummy,link="linear") summary(d32) postprob(d32) summary(as.numeric(d32$pprob[d32$pprob[,"class"]==1,"prob1"])) summary(as.numeric(d32$pprob[d32$pprob[,"class"]==2,"prob2"])) summary(as.numeric(d32$pprob[d32$pprob[,"class"]==3,"prob3"])) people32 <- as.data.frame(d32$pprob[,1:2]) dummy$group32 <- factor(people32$class[sapply(dummy$id, function(x) which(people32$id==x))], ) # dummy$id <- as.character(dummy$id) # dummy$id <- as.factor(dummy$id) # plot check p1 <- ggplot(dummy, aes(x, y, group=id, colour=group32)) + geom_line() + geom_smooth(aes(group=group32), method="loess", size=2, se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Raw")# + scale_x_continuous(limits = c(80,250)) p2 <- ggplot(dummy, aes(x, y, group=id, colour=group32)) + geom_smooth(aes(group=id, colour=group32),size=0.5, se=F) + geom_smooth(aes(group=group32), method="loess", size=2, se=T) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class")# + opts(title="Smoothed", legend.position="none") + scale_x_continuous(limits = c(80,250)) png(paste(Sys.Date(),"_3_latent_class_higher.png",sep=""), width=1000, height=550, res=90) grid.arrange(p1,p2, ncol=2, main="3 Latent Classes - higher order term") dev.off()