--- title: "Assignment1" author: "Stephen Jones" date: "January 31, 2019" output: html_document: highlight: pygments theme: cerulean pdf_document: default --- # Choose from the tabs below to view code and links to files on github. {.tabset} ## Assignment 1 ```{r message=FALSE, warning=FALSE} #clean the workspace. rm(list=ls()) getwd() #need data.table for fread. suppressWarnings(library(data.table)) ``` ```{r eval=FALSE} #download .data file from url, write to local drive, then upload to github mushdata<-fread("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data") write.csv(mushdata,'agaricus-lepiota.csv') ``` ```{r message=FALSE, warning=FALSE} #download data from github library(RCurl) mushdata<-read.csv(text=getURL("https://raw.githubusercontent.com/sigmasigmaiota/mushroom/master/agaricus-lepiota.csv")) #examine data summary(mushdata) head(mushdata) #remove first column which serves as a redundant row counter. mushdata$X<-NULL ``` #####Using the codebook as a reference, test columns for unique values to verify; do the variables match the codebook? ```{r message=FALSE, warning=FALSE} sapply(mushdata, function(x) unique(x)) ``` #####I'd like to examine the data describing traits observable to pedestrians or hikers, renaming those variables which have been verified with the codebook. ```{r message=FALSE, warning=FALSE} colnames(mushdata)[1]<-"Edible" colnames(mushdata)[2]<-"CapShape" colnames(mushdata)[3]<-"CapSurface" colnames(mushdata)[4]<-"CapColor" colnames(mushdata)[6]<-"Odor" colnames(mushdata)[22]<-"Population" colnames(mushdata)[23]<-"Habitat" ``` #####Check missing values before recoding. There are 0. ```{r message=FALSE, warning=FALSE} #Check missing values sapply(mushdata, function(x) sum(is.na(x))) ``` #####Recode using recode command from "car" package. ```{r message=FALSE, warning=FALSE} library(car) mushdata$Edible<-recode(mushdata$Edible, "'e'='edible'; 'p'='poisonous'") mushdata$CapShape<-recode(mushdata$CapShape, "'b'='bell'; 'c'='conical'; 'x'='convex'; 'f'='flat'; 'k'='knobbed'; 's'='sunken'") mushdata$CapSurface<-recode(mushdata$CapSurface, "'f'='fibrous'; 'g'='grooves'; 'y'='scaly'; 's'='smooth'") mushdata$CapColor<-recode(mushdata$CapColor, "'n'='brown'; 'b'='buff'; 'c'='cinnamon'; 'g'='gray'; 'r'='green'; 'p'='pink'; 'u'='purple'; 'e'='red'; 'w'='white'; 'y'='yellow'") mushdata$Odor<-recode(mushdata$Odor, "'a'='almond'; 'l'='anise'; 'c'='creosote'; 'y'='fishy'; 'f'='foul'; 'm'='musty'; 'n'='none'; 'p'='pungent'; 's'='spicy'") mushdata$Population<-recode(mushdata$Population, "'a'='abundant'; 'c'='clustered'; 'n'='numerous'; 's'='scattered'; 'v'='several'; 'y'='solitary'") mushdata$Habitat<-recode(mushdata$Habitat, "'g'='grasses'; 'l'='leaves'; 'm'='meadows'; 'p'='paths'; 'u'='urban'; 'w'='waste'; 'd'='woods'") ``` #####Check missing values after recoding. ```{r message=FALSE, warning=FALSE} sapply(mushdata, function(x) sum(is.na(x))) ``` #####Create subset with selected variables. ```{r message=FALSE, warning=FALSE} mushdata.sub<-mushdata[,c("Edible","CapShape","CapSurface","CapColor","Odor","Population","Habitat")] ``` #####Split the dataframe, in preparation to compare edible and poisonous plots. ```{r message=FALSE, warning = FALSE} Edible.Pois<-split(mushdata.sub[2:7],mushdata$Edible) pois<-as.data.frame(Edible.Pois$poisonous) edib<-as.data.frame(Edible.Pois$edible) CS.p<-table(pois$CapShape) CSur.p<-table(pois$CapSurface) CC.p<-table(pois$CapColor) OD.p<-table(pois$Odor) POP.p<-table(pois$Population) HB.p<-table(pois$Habitat) CS.e<-table(edib$CapShape) CSur.e<-table(edib$CapSurface) CC.e<-table(edib$CapColor) OD.e<-table(edib$Odor) POP.e<-table(edib$Population) HB.e<-table(edib$Habitat) Prop.CapShape<-merge(as.data.frame(prop.table(CS.p)),as.data.frame(prop.table(CS.e)),by="Var1") Prop.CapSurface<-merge(as.data.frame(prop.table(CSur.p)),as.data.frame(prop.table(CSur.e)),by="Var1") Prop.CapColor<-merge(as.data.frame(prop.table(CC.p)),as.data.frame(prop.table(CC.e)),by="Var1") Prop.Odor<-merge(as.data.frame(prop.table(OD.p)),as.data.frame(prop.table(OD.e)),by="Var1") Prop.Population<-merge(as.data.frame(prop.table(POP.p)),as.data.frame(prop.table(POP.e)),by="Var1") Prop.Habitat<-merge(as.data.frame(prop.table(HB.p)),as.data.frame(prop.table(HB.e)),by="Var1") names(Prop.CapShape)<-c("Quality","Pois","Edib") names(Prop.CapSurface)<-c("Quality","Pois","Edib") names(Prop.CapColor)<-c("Quality","Pois","Edib") names(Prop.Odor)<-c("Quality","Pois","Edib") names(Prop.Population)<-c("Quality","Pois","Edib") names(Prop.Habitat)<-c("Quality","Pois","Edib") ``` ```{r message=FALSE, warning = FALSE} library(ggplot2) ggplot(Prop.CapShape) + geom_point(aes(Quality,Edib),shape=69,fill="darkgreen", color="green", size=3,alpha=.8) + geom_point(aes(Quality,Pois),shape=80,fill="darkred", color="red", size=3, alpha=.8) + theme_bw()+ theme(panel.border = element_blank(), axis.line = element_line(colour = "black"))+ ggtitle("Mushroom Cap Shape, Edible vs Poisonous", subtitle="https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")+ xlab("Quality")+ ylab("Percent") ``` ```{r message=FALSE, warning = FALSE} ggplot(Prop.CapColor) + geom_point(aes(Quality,Edib),shape=69,fill="darkgreen", color="green", size=3,alpha=.8) + geom_point(aes(Quality,Pois),shape=80,fill="darkred", color="red", size=3, alpha=.8) + theme_bw()+ theme(panel.border = element_blank(), axis.line = element_line(colour = "black"))+ ggtitle("Mushroom Cap Color, Edible vs Poisonous", subtitle="https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")+ xlab("Quality")+ ylab("Percent") ``` ```{r message=FALSE, warning = FALSE} ggplot(Prop.CapSurface) + geom_point(aes(Quality,Edib),shape=69,fill="darkgreen", color="green", size=3,alpha=.8) + geom_point(aes(Quality,Pois),shape=80,fill="darkred", color="red", size=3, alpha=.8) + theme_bw()+ theme(panel.border = element_blank(), axis.line = element_line(colour = "black"))+ ggtitle("Mushroom Cap Color, Edible vs Poisonous", subtitle="https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")+ xlab("Quality")+ ylab("Percent") ``` ```{r message=FALSE, warning = FALSE} ggplot(Prop.Habitat) + geom_point(aes(Quality,Edib),shape=69,fill="darkgreen", color="green", size=3,alpha=.8) + geom_point(aes(Quality,Pois),shape=80,fill="darkred", color="red", size=3, alpha=.8) + theme_bw()+ theme(panel.border = element_blank(), axis.line = element_line(colour = "black"))+ ggtitle("Mushroom Cap Color, Edible vs Poisonous", subtitle="https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")+ xlab("Quality")+ ylab("Percent") ``` ```{r message=FALSE, warning = FALSE} ggplot(Prop.Odor) + geom_point(aes(Quality,Edib),shape=69,fill="darkgreen", color="green", size=3,alpha=.8) + geom_point(aes(Quality,Pois),shape=80,fill="darkred", color="red", size=3, alpha=.8) + theme_bw()+ theme(panel.border = element_blank(), axis.line = element_line(colour = "black"))+ ggtitle("Mushroom Cap Color, Edible vs Poisonous", subtitle="https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")+ xlab("Quality")+ ylab("Percent") ``` ```{r message=FALSE, warning = FALSE} ggplot(Prop.Population) + geom_point(aes(Quality,Edib),shape=69,fill="darkgreen", color="green", size=3,alpha=.8) + geom_point(aes(Quality,Pois),shape=80,fill="darkred", color="red", size=3, alpha=.8) + theme_bw()+ theme(panel.border = element_blank(), axis.line = element_line(colour = "black"))+ ggtitle("Mushroom Cap Color, Edible vs Poisonous", subtitle="https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")+ xlab("Quality")+ ylab("Percent") ``` ## Links Link to repository .Rmd file: https://github.com/sigmasigmaiota/mushroom/blob/master/Assignment1_StephenJones.Rmd Direct link to this .Rmd: https://raw.githubusercontent.com/sigmasigmaiota/mushroom/master/Assignment1_StephenJones.Rmd