# An Introduction to R # # Devan Allen McGranahan (devan.mcgranahan@gmail.com) # # YouTube lectures: https://www.youtube.com/playlist?list=PLKXOvaXmjIGcSHFMe2Wpsaw4yzvWR0AgQ # github repo: https://github.com/devanmcg/IntroRangeR # # Lesson 0: Getting to know each other through data # # This script demonstrates several R capacities # by chugging through the Get To Know You survey # # # START Code Chunk 1 # # Install, load packages # This script uses several external packages. # To install them semi-automatically, # first install the pacman PACkage MANagement package: if (!require("pacman")) install.packages("pacman") # Once installed, pacman will take care of the rest for you: pacman::p_load(tidyverse, grid, gridExtra, maps, ggmap, maptools, tm, SnowballC, wordcloud) # # END Code Chunk 1 # # START Code Chunk 2 # # Fetch survey data # # Two options: # # Current (or most recent) term only: survey.d <- read.csv(url("https://raw.githubusercontent.com/devanmcg/IntroRangeR/master/data/SurveyResponsesMostRecent.csv")) # # Data from all-time: survey.d <- read.csv(url("https://raw.githubusercontent.com/devanmcg/IntroRangeR/master/data/SurveyResponsesAll.csv")) # Clean up a bit survey.d <- filter(survey.d, !is.na(program )) # # END Code Chunk 2 # # START Code Chunk 3 # # Bar graphs (degree.gg <- ggplot(survey.d, aes(x=reorder(degree,degree, function(x)-length(x)))) + geom_bar() + labs(x = "Degree type", y = "Number of students") + theme_bw(16) + theme(axis.text=element_text(color="black"), axis.title=element_text(face="bold"), panel.grid.major.x = element_blank(), legend.position = "none") ) (program.gg <- ggplot(survey.d, aes(x=reorder(program,program, function(x)-length(x)))) + geom_bar() + labs(x = "Program", y = "Number of students") + theme_bw(16) + theme(axis.text=element_text(color="black"), axis.text.x = element_text(angle = 33, hjust = 1), axis.title=element_text(face="bold"), panel.grid.major.x = element_blank(), legend.position = "none") ) grid.arrange(degree.gg, program.gg, ncol=2) # # END Code Chunk 3 # # START Code Chunk 4 # ggplot(survey.d, aes(x=reorder(program,program, function(x)-length(x)))) + geom_bar(aes(fill=degree)) + labs(x = "Program", y = "Number of students") + scale_fill_brewer(palette = "Set1", name="Degree") + theme_bw(16) + theme(axis.text=element_text(color="black"), axis.text.x = element_text(angle = 33, hjust = 1), axis.title=element_text(face="bold"), legend.key.width= unit(1, "cm"), legend.text=element_text(size=12), legend.title=element_text(size=12, face="bold"), panel.grid.major.x = element_blank(), legend.position = "top") # # END Code Chunk 4 # # START Code Chunk 5 # ggplot(survey.d, aes(x=reorder(water,water, function(x)-length(x)), fill=factor(program))) + geom_bar() + labs(x = "Which implies greater water content?", y = "Number of students") + scale_fill_brewer(palette = "Set1", name="Program") + theme_bw(18) + theme(axis.text=element_text(color="black"), axis.title=element_text(face="bold"), legend.title=element_text(face="bold"), panel.grid.major.x = element_blank(), legend.position = "top") # # END Code Chunk 5 # # START Code Chunk 6 # # Get some map data world.md <- map_data("world") %>% filter(region !="Antarctica") l48.md <- map_data("state") # (us.gg <- ggplot() +coord_map("polyconic") + theme_minimal(16) + geom_polygon(data=l48.md, aes(x=long, y=lat, group=group), color="white", fill="grey90", size=0.25) + stat_sum(data=survey.d %>% filter(country == "US"), aes(x=long, y=lat, size=factor(..n..), fill=degree), geom = "point", pch=24, col="black") + scale_size_discrete(range = c(2, 6), guide=FALSE) + theme(legend.position = "bottom") + labs(x="longitude", y="latitude", title = "Where we did our undergrad") ) # # END Code Chunk 6 # # START Code Chunk 7 # # Can't leave Alaska out ak.md <- world.md %>% filter(region == "USA", subregion == "Alaska", long <= -120, lat >= 50) us.gg + geom_path(data=ak.md, aes(x=long, y=lat, group=group), color="black", size=0.25) # # END Code Chunk 7 # # START Code Chunk 8 # # Intro R is worldwide! Must include our international colleagues: ggplot() +coord_quickmap( ) + theme_minimal(16) + geom_polygon(data=world.md, aes(x=long, y=lat, group=group), color="white", fill="grey90", size=0.25) + stat_sum(data=survey.d, aes(x=long, y=lat, size=factor(..n..), fill=degree), geom = "point", pch=24, col="black") + scale_size_discrete(range = c(2, 6), guide=FALSE) + theme(legend.position = "bottom") + labs(x="longitude", y="latitude", title="Where we ALL did our undergrad") # # END Code Chunk 8 # # START Code Chunk 9 # # Make a word cloud of relationships with data # datCorpus <- Corpus(VectorSource(survey.d$relationship)) datCorpus <- tm_map(datCorpus, removeWords, stopwords('english')) wordcloud(datCorpus$content, scale=c(4,0.5), min.freq=1, max.words=Inf, random.order=FALSE, random.color=TRUE) # # END Code Chunk 9