##First thing's first--let's make sure we have our surveys object.

surveys <- read.csv("portal_clean.csv") #Only necessary if you restarted R or got rid of surveys somehow. Otherwise, you should have it already.

##Next, turn on the dplyr package using the Packages tab or by typing:
library(dplyr) #If this doesn't work, you may need to download dplyr or tidyverse using the Packages tab--ask for help!

#Let's start with some simpler manipulations.
#1st Problem: Getting back only some columns of a data sheet -- select.

select(surveys, month, hindfoot_length) #The first argument is the data sheet--only the columns put in subsequent argument slots are saved in the process and the rest are dropped.

#Challenge 1.
small_surveys = select(surveys, species_id, sex, weight) #The first argument is the data sheet.
small_surveys #Only those three columns saved.

#2nd Problem: We want to sort the data sheet -- arrange.

arrange(small_surveys, weight) #First argument is the data sheet again--the second is the column to sort by.
arrange(small_surveys, weight, sex) #You can sort by a second column by putting it next, and so on.
arrange(small_surveys, desc(weight)) #Use the desc() function to sort in descending order instead.

#Challenge 2.
sorted_surveys = arrange(small_surveys, species_id, desc(weight))
sorted_surveys #Successfully sorted!

#3rd Problem: We want to create a new column using a preexisting column -- mutate.

mutate(sorted_surveys, weight / 1000) #Creates a new column for the newly calculated weights.
mutate(sorted_surveys, kg_weight = weight / 1000) #Names the new column.

#Challenge 3. 
mutated_surveys = mutate(sorted_surveys, sqrt_weights = sqrt(weight)) #?sqrt() for help. 
mutated_surveys #Took the square root and named the column.

#4th problem: We want to get back only some rows from our data set -- filter.

filter(mutated_surveys, weight == 18) #Only gets back rows with a weight = 18. == is necessary because = already means something.
filter(mutated_surveys, weight < 25) #Less than and greater than. 
filter(mutated_surveys, weight >= 100) #Greater than or equal to looks like this.
filter(mutated_surveys, sex != "M") #!= means is not equal to. Remember, sex is text, so you need the quotes.
filter(mutated_surveys, sex != "M", weight == 45) #You can have two match rules, or more!

#Challenge 4. 
filtered_surveys = filter(mutated_surveys, sex == "F", weight <= 50) #Two rules successfully applied. 
filtered_surveys

#5th problem: We want to summarize our data -- group_by and summarize.

grouped_surveys = group_by(filtered_surveys, species_id) #This groups the data by species_id, albeit invisibly. 
grouped_surveys #We can't "see" that it's done anything, but it has.

summarized_surveys = summarize(grouped_surveys, mean(weight)) #The mean() function takes an average.
summarized_surveys #We have the mean weights of each unique species_id. 

summarized_surveys = summarize(grouped_surveys, mean = mean(weight)) #Names the new column.
summarized_surveys

grouped_surveys2 = group_by(mutated_surveys, species_id, sex) #You can group the data by multiple columns--each unique combo will get a summary.
summarize(grouped_surveys2, count = n()) #Counts the number of observations for each species and sex combo.

#The pipe operator %>%
filtered_surveys %>% group_by(species_id) %>% summarize(mean = mean(weight)) #Using pipes, we can do both steps in one line of code!

#Challenge 5. Combine all the steps so far into a single step!
surveys %>% select(species_id, sex, weight) %>% arrange(species_id, desc(weight)) %>% mutate(sqrt_weight = sqrt(weight)) %>% filter(sex == "F", weight <= 50) %>% group_by(species_id) %>%  summarize(mean = mean(weight))
#Huzzah!

##Making the data sets needed for the ggplot2 unit.
#First, let's make a data set with just the data for the species_id "DM"

just_dm <- surveys %>% filter(species_id == "DM")

#Second, get the means weights, hindfoot lengths, and counts for each species.

stat_summary <- surveys %>% group_by(species_id) %>% summarize(mean_weight = mean(weight), 
                                                               mean_length = mean(hindfoot_length), count = n())

#Lastly, let's get the yearly number of observations for each species and sex combination, as well as the average weights and hindfoot lengths.

year_summary <- surveys %>% group_by(year, sex, species_id) %>% summarize(count = n(), mean_weight = mean(weight), mean_length = mean(hindfoot_length))