## working directories in R

getwd() # function to get current working directory
setwd() # to change/set working directory

## Data structures
## make data file called feline-data.csv

# make this in a text editor, or go to file -> new file -> text file
# enter data as comma separated, no spaces, make sure you hit return after last line

# coat,weight,likes_string
# calico,2.1,1
# black,5.0,0
# tabby,3.2,1
#

# save the file as feline-data.csv in the /data subfolder of your project


# read.csv() to read in our data"

cats <- read.csv(file = "data/feline-data.csv") # note the extension with the folder name first, since we're currently in /my_project, not /my_project/data
cats

# read.csv(), read.table() is similar

# use the $ operator to look at an individual column
cats$weight 
cats$coat

cats$weight + 2 # do math with a column
cats$coat + 2 # tabby + 2 is nonsense!

## Data types in R - 5 data types
# all data is interpreted as one of these 5 types!
typeof(cats$weight)

typeof(3.14) #double
typeof(1L) # integer: has an L after the number
typeof(1+1i) # complex
typeof(TRUE) # logical
typeof('banana') # character

# cats2 data file, make a new document

# coat,weight,likes_string
# calico,2.1,1
# black,5.0,0
# tabby,3.2,1
# tabby,2.3 or 2.4,1
#

cats2 <- read.csv("data/feline-data-2.csv")

typeof(cats2$weight) # now it's an integer...what?
cats2 + 2
cats2

rm(cats2)

# Vectors
# every element in a vector must be of the SAME DATA TYPE

my_vector <- vector(length = 3) # default is to type logical

another_vector <- vector(mode = 'character', length = 3) # use mode to make it character, for example
another_vector

## str() function to look at the structure of an object
str(another_vector)
str(cats$weight)

## in a data frame, COLUMNS are VECTORS
## columns in a data frame have to be of the same data type

str(cats)

# c() combine function
combine_vector <- c(2, 6, 3)
combine_vector

quiz_vector <- c(2, 6, '3') # what type will this be?
str(quiz_vector)

# type coercion
# R forces everything in the vector to be the same data type

coercion_vector <- c('a', TRUE)
coercion_vector
str(coercion_vector) # character

# specific order for type coercion in R
# -> is transformed into
# logical -> integer -> neric -> complex -> character

character_vector <- c('0', '2', '4')
character_vector
character_coerced_to_numeric <- as.numeric(character_vector) # make the vector numeric!
character_coerced_to_numeric
typeof(character_coerced_to_numeric)
str(character_coerced_to_numeric)

# different as functions to make different types
# as.logical, as.character, as.numeric, etc. 

# c() can add things to an existing vector

ab_vector <- c('a', 'b') # makes vector with just a and b
ab_vector
combine_example <- c(ab_vector, 'c') # adds c to the vector
combine_example

# series of #s

mySeries <- 1:10 # produces series of 1 2 3 4 5 6 7 8 9 10
mySeries
seq(10) # same output; seq() generates regular sequences

## asking questions about vectors
sequence_example <- seq(10)
head(sequence_example, n = 2) # head() looks at the top n # of elements
tail(sequence_example, n = 5) # tail() looks at bottom n # of elements
length(sequence_example) # length() gets length of object

# you can also add names to your elements
names_example <- 5:8
names(names_example) <- c("a", "b", "c", "d") # names() refers to the names, this code assigns values to the names
names_example
names(names_example)

str(names_example) # ineger...?


# socrative question
x <- 1:26 # vector of numbers 1 through 26
x
x <- x * 2 # reassign x to be 2 * x
x
names(x) <- LETTERS # assigns the names of x to be the pre-defined vector LETTERS
x

# could also do names(x) <- c("A", "B", "C", ... "X", "Y", "Z") but this is a lot of typing!

## data frames
# look at structure of individual columns
str(cats$weight)
str(cats$coat)

coats <- c('tabby', 'tortoiseshell', 'tortoiseshell', 'black', 'tabby') # make new vector coats
coats
str(coats) # character

CATegories <- factor(coats) # use factor() to make my character vector into a factor data structure
# factors are used for categorical data
# factors are actually INTEGERS under the hood! They are ordered/numbered alphabetically
str(CATegories)

typeof(coats) # character
typeof(CATegories) # factor

# maybe you don't want to order them alphabetically
mydata <- c("case", "control", "control", "case") # make character vector
factor_ordering_example <- factor(mydata) # make it into a factor vector, case defaults to 1L, control to 2L
factor_ordering_example <- factor(mydata, levels = c("control", "case")) # use this to make control 1L and case 2L, not alphabetical! define the levels
str(factor_ordering_example)

# factors are integers under the hood

# lists
# lists can contain different data types
list_example <- list(1, "a", TRUE, 1+4i)
list_example

# lists can contain elements with different lengths
another_list <- list(title = "Research Bazaar", numbers = 1:10, data = TRUE)
another_list

typeof(cats) # data frames are lists!!!

## a data frame is a special list where all of the vectors are the same length

# aside, use the square brackets to look at indexing!
another_vector
ab_vector[1] # looks at first element
ab_vector[2] # looks at second element

# data frame: two-dimensional
cats[1] # looks at slice

# for two-dimensional things, [rows, columns]
cats[1, 2] # first row, second column of cats
cats

# leave a blank to select all rows or columns
cats[ , 3] # all rows, column 3
cats[3, ] # row 3, all columns
cats[c(1,3),c(2,3)] # can use c() combine also: rows 1 and 3, columns 2 and 3

# matrix
# 2 dimensional, all of the same type

matrix_example <- matrix(0, ncol = 6, nrow = 3) # create matrix using matrix() function
matrix_example

dim(matrix_example) # shows number of rows, number of columns in matrix
nrow(matrix_example) # shows number of rows
ncol(matrix_example) # shows number of columns

## Exploring Data Frames

## adding new columns or rows to your data frame

cats

age <- c(2,3,5,12) # make new vector containg ages

# cbind() column bind
cats <- cbind(cats, age) # oops, something went wrong!
nrow(cats) # cats has only 3 obs, age has 4. remember data frames contain column vectors of EQUAL LENGTH

age <- c(2,3,5) # only 3 elements
cats <- cbind(cats, age) # use cbind() to bind cats and age together
cats

## add another observation, another cat as a row!
# remember rows/observations are lists!

# use rbind() row bind
newRow <- list('tortoiseshell', 3.3, TRUE, 9) # use list() to create a new list! 
# make sure you put character values in quotes!
cats <- rbind(cats, newRow) # use rbind() to add newRow to cats
# uh-oh, what happened!
cats # NA vallue for coat, but weight, likes_string, and age are ok

# cats$coat is a factor, and R only allows for pre-existing levels of factors
# tortoiseshell is a new level, so R doesn't recognize it!

levels(cats$coat) # use levels() to look at current levels of the factor
levels(cats$coat) <- c(levels(cats$coat), 'tortoiseshell') # use c() to add tortoiseshell as another factor level to the pre-existing ones
cats <- rbind(cats, list('tortoiseshell', 3.3, TRUE, 9)) # now this should work!
cats

cats[-4, ] # use - before row # to get rid of a row

na.omit(cats) # use na.omit() to get rid of any observations with NA values

cats <- na.omit(cats) # don't forget you need to reassign it to cats to save it!
cats

cats <- rbind(cats, cats) # append data frames to each other
cats # row names don't make sense

rownames(cats) <- NULL # use rownames() to rename the rows, NULL here 
cats

# Advanced Challenge!
# make a new data frame with your first name, last name, and lucky #
# add a new row with your partner's info
# add a new column with answer to the question "Is it time for a coffee break? / day 1 to be over?"

# example solution
# use data.frame() to make new data frame
# args column_name = "value", etc.
df <- data.frame(first = "Nora", last = "Mitchell", Number = 11)
df # data frame with one row, three columns

newRow <- list(first = "James", last = "Mickley", Number = 12) 
# use list() to make a new row with James' info

df <- rbind(df, newRow) # oops! 
str(df) # first and last are factors, need to either add a factor level or...

# convert first and last to characters instead of factors using as.character()
df$first <- as.character(df$first)
df$last <- as.character(df$last)
str(df) # now they're characters, not factors

## we added a row with missing info!
df <- na.omit(df)
df

df <- rbind(df, newRow) # now use rbind() to bind original df with new row/list
df

## read in the gapminder dataset
gapminder <- read.csv(file = "data/gapminder-FiveYearData.csv")
gapminder # too big to show, we ran out of room in our console!

length(gapminder) # length() of data frame is # of columns
ncol(gapminder) # ncol() number of columns
nrow(gapminder) # nrow() number of rows
dim(gapminder) # dim() number of rows, number of columns

colnames(gapminder) # names of the rows

str(gapminder) # str() shows that it's a data frame, info for all of our columns too!

## That's a wrap on day 1! Thanks!