# creating folders
# always in quotation mark - that indicates that it is 
# a text string.
dir.create("data")
dir.create("data_output")
dir.create("figure_output")

# to run a line, press ctrl+enter

# Downloaded file
download.file("https://raw.githubusercontent.com/KUBDatalab/beginning-R/main/data/SAFI_clean.csv",
              "data/SAFI_clean.csv", mode = "wb")

# do not worry about the scary red text

# Creating objects in R

3 +5

12/7

# assignment operater alt+-
area_hectares <- 1.0

# Parentheses shows the result of the calculation on the righthand side
# on the console - the assignment still happens.
(area_hectares <- 1.0)

2.47 * area_hectares

# Changing values of objects
area_hectares <- 2.5

# Functions

sqrt()

# Help tells us that the round() function takes an input x
# and an argument digits that allows us to specify how round the
# rounding should be.
round(3.14159)

# we can see all possible arguments of functions in this way:
args(round)


# rounding to two digits
round(3.14159, digits = 2)

# naming arguments is not necessary (but recommended!)
round(3.14159, 2)

# we can change order of arguments. In that case we have to 
# name them.
round(digits = 2, x = 3.14159)

# Vectors and datatypes

hh_members <- c(3, 7, 10, 6)

# character (or text) vectors:

respondent_wall_type <- c("muddaub", "burntbricks", "sunbricks")

# to see the content of the vector:
respondent_wall_type

# Inspecting vectors

length(hh_members)

length(respondent_wall_type)

class(hh_members)

class(respondent_wall_type)

str(hh_members)

str(respondent_wall_type)

# adding elements to a vector.

possessions <- c("bicycle", "radio", "television")

possessions <- c(possessions, "mobile_phone")

possessions <- c("car", possessions)

# Different kinds of vectors

num_char <- c(1, 2, 3, "a")
num_logical <- c(1, 2, 3, TRUE, FALSE)
char_logical <- c("a", "b", "c", TRUE)
tricky <- c(1, 2, 3, "4")

# installing packages

install.packages("tidyverse")
library(tidyverse)

# reading in a dataframe

read_csv("data/SAFI_clean.csv")

interviews <- read_csv("data/SAFI_clean.csv")

# use this to adress specifict columns instead of attach
interviews$no_meals

# selecting columns and filtering rows

select(interviews, village, no_membrs)

# it is not that bad here - there are only 131 rows. But 
# tibbles are preferred today rather than dataframes - because 
# if there was 1000 rows, we would see them all... Not nice


filter(interviews, village == "Chirodzo")

# We can filter on more than one thing:

filter(interviews,  village == "Chirodzo", 
       no_membrs > 4, no_meals > 2)

# filter by default filters with AND. All comparisons have to
# be true.

filter(interviews,  village == "Chirodzo" & 
       no_membrs > 4 & no_meals > 2)

# The OR operator. Returns rows where village is either Chirodzo or Ruaca

filter(interviews, village == "Chirodzo"| village == "Ruaca")

# Pipes

interviews_ch <- select(filter(interviews, village == "Chirodzo"), 
                        village:respondent_wall_type)

interviews_ch

# shortcut for the pipe
# ctrl+shift+m
# command+shift+m on mac

interviews %>% 
  filter(village == "Chirodzo") %>% 
  select(village:respondent_wall_type)

# Note interviews is not changed. We will need to save the output
# as an object

interviews_ch <- interviews %>% 
  filter(village == "Chirodzo") %>% 
  select(village:respondent_wall_type)

# mutate - creating new columns

interviews %>% 
  mutate(total_meals = no_membrs*no_meals)


# avoid use of attach! 
# Attaching a dataset, containg a column name called "score",
# allows us to work directly with the column just writing "score".
# 
# that feels nice. But is risky. What happens when we have to datasets
# both containing a column called "score" and we attach both?
# 
# Which version of "score" are we then referring to?
#
# Address your data directly, so there is no ambiquity. Using 
# attach() seriously increases the potential for confusion and
# makes debugging and finding errors more difficult.