--- title: "Working with Network Survey Data in R" author: "Marcel Mallow" format: html --- ## Getting our collected survey data into R ```{r} # if you are pretty sure you did not install any of the below packages to your R / RStudio environment before, then please run the install-commands # otherwise just go to the next code block #install.packages("tidyverse") # install.packages("dplyr") # install.packages("tidyselect") # install.packages("stringr") # otherwise load / import the packages you will need for the data import and cleaning / wrangling in this session library (tidyverse) library (tidyselect) library (stringr) library (dplyr) ``` ```{r} # otherwise load / import the packages you will need for the data import and cleaning / wrangling in this session getwd() #tell you what your working dir is # setwd() #lets you set a new working directory, to the folder where your .csv resides ``` When you are loading the network survey .csv make sure it is in your current R working directory, otherwise you will have to specify the path. ```{r} # set this to your working directory # setwd("~/surveyDataImport/waves") # responsesFile <- "sampleNetworkSurvey.csv" #set this to the filename of your downloaded Qualtrics .csv file responsesFile <- "https://raw.github.com/MarMall/NetworkApproaches/main/Instructions/Labs/data/sampleNetworkSurvey.csv" #set this to the filename of your downloaded Qualtrics .csv file ``` ```{r} # #remove row(s) with question # responses <- responses [-c (1), ] # head(responses) #better way to avoid row with question recoding everything into character variables #so we don't have to change them all back again later #setwd("~/surveyDataImport/waves") # readr:: responses <- read.csv (responsesFile, header = TRUE, nrows = 1) #only the first row variables <- colnames (responses) # then set this to our variable names #variables #head(responses) # variables[1] <- "ID" responses <- read.csv (responsesFile, header = FALSE, skip = 2) colnames (responses) <- variables ## Here you have your dataframe of all the responses # have a look inside responses ``` ## Isolating our network relations of interest ```{r QID149-Friendship} ## Now lets recode the first question results into network format #responses to first question -- selection by question number ## exclude 99 as this is "Nobody" QID149 <- responses %>% select (c ( #"ID", #"recipientLastName", # "QID169_1", vars_select (names (responses), starts_with ("QID149", ignore.case = TRUE), -starts_with ("QID149_99", ignore.case = TRUE) #filter out NOBODY ))) # QID149 <- QID149[2:7] QID149 <- QID149[1:14] #we only have 14 responses, so we work only with those. #QID149 ``` ```{r QID128-Influence(Top10-Influencers)} ## exclude 99 as this is "Nobody" QID128 <- responses %>% select (c ( #"ID", #"recipientLastName", # "QID169_1", vars_select (names (responses), starts_with ("QID128", ignore.case = TRUE), -starts_with ("QID128_99", ignore.case = TRUE) ))) # QID128 <- QID128[2:7] QID128 <- QID128[1:14] # again, we only had 14 responses to the survey #QID128 QID128 <- QID128 %>% # Recoding NAs to 0 mutate (across (vars_select (names (QID128), contains ("QID128", ignore.case = TRUE)), ~ replace (., is.na (.), 0))) %>% ## AND making the data binary mutate (across (vars_select (names (QID128), contains ("QID128", ignore.case = TRUE)), ~ replace (., . != 0, 1))) #this ensures the data is binary dim(QID128) # nice, this is squared 14x14 #QID128 ``` Now we have to turn it all into matrix format, but that is easy with R... ```{r turningIntoMatrixFormat} # # QID149 # friendship relation # QID128 # influence QID149_mat <- as.matrix(QID149) #QID149_mat is.matrix(QID149_mat) # checking the dimensions of the matrix #dim(QID149_mat) # should be symmetric ##same for the influence relation QID128_mat <- as.matrix(QID128) # # checking the dimensions of the matrix # dim(QID128) # should be symmetric #QID128_mat is.matrix(QID128_mat) ``` OK, we now have two network relations in matrix format, and you can do the procedure for as many network relations as you like. However, this might already be a good moment to already save your matrices in R, so that you can access them in later R-Sessions. ## Saving Network relation matrices ```{r SavingNetworkRelation} #needs to be in .RData format # will save directly into your workspace / working directory (getwd()) save (QID149_mat, file = paste0 ("Friendship_q149" ,".Rdata")) save (QID128_mat, file = paste0 ("Influence_q128" ,".Rdata")) ```