--- title: "2_4_merging_temperature_data" author: "Aspen Coyle" date: "7/25/2022" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## Introduction In this script, we'll fix some of the errors within specific Tidbit files. This will only address the ones with unique issues that make using the function we created (and use in the following script) extremely difficult #### Load libraries (and install if necessary), and load packages ```{r libraries, message=FALSE, warning=FALSE} # Add all required libraries here list.of.packages <- c("tidyverse", "readxl", "writexl", "lubridate") # Get names of all required packages that aren't installed new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])] # Install all new packages if(length(new.packages)) install.packages(new.packages) # Load all required libraries lapply(list.of.packages, FUN = function(X) { do.call("require", list(X)) }) # Load custom functions source("hemat_modeling_functions.R") ``` #### 2007 ```{r} ## RKC Survey, Leg 3, Tidbit #7 # Issue: for half, the columns are separated by tabs. At row 128 and below, the first two columns (Date and Time) are separated by spaces. This messes with how the file reads in issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_3/7.TXT", row.names = NULL) # Rename columns names(issue) <- c("Date", "Time", "Temperature") # Move Time column at rows 128+ to Temperature issue[-(1:127), ]$Temperature <- issue[-(1:127), ]$Time # Split Date column at rows 128+, move latter half to Time issue[-(1:127), ]$Time <- gsub("^.*? ", "", issue[-(1:127), ]$Date) # Remove second half of Date column at rows 128+ issue[-(1:127), ]$Date <- gsub(" .*$", "", issue[-(1:127), ]$Date) # Done! Now we'll just write this out as a text file write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_3/7.TXT", sep = "\t", row.names = FALSE) ## RKC Survey, Leg 3, Tidbit #13 # Same issue as RKC Leg 3, Tidbit #7 above # Only difference: it's reading in as two columns the whole way down issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_3/13.TXT", row.names = NULL) # Split first column into two issue <- issue %>% separate(Date.Time, c("Date", "Time"), sep = " ") # Done! Now we'll just write this out as a text file write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_3/13.TXT", sep = "\t", row.names = FALSE) ## RKC Survey, Leg 1, Tidbit 21 # Exact same issue as RKC Leg 3, Tidbit #13 above issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_1/21.TXT", row.names = NULL) # Split first column into two issue <- issue %>% separate(Date.Time, c("Date", "Time"), sep = " ") # Done! Now we'll just write this out as a text file write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_1/21.TXT", sep = "\t", row.names = FALSE) ``` #### 2008 The dates are messed up unless we fix the formatting, so we'll solve this ```{r} leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2008/RKC_survey/Leg_1", full.names = TRUE) for (i in 1:length(leg_files)) { print(leg_files[i]) print(i) # Read in file issue <- read.delim(file = leg_files[i], header = TRUE, sep = "\t") # Standardize table size # If 2 columns, they're datetime and temp # If 3, they're date, time, and temp if ("Date.Time" %in% names(issue)) { issue <- issue %>% separate(Date.Time, into = c("Date", "Time"), sep = " ") } # Rename columns names(issue) <- c("date", "time", "temp") # Write out file write_delim(issue, file = leg_files[i], delim ="\t") } leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2008/RKC_survey/Leg_2", full.names = TRUE) for (i in 1:length(leg_files)) { print(leg_files[i]) print(i) # Read in file issue <- read.delim(file = leg_files[i], header = TRUE, sep = "\t") # Standardize table size # If 2 columns, they're datetime and temp # If 3, they're date, time, and temp if ("Date.Time" %in% names(issue)) { issue <- issue %>% separate(Date.Time, into = c("Date", "Time"), sep = " ") } # Rename columns names(issue) <- c("date", "time", "temp") # Write out file write_delim(issue, file = leg_files[i], delim ="\t") } leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2008/RKC_survey/Leg_3", full.names = TRUE) for (i in 1:length(leg_files)) { print(leg_files[i]) print(i) # Read in file issue <- read.delim(file = leg_files[i], header = TRUE, sep = "\t") # Standardize table size # If 2 columns, they're datetime and temp # If 3, they're date, time, and temp if ("Date.Time" %in% names(issue)) { issue <- issue %>% separate(Date.Time, into = c("Date", "Time"), sep = " ") } # Rename columns names(issue) <- c("date", "time", "temp") # Write out file write_delim(issue, file = leg_files[i], delim ="\t") } ``` Most (if not all) of the 2008 Tanner survey on Leg 1 has a messed-up header. We'll change these by using the shell! We'll create a for loop that just replaces the headers of each line. ```{bash} header="Date Time Temp" for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2008/Tanner_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done ``` #### 2009 Same deal as 2008, some of the headers for Leg 2 of the Tanner survey are messed up. We'll just standardize them all ```{bash} header="Date Time Temp" for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/*; do sed -i "1s/.*/$header/" $FILE; done # For a few more, like Tidbit #14, the header takes up the first two lines # We've already changed the first header, so we'll simply remove the second # We'll remove the fourth too (now the third) as it's got an extra column sed -i 2d "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt" sed -i 3d "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt" # Tidbit #14 is really messed up, the tails need to be fixed too. # Remove the last 3 lines head -n -3 ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt > ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/test.txt mv ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/test.txt ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt # Tidbit 14 also has several extra columns, which we'll name now, so we can read them into R and then remove them fourteen_head="Row Date Time Temp Unknown Unknown" sed -i "1s/.*/$fourteen_head/" "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt" ``` ```{r} # Read in Tidbit 14 issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt", col.names = c("Row", "Date", "Temp", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown")) # Select only columns with useful info issue <- issue %>% select(c("Date", "Temp")) # We now just need to split the Date column into Date and Time issue <- issue %>% separate(Date, c("Date", "Time"), sep = " ") # Done! Write it out write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt", sep = "\t", row.names = FALSE) ``` Back to Bash. Tidbit #20 from Tanner crab leg 2 has no data at all, so we'll remove ```{bash} rm ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/20.txt ``` ### 2012 ```{r} issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2012/RKC_survey/Leg_1/21.TXT", col.names = c("DateTime", "Temp")) # We now just need to split the DateTime column into Date and Time issue <- issue %>% separate(DateTime, c("Date", "Time"), sep = " ") # Perfect, now just write it out write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2012/RKC_survey/Leg_1/21.TXT", sep = "\t", row.names = FALSE) ``` ### 2013 # In 2013, have a periodic issue where the file will be mostly comma-separated, but with some spaces and unneeded column (specifically one of Fahrenheit temp values) ```{r} source("hemat_modeling_functions.R") # Fix 2013 RKC, Leg 2, Tidbit #1 fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/1.txt") # Same for RKC, Leg 2, Tidbit 15 fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/15.txt") #Also RKC, Leg 2, Tidbit 26 fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/26.txt") # RKC, Leg 2, Tidbit 27 fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/27.txt") # RKC, Leg 2, Tidbit 9 fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/9.txt") ### We also need to fix some tables in which the first column is date while the second is time,temp (connected by a comma). Let's fix those: fix_timetemp_comma(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/26.txt") fix_timetemp_comma(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/1.txt") fix_timetemp_comma(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/15.txt") fix_timetemp_comma(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/9.txt") # We also have some longer, more elaborate fixes for 2013. Let's address them below: # RKC, Leg 3, Tidbit 27 # Issue: all variables are together in a single column filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/27.txt" issue <- read.delim(file = filepath) # Split the combined column apart using the space issue <- issue %>% separate(Date.Time.Temperature....C., c("Date", "TimeTemp"), sep = " ") # Split the time/temp column apart using the comma issue <- issue %>% separate(TimeTemp, c("Time", "Temp"), sep = ",") # Done! Write it out write.table(issue, file = filepath, sep = "\t", row.names = FALSE) ``` ### 2016 For the RKC survey, Legs 1 and 3 have their temperatures entered in Fahrenheit. Leg 2 is in Celsius. We also have some weird issues with Tidbit 17 in Legs 2 and 3 - the times aren't reading in correctly. We'll remedy that first, then move on to fixing temperature readings ```{r} #### Tidbit 17 # Leg 2 issue <- read.csv(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_2/17.csv", skip = 2, header = FALSE) #Select columns 2 and 3 (datetime and temp) issue <- issue[, 2:3] # Rename columns names(issue) <- c("datetime", "temp") # Split datetime column issue <- issue %>% separate(datetime, c("date", "time"), sep = "16 ", extra = "merge") # Paste 16 onto the end of the date column issue$date <- paste0(issue$date, "16") # Eliminate rows with NAs issue <- na.omit(issue) # Write out file as .txt write_delim(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_2/17.txt", delim = "\t") # Leg 3 issue <- read.csv(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/17.csv", skip = 2, header = FALSE) #Select columns 2 and 3 (datetime and temp) issue <- issue[, 2:3] # Rename columns names(issue) <- c("datetime", "temp") # Split datetime column issue <- issue %>% separate(datetime, c("date", "time"), sep = "16 ", extra = "merge") # Paste 16 onto the end of the date column issue$date <- paste0(issue$date, "16") # Convert temperature from F to C issue$temp <- (as.numeric(issue$temp) - 32)*(5/9) # Eliminate rows with NAs issue <- na.omit(issue) # Write out file write_delim(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/17.txt", delim = "\t") #### Temperature Conversions # Leg 1 leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_1/", full.names = TRUE, recursive = TRUE) for (i in 1:length(leg_files)) { print(leg_files[i]) print(i) # Read in file issue <- read_excel(path = leg_files[i], col_names = FALSE, skip = 2) # Give column names suppressWarnings(names(issue) <- c("rows", "datetime", "temp_f")) # Select only those columns issue <- issue %>% select(datetime, temp_f) # Convert temperature from F to C issue$temp_f <- (as.numeric(issue$temp_f) - 32)*(5/9) # Split datetime column issue <- issue %>% separate(datetime, c("date", "time"), sep = "16 ", extra = "merge") # Rename column to signify the change to C names(issue) <- c("date", "time", "temp") # Paste 16 onto the end of the date column issue$date <- paste0(issue$date, "16") # Eliminate rows with NAs issue <- na.omit(issue) # Remove extension from file, replace with .txt leg_files[i] <- leg_files[i] %>% str_replace(file_ext(leg_files[i]), "txt") # Write out file write_delim(issue, file = leg_files[i], delim = "\t") } # Leg 3 leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/", full.names = TRUE, recursive = TRUE) for (i in 1:length(leg_files)) { print(leg_files[i]) # Skip if it's Tidbit 17, we already fixed it if (leg_files[i] == "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/17.txt" | leg_files[i] == "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/17.csv") { } else { print(i) # Read in file issue <- read.csv(file = leg_files[i], header = FALSE, skip = 2) # Give column names suppressWarnings(names(issue) <- c("row", "datetime", "temp_f")) # Select only those columns issue <- issue %>% select(datetime, temp_f) # Convert temperature from F to C issue$temp_f <- (as.numeric(issue$temp_f) - 32)*(5/9) # Split the datetime column issue <- issue %>% separate(datetime, c("date", "time"), sep = "16 ", extra = "merge") # Rename column to signify the change to C names(issue) <- c("date", "time", "temp") # Paste 16 onto the end of the date column issue$date <- paste0(issue$date, "16") # Eliminate rows with NAs issue <- na.omit(issue) # Change to a .txt file leg_files[i] <- leg_files[i] %>% str_replace(file_ext(leg_files[i]), "txt") # Write out file write_delim(issue, file = leg_files[i], delim = "\t") } } ``` Now that we've changed all .xls files to .txt, we'll eliminate the .xls files in Leg 1 of the RKC survey and the .csv files in Leg 3 of the RKC survey. We'll also remove the .csv files in Leg 2 for Tidbit 17. ```{bash} rm ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_1/*.xls rm ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/*.csv rm ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_2/17.csv ``` Tanner surveys have messed-up headers, we'll remove them. We've gotta add in two extra because the .csv files accidentally made 5 rows in some The bottom 2-3 rows are also junk (just say "Logged") and are throwing off our data file, so we'll remove all lines saying "Logged" ```{bash} header="DateTime Temp Junk Junk Junk Junk Junk" # Fix headers in Leg 1 for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done # Fix headers in Leg 2 for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_2/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_2/*; do sed -i '/Logged/d' $FILE; done ``` Tanner surveys still have some issues, we need to standardize column size Built a custom function to fix it up ```{r} source("hemat_modeling_functions.R") # Fix errors in overly long column names # Leg 1 fix_longhead_txt(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/18.txt") fix_longhead_txt(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/19.txt") fix_longhead_txt(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/26.txt") # Leg 2 fix_longhead_txt(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_2/18.txt") # Standardize columns so our dates read in correctly leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/", full.names = TRUE) for (i in 1:length(leg_files)) { print(leg_files[i]) print(i) # Read in file issue <- read.delim(file = leg_files[i], header = TRUE) # Remove all columns with only NAs issue <- issue[, colSums(is.na(issue)) < nrow(issue)] # If 2 columns, split. Otherwise, leave. if (length(names(issue)) == 2) { # Name columns names(issue) <- c("datetime", "temp") # Split datetime column issue <- issue %>% separate(datetime, c("date", "time"), sep = "16 ") # Paste 16 onto the end of the date column issue$date <- paste0(issue$date, "16") # Remove rows with just NAs issue <- na.omit(issue) } else { # Give column names suppressWarnings(names(issue) <- c("date", "time", "temp")) # Eliminate rows with just NAs issue <- na.omit(issue) } # Write out file write_delim(issue, file = leg_files[i], delim = "\t") } leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_2/", full.names = TRUE) for (i in 1:length(leg_files)) { print(leg_files[i]) print(i) # Read in file issue <- read.delim(file = leg_files[i], header = TRUE) # Remove all columns with only NAs issue <- issue[, colSums(is.na(issue)) < nrow(issue)] # If 2 columns, split. Otherwise, leave. if (length(names(issue)) == 2) { # Name columns names(issue) <- c("datetime", "temp") # Split datetime column issue <- issue %>% separate(datetime, c("date", "time"), sep = "16 ") # Paste 16 onto the end of the date column issue$date <- paste0(issue$date, "16") # Remove rows with just NAs issue <- na.omit(issue) } else { # Give column names suppressWarnings(names(issue) <- c("date", "time", "temp")) # Eliminate rows with just NAs issue <- na.omit(issue) } # Write out file write_delim(issue, file = leg_files[i], delim = "\t") } ``` ### 2017 Again, surveys have messed-up headers. This time, it's all surveys. Let's fix them! Like before, we have some lines that say "Logged" that are throwing off our data, so we'll remove all lines with "Logged" ```{bash} header="DateTime Temp Junk Junk Junk Junk Junk" ### RKC Survey # Fix headers in Leg 1 for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done # Fix headers in Leg 2 for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_2/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_2/*; do sed -i '/Logged/d' $FILE; done # Fix headers in Leg 3 for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_3/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_3/*; do sed -i '/Logged/d' $FILE; done ### Tanner Survey # Fix headers in Leg 1 for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/Tanner_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/Tanner_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done ``` ```{bash} # Fix headers in Leg 3 for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/Tanner_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/Tanner_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done ``` We also continue to have issues with Tidbit #18. Looks like it just records data differently than the others. Additionally, we'll fix a few others ```{r} #### RKC Leg 1 # 18 filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_1/18.txt" count.fields(filepath, sep = "\t") issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7))) issue <- issue %>% select(Date, Time, Temp) write.table(issue, file = filepath, sep = "\t", row.names = FALSE) #### RKC Leg 2 # 18 filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_2/18.txt" count.fields(filepath, sep = "\t") issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7))) issue <- issue %>% select(Date, Time, Temp) write.table(issue, file = filepath, sep = "\t", row.names = FALSE) # 22 filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_2/22.txt" count.fields(filepath, sep = "\t") issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7))) issue <- issue %>% select(Date, Time, Temp) write.table(issue, file = filepath, sep = "\t", row.names = FALSE) ### RKC Leg 3 # 12 filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_3/12.txt" count.fields(filepath, sep = "\t") issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7))) issue <- issue %>% select(Date, Time, Temp) write.table(issue, file = filepath, sep = "\t", row.names = FALSE) # 18 filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_3/18.txt" count.fields(filepath, sep = "\t") issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7))) issue <- issue %>% select(Date, Time, Temp) write.table(issue, file = filepath, sep = "\t", row.names = FALSE) ``` ### 2018 Same issue as 2016 and 2017, need to edit headers and remove lines with "Logger" ```{bash} header="DateTime Temp Junk Junk Junk Junk Junk" ### RKC Survey # Fix headers for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_*/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_*/*; do sed -i '/Logged/d' $FILE; done ### Tanner Survey # Fix headers for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/Tanner_survey/Leg_*/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/Tanner_survey/Leg_*/*; do sed -i '/Logged/d' $FILE; done ``` Again, we have a few Tidbits that we need to individually fix ```{r} # RKC Leg 1 # 26 fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_1/26.txt") # RKC Leg 2 # 26 fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_2/26.txt") # RKC Leg 3 # 13 fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_3/13.txt") # 26 fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_3/26.txt") ``` ### 2019 Same issue as 2017 and 2018, need to edit headers and remove lines with "Logger". Hey, looks like they finally standardized things (mostly)! ```{bash} header="DateTime Temp Junk Junk Junk Junk Junk" ### RKC Survey # Note: Just need to fix 1 and 3 using this script. We'll fix all .csv files (which is Leg 2) below. # Fix headers for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_3/*; do sed -i "1s/.*/$header/" $FILE; done for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_3/*; do sed -i '/Logged/d' $FILE; done ``` We also have a few files we need to fix up for Legs 1 and 3 while we're at it (non-csv files are being grouped together here) ```{r} #### Leg 1 # 26 fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_1/26.txt") #### Leg 3 # 22 fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_3/22.txt") ``` #### Fix Leg 2 files for 2019 ```{r} leg2_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_2/", full.names = TRUE, recursive = TRUE) for (i in 1:length(leg2_files)) { print(leg2_files[i]) print(i) # Read in file issue <- read.csv(file = leg2_files[i], header = FALSE, skip = 2) # Give column names names(issue) <- c("row", "datetime", "temp_f") # Select only those columns issue <- issue %>% select(datetime, temp_f) # Split datetime column using the space issue <- issue %>% separate(datetime, c("date", "time"), sep = " ", extra = "merge") # Convert temperature from F to C issue$temp_f <- (issue$temp_f - 32)*(5/9) # Rename column to signify the change to C names(issue) <- c("date", "time", "temp") # Write out file write.csv(issue, file = leg2_files[i], row.names = FALSE) } ```