---
title: "2_4_merging_temperature_data"
author: "Aspen Coyle"
date: "7/25/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Introduction

In this script, we'll fix some of the errors within specific Tidbit files. This will only address the ones with unique issues that make using the function we created (and use in the following script) extremely difficult

#### Load libraries (and install if necessary), and load packages

```{r libraries, message=FALSE, warning=FALSE}
# Add all required libraries here
list.of.packages <- c("tidyverse", "readxl", "writexl", "lubridate")
# Get names of all required packages that aren't installed
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])]
# Install all new packages
if(length(new.packages)) install.packages(new.packages)


# Load all required libraries
lapply(list.of.packages, FUN = function(X) {
  do.call("require", list(X))
})

# Load custom functions
source("hemat_modeling_functions.R")
```

#### 2007

```{r}
## RKC Survey, Leg 3, Tidbit #7
# Issue: for half, the columns are separated by tabs. At row 128 and below, the first two columns (Date and Time) are separated by spaces. This messes with how the file reads in

issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_3/7.TXT", row.names = NULL)
# Rename columns
names(issue) <- c("Date", "Time", "Temperature")

# Move Time column at rows 128+ to Temperature
issue[-(1:127), ]$Temperature <- issue[-(1:127), ]$Time

# Split Date column at rows 128+, move latter half to Time
issue[-(1:127), ]$Time <- gsub("^.*? ", "", issue[-(1:127), ]$Date)

# Remove second half of Date column at rows 128+
issue[-(1:127), ]$Date <- gsub(" .*$", "", issue[-(1:127), ]$Date)

# Done! Now we'll just write this out as a text file
write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_3/7.TXT",
            sep = "\t",
            row.names = FALSE)

## RKC Survey, Leg 3, Tidbit #13
# Same issue as RKC Leg 3, Tidbit #7 above
# Only difference: it's reading in as two columns the whole way down

issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_3/13.TXT", row.names = NULL)

# Split first column into two
issue <- issue %>%
  separate(Date.Time, c("Date", "Time"), sep = " ")

# Done! Now we'll just write this out as a text file
write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_3/13.TXT",
            sep = "\t",
            row.names = FALSE)

## RKC Survey, Leg 1, Tidbit 21
# Exact same issue as RKC Leg 3, Tidbit #13 above
issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_1/21.TXT", row.names = NULL)

# Split first column into two
issue <- issue %>%
  separate(Date.Time, c("Date", "Time"), sep = " ")

# Done! Now we'll just write this out as a text file
write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2007/RKC_survey/Leg_1/21.TXT",
            sep = "\t",
            row.names = FALSE)


```

#### 2008

The dates are messed up unless we fix the formatting, so we'll solve this


```{r}
leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2008/RKC_survey/Leg_1", full.names = TRUE)

for (i in 1:length(leg_files)) {
  print(leg_files[i])
  print(i)
  # Read in file
  issue <- read.delim(file = leg_files[i], header = TRUE,
                      sep = "\t")
  # Standardize table size
  # If 2 columns, they're datetime and temp
  # If 3, they're date, time, and temp
  if ("Date.Time" %in% names(issue)) {
    issue <- issue %>%
      separate(Date.Time, into = c("Date", "Time"), sep = " ")
  }
  # Rename columns
  names(issue) <- c("date", "time", "temp")
  
  # Write out file
  write_delim(issue, file = leg_files[i], delim ="\t")
}

leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2008/RKC_survey/Leg_2", full.names = TRUE)

for (i in 1:length(leg_files)) {
  print(leg_files[i])
  print(i)
  # Read in file
  issue <- read.delim(file = leg_files[i], header = TRUE,
                      sep = "\t")
  # Standardize table size
  # If 2 columns, they're datetime and temp
  # If 3, they're date, time, and temp
  if ("Date.Time" %in% names(issue)) {
    issue <- issue %>%
      separate(Date.Time, into = c("Date", "Time"), sep = " ")
  }
  # Rename columns
  names(issue) <- c("date", "time", "temp")
  
  # Write out file
  write_delim(issue, file = leg_files[i], delim ="\t")
}

leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2008/RKC_survey/Leg_3", full.names = TRUE)

for (i in 1:length(leg_files)) {
  print(leg_files[i])
  print(i)
  # Read in file
  issue <- read.delim(file = leg_files[i], header = TRUE,
                      sep = "\t")
  # Standardize table size
  # If 2 columns, they're datetime and temp
  # If 3, they're date, time, and temp
  if ("Date.Time" %in% names(issue)) {
    issue <- issue %>%
      separate(Date.Time, into = c("Date", "Time"), sep = " ")
  }
  # Rename columns
  names(issue) <- c("date", "time", "temp")
  
  # Write out file
  write_delim(issue, file = leg_files[i], delim ="\t")
}



```

Most (if not all) of the 2008 Tanner survey on Leg 1 has a messed-up header. We'll change these by using the shell! We'll create a for loop that just replaces the headers of each line.

```{bash}
header="Date 	Time 	Temp"

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2008/Tanner_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done

```


#### 2009

Same deal as 2008, some of the headers for Leg 2 of the Tanner survey are messed up. We'll just standardize them all

```{bash}
header="Date 	Time 	Temp"

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/*; do sed -i "1s/.*/$header/" $FILE; done

# For a few more, like Tidbit #14, the header takes up the first two lines
# We've already changed the first header, so we'll simply remove the second
# We'll remove the fourth too (now the third) as it's got an extra column 

sed -i  2d "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt"
sed -i  3d "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt"

# Tidbit #14 is really messed up, the tails need to be fixed too.
# Remove the last 3 lines
head -n -3 ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt > ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/test.txt

mv ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/test.txt ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt

# Tidbit 14 also has several extra columns, which we'll name now, so we can read them into R and then remove them
fourteen_head="Row 	Date 	Time 	Temp 	Unknown 	Unknown"
sed -i "1s/.*/$fourteen_head/" "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt"
```

```{r}
# Read in Tidbit 14
issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt",
                    col.names = c("Row", "Date", "Temp", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown"))

# Select only columns with useful info
issue <- issue %>%
  select(c("Date", "Temp"))

# We now just need to split the Date column into Date and Time
issue <- issue %>%
  separate(Date, c("Date", "Time"), sep = " ")

# Done! Write it out
write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/14.txt",
            sep = "\t",
            row.names = FALSE)
```

Back to Bash. Tidbit #20 from Tanner crab leg 2 has no data at all, so we'll remove

```{bash}
rm ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2009/Tanner_survey/Leg_2/20.txt
```

### 2012
```{r}
issue <- read.delim(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2012/RKC_survey/Leg_1/21.TXT",
                    col.names = c("DateTime", "Temp"))


# We now just need to split the DateTime column into Date and Time
issue <- issue %>%
  separate(DateTime, c("Date", "Time"), sep = " ")

# Perfect, now just write it out
write.table(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2012/RKC_survey/Leg_1/21.TXT",
            sep = "\t",
            row.names = FALSE)

```




### 2013

# In 2013, have a periodic issue where the file will be mostly comma-separated, but with some spaces and unneeded column (specifically one of Fahrenheit temp values)

```{r}
source("hemat_modeling_functions.R")

# Fix 2013 RKC, Leg 2, Tidbit #1
fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/1.txt")

# Same for RKC, Leg 2, Tidbit 15
fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/15.txt")

#Also RKC, Leg 2, Tidbit 26 
fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/26.txt")


# RKC, Leg 2, Tidbit 27
fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/27.txt")

# RKC, Leg 2, Tidbit 9
fix_long_csvs("../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_2/9.txt")

### We also need to fix some tables in which the first column is date while the second is time,temp (connected by a comma). Let's fix those:

fix_timetemp_comma(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/26.txt")

fix_timetemp_comma(filepath =  "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/1.txt")

fix_timetemp_comma(filepath =  "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/15.txt")

fix_timetemp_comma(filepath =  "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/9.txt")



# We also have some longer, more elaborate fixes for 2013. Let's address them below:


# RKC, Leg 3, Tidbit 27
# Issue: all variables are together in a single column
filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2013/RKC_survey/Leg_3/27.txt"

issue <- read.delim(file = filepath)

# Split the combined column apart using the space
issue <- issue %>%
  separate(Date.Time.Temperature....C., c("Date", "TimeTemp"), sep = " ")

# Split the time/temp column apart using the comma
issue <- issue %>%
  separate(TimeTemp, c("Time", "Temp"), sep = ",")

# Done! Write it out
write.table(issue, file = filepath,
            sep = "\t",
            row.names = FALSE)

```


### 2016

For the RKC survey, Legs 1 and 3 have their temperatures entered in Fahrenheit. Leg 2 is in Celsius. We also have some weird issues with Tidbit 17 in Legs 2 and 3 - the times aren't reading in correctly. We'll remedy that first, then move on to fixing temperature readings

```{r}
#### Tidbit 17

# Leg 2
issue <- read.csv(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_2/17.csv",
                  skip = 2, header = FALSE)
#Select columns 2 and 3 (datetime and temp)
issue <- issue[, 2:3]
# Rename columns
names(issue) <- c("datetime", "temp")
# Split datetime column
issue <- issue %>%
    separate(datetime, c("date", "time"), sep = "16 ", extra = "merge")
# Paste 16 onto the end of the date column
issue$date <- paste0(issue$date, "16")
# Eliminate rows with NAs
issue <- na.omit(issue)
# Write out file as .txt
write_delim(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_2/17.txt", delim = "\t")

# Leg 3
issue <- read.csv(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/17.csv",
                  skip = 2, header = FALSE)
#Select columns 2 and 3 (datetime and temp)
issue <- issue[, 2:3]
# Rename columns
names(issue) <- c("datetime", "temp")
# Split datetime column
issue <- issue %>%
    separate(datetime, c("date", "time"), sep = "16 ", extra = "merge")
# Paste 16 onto the end of the date column
issue$date <- paste0(issue$date, "16")
# Convert temperature from F to C
issue$temp <- (as.numeric(issue$temp) - 32)*(5/9)
# Eliminate rows with NAs
issue <- na.omit(issue)
# Write out file
write_delim(issue, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/17.txt", delim = "\t")



#### Temperature Conversions
# Leg 1
leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_1/", full.names = TRUE, recursive = TRUE)

for (i in 1:length(leg_files)) {
  print(leg_files[i])
  print(i)
  # Read in file
  issue <- read_excel(path = leg_files[i], col_names = FALSE, skip = 2)
  # Give column names
  suppressWarnings(names(issue) <- c("rows", "datetime", "temp_f"))
  # Select only those columns
  issue <- issue %>%
    select(datetime, temp_f)
  # Convert temperature from F to C
  issue$temp_f <- (as.numeric(issue$temp_f) - 32)*(5/9)
  # Split datetime column
  issue <- issue %>%
    separate(datetime, c("date", "time"), sep = "16 ", extra = "merge")
  # Rename column to signify the change to C
  names(issue) <- c("date", "time", "temp")
  # Paste 16 onto the end of the date column
  issue$date <- paste0(issue$date, "16")
  # Eliminate rows with NAs
  issue <- na.omit(issue)
  # Remove extension from file, replace with .txt
  leg_files[i] <- leg_files[i] %>%
    str_replace(file_ext(leg_files[i]), "txt")
  # Write out file
  write_delim(issue, file = leg_files[i], 
              delim = "\t")
}

# Leg 3
leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/", full.names = TRUE, recursive = TRUE)

for (i in 1:length(leg_files)) {
  print(leg_files[i])
  # Skip if it's Tidbit 17, we already fixed it
  if (leg_files[i] == "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/17.txt" | 
      leg_files[i] == "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/17.csv") {
  } else {
    print(i)
    # Read in file
    issue <- read.csv(file = leg_files[i], header = FALSE, skip = 2)
    # Give column names
    suppressWarnings(names(issue) <- c("row", "datetime", "temp_f"))
    # Select only those columns
    issue <- issue %>%
      select(datetime, temp_f)
    # Convert temperature from F to C
    issue$temp_f <- (as.numeric(issue$temp_f) - 32)*(5/9)
    # Split the datetime column
    issue <- issue %>%
      separate(datetime, c("date", "time"), sep = "16 ", extra = "merge")
    # Rename column to signify the change to C
    names(issue) <- c("date", "time", "temp")
    # Paste 16 onto the end of the date column
    issue$date <- paste0(issue$date, "16")
    # Eliminate rows with NAs
    issue <- na.omit(issue)
    # Change to a .txt file
    leg_files[i] <- leg_files[i] %>%
      str_replace(file_ext(leg_files[i]), "txt")
    # Write out file
    write_delim(issue, file = leg_files[i], 
                delim = "\t")
  }
}
```

Now that we've changed all .xls files to .txt, we'll eliminate the .xls files in Leg 1 of the RKC survey and the .csv files in Leg 3 of the RKC survey. We'll also remove the .csv files in Leg 2 for Tidbit 17.

```{bash}
rm ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_1/*.xls

rm ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_3/*.csv

rm ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/RKC_survey/Leg_2/17.csv

```




Tanner surveys have messed-up headers, we'll remove them. We've gotta add in two extra because the .csv files accidentally made 5 rows in some

The bottom 2-3 rows are also junk (just say "Logged") and are throwing off our data file, so we'll remove all lines saying "Logged"


```{bash}
header="DateTime 	Temp 	Junk 	Junk 	Junk 	Junk 	Junk"

# Fix headers in Leg 1
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done

# Fix headers in Leg 2
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_2/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_2/*; do sed -i '/Logged/d' $FILE; done
```

Tanner surveys still have some issues, we need to standardize column size

Built a custom function to fix it up

```{r}
source("hemat_modeling_functions.R")

# Fix errors in overly long column names

# Leg 1
fix_longhead_txt(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/18.txt")

fix_longhead_txt(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/19.txt")

fix_longhead_txt(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/26.txt")

# Leg 2
fix_longhead_txt(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_2/18.txt")

# Standardize columns so our dates read in correctly

leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_1/", full.names = TRUE)

for (i in 1:length(leg_files)) {
  print(leg_files[i])
  print(i)
  # Read in file
  issue <- read.delim(file = leg_files[i], header = TRUE)
  # Remove all columns with only NAs
  issue <- issue[, colSums(is.na(issue)) < nrow(issue)]
  
  # If 2 columns, split. Otherwise, leave.
  if (length(names(issue)) == 2) {
    # Name columns
    names(issue) <- c("datetime", "temp")
    # Split datetime column
    issue <- issue %>%
      separate(datetime, c("date", "time"), sep = "16 ")
    # Paste 16 onto the end of the date column
    issue$date <- paste0(issue$date, "16")
    # Remove rows with just NAs
    issue <- na.omit(issue)
  } else {
    # Give column names
    suppressWarnings(names(issue) <- c("date", "time", "temp"))
    # Eliminate rows with just NAs
    issue <- na.omit(issue)
  }
  
  # Write out file
  write_delim(issue, file = leg_files[i], 
              delim = "\t")
}
  
leg_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2016/Tanner_survey/Leg_2/", full.names = TRUE)

for (i in 1:length(leg_files)) {
  print(leg_files[i])
  print(i)
  # Read in file
  issue <- read.delim(file = leg_files[i], header = TRUE)
  # Remove all columns with only NAs
  issue <- issue[, colSums(is.na(issue)) < nrow(issue)]
  
  # If 2 columns, split. Otherwise, leave.
  if (length(names(issue)) == 2) {
    # Name columns
    names(issue) <- c("datetime", "temp")
    # Split datetime column
    issue <- issue %>%
      separate(datetime, c("date", "time"), sep = "16 ")
    # Paste 16 onto the end of the date column
    issue$date <- paste0(issue$date, "16")
    # Remove rows with just NAs
    issue <- na.omit(issue)
  } else {
    # Give column names
    suppressWarnings(names(issue) <- c("date", "time", "temp"))
    # Eliminate rows with just NAs
    issue <- na.omit(issue)
  }
  
  # Write out file
  write_delim(issue, file = leg_files[i], 
              delim = "\t")
}
  
  
  
```


### 2017

Again, surveys have messed-up headers. This time, it's all surveys. Let's fix them!

Like before, we have some lines that say "Logged" that are throwing off our data, so we'll remove all lines with "Logged"

```{bash}
header="DateTime 	Temp 	Junk 	Junk 	Junk 	Junk 	Junk"

### RKC Survey

# Fix headers in Leg 1
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done

# Fix headers in Leg 2
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_2/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_2/*; do sed -i '/Logged/d' $FILE; done

# Fix headers in Leg 3
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_3/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_3/*; do sed -i '/Logged/d' $FILE; done

### Tanner Survey
# Fix headers in Leg 1
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/Tanner_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/Tanner_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done

```

```{bash}
# Fix headers in Leg 3
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/Tanner_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/Tanner_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done
```


We also continue to have issues with Tidbit #18. Looks like it just records data differently than the others. 

Additionally, we'll fix a few others

```{r}
####  RKC Leg 1

# 18
filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_1/18.txt"

count.fields(filepath, sep = "\t")

issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7)))

issue <- issue %>%
  select(Date, Time, Temp)

write.table(issue, file = filepath,
            sep = "\t",
            row.names = FALSE)

#### RKC Leg 2

# 18
filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_2/18.txt"

count.fields(filepath, sep = "\t")

issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7)))

issue <- issue %>%
  select(Date, Time, Temp)

write.table(issue, file = filepath,
            sep = "\t",
            row.names = FALSE)

# 22
filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_2/22.txt"

count.fields(filepath, sep = "\t")

issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7)))

issue <- issue %>%
  select(Date, Time, Temp)

write.table(issue, file = filepath,
            sep = "\t",
            row.names = FALSE)

### RKC Leg 3

# 12
filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_3/12.txt"

count.fields(filepath, sep = "\t")

issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7)))

issue <- issue %>%
  select(Date, Time, Temp)

write.table(issue, file = filepath,
            sep = "\t",
            row.names = FALSE)

# 18
filepath <- "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2017/RKC_survey/Leg_3/18.txt"

count.fields(filepath, sep = "\t")

issue <- read.delim(file = filepath, col.names = c("Date", "Time", "Temp", rep("Trash", times = 7)))

issue <- issue %>%
  select(Date, Time, Temp)

write.table(issue, file = filepath,
            sep = "\t",
            row.names = FALSE)

```

### 2018

Same issue as 2016 and 2017, need to edit headers and remove lines with "Logger"


```{bash}
header="DateTime 	Temp 	Junk 	Junk 	Junk 	Junk 	Junk"

### RKC Survey

# Fix headers
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_*/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_*/*; do sed -i '/Logged/d' $FILE; done

### Tanner Survey
# Fix headers
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/Tanner_survey/Leg_*/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/Tanner_survey/Leg_*/*; do sed -i '/Logged/d' $FILE; done

```


Again, we have a few Tidbits that we need to individually fix

```{r}
# RKC Leg 1
# 26
fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_1/26.txt")

# RKC Leg 2
# 26
fix_txt_headers(filepath =  "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_2/26.txt")

# RKC Leg 3
# 13
fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_3/13.txt")
# 26
fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2018/RKC_survey/Leg_3/26.txt")
```


### 2019

Same issue as 2017 and 2018, need to edit headers and remove lines with "Logger". Hey, looks like they finally standardized things (mostly)!


```{bash}
header="DateTime 	Temp 	Junk 	Junk 	Junk 	Junk 	Junk"

### RKC Survey
# Note: Just need to fix 1 and 3 using this script. We'll fix all .csv files (which is Leg 2) below.
# Fix headers
for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_1/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_1/*; do sed -i '/Logged/d' $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_3/*; do sed -i "1s/.*/$header/" $FILE; done

for FILE in ../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_3/*; do sed -i '/Logged/d' $FILE; done

```

We also have a few files we need to fix up for Legs 1 and 3 while we're at it (non-csv files are being grouped together here)

```{r}

#### Leg 1
# 26
fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_1/26.txt")

#### Leg 3
# 22
fix_txt_headers(filepath = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_3/22.txt")
```


#### Fix Leg 2 files for 2019

```{r}
leg2_files <- list.files(path = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/temperature_data/2019/RKC_survey/Leg_2/", full.names = TRUE, recursive = TRUE)

for (i in 1:length(leg2_files)) {
  print(leg2_files[i])
  print(i)
  # Read in file
  issue <- read.csv(file = leg2_files[i], header = FALSE, skip = 2)
  # Give column names
  names(issue) <- c("row", "datetime", "temp_f")
  # Select only those columns
  issue <- issue %>%
    select(datetime, temp_f)
  # Split datetime column using the space
  issue <- issue %>%
    separate(datetime, c("date", "time"), sep = " ", extra = "merge")
  # Convert temperature from F to C
  issue$temp_f <- (issue$temp_f - 32)*(5/9)
  # Rename column to signify the change to C
  names(issue) <- c("date", "time", "temp")
  # Write out file
  write.csv(issue, file = leg2_files[i],
            row.names = FALSE)
}
```