---
title: "Daily Update Arkansas Covid"
author: "Rob Wells, Katy Seiter and Mary Hennigan"
date: "10/27/2021"
output: pdf_document
---

#Daily Update Arkansascovid.com Calculations

- **This notebook retrieves data the ADH API and produces tables for upload to Arkansascovid.com GitHub.**
#  https://github.com/Arkansascovid/Main

#Fixes, refines vaccine data

- **Separately, run Hospital Data_April 29 and Demographic Update **

# Part 1: Import State Data, Clean It

```{r include=FALSE}
# install.packages("slider")
# install.packages("zoo")
# install.packages("gtools")
# install.packages("kableExtra")
# install.packages("formattable")
#install.packages("janitor")
#install.packages("tidyverse")
#install.packages("reshape2")
library(tidyverse)
library(janitor)
library(lubridate)
library(jsonlite)
library(gtools)
library(zoo)  
library(reshape2)
library(slider)
library(formattable)

```

## County Level Arkansas Covid Data from ADH FEED
```{r}
#New County json feed
#38 Variables
qq <- fromJSON('https://services5.arcgis.com/vlhGQkz6fSnofVMD/ArcGIS/rest/services/ADH_COVID19_STATS_BY_COUNTY/FeatureServer/0/query?where=0%3D0&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token=')

df33 <- qq[["features"]][["attributes"]]
#print's today's date as a date field
df33$mydate <- Sys.Date()
#file.exists(filename)
#austin help on that
#if file exists = check if current data is different and if that is true then append today's data set to master dataset and resave
#today_county_new <- df33
today_county <- df33

today_county <- today_county %>% 
  select(county_nam, Pop_5YR_EST_2018,positive,negative,pending,lab_prvt,
lab_pub,deaths,total_tests,pos_range,
neg_range,active_cases,confirmed_pos,probable_pos,
confirmed_recov,probable_recov,confirmed_death,probable_death,
confirmed_active,probable_active,pcr_pos_test,pcr_neg_test,
antigen_pos_test,antigen_neg_test,total_pos_test,total_neg_test,
POP_5YR_EST_2018_2019, antigen_test,pcr_test,recoveries,
Shape__Area,Shape__Length,mydate)
```

```{r}
#If this fails, check the backup from Austin
#https://docs.google.com/spreadsheets/d/1WRITgZZdI5PwiJiJeutBjJigSifOaNPDyTW_jHizfGQ/edit?usp=sharing
#slice of latest data, rerun from line 49 forward
#df3 <- rio::import("/Users/rswells/Downloads/may 15 data.xlsx")

```

```{r}
today_county <- janitor::clean_names(today_county)
#write.csv(today_county, "today_county_july5.csv")
```


#Set Dates
```{r}
#today's date
today <- Sys.Date()

#NOTE: IF YOU ARE RUNNING THIS A DAY LATE, USE THIS CODE TO WORK PROPERLY
#today <- Sys.Date()-1
#today_county$mydate <-"2020-09-22- THE OLD DATE...."

#yesterday's date
yesterday <- (today-1)

```

#Quick Calculation of State Totals 
```{r}
#today_county <- today_county [ -c(1) ]

today_county [2:33] <- lapply(today_county [2:33], as.numeric)
#Calculate statewide totals

today_county <- today_county %>%
  janitor::adorn_totals("row") %>% 
  mutate(mydate=(today))

today_county$county_nam <- str_replace_all(today_county$county_nam, pattern=fixed('Arkansas'), replacement=fixed('Arkansas_county') )
today_county$county_nam <- str_replace_all(today_county$county_nam, pattern=fixed('Total'), replacement=fixed('Arkansas_all_counties') )
today_county$county_nam  <- gsub("[[:space:]]", "", today_county$county_nam)
today_county$mydate <- as.Date(today_county$mydate)
```

-**Import and Clean Yesterday's Data**
```{r}
yesterday_county <- rio::import("https://raw.githubusercontent.com/Arkansascovid/Main/master/MasterData/master_file.csv")
#yesterday_county <- yesterday_county [ -c(1) ]
yesterday_county$mydate <- as.Date(yesterday_county$mydate)
#ran this to test it out on today's data
 yesterday_county <- yesterday_county %>% 
  filter(mydate<=(yesterday))
yesterday_county[4:45] <- lapply(yesterday_county[4:45], as.numeric)
```
# .
# Previous Date Calculations
#  .
-**This section performs all of the county calculations**   
-**It updates master_file.csv**  
-**Create Temporary Table Two Days' Worth of Data**   
-**Sort Alphbetically and Run Calculations**   

```{r}
countytemp <- yesterday_county %>% 
  filter(mydate>=(yesterday)) %>%  
  arrange(county_nam) 


```

You should get a df with 154 observations (two days' worth a data)
```{r}
df_master <- smartbind(today_county, countytemp)

twodays <- df_master %>% 
  arrange((county_nam))
```


-**The Today-Yesterday Calculations**

```{r}
twodays <- twodays %>%
  mutate(Number_Tested=(positive+negative)) %>% 
  mutate(New_Cases_Today = (positive-lead(positive))) %>%
  mutate(Recovered_Since_Yesterday = (recoveries-lead(recoveries))) %>%
  mutate(New_Deaths_Today = (deaths-lead(deaths))) %>%
  mutate(New_Tests_Dashboard = (Number_Tested-lead(Number_Tested)))
```

- **IMPORTANT: FILTER TABLE TO TODAY'S RESULTS**
```{r}
twodays <- twodays %>% filter(mydate > yesterday) #SHOULD GET 77 ROWS IN twodays
```


```{r}
glimpse(twodays)
```

-**The Percentage Calculations**

```{r}
twodays <- twodays %>%
  mutate(Cases_Population = (positive / pop_5yr_est_2018)*100) %>% 
  mutate(Tested_Population = (Number_Tested / pop_5yr_est_2018)*100) %>% 
  mutate(New_Cases_Today_10k_Pop = (New_Cases_Today/pop_5yr_est_2018)*10000) %>% 
  mutate(Active_Cases_10k_Pop = (active_cases/pop_5yr_est_2018)*10000) %>% 
  mutate(Confirmed_Active_Cases_10k_Pop = (confirmed_active/pop_5yr_est_2018)*10000) %>% 
  #legacy calculation - revised 5/17/21 mutate(Active_Cases_10k_Pop = (confirmed_active/pop_5yr_est_2018)*10000) %>% 
  mutate(Pct_Positive_Cumulative = (positive/Number_Tested)*100) %>% 
  mutate(Pct_Positive_New_to_Dashboard = (New_Cases_Today/New_Tests_Dashboard)*100) %>% 
  mutate(Closed = (recoveries + deaths)) %>% 
  mutate(Pct_Deaths_vs_Recoveries = (deaths/Closed)*100) %>% 
  mutate(Pct_Recoveries_vs_Deaths = (recoveries/Closed)*100)
  
twodays
```

#On May 17, I updated the new active cases per 10k calculation to use total active cases. The legacy calculation of confirmed active / population is now Confirmed Active Cases Per 10k. We had used confirmed active / population since the Fall 2020 to have a consistent time series after the ADH in September 2020 provided confirmed and probable cases. That distinction with probable cases isn't as important now, so we will use the total active cases and retain the legacy calculation as Confirmed Active Cases Per 10k.

#Merges the Today's Arkanas_All_Counties line with yesterday's data. 
```{r}
master2 <- smartbind(twodays, yesterday_county, fill=0)
```

```{r}

master2 <- master2 %>% 
  select("county_nam","mydate","deaths","fhwa_numbe","fips","positive","negative","recoveries","total_tests","active_cases","pop_5yr_est_2018", "pop_5yr_est_2018_2019","confirmed_pos","probable_pos","confirmed_neg","probable_neg","confirmed_recov","probable_recov","confirmed_death","probable_death", "confirmed_active","probable_active","pcr_test","antigen_test","pcr_pos_test", "pcr_neg_test","antigen_pos_test","antigen_neg_test","total_pos_test","total_neg_test",
"New_Cases_Today","Recovered_Since_Yesterday","New_Deaths_Today","New_Tests_Dashboard","Cases_Population","Tested_Population","New_Cases_Today_10k_Pop","Active_Cases_10k_Pop","Confirmed_Active_Cases_10k_Pop", "Pct_Positive_Cumulative","Pct_Positive_New_to_Dashboard","Closed","Pct_Deaths_vs_Recoveries","Pct_Recoveries_vs_Deaths","lab_prvt","lab_pub")
master2$mydate <- as.Date(master2$mydate)
glimpse(master2)
```


#Save master file
```{r}
master2$Cases_Population <- round(master2$Cases_Population, 2)
master2$Tested_Population <- round(master2$Tested_Population, 2)
master2$New_Cases_Today_10k_Pop <- round(master2$New_Cases_Today_10k_Pop, 2)
master2$Active_Cases_10k_Pop <- round(master2$Active_Cases_10k_Pop, 2)
master2$Pct_Positive_Cumulative <- round(master2$Pct_Positive_Cumulative, 2)
master2$Pct_Positive_New_to_Dashboard <- round(master2$Pct_Positive_New_to_Dashboard, 2)
master2$Pct_Deaths_vs_Recoveries <- round(master2$Pct_Deaths_vs_Recoveries, 2)
master2$Pct_Recoveries_vs_Deaths <- round(master2$Pct_Recoveries_vs_Deaths, 2)

write.csv(master2, file = "MasterData/master_file.csv")
```


#Daily Pretty Slide
```{r}
library(formattable)
instagram <- master2 %>% 
  select(county_nam, mydate, New_Cases_Today, New_Deaths_Today, deaths, active_cases, positive, deaths) %>% 
  filter(county_nam == "Arkansas_all_counties") %>% 
  filter(mydate==today)
instagram
```

```{r}
instagram <- instagram 

instagram$mydate <- as.character((instagram$mydate), format='%m%-%d%-%Y')

instagram$active_cases <- scales::comma(instagram$active_cases)
instagram$deaths <- scales::comma(instagram$deaths)
instagram$New_Cases_Today <- scales::comma(instagram$New_Cases_Today)
instagram$New_Deaths_Today <- scales::comma(instagram$New_Deaths_Today)
instagram$positive <- scales::comma(instagram$positive)
instagram
```


```{r}
daily <- instagram %>% 
  select(mydate, New_Cases_Today,New_Deaths_Today, active_cases, positive, deaths) %>% 

  
  rename('New Cases Today' =  New_Cases_Today, 'New Deaths Today' = New_Deaths_Today, 'Active Cases' = active_cases, 'Deaths' = deaths, 'Positive' = positive, 'Date' = mydate)

```


```{r}
#The "PRETTY SLIDE"

dailyT<- as.data.frame(t(daily))

write.csv(dailyT, "dailyT.csv")
daily2 <- rio::import("dailyT.csv")
colnames(daily2) <- c("Item", "Amount")
daily2 <- daily2[-c(1), ] 
daily2

write.csv(daily2, file = "HomePageData/daily.csv")
#It feeds this Flourish graphic: https://app.flourish.studio/visualisation/4947109/edit
```


# AVERAGE CALCULATIONS 
```{r}
test <- master2 %>% 
  group_by(county_nam) %>%
  arrange(county_nam, mydate) %>%
  mutate(Positive_7_Day_Avg = slider::slide_dbl(positive, mean, .before = 6, .after = 0)) %>%
  mutate(New_Cases_7_Day_Avg = slider::slide_dbl(New_Cases_Today, mean, .before = 6, .after = 0)) %>%
  mutate(New_Deaths_7_Day_Avg = slider::slide_dbl(New_Deaths_Today, mean, .before = 6, .after = 0)) %>%
ungroup()

#Run this before writing script to get back in descending order by date

test <- test %>% 
  arrange(desc(mydate))

test2 <- test %>% 
  select(mydate, county_nam, positive, Positive_7_Day_Avg, New_Cases_Today, New_Cases_7_Day_Avg, New_Deaths_Today, New_Deaths_7_Day_Avg, Recovered_Since_Yesterday, active_cases) %>% 
  filter(county_nam=="Arkansas_all_counties") %>% 
  filter(mydate>=(yesterday))
  #filter(mydate  >="2021-07-02")

#Alex Nichol decimal rounding:
test2$Positive_7_Day_Avg <-round(test2$Positive_7_Day_Avg, 1)
test2$New_Cases_7_Day_Avg <-round(test2$New_Cases_7_Day_Avg, 1)
test2$New_Deaths_7_Day_Avg <-round(test2$New_Deaths_7_Day_Avg, 1)

#write.csv(test2, "test2.csv")
```


# CREATE NEW CASES AND RECOVERIES SLIDE - 2 DAYS
```{r}

df4 <- melt(test2[,c("mydate", "New_Cases_Today","New_Cases_7_Day_Avg", "Recovered_Since_Yesterday")], id.vars = 1)

df4 <- df4 %>% 
  rename(Detail = variable, Amount = value)

df4$Amount <- round(df4$Amount,0)

ggplot(df4,aes(x = mydate, y = Amount, label = Amount, fill= Detail)) + 
geom_bar(stat="identity", position="dodge", color="white")+
scale_fill_manual(values=c("#B3BF08", "#08B3BF", "#D68037"))+
scale_y_continuous(limits=c(0, 1000))  +
  theme_bw() + 
  theme(
    plot.title = element_text(face = "bold", size = 16),
    legend.background = element_rect(fill = "white", size = .01, colour = "white"),
    legend.justification = c(0, 1),
    #adjust the box position. First number horizontal, second, vertical 
    legend.position = c(.617, .96),
    #legend.position = c(.617, .38),
    #legend.position = c(.1, .38),
    axis.ticks = element_line(colour = "grey70", size = 0.2),
    panel.grid.major = element_line(colour = "grey70", size = 0.2),
    panel.grid.minor = element_blank()
  )+
   geom_col(position = position_dodge2(width = 0.9, preserve = "single")) +
  geom_text(position = position_dodge2(width = 0.9, preserve = "single"), vjust=-0.5, hjust=+0.5) +
  labs(title = "New Cases & Recoveries in Arkansas Announced Today", 
       subtitle = "Confirmed + Probable,  ADH Data for Oct. 27, 2021",
       caption = "Graphic by ArkansasCovid.com",
       y="Amount",
       x="Date")
ggsave("Changes_2_Day_avg.png",device = "png",width=9,height=6, dpi=400)
```


# CREATE DEATHS SLIDE - 2 DAYS
```{r}
df5 <- melt(test2[,c("mydate", "New_Deaths_Today","New_Deaths_7_Day_Avg")], id.vars = 1)

df5 <- df5 %>% 
  rename(Detail = variable, Amount = value)

df5$Amount <- round(df5$Amount,0)

#deaths 2-day snapshot
ggplot(df5,aes(x = mydate, y = Amount, label = Amount, fill= Detail)) + 
geom_bar(stat="identity", position="dodge", color="white")+
scale_fill_manual(values=c("#08B3BF", "#D68037"))+
  theme(legend.position = "bottom") +
  geom_col(position = position_dodge2(width = 0.9, preserve = "single"), show.legend = T) +
  geom_text(position = position_dodge2(width = 0.9, preserve = "single"), vjust=-0.5, hjust=+0.5) +
  scale_y_continuous(limits=c(0, 30)) + 
  labs(title = "New Deaths in Arkansas Announced Today", 
       subtitle = "Confirmed + Probable,  ADH Data for Oct. 27, 2021",
       caption = "Graphic by ArkansasCovid.com",
       y="Amount",
       x="Date")
ggsave("Deaths_2_Day_avg.png",device = "png",width=9,height=6, dpi=400)

```


#-------------------------------------------
#-------------------------------------------
#Vaccine Data - New Section May 21, 2021
#County vaccination data here
#-------------------------------------------
#-------------------------------------------

## Retrieve County Vaccine Data from ADH FEED
```{r}
#County vaccination json feed
#35 Variables

qq <- fromJSON('https://services5.arcgis.com/vlhGQkz6fSnofVMD/ArcGIS/rest/services/ADH_COVID19_VACCINATIONS_BY_COUNTY/FeatureServer/0/query?where=0%3D0&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token=')


#Pulls in the data that matters to 'q'
vax <- qq[["features"]][["attributes"]]

# vax1 <- as.data.frame(t(vax))
# vax1 <- vax1[-c(1), ] 
# names(vax1) <- lapply(vax1[1, ], as.character)
# vax1 <- vax1[-1,] 
vax$date <- today

#Change this date if a day old
#vax1$date <- Sys.Date()-1

vax <- clean_names(vax)

vax <- vax %>% 
rename(Date = "date",
       County_Name = "adh_covid19_vac_join_county_nam", 
       County_Vax_Total =  "vac_demo_county_aggregate_count", 
       White_Total = "vac_demo_county_race0_total", 
       Black_Total= "vac_demo_county_race1_total",
       Native_American_Total = "vac_demo_county_race2_total", 
       Asian_Total = "vac_demo_county_race3_total", 
       Pacific_Islander_Total = "vac_demo_county_race4_total",
       Unk_Race_Total = "vac_demo_county_race5_total", 
       Other_Race_Total= "vac_demo_county_race6_total",
       Hispanic_Total= "vac_demo_county_ethnicity_hispa", 
       Dose_1 = "vac_demo_county_dose_one",
       Dose_2 = "vac_demo_county_dose_two", 
       Unk_Dose= "vac_demo_county_unk_dose",
       Partial_Vax= "vac_demo_county_partially_immun", 
       Fully_Vax = "vac_demo_county_fully_immunized", 
       Hispanic_Partial_Vax = "vac_demo_county_partially_hispa",
       Hispanic_Full_Vax = "vac_demo_county_fully_hispanic", 
       Population = "vac_demo_county_population", 
       County_Pct_Dose_1 = "vac_demo_county_percent_dose_1",
       County_Pct_Dose_2 = "vac_demo_county_percent_dose_2",
       County_Pct_Population = "vac_demo_county_percent_populat",
       White_Partial_Pct= "vac_demo_county_partially_white",
       Black_Partial_Pct= "vac_demo_county_partially_black",
       Asian_Partial_Pct = "vac_demo_county_partially_aian",
       Non_Hispanic_Partial_Pct= "vac_demo_county_partially_nhopi",
       White_Full_Pct = "vac_demo_county_fully_white", 
       Black_Full_Pct = "vac_demo_county_fully_black", 
       Asian_Full_Pct= "vac_demo_county_fully_aian", 
       Non_Hispanic_Full_Pct = "vac_demo_county_fully_nhopi")

vax1 <- vax %>% 
  select(Date, County_Name, County_Vax_Total,  Dose_1, Dose_2, Unk_Dose, Partial_Vax,
         Fully_Vax, White_Total, White_Full_Pct, Black_Total, Black_Full_Pct,
         Native_American_Total, Asian_Total, Asian_Full_Pct, Pacific_Islander_Total,
         Unk_Race_Total, Other_Race_Total, Hispanic_Total, Hispanic_Partial_Vax,
         Hispanic_Full_Vax, Population, County_Pct_Dose_1, County_Pct_Dose_2,
         White_Partial_Pct, Black_Partial_Pct, Asian_Partial_Pct,
         Non_Hispanic_Partial_Pct, Non_Hispanic_Full_Pct) %>% 
  mutate(Full_Pct =(Fully_Vax/Population)) 
  
vax1$Full_Pct <- formattable::percent(vax1$Full_Pct, 1)


write.csv(vax1, "HomePageData/vaccine_county_full.csv")

vaccine_county <- vax1 %>% 
  select(County_Name, Date, County_Vax_Total, Partial_Vax, Fully_Vax,Unk_Dose, Population, Full_Pct) 

write.csv(vaccine_county, "HomePageData/vaccine_county.csv")
write.csv(vaccine_county, "MasterData/vaccine_county.csv")
date <- today
vaxname <- paste("vaccine_county_",date,".csv",sep="")
write.csv(vax1, file=vaxname) 
write.csv(vaccine_county, file=vaxname) 

#Feeds to homepage map
#https://www.datawrapper.de/_/HZNsT/
#This is autoloaded to Github via the vaccine_county.csv

#Feeds to vaccine chart
#https://www.datawrapper.de/_/dDKzc/

#Documentation for live updated charts
#https://academy.datawrapper.de/article/60-external-data-sources
#https://academy.datawrapper.de/article/236-how-to-create-a-live-updating-symbol-map-or-choropleth-map

#Definitions for the data feed
#https://docs.google.com/spreadsheets/d/1gJ8aEnX5zmT5ru-WPJjBQ2gEEaGZ4vc_A8hgWK69IdE/edit?usp=sharing

```

#Feed to vaccine chart
```{r}

vaccinecounty1 <- vaccine_county %>% 
  select(County_Name, Partial_Vax, Fully_Vax, Full_Pct, Unk_Dose, Population, Date)

head(vaccinecounty1)

write.csv(vaccinecounty1, file = "HomePageData/vaccinecounty.csv")

#Write a copy with a date for archiving
#Put this file into the Vaccine History folder in GitHub #https://github.com/Arkansascovid/Main/tree/master/Vaccine_History 
date <- today
vaccinecounty2 <- paste("vaccinecounty",date,".csv",sep="")
write.csv(vaccinecounty1, file=vaccinecounty2) 


```


#Add to master file
```{r}
vaxmaster <- rio::import("https://raw.githubusercontent.com/Arkansascovid/Main/master/HomePageData/vaccine_master.csv")

# vaxmaster <- vaxmaster [ -c(31) ]
# 
# vaxmaster <- vaxmaster %>% 
#    filter(Date < "2021-08-23")

#vaxmaster <- vaxmaster [ -c(30) ]
# vaxmaster <- vaxmaster %>% 
#   rename(Date = "date")

vaccine_master <- smartbind(vax1, vaxmaster)
# vaccine_master <- vaccine_master [ -c(30) ]

write.csv(vaccine_master, "HomePageData/vaccine_master.csv")

```


#fix All_Doses_Given
```{r}
All_Doses_Given <- sum(vax1$County_Vax_Total)
```

## Retrieve Old Vaccine Data from ADH FEED
```{r}
#Simple statewide json feed
#6 Variables


#This feed has not updated since August 26, 2021

qqq <- fromJSON('https://services5.arcgis.com/vlhGQkz6fSnofVMD/ArcGIS/rest/services/ADH_COVID19_VACCINATIONS_ADMIN_DIST/FeatureServer/0/query?where=0%3D0&objectIds=&time=&resultType=none&outFields=*&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&sqlFormat=none&f=pjson&token=')

#Pulls in the data that matters to 'q'
vax_old <- qqq[["features"]][["attributes"]]

vax1_old <- as.data.frame(t(vax_old))
vax1_old <- vax1_old[-c(1), ] 
names(vax1_old) <- lapply(vax1_old[1, ], as.character)
vax1_old <- vax1_old[-1,] 
vax1_old$date <- today

#Change this date if a day old
#vax1$date <- Sys.Date()-1

vax1_old <- clean_names(vax1_old)

colnames(vax1_old)[1:7] <- c("All_Doses_Given", "All_Doses_Received", "Long_Term_Care_Doses", "Doses_Received", "Long_Term_Care_Given", "Doses_Given", "Date")
vax1_old [1:6] <- lapply(vax1_old [1:6], as.character)
vax1_old [1:6] <- lapply(vax1_old [1:6], as.numeric)

#fix All_Doses_Given
vax1_old$All_Doses_Given <- All_Doses_Given


#makes a spreadsheet
#write.csv(vax1, "vaccine_api.csv")

```


# IMPORT THE Vaccine Data GitHub

```{r include=FALSE}

vax2 <- rio::import("https://raw.githubusercontent.com/Arkansascovid/Main/master/HomePageData/vaccine.csv")
vax2 [3:9] <- lapply(vax2 [3:9], as.numeric)
vax2$Date <- as.Date(vax2$Date)
#vax2$Date <- mdy(vax2$Date)

#If this is a day old, run this
# vax2 <- vax2 %>%
#     filter(Date <= (yesterday))
```


```{r}

#one time combo
vaccine <- smartbind(vax1_old, vax2)

vaccine <- vaccine %>% 
  select(Date, Doses_Given, Doses_Received, Long_Term_Care_Doses, Long_Term_Care_Given, All_Doses_Received, All_Doses_Given, New_Doses_Today, Pct_Doses_Total) %>% 
  arrange(Date)

glimpse(vaccine)
```


#math
```{r}
vaccine <- vaccine %>% 
  mutate(New_Doses_Today = (All_Doses_Given-lag(All_Doses_Given))) %>% 
  mutate(Pct_Doses_Total = (All_Doses_Given/All_Doses_Received)) 


vaccine$Pct_Doses_Total <- formattable::percent(vaccine$Pct_Doses_Total, 1)
glimpse(vaccine)

write.csv(vaccine, file = "HomePageData/vaccine.csv")
write.csv(vaccine, file = "MasterData/vaccine.csv")
#Feeds to Flourish daily chart
#https://app.flourish.studio/story/950991/edit

```


```{r}
vaccinemarshallese <- vaccine %>%
  rename('Aolepan oran uno ko kar bōki' = All_Doses_Received, 'Aolepan oran uno ko emōj kōjerbali' = All_Doses_Given, 'Uno ko rōkāāl an rainin' = New_Doses_Today)

write.csv(vaccinemarshallese, "Marshallese/vaccine_marshallese.csv")


```


#-----------------------------------
#Flourish Chart Formatting Section
#-----------------------------------


#Rename columns
```{r}
master <- master2 %>%
  rename(County_Name = county_nam, Date = mydate, Deaths = deaths, Positive = positive, Negative = negative, Recoveries = recoveries, Total_Tests = total_tests, Active_Cases = active_cases, Confirmed_Positive = confirmed_pos, Probable_Positive =probable_pos, Confirmed_Negative = confirmed_neg, Probable_Negative = probable_neg, Confirmed_Recoveries = confirmed_recov, Probable_Recoveries = probable_recov, PCR_Positive_Test = pcr_pos_test, PCR_Negative_Tests = pcr_neg_test, Total_Positive_Test = total_pos_test, Total_Negative_Test = total_neg_test)

names(master)
#master <- master[ -c(1) ]
```

#Homepage Numbers
```{r}
library(formattable)
homepage <- master %>% 
  select(County_Name, Date, New_Cases_Today, New_Deaths_Today, Deaths, Active_Cases) %>% 
 filter(County_Name=="Arkansas_all_counties") %>% 
  filter(Date==today)

# homepage$New_Cases_Today <- accounting(homepage$New_Cases_Today, digits = 0)
# homepage$New_Deaths_Today <- accounting(homepage$New_Deaths_Today, digits = 0)
# homepage$Deaths <- accounting(homepage$Deaths, digits = 0)
# homepage$Active_Cases <- accounting(homepage$Active_Cases, digits = 0)

write.csv(homepage, file = "HomePageData/homepage.csv")
```
#Rename for Spanish Homepage
```{r}
homepage_spanish <- homepage %>% 
  rename(Candado = County_Name, Nuevos_Cases_Hoy = New_Cases_Today, Nuevas_Muertes_Hoy = New_Deaths_Today, Muertes = Deaths, Casos_Activos = Active_Cases) %>% 
  filter(Candado == "Arkansas_all_counties") %>% 
  filter(Date==today)
homepage_spanish

write.csv(homepage_spanish, file = "SpanishData/homepage_spanish.csv")
```

#Rename for Marshallese Homepage
```{r}
homepage_marshallese <- homepage %>% 
  rename(Keej_ko_rōkāāl_ilo_rainin  = New_Cases_Today, Armej_ro_rej_kab_mej_rainin = New_Deaths_Today, Rimej_ro = Deaths, Keej_ko_rej_active = Active_Cases) %>% 
  filter(County_Name == "Arkansas_all_counties") %>% 
  filter(Date==today)
homepage_marshallese

write.csv(homepage_marshallese, file = "Marshallese/homepage_marshallese.csv")
```


#Only counties, one day calculations - County Changes Story: Active and New cases map, Deaths today by county, Top counties with new cases, Total tests by county, Cases and tests from dashboard, New Cases per 10k pop Slides; - Deaths Story: Total deaths by County Slide
```{r}
countyonlytoday <- master %>% 
  filter(Date==today) %>% 
  filter(County_Name!="Arkansas_all_counties") %>% 
  filter(County_Name!="MissingCountyInfo")

#CHECK CHECK - YOU MAY NEED TO SLICE OFF THE FIRST COLUMN
#countyonlytoday <- countyonlytoday [ -c(1) ]

countyonlytoday$County_Name <- str_replace_all(countyonlytoday$County_Name, pattern=fixed('Arkansas_county'), replacement=fixed('Arkansas') )
countyonlytoday$County_Name <- str_replace_all(countyonlytoday$County_Name, pattern=fixed('LittleRiver'), replacement=fixed('Little River') )
countyonlytoday$County_Name <- str_replace_all(countyonlytoday$County_Name, pattern=fixed('HotSpring'), replacement=fixed('Hot Spring') )
countyonlytoday$County_Name <- str_replace_all(countyonlytoday$County_Name, pattern=fixed('VanBuren'), replacement=fixed('Van Buren') )
countyonlytoday$County_Name <- str_replace_all(countyonlytoday$County_Name, pattern=fixed('SaintFrancis'), replacement=fixed('St. Francis') )

write.csv(countyonlytoday, file = "HomePageData/countyonlytoday.csv")
```

#Rename for Spanish Flourish, countyonlytoday
```{r}
countyonlytoday_spanish <- countyonlytoday %>% 
  rename(Nuevos_Casos_Hoy = New_Cases_Today, Nuevas_Muertes_Hoy = New_Deaths_Today, Casos_Activos = Active_Cases,Casos_Activos_por_cada_10k_Residentes = Active_Cases_10k_Pop, Confirmado_Activo = confirmed_active, Probable_Activo = probable_active, Porcentaje_Positivo_de_Novedades_en_el_Tablero = Pct_Positive_New_to_Dashboard, Nuevos_Casos_Hoy_por_cada_10k_Residentes = New_Cases_Today_10k_Pop, Recuperaciones = Recoveries, Población_Probada = Tested_Population)
#countyonlytoday_spanish <- countyonlytoday_spanish [ -c(1) ]

write.csv(countyonlytoday_spanish, file = "SpanishData/countyonlytoday_spanish.csv")

```

#Rename for Marshallese Flourish, countyonlytoday
#Needs translations Tested_Population, New_Cases_Today_10k_Pop, Counties with top new cases
```{r}
countyonlytoday_marshallese <- countyonlytoday %>% 
  rename(Case_ko_rōkāāl_rainin = New_Cases_Today, Armej_ro_rej_kab_mej_rainin = New_Deaths_Today, Oran_case_ko_rej_active  = Active_Cases, Keej_ko_rej_Active_iaan_10K_armej  = Active_Cases_10k_Pop)
#countyonlytoday_marshallese <- countyonlytoday_marshallese [ -c(1) ]  

write.csv(countyonlytoday_marshallese, file = "Marshallese/countyonlytoday_marshallese.csv")

```

#Top counties with new cases and Spanish version
```{r}
Top_counties_new_cases <-countyonlytoday %>% 
  top_n(10, New_Cases_Today) %>% 
  select(County_Name, New_Cases_Today, New_Cases_Today_10k_Pop) %>% 
  arrange(desc(New_Cases_Today))
write.csv(Top_counties_new_cases, file = "HomePageData/Top_counties_new_cases.csv")

Top_counties_new_cases_spanish <- Top_counties_new_cases %>% 
  rename(Condado = County_Name, Nuevos_Casos_Hoy = New_Cases_Today, Nuevos_Casos_por_cada_10k_Residentes = New_Cases_Today_10k_Pop)


write.csv(Top_counties_new_cases_spanish, file = "SpanishData/Top_counties_new_cases_spanish.csv")

```

#Top_new_cases_10k_pop and Spanish version
```{r}
Top_new_cases_10k_pop <-countyonlytoday %>% 
  top_n(10, New_Cases_Today_10k_Pop) %>% 
  select(County_Name, New_Cases_Today, New_Cases_Today_10k_Pop) %>% 
  arrange(desc(New_Cases_Today_10k_Pop))
#write.csv(Top_new_cases_10k_pop, file = "HomePageData/Top_new_cases_10k_pop.csv")

Top_new_cases_10k_pop_spanish <- Top_new_cases_10k_pop %>% 
  rename(Condado = County_Name, Nuevos_Casos_Hoy = New_Cases_Today, Nuevos_Casos_por_cada_10k_Residentes = New_Cases_Today_10k_Pop)


#write.csv(Top_new_cases_10k_pop_spanish, file = "SpanishData/Top_new_cases_10k_pop_spanish.csv")
```

#Top counties with new cases and Marshallese version
#Needs New_Cases_Today_10k_Pop translation
```{r}
Top_counties_new_cases <-countyonlytoday %>% 
  top_n(10, New_Cases_Today) %>% 
  select(County_Name, New_Cases_Today, New_Cases_Today_10k_Pop) %>% 
  arrange(desc(New_Cases_Today))
write.csv(Top_counties_new_cases, file = "HomePageData/Top_counties_new_cases.csv")

Top_counties_new_cases_marshallese <- Top_counties_new_cases %>% 
  rename( Case_ko_rōkāāl_rainin  = New_Cases_Today)


write.csv(Top_counties_new_cases_marshallese, file = "Marshallese/Top_counties_new_cases_marshallese.csv")

```


#Deaths by day, minimum 1, and Spanish version and Marshallese version
```{r}
deaths_daily <- countyonlytoday %>% 
  filter(New_Deaths_Today >=1) %>% 
  select(County_Name, New_Deaths_Today) %>% 
  arrange(desc(New_Deaths_Today))
write.csv(deaths_daily, file = "HomePageData/deaths_daily.csv")

deaths_daily_spanish <- deaths_daily %>% 
  rename(Nuevas_Muertes_Hoy = New_Deaths_Today, Condado = County_Name)

deaths_daily_marshallese <- deaths_daily %>% 
  rename(Armej_ro_rej_kab_mej_rainin = New_Deaths_Today)

write.csv(deaths_daily_marshallese, file = "Marshallese/deaths_daily_marshallese.csv")
```

#--------------------------------------------------------------------------------#
#Weekly Change in Cases slide 
#--------------------------------------------------------------------------------#


```{r}
df1 <- master %>% 
  filter(County_Name=="Arkansas_all_counties")

df1 <- df1 %>%
  mutate(DATE = ymd(Date))
```

#date formatting
```{r}
df1$week <- isoweek(df1$DATE)
df1$month <- month(df1$DATE)
```

#calculations by week
```{r}
newcases <- df1 %>%
  group_by(week) %>%
  summarise(week_newcases = sum(New_Cases_Today, na.rm = TRUE))
```

#calculations by 2021 week
```{r}
newcases_2021 <- df1 %>%
  filter(Date >= "2021-01-04") %>% 
  group_by(week) %>%
  summarise(week_newcases = sum(New_Cases_Today, na.rm = TRUE)) %>% 
  mutate(Year=("2021"))
  
```


```{r}
newcases_2020 <- df1 %>%
  filter(Date < "2021-01-04") %>% 
  group_by(week) %>%
  summarise(week_newcases = sum(New_Cases_Today, na.rm = TRUE)) %>% 
  mutate(Year=("2020"))
```

#Table with 2021, 2020 years separated
```{r}
newcases <- rbind(newcases_2021, newcases_2020) 
```  


#import table to name the weeks beginning Monday
```{r}
weeks_numbers <- rio::import("https://raw.githubusercontent.com/profrobwells/CovidFall2020/master/weeks_numbers.csv")

weeks_numbers$Date <- lubridate::mdy(weeks_numbers$Date)
```

#Join with cases; manually change date filter after a week
```{r}
weeklycases_2020 <- newcases_2020 %>% 
  right_join(weeks_numbers, by=c("week"="Number")) %>% 
  filter(Date < "2021-01-04") %>% 
  distinct() %>% 
  arrange(desc(Date))

weeklycases_2021 <- newcases_2021 %>% 
  right_join(weeks_numbers, by=c("week"="Number")) %>% 
  filter(Date<today) %>% 
  distinct() %>% 
  arrange(desc(Date))

weeklycases_2021 <- weeklycases_2021 %>% 
  filter(Date > "2021-01-04") 

weeklycases <- rbind(weeklycases_2021, weeklycases_2020 )

weeklycases <- weeklycases %>%
  rename(Weekly_Total_New_Cases = week_newcases, Week_Beginning = Date)

write.csv(weeklycases, file = "HomePageData/weeklycases.csv")

#Spanish translation
weeklycases_spanish <- weeklycases %>% 
  rename(Semana = week, Casos_Todos_Semana = Weekly_Total_New_Cases, Comienzo_de_la_Semana = Week_Beginning)

write.csv(weeklycases_spanish, file = "SpanishData/weeklycases_spanish.csv")

```

#--------------------------------------------------------------------------------#

#Calculate time series totals and averages; used in:

#--------------------------------------------------------------------------------#
#Statewide story slides: Cases context, Confirmed vs probable slides
#Deaths story slides: Cases per death, New deaths - 7 day avg, Statewide Total Deaths, Pct Inactive/Closed cases resulting in death
#County Changes Today story slides: County Snapshots
```{r}
test <- master %>% 
  group_by(County_Name) %>%
  arrange(County_Name, Date) %>% mutate(New_Cases_7_Day_Avg = slider::slide_dbl(New_Cases_Today, mean, .before =
  6, .after = 0)) %>% 
  mutate(New_Deaths_7_Day_Avg = slider::slide_dbl(New_Deaths_Today, mean, .before =
  6, .after = 0)) %>% 
  mutate(Total_Positives = (Confirmed_Positive + Probable_Positive)) %>% 
  mutate(Active_Cases_Total = (confirmed_active + probable_active)) %>% 
  mutate(Total_Deaths = (confirmed_death + probable_death)) %>% 
  mutate(Cases_Per_Death = Total_Positives / Total_Deaths) %>% 
  ungroup()

test <- test %>% 
  arrange(Date)

test$New_Cases_7_Day_Avg <-round(test$New_Cases_7_Day_Avg, 1)
test$New_Deaths_7_Day_Avg <-round(test$New_Deaths_7_Day_Avg, 1)

#County changes today story: county snapshots 
countysnapshot <- test %>%
  select(County_Name, Date, Active_Cases_Total, Total_Positives, Total_Deaths, Pct_Positive_Cumulative, New_Cases_Today, Active_Cases_10k_Pop) %>% 
  filter(Date>"2020-08-31")
write.csv(countysnapshot, file = "HomePageData/countysnapshot.csv")

#Rename Spanish County Snapshots
countysnapshot_spanish <- countysnapshot %>% 
  rename(Candado = County_Name, Fecha = Date, Casos_Activos = Active_Cases_Total, Positivos_Todos = Total_Positives, Muertes_Todos = Total_Deaths, Nuevos_Casos_Hoy = New_Cases_Today, Casos_Activos_por_cada_10k_Residentes = Active_Cases_10k_Pop,  Porcentaje_Positivo_Accumulative = Pct_Positive_Cumulative )
write.csv(countysnapshot_spanish, file = "SpanishData/countysnapshot_spanish.csv")

#Rename Marshallese County Snapshots
countysnapshot_marshallese <- countysnapshot %>% 
  rename(Keej_ko_rej_active = Active_Cases_Total, Oran_Case_ko_rej_Positive = Total_Positives, Oran_rimej_ro  = Total_Deaths, Case_ko_rōkāāl_rainin  = New_Cases_Today, Keej_ko_rej_Active_iaan_10K_armej  = Active_Cases_10k_Pop,  Percent_in_Aolepen_Oran = Pct_Positive_Cumulative )
write.csv(countysnapshot_marshallese, file = "Marshallese/countysnapshot_marshallese.csv")


#Use this for Story: Cases Context - could combine with statewide2 if we don't go all the way back to Aug. 1
statewide <- test %>% 
  select(Date, County_Name, New_Cases_Today, New_Cases_7_Day_Avg, Total_Positives, Active_Cases_Total, Total_Deaths, Pct_Deaths_vs_Recoveries) %>% 
  filter(County_Name=="Arkansas_all_counties") %>% 
  filter(Date>="2020-08-01")
write.csv(statewide, file = "HomePageData/statewide.csv")

#Statewide Story - Confirmed vs Probable slide
statewide2 <- test %>% 
  select(Date, County_Name, confirmed_active, probable_active, confirmed_death, probable_death, Confirmed_Positive, Probable_Positive) %>% 
  filter(County_Name=="Arkansas_all_counties") %>% 
  filter(Date>="2020-09-13")

statewide2 <- statewide2 %>%
  rename(Confirmed_Active = confirmed_active, Probable_Active = probable_active, Confirmed_Death = confirmed_death, Probable_Death = probable_death)

write.csv(statewide2, file = "HomePageData/statewide2.csv")
```

#Use this for Deaths: Cases per death & 7day avg new deaths, statewide total deaths
```{r}
deaths <- test %>% 
  select(Date, County_Name, Total_Positives, Total_Deaths, New_Deaths_7_Day_Avg, New_Deaths_Today, Cases_Per_Death) %>% 
  filter(County_Name=="Arkansas_all_counties") %>% 
  filter(Date>="2020-04-08") 

#Filtering outliers from data cleanup days
deaths_clean <- deaths %>%
  mutate(Clean_New_Deaths_Today = New_Deaths_Today)

deaths_clean <- deaths_clean %>%
  select(Date, County_Name, Clean_New_Deaths_Today) %>%
  filter(Date != "2021-02-28") %>%
  filter(Date != "2021-10-10")

deaths_clean <- merge(deaths, deaths_clean, all.x = TRUE)
deaths_clean <- deaths_clean %>%
  unique()

write.csv(deaths_clean, file = "HomePageData/deaths.csv")

deaths_spanish <- deaths_clean %>% 
  rename(Fecha = Date, Condado = County_Name, Positivos_Todos = Total_Positives, Muertes_Todos = Total_Deaths, Nuevas_Muertes_Hoy = New_Deaths_Today, Casos_por_Muertes = Cases_Per_Death, Limpiar_nuevas_muertes_hoy = Clean_New_Deaths_Today )

write.csv(deaths_spanish, file = "SpanishData/deaths_spanish.csv")

#County Time Series

countytime <- master2 %>% 
  select(county_nam, mydate, deaths, positive, active_cases, Cases_Population) %>% 
  rename(Date = mydate, Deaths = deaths, Positive = positive) %>% 
  filter( Date>= "2020-09-13" )


write.csv(countytime, "HomepageData/countytime.csv")
```


#Statewide: Top dates for new cases & Spanish
```{r}
topcases <- master %>% 
  select(County_Name, Date, New_Cases_Today) %>% 
  filter(County_Name=="Arkansas_all_counties") %>% 
  top_n(10, New_Cases_Today) %>% 
  arrange(desc(New_Cases_Today))
 
write.csv(topcases, file = "HomePageData/topcases.csv")

#Spanish
topcases_spanish <- topcases %>% 
  rename(Condado = County_Name, Fecha = Date, Nuevos_Casos_Hoy = New_Cases_Today)

write.csv(topcases_spanish, file = "SpanishData/topcases_spanish.csv")
```


#Spanish New Cases Today by County 
```{r}
countycases_spanish <- master %>% 
  rename(Candado = County_Name, Nuevos_Cases_Hoy = New_Cases_Today, Nuevas_Muertes_Hoy = New_Deaths_Today, Muertes = Deaths, Casos_Activos = Active_Cases, Casos_Nuevos_por_cada_10k_Residentes = New_Cases_Today_10k_Pop, Casos_Activos_por_cada_10k_Residentes= Active_Cases_10k_Pop) %>% 
  filter(Date==today)


write.csv(countycases_spanish, file = "SpanishData/countycases_spanish.csv")
```

#Datawrapper Maps
```{r}
datawrapper <- countyonlytoday %>% 
  select(County_Name, Date, New_Cases_Today, Positive, Active_Cases, Deaths)
write.csv(datawrapper, file = "HomePageData/datawrapper.csv")

#Spanish
datawrapper_spanish <- countyonlytoday_spanish %>% 
  select(County_Name, Date, Nuevos_Casos_Hoy)
write.csv(datawrapper_spanish, file = "SpanishData/datawrapper_spanish.csv")

#Marshallese
datawrapper_marshallese <- countyonlytoday_marshallese %>% 
  select(County_Name, Date, Case_ko_rōkāāl_rainin)
write.csv(datawrapper_marshallese, file = "Marshallese/datawrapper_marshallese.csv")
```


-**30**