library(dplyr)
library(tidyr)
library(aws.s3)

Sys.setenv("AWS_DEFAULT_REGION" = "us-east-2")
s3bucket <- tryCatch(
  {
    get_bucket(bucket = "forecast-eval")
  },
  error = function(e) {
    e
  }
)

readbucket <- function(name) {
  tryCatch(
    {
      s3readRDS(object = name, bucket = s3bucket)
    },
    error = function(e) {
      e
    }
  )
}

# Cases, deaths, hosp scores: needed for "actual"s
cases <- bind_rows(
  readbucket("score_cards_nation_cases.rds"),
  readbucket("score_cards_state_cases.rds")
)
deaths <- bind_rows(
  readbucket("score_cards_nation_deaths.rds"),
  readbucket("score_cards_state_deaths.rds")
)
hosp <- bind_rows(
  readbucket("score_cards_nation_hospitalizations.rds"),
  readbucket("score_cards_state_hospitalizations.rds")
)

# The big one: predictions from all forecasters
pred <- readbucket("predictions_cards.rds")

# Cases
pred_cases <- pred %>%
  filter(signal == "confirmed_incidence_num") %>%
  mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>%
  pivot_wider(
    names_from = quantile,
    values_from = value,
    names_prefix = "forecast_"
  )

actual_cases <- cases %>%
  select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual)

joined_cases <- left_join(pred_cases, actual_cases)
sum(is.na(actual_cases$actual)) == sum(is.na(joined_cases$actual))
write.csv(joined_cases, "cases.csv")

# Deaths
pred_deaths <- pred %>%
  filter(signal == "deaths_incidence_num") %>%
  mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>%
  pivot_wider(
    names_from = quantile,
    values_from = value,
    names_prefix = "forecast_"
  )

actual_deaths <- deaths %>%
  select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual)

joined_deaths <- left_join(pred_deaths, actual_deaths)
sum(is.na(actual_deaths$actual)) == sum(is.na(joined_deaths$actual))
write.csv(joined_deaths, "deaths.csv")

# Hospitalizations: break up by weeks since we run into memory errors o/w!
pred_hosp <- actual_hosp <- joined_hosp <- vector(mode = "list", length = 4)
for (k in 1:4) {
  cat(k, "... ")
  days <- (k - 1) * 7 + 1:7
  pred_hosp[[k]] <- pred %>%
    filter(signal == "confirmed_admissions_covid_1d", ahead %in% days) %>%
    mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>%
    pivot_wider(
      names_from = quantile,
      values_from = value,
      names_prefix = "forecast_"
    )

  actual_hosp[[k]] <- hosp %>%
    filter(ahead %in% days) %>%
    select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual)

  joined_hosp[[k]] <- left_join(pred_hosp[[k]], actual_hosp[[k]])
  cat(sum(is.na(actual_hosp[[k]]$act)) == sum(is.na(joined_hosp[[k]]$act)))
  write.csv(joined_hosp[[k]], sprintf("hospitalizations_%iwk.csv", k))
}