--- title: "Query NHTS Datasets from R" output: rmarkdown::html_vignette vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Querying 2009} %\VignetteEncoding{UTF-8} ---

# Load NHTS tool ```{r warning=F, message=F} library(summarizeNHTS) ```
# Bring a copy or download dataset ```{r, eval=F} download_nhts_data("2009", "C:/NHTS") # > You are about to download large data files to the directory below. Continue? (y/n): # > C:/NHTS/csv/2009 ```
# Initialize dataset ```{r echo=F} # unwanted because of how noisy it appears in output options("datatable.showProgress"=FALSE) ``` ```{r warning=F} dataset <- read_data("2009", csv_path = "C:/NHTS") ```
--- ## Organize Questions * What does the sample look like? * How often are people travelling? How far? * When are people travelling? Where? Why? * Are there differences by categorical characteristics? * ... and on and on ## Review data documentation * Makes lists of variables to support analyses * Categorize and organize * ... and on and on ## Resources * [Data Documentation](data_elements.html) * [This File's Code](https://raw.githubusercontent.com/Westat-Transportation/summarizeNHTS/master/vignettes/summarizeNHTS-demo.Rmd) ---

# Start summarizing...
## Annual Vehicle Mileage Accrual ```{r} statistic <- summarize_data( data = dataset, agg = 'avg', agg_var = "ANNMILES", subset = "(VEHAGE > 0 & VEHAGE < 11) & (ANNMILES > 500 & ANNMILES < 200000)" ) statistic ```
## Annual Vehicle Mileage Accrual by Vehicle Age ```{r} statistic <- summarize_data( data = dataset, agg = 'avg', agg_var = "ANNMILES", by = c("VEHAGE"), label = TRUE, subset = "(VEHAGE > 0 & VEHAGE < 11) & (ANNMILES > 500 & ANNMILES < 200000)" ) make_table(statistic) make_chart(statistic) ```
## Household Size by Number of Vehicles ```{r echo=F,warning=F} dataset$data$household$HHSIZE <- ifelse(dataset$data$household$HHSIZE > 3, "4+", dataset$data$household$HHSIZE) dataset$data$household$HHVEHCNT <- ifelse(dataset$data$household$HHVEHCNT > 3, "4+", dataset$data$household$HHVEHCNT) ``` ```{r} statistic <- summarize_data( data = dataset, agg = "household_count", label = TRUE, by = c("HHSIZE","HHVEHCNT") ) make_table(statistic, digits = 0) ```

```{r} make_chart(statistic, digits = 0) ```

# Querying at the Trip Level
## Average Distance Travelled to Work
### Table ```{r echo=F,warning=F} stash_WHYTO <- dataset$data$trip$WHYTO dataset$data$trip$WHYTO <- ifelse(!dataset$data$trip$WHYTO %in% c("-1","-7","-8","-9","11","12"), "Not to Work", dataset$data$trip$WHYTO) dataset$data$trip$WHYTO <- ifelse(dataset$data$trip$WHYTO == "11", "To Work", dataset$data$trip$WHYTO) ``` ```{r} statistic <- summarize_data( data = dataset, agg = "avg", agg_var = c("TRPMILES"), by = c("WHYTO"), subset = "!WHYTO %in% c('-1','-7','-8','-9','12') & (TRPMILES > 0 & TRPMILES < 200)" ) make_table(statistic) ```
### Bar Chart ````{r} make_chart(statistic) ``` ```{r echo=F,warning=F} dataset$data$trip$WHYTO <- stash_WHYTO ```


## Average Dwell Time by Trip Purpose (TRIPPURP) ```{r} statistic <- summarize_data( data = dataset, agg = "avg", agg_var = c("DWELTIME"), by = c("TRIPPURP"), subset = "(!TRIPPURP %in% c('-9'))" ) make_chart(statistic, order=T, digits=0) ```
## Average Dwell Time by Trip Purpose (WHYTO) ```{r echo=F,warning=F} codebook_2009$values[codebook_2009$values$NAME=="WHYTO"]$LABEL <- substr(codebook_2009$values[codebook_2009$values$NAME=="WHYTO"]$LABEL, 0, 22) ``` ```{r} statistic <- summarize_data( data = dataset, agg = "avg", agg_var = c("DWELTIME"), by = c("WHYTO"), subset = "(!WHYTO %in% c('-9','-8','-7','-1','01','10','12'))" ) make_chart(statistic, order=T, digits=0) ``` ```{r echo=F, warning=F} library(data.table) dataset$data$trip[, STRTTIME := substring(sprintf("%s04d", STRTTIME), 0, 2)] ```

## Trip Frequency by Start Hour ```{r} statistic <- summarize_data( data = dataset, agg = "trip_count", by = c("STRTTIME"), subset = "STRTTIME != '00'", exclude_missing = TRUE ) make_chart(statistic, digits=0) ```
### Big numbers! We can also specify that we want to compute as proportion. ### Trip Frequency over Start Hour ```{r} statistic <- summarize_data( data = dataset, agg = "trip_count", by = c("STRTTIME"), prop = TRUE, subset = "STRTTIME != '00'", exclude_missing = TRUE ) make_chart(statistic) ```
### Trip Frequency over Start Hour of Full Time Workers ```{r} statistic <- summarize_data( data = dataset, agg = "trip_count", by = c("STRTTIME"), prop = TRUE, subset = "STRTTIME != '00' & WORKER == '01' & WKFTPT == '01'", exclude_missing = TRUE ) make_chart(statistic, digits=0) ```

# Trip Rates!
## Daily Person Trips by Age ```{r echo=F,warning=F} library(data.table) stash_R_AGE <- dataset$data$person$R_AGE class(dataset$data$person$R_AGE) <- "character" dataset$data$person[R_AGE %in% as.character(5:14), R_AGE := "05 to 14"] dataset$data$person[R_AGE %in% as.character(15:19), R_AGE := "15 to 19"] dataset$data$person[R_AGE %in% as.character(20:24), R_AGE := "20 to 24"] dataset$data$person[R_AGE %in% as.character(25:29), R_AGE := "25 to 29"] dataset$data$person[R_AGE %in% as.character(30:34), R_AGE := "30 to 34"] dataset$data$person[R_AGE %in% as.character(35:39), R_AGE := "35 to 39"] dataset$data$person[R_AGE %in% as.character(40:44), R_AGE := "40 to 44"] dataset$data$person[R_AGE %in% as.character(45:49), R_AGE := "45 to 49"] dataset$data$person[R_AGE %in% as.character(50:54), R_AGE := "50 to 54"] dataset$data$person[R_AGE %in% as.character(55:59), R_AGE := "55 to 59"] dataset$data$person[R_AGE %in% as.character(60:64), R_AGE := "60 to 64"] dataset$data$person[R_AGE %in% as.character(65:69), R_AGE := "65 to 69"] dataset$data$person[R_AGE %in% as.character(70:74), R_AGE := "70 to 74"] dataset$data$person[R_AGE %in% as.character(75:79), R_AGE := "75 to 79"] dataset$data$person[R_AGE %in% as.character(80:84), R_AGE := "80 to 84"] dataset$data$person[R_AGE %in% as.character(85:99), R_AGE := "85 +"] ``` ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("R_AGE") ) make_table(statistic) make_chart(statistic) ``` ```{r echo=F,warning=F} dataset$data$person$R_AGE <- stash_R_AGE ```

## Daily Person Trips by Driver Status ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("DRIVER"), subset = "DRIVER %in% c('01','02') & R_AGE > 18" ) make_chart(statistic) ```
## And by Worker Status ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("DRIVER", "WORKER"), subset = "DRIVER %in% c('01','02') & WORKER %in% c('01','02') & R_AGE > 18" ) make_chart(statistic) ```

### Daily Person Trips by Travel Day Public Transit Usage ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("USEPUBTR"), subset = "DRIVER %in% c('01','02') & R_AGE > 18 & USEPUBTR %in% c('01','02')" ) make_chart(statistic) ```
## Daily Person Trips by Day of Week ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("TRAVDAY") ) make_chart(statistic) ```
## Daily Person Trips by Day of Week and Worker Status ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("TRAVDAY","WORKER"), subset = "WORKER %in% c('01','02') & R_AGE > 18" ) make_chart(statistic, x = "TRAVDAY") ```
## Daily Person Trips by Day of Week and Previous Week's Primary Activity ````{r echo=F,warning=F} # nhts_2009$labels$DESCRIPTION <- gsub("Temporarily absent from a job or business", "Temp w/o job", nhts_2009$labels$DESCRIPTION) # nhts_2009$labels$DESCRIPTION <- gsub("Doing something else", "Something else", nhts_2009$labels$DESCRIPTION) ```` ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("TRAVDAY","PRMACT"), subset = "PRMACT %in% c('01','04','06')" ) make_chart(statistic, x = "TRAVDAY") ```
# Household Trip Rates ```{r} statistic <- summarize_data( data = dataset, agg = "household_trip_rate" ) statistic ``` ---
## Daily Person Trips per Household by Household Size ```{r} statistic <- summarize_data( data = dataset, agg = "household_trip_rate", by = c("HHSIZE"), label = FALSE ) make_chart(statistic) ```

# Trip Rate Maps
## Daily Person Trips by State ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", label = FALSE, by = "HHSTFIPS" ) make_map(statistic) ```
## Tilemaps! ```{r} make_map(statistic, state_style="tile") ```
## Daily Person Trips by State Where Household Income < 25,000
```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("HHSTFIPS"), label = FALSE, subset = "HHFAMINC %in% c('01','02','03','04','05')" ) make_map(statistic) ```
```{r} make_map(statistic, state_style = "tile") ```


## Younger Age Group By State ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("HHSTFIPS"), label = FALSE, subset = "R_AGE >= 20 & R_AGE <= 40" ) make_map(statistic) ```
## Older Age Group By State ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("HHSTFIPS"), label = FALSE, subset = "R_AGE >= 50 & R_AGE <= 70" ) make_map(statistic) ```


## Metro Geographies Supported Too (where available)
## Daily Person Trip Rates by Core Based Statistical Area ```{r} statistic <- summarize_data( data = dataset, agg = "person_trip_rate", by = c("HH_CBSA"), label = FALSE ) make_map(statistic) ```