---
title: "Query NHTS Datasets from R"
output: rmarkdown::html_vignette
vignette: >
%\VignetteEngine{knitr::rmarkdown}
%\VignetteIndexEntry{Querying 2009}
%\VignetteEncoding{UTF-8}
---
# Load NHTS tool
```{r warning=F, message=F}
library(summarizeNHTS)
```
# Bring a copy or download dataset
```{r, eval=F}
download_nhts_data("2009", "C:/NHTS")
# > You are about to download large data files to the directory below. Continue? (y/n):
# > C:/NHTS/csv/2009
```
# Initialize dataset
```{r echo=F}
# unwanted because of how noisy it appears in output
options("datatable.showProgress"=FALSE)
```
```{r warning=F}
dataset <- read_data("2009", csv_path = "C:/NHTS")
```
---
## Organize Questions
* What does the sample look like?
* How often are people travelling? How far?
* When are people travelling? Where? Why?
* Are there differences by categorical characteristics?
* ... and on and on
## Review data documentation
* Makes lists of variables to support analyses
* Categorize and organize
* ... and on and on
## Resources
* [Data Documentation](data_elements.html)
* [This File's Code](https://raw.githubusercontent.com/Westat-Transportation/summarizeNHTS/master/vignettes/summarizeNHTS-demo.Rmd)
---
# Start summarizing...
## Annual Vehicle Mileage Accrual
```{r}
statistic <- summarize_data(
data = dataset,
agg = 'avg',
agg_var = "ANNMILES",
subset = "(VEHAGE > 0 & VEHAGE < 11) & (ANNMILES > 500 & ANNMILES < 200000)"
)
statistic
```
## Annual Vehicle Mileage Accrual by Vehicle Age
```{r}
statistic <- summarize_data(
data = dataset,
agg = 'avg',
agg_var = "ANNMILES",
by = c("VEHAGE"),
label = TRUE,
subset = "(VEHAGE > 0 & VEHAGE < 11) & (ANNMILES > 500 & ANNMILES < 200000)"
)
make_table(statistic)
make_chart(statistic)
```
## Household Size by Number of Vehicles
```{r echo=F,warning=F}
dataset$data$household$HHSIZE <- ifelse(dataset$data$household$HHSIZE > 3, "4+", dataset$data$household$HHSIZE)
dataset$data$household$HHVEHCNT <- ifelse(dataset$data$household$HHVEHCNT > 3, "4+", dataset$data$household$HHVEHCNT)
```
```{r}
statistic <- summarize_data(
data = dataset,
agg = "household_count",
label = TRUE,
by = c("HHSIZE","HHVEHCNT")
)
make_table(statistic, digits = 0)
```
```{r}
make_chart(statistic, digits = 0)
```
# Querying at the Trip Level
## Average Distance Travelled to Work
### Table
```{r echo=F,warning=F}
stash_WHYTO <- dataset$data$trip$WHYTO
dataset$data$trip$WHYTO <- ifelse(!dataset$data$trip$WHYTO %in% c("-1","-7","-8","-9","11","12"), "Not to Work", dataset$data$trip$WHYTO)
dataset$data$trip$WHYTO <- ifelse(dataset$data$trip$WHYTO == "11", "To Work", dataset$data$trip$WHYTO)
```
```{r}
statistic <- summarize_data(
data = dataset,
agg = "avg",
agg_var = c("TRPMILES"),
by = c("WHYTO"),
subset = "!WHYTO %in% c('-1','-7','-8','-9','12') & (TRPMILES > 0 & TRPMILES < 200)"
)
make_table(statistic)
```
### Bar Chart
````{r}
make_chart(statistic)
```
```{r echo=F,warning=F}
dataset$data$trip$WHYTO <- stash_WHYTO
```
## Average Dwell Time by Trip Purpose (TRIPPURP)
```{r}
statistic <- summarize_data(
data = dataset,
agg = "avg",
agg_var = c("DWELTIME"),
by = c("TRIPPURP"),
subset = "(!TRIPPURP %in% c('-9'))"
)
make_chart(statistic, order=T, digits=0)
```
## Average Dwell Time by Trip Purpose (WHYTO)
```{r echo=F,warning=F}
codebook_2009$values[codebook_2009$values$NAME=="WHYTO"]$LABEL <- substr(codebook_2009$values[codebook_2009$values$NAME=="WHYTO"]$LABEL, 0, 22)
```
```{r}
statistic <- summarize_data(
data = dataset,
agg = "avg",
agg_var = c("DWELTIME"),
by = c("WHYTO"),
subset = "(!WHYTO %in% c('-9','-8','-7','-1','01','10','12'))"
)
make_chart(statistic, order=T, digits=0)
```
```{r echo=F, warning=F}
library(data.table)
dataset$data$trip[, STRTTIME := substring(sprintf("%s04d", STRTTIME), 0, 2)]
```
## Trip Frequency by Start Hour
```{r}
statistic <- summarize_data(
data = dataset,
agg = "trip_count",
by = c("STRTTIME"),
subset = "STRTTIME != '00'",
exclude_missing = TRUE
)
make_chart(statistic, digits=0)
```
### Big numbers! We can also specify that we want to compute as proportion.
### Trip Frequency over Start Hour
```{r}
statistic <- summarize_data(
data = dataset,
agg = "trip_count",
by = c("STRTTIME"),
prop = TRUE,
subset = "STRTTIME != '00'",
exclude_missing = TRUE
)
make_chart(statistic)
```
### Trip Frequency over Start Hour of Full Time Workers
```{r}
statistic <- summarize_data(
data = dataset,
agg = "trip_count",
by = c("STRTTIME"),
prop = TRUE,
subset = "STRTTIME != '00' & WORKER == '01' & WKFTPT == '01'",
exclude_missing = TRUE
)
make_chart(statistic, digits=0)
```
# Trip Rates!
## Daily Person Trips by Age
```{r echo=F,warning=F}
library(data.table)
stash_R_AGE <- dataset$data$person$R_AGE
class(dataset$data$person$R_AGE) <- "character"
dataset$data$person[R_AGE %in% as.character(5:14), R_AGE := "05 to 14"]
dataset$data$person[R_AGE %in% as.character(15:19), R_AGE := "15 to 19"]
dataset$data$person[R_AGE %in% as.character(20:24), R_AGE := "20 to 24"]
dataset$data$person[R_AGE %in% as.character(25:29), R_AGE := "25 to 29"]
dataset$data$person[R_AGE %in% as.character(30:34), R_AGE := "30 to 34"]
dataset$data$person[R_AGE %in% as.character(35:39), R_AGE := "35 to 39"]
dataset$data$person[R_AGE %in% as.character(40:44), R_AGE := "40 to 44"]
dataset$data$person[R_AGE %in% as.character(45:49), R_AGE := "45 to 49"]
dataset$data$person[R_AGE %in% as.character(50:54), R_AGE := "50 to 54"]
dataset$data$person[R_AGE %in% as.character(55:59), R_AGE := "55 to 59"]
dataset$data$person[R_AGE %in% as.character(60:64), R_AGE := "60 to 64"]
dataset$data$person[R_AGE %in% as.character(65:69), R_AGE := "65 to 69"]
dataset$data$person[R_AGE %in% as.character(70:74), R_AGE := "70 to 74"]
dataset$data$person[R_AGE %in% as.character(75:79), R_AGE := "75 to 79"]
dataset$data$person[R_AGE %in% as.character(80:84), R_AGE := "80 to 84"]
dataset$data$person[R_AGE %in% as.character(85:99), R_AGE := "85 +"]
```
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("R_AGE")
)
make_table(statistic)
make_chart(statistic)
```
```{r echo=F,warning=F}
dataset$data$person$R_AGE <- stash_R_AGE
```
## Daily Person Trips by Driver Status
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("DRIVER"),
subset = "DRIVER %in% c('01','02') & R_AGE > 18"
)
make_chart(statistic)
```
## And by Worker Status
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("DRIVER", "WORKER"),
subset = "DRIVER %in% c('01','02') & WORKER %in% c('01','02') & R_AGE > 18"
)
make_chart(statistic)
```
### Daily Person Trips by Travel Day Public Transit Usage
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("USEPUBTR"),
subset = "DRIVER %in% c('01','02') & R_AGE > 18 & USEPUBTR %in% c('01','02')"
)
make_chart(statistic)
```
## Daily Person Trips by Day of Week
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("TRAVDAY")
)
make_chart(statistic)
```
## Daily Person Trips by Day of Week and Worker Status
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("TRAVDAY","WORKER"),
subset = "WORKER %in% c('01','02') & R_AGE > 18"
)
make_chart(statistic, x = "TRAVDAY")
```
## Daily Person Trips by Day of Week and Previous Week's Primary Activity
````{r echo=F,warning=F}
# nhts_2009$labels$DESCRIPTION <- gsub("Temporarily absent from a job or business", "Temp w/o job", nhts_2009$labels$DESCRIPTION)
# nhts_2009$labels$DESCRIPTION <- gsub("Doing something else", "Something else", nhts_2009$labels$DESCRIPTION)
````
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("TRAVDAY","PRMACT"),
subset = "PRMACT %in% c('01','04','06')"
)
make_chart(statistic, x = "TRAVDAY")
```
# Household Trip Rates
```{r}
statistic <- summarize_data(
data = dataset,
agg = "household_trip_rate"
)
statistic
```
---
## Daily Person Trips per Household by Household Size
```{r}
statistic <- summarize_data(
data = dataset,
agg = "household_trip_rate",
by = c("HHSIZE"),
label = FALSE
)
make_chart(statistic)
```
# Trip Rate Maps
## Daily Person Trips by State
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
label = FALSE,
by = "HHSTFIPS"
)
make_map(statistic)
```
## Tilemaps!
```{r}
make_map(statistic, state_style="tile")
```
## Daily Person Trips by State Where Household Income < 25,000
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("HHSTFIPS"),
label = FALSE,
subset = "HHFAMINC %in% c('01','02','03','04','05')"
)
make_map(statistic)
```
```{r}
make_map(statistic, state_style = "tile")
```
## Younger Age Group By State
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("HHSTFIPS"),
label = FALSE,
subset = "R_AGE >= 20 & R_AGE <= 40"
)
make_map(statistic)
```
## Older Age Group By State
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("HHSTFIPS"),
label = FALSE,
subset = "R_AGE >= 50 & R_AGE <= 70"
)
make_map(statistic)
```
## Metro Geographies Supported Too (where available)
## Daily Person Trip Rates by Core Based Statistical Area
```{r}
statistic <- summarize_data(
data = dataset,
agg = "person_trip_rate",
by = c("HH_CBSA"),
label = FALSE
)
make_map(statistic)
```