Geospatial visualization

MACS 30500 University of Chicago

Geospatial visualization

  • Oldest data visualization methods in human existence
  • Google Maps
  • Depict spatial features
    • Additional data overlaid
  • Maps defined by:
    • Scale
    • Projection
    • Symbols

Drawing maps in R

Types of layers

  • Points
  • Symbols
  • Fills

Storing map boundaries

  • Geographic information system
  • General-purpose vs. specialized software
  • ggplot2

Using maps boundaries

library(maps)

# map of the world
map()

# usa boundaries
map("usa")

map("state")

# county map of illinois
map("county", "illinois")

Using maps boundaries

# map of the world
map_data("world") %>%
  as_tibble
## # A tibble: 99,338 x 6
##     long   lat group order region subregion
##  * <dbl> <dbl> <dbl> <int>  <chr>     <chr>
##  1 -69.9  12.5     1     1  Aruba      <NA>
##  2 -69.9  12.4     1     2  Aruba      <NA>
##  3 -69.9  12.4     1     3  Aruba      <NA>
##  4 -70.0  12.5     1     4  Aruba      <NA>
##  5 -70.1  12.5     1     5  Aruba      <NA>
##  6 -70.1  12.6     1     6  Aruba      <NA>
##  7 -70.0  12.6     1     7  Aruba      <NA>
##  8 -70.0  12.6     1     8  Aruba      <NA>
##  9 -69.9  12.5     1     9  Aruba      <NA>
## 10 -69.9  12.5     1    10  Aruba      <NA>
## # ... with 99,328 more rows
# usa boundaries
map_data("usa") %>%
  as_tibble
## # A tibble: 7,243 x 6
##     long   lat group order region subregion
##  * <dbl> <dbl> <dbl> <int>  <chr>     <chr>
##  1  -101  29.7     1     1   main      <NA>
##  2  -101  29.7     1     2   main      <NA>
##  3  -101  29.7     1     3   main      <NA>
##  4  -101  29.6     1     4   main      <NA>
##  5  -101  29.6     1     5   main      <NA>
##  6  -101  29.6     1     6   main      <NA>
##  7  -101  29.6     1     7   main      <NA>
##  8  -101  29.6     1     8   main      <NA>
##  9  -101  29.6     1     9   main      <NA>
## 10  -101  29.6     1    10   main      <NA>
## # ... with 7,233 more rows
map_data("state") %>%
  as_tibble
## # A tibble: 15,537 x 6
##     long   lat group order  region subregion
##  * <dbl> <dbl> <dbl> <int>   <chr>     <chr>
##  1 -87.5  30.4     1     1 alabama      <NA>
##  2 -87.5  30.4     1     2 alabama      <NA>
##  3 -87.5  30.4     1     3 alabama      <NA>
##  4 -87.5  30.3     1     4 alabama      <NA>
##  5 -87.6  30.3     1     5 alabama      <NA>
##  6 -87.6  30.3     1     6 alabama      <NA>
##  7 -87.6  30.3     1     7 alabama      <NA>
##  8 -87.6  30.3     1     8 alabama      <NA>
##  9 -87.7  30.3     1     9 alabama      <NA>
## 10 -87.8  30.3     1    10 alabama      <NA>
## # ... with 15,527 more rows
# county map of illinois
map_data("county", "illinois") %>%
  as_tibble
## # A tibble: 1,697 x 6
##     long   lat group order   region subregion
##  * <dbl> <dbl> <dbl> <int>    <chr>     <chr>
##  1 -91.5  40.2     1     1 illinois     adams
##  2 -90.9  40.2     1     2 illinois     adams
##  3 -90.9  40.2     1     3 illinois     adams
##  4 -90.9  40.1     1     4 illinois     adams
##  5 -90.9  39.8     1     5 illinois     adams
##  6 -90.9  39.8     1     6 illinois     adams
##  7 -91.4  39.8     1     7 illinois     adams
##  8 -91.4  39.8     1     8 illinois     adams
##  9 -91.4  39.9     1     9 illinois     adams
## 10 -91.5  39.9     1    10 illinois     adams
## # ... with 1,687 more rows

Importance of group

library(gapminder)

# no group aesthetic
ggplot(gapminder, aes(year, lifeExp)) +
  geom_line()

# with grouping by country
ggplot(gapminder, aes(year, lifeExp, group = country)) +
  geom_line()

Importance of group

map("state", "michigan")

Drawing the United States

usa <- as_tibble(map_data("usa"))
usa
## # A tibble: 7,243 x 6
##     long   lat group order region subregion
##  * <dbl> <dbl> <dbl> <int>  <chr>     <chr>
##  1  -101  29.7     1     1   main      <NA>
##  2  -101  29.7     1     2   main      <NA>
##  3  -101  29.7     1     3   main      <NA>
##  4  -101  29.6     1     4   main      <NA>
##  5  -101  29.6     1     5   main      <NA>
##  6  -101  29.6     1     6   main      <NA>
##  7  -101  29.6     1     7   main      <NA>
##  8  -101  29.6     1     8   main      <NA>
##  9  -101  29.6     1     9   main      <NA>
## 10  -101  29.6     1    10   main      <NA>
## # ... with 7,233 more rows

Simple black map

ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group))

Fixed aspect ratio

ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group)) +
  coord_fixed()

Fixed aspect ratio

ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3)

Change the colors

ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group),
               fill = NA, color = "red") + 
  coord_fixed(1.3)

gg1 <- ggplot() + 
  geom_polygon(data = usa, aes(x = long, y = lat, group = group),
               fill = "violet", color = "blue") + 
  coord_fixed(1.3)
gg1

Always use the group aesthetic

ggplot() + 
  geom_polygon(data = usa, aes(x = long, y = lat),
               fill = "violet", color = "blue") + 
  coord_fixed(1.3)

State maps

states <- as_tibble(map_data("state"))
states
## # A tibble: 15,537 x 6
##     long   lat group order  region subregion
##  * <dbl> <dbl> <dbl> <int>   <chr>     <chr>
##  1 -87.5  30.4     1     1 alabama      <NA>
##  2 -87.5  30.4     1     2 alabama      <NA>
##  3 -87.5  30.4     1     3 alabama      <NA>
##  4 -87.5  30.3     1     4 alabama      <NA>
##  5 -87.6  30.3     1     5 alabama      <NA>
##  6 -87.6  30.3     1     6 alabama      <NA>
##  7 -87.6  30.3     1     7 alabama      <NA>
##  8 -87.6  30.3     1     8 alabama      <NA>
##  9 -87.7  30.3     1     9 alabama      <NA>
## 10 -87.8  30.3     1    10 alabama      <NA>
## # ... with 15,527 more rows

State maps

ggplot(data = states) + 
  geom_polygon(aes(x = long, y = lat, group = group), color = "white") + 
  coord_fixed(1.3)

State maps

ggplot(data = states) + 
  geom_polygon(aes(x = long, y = lat, fill = region, group = group), color = "white") + 
  coord_fixed(1.3) +
  # turn off color legend
  theme(legend.position = "none")

Plot a subset of states

midwest <- filter(states,
                  region %in% c("illinois", "indiana", "iowa",
                                "kansas", "michigan", "minnesota",
                                "missouri", "nebraska", "north dakota",
                                "ohio", "south dakota", "wisconsin"))

ggplot(data = midwest) + 
  geom_polygon(aes(x = long, y = lat, group = group),
               fill = "palegreen", color = "black") + 
  coord_fixed(1.3)

Zoom in on Illinois and its counties

(il_df <- filter(states, region == "illinois"))
## # A tibble: 329 x 6
##     long   lat group order   region subregion
##    <dbl> <dbl> <dbl> <int>    <chr>     <chr>
##  1 -90.6  42.5    12  2951 illinois      <NA>
##  2 -90.4  42.5    12  2952 illinois      <NA>
##  3 -89.9  42.5    12  2953 illinois      <NA>
##  4 -89.8  42.5    12  2954 illinois      <NA>
##  5 -89.4  42.5    12  2955 illinois      <NA>
##  6 -89.4  42.5    12  2956 illinois      <NA>
##  7 -88.9  42.5    12  2957 illinois      <NA>
##  8 -88.8  42.5    12  2958 illinois      <NA>
##  9 -88.7  42.5    12  2959 illinois      <NA>
## 10 -88.3  42.5    12  2960 illinois      <NA>
## # ... with 319 more rows
counties <- as_tibble(map_data("county"))
(il_county <- filter(counties, region == "illinois"))
## # A tibble: 1,696 x 6
##     long   lat group order   region subregion
##    <dbl> <dbl> <dbl> <int>    <chr>     <chr>
##  1 -91.5  40.2   561 21883 illinois     adams
##  2 -90.9  40.2   561 21884 illinois     adams
##  3 -90.9  40.1   561 21885 illinois     adams
##  4 -90.9  39.8   561 21886 illinois     adams
##  5 -90.9  39.8   561 21887 illinois     adams
##  6 -91.4  39.8   561 21888 illinois     adams
##  7 -91.4  39.8   561 21889 illinois     adams
##  8 -91.4  39.9   561 21890 illinois     adams
##  9 -91.5  39.9   561 21891 illinois     adams
## 10 -91.4  39.9   561 21892 illinois     adams
## # ... with 1,686 more rows

Zoom in on Illinois and its counties

il_base <- ggplot(data = il_df,
                  mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color = "black", fill = "gray")

il_base +
  theme_void()

Zoom in on Illinois and its counties

il_base +
  theme_void() + 
  geom_polygon(data = il_county, fill = NA, color = "white") +
  # get the state border back on top
  geom_polygon(color = "black", fill = NA)

(Contiguous) United States

fiftystater

library(fiftystater)

data("fifty_states")
as_tibble(fifty_states)
## # A tibble: 13,694 x 7
##     long   lat order  hole  piece      id     group
##    <dbl> <dbl> <int> <lgl> <fctr>   <chr>    <fctr>
##  1 -85.1  32.0     1 FALSE      1 alabama Alabama.1
##  2 -85.1  31.9     2 FALSE      1 alabama Alabama.1
##  3 -85.1  31.9     3 FALSE      1 alabama Alabama.1
##  4 -85.1  31.8     4 FALSE      1 alabama Alabama.1
##  5 -85.1  31.8     5 FALSE      1 alabama Alabama.1
##  6 -85.1  31.7     6 FALSE      1 alabama Alabama.1
##  7 -85.1  31.7     7 FALSE      1 alabama Alabama.1
##  8 -85.1  31.7     8 FALSE      1 alabama Alabama.1
##  9 -85.1  31.6     9 FALSE      1 alabama Alabama.1
## 10 -85.0  31.6    10 FALSE      1 alabama Alabama.1
## # ... with 13,684 more rows
ggplot(data = fifty_states, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color = "black", fill = "gray")

Using shapefiles

  • .shp - geographic coordinates of the geographic features
  • .dbf - data associated with the geographic features
  • .prj - information about the projection of the coordinates in the shapefile

Importing shapefiles in R

library(rgdal)

usa <- readOGR("../data/census_bureau/cb_2013_us_state_20m/cb_2013_us_state_20m.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "../data/census_bureau/cb_2013_us_state_20m/cb_2013_us_state_20m.shp", layer: "cb_2013_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
str(usa, max.level = 2) # view structure of object
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  52 obs. of  9 variables:
##   ..@ polygons   :List of 52
##   ..@ plotOrder  : int [1:52] 32 48 29 3 18 46 33 22 34 51 ...
##   ..@ bbox       : num [1:2, 1:2] -179.1 17.9 179.8 71.4
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot

Importing shapefiles in R

fortify(usa) %>%
  head()
##    long  lat order  hole piece id group
## 1 -88.5 31.9     1 FALSE     1  0   0.1
## 2 -88.5 31.9     2 FALSE     1  0   0.1
## 3 -88.5 31.9     3 FALSE     1  0   0.1
## 4 -88.5 31.9     4 FALSE     1  0   0.1
## 5 -88.5 32.0     5 FALSE     1  0   0.1
## 6 -88.5 32.0     6 FALSE     1  0   0.1

Importing shapefiles in R

as_tibble(usa@data)
## # A tibble: 52 x 9
##    STATEFP  STATENS    AFFGEOID  GEOID STUSPS        NAME   LSAD
##  *  <fctr>   <fctr>      <fctr> <fctr> <fctr>      <fctr> <fctr>
##  1      01 01779775 0400000US01     01     AL     Alabama     00
##  2      05 00068085 0400000US05     05     AR    Arkansas     00
##  3      06 01779778 0400000US06     06     CA  California     00
##  4      09 01779780 0400000US09     09     CT Connecticut     00
##  5      12 00294478 0400000US12     12     FL     Florida     00
##  6      13 01705317 0400000US13     13     GA     Georgia     00
##  7      16 01779783 0400000US16     16     ID       Idaho     00
##  8      17 01779784 0400000US17     17     IL    Illinois     00
##  9      18 00448508 0400000US18     18     IN     Indiana     00
## 10      20 00481813 0400000US20     20     KS      Kansas     00
## # ... with 42 more rows, and 2 more variables: ALAND <fctr>, AWATER <fctr>

Importing shapefiles in R

# state name
usa %>%
  fortify(region = "NAME") %>%
  head()
##    long  lat order  hole piece      id     group
## 1 -88.5 31.9     1 FALSE     1 Alabama Alabama.1
## 2 -88.5 31.9     2 FALSE     1 Alabama Alabama.1
## 3 -88.5 31.9     3 FALSE     1 Alabama Alabama.1
## 4 -88.5 31.9     4 FALSE     1 Alabama Alabama.1
## 5 -88.5 32.0     5 FALSE     1 Alabama Alabama.1
## 6 -88.5 32.0     6 FALSE     1 Alabama Alabama.1
# FIPS code
usa %>%
  fortify(region = "STATEFP") %>%
  head()
##    long  lat order  hole piece id group
## 1 -88.5 31.9     1 FALSE     1 01  01.1
## 2 -88.5 31.9     2 FALSE     1 01  01.1
## 3 -88.5 31.9     3 FALSE     1 01  01.1
## 4 -88.5 31.9     4 FALSE     1 01  01.1
## 5 -88.5 32.0     5 FALSE     1 01  01.1
## 6 -88.5 32.0     6 FALSE     1 01  01.1
# keep it all
(usa2 <- usa %>%
  fortify(region = "NAME") %>%
  as_tibble() %>%
  left_join(usa@data, by = c("id" = "NAME")))
## # A tibble: 46,174 x 15
##     long   lat order  hole  piece      id     group STATEFP  STATENS
##    <dbl> <dbl> <int> <lgl> <fctr>   <chr>    <fctr>  <fctr>   <fctr>
##  1 -88.5  31.9     1 FALSE      1 Alabama Alabama.1      01 01779775
##  2 -88.5  31.9     2 FALSE      1 Alabama Alabama.1      01 01779775
##  3 -88.5  31.9     3 FALSE      1 Alabama Alabama.1      01 01779775
##  4 -88.5  31.9     4 FALSE      1 Alabama Alabama.1      01 01779775
##  5 -88.5  32.0     5 FALSE      1 Alabama Alabama.1      01 01779775
##  6 -88.5  32.0     6 FALSE      1 Alabama Alabama.1      01 01779775
##  7 -88.5  32.1     7 FALSE      1 Alabama Alabama.1      01 01779775
##  8 -88.4  32.2     8 FALSE      1 Alabama Alabama.1      01 01779775
##  9 -88.4  32.2     9 FALSE      1 Alabama Alabama.1      01 01779775
## 10 -88.4  32.2    10 FALSE      1 Alabama Alabama.1      01 01779775
## # ... with 46,164 more rows, and 6 more variables: AFFGEOID <fctr>,
## #   GEOID <fctr>, STUSPS <fctr>, LSAD <fctr>, ALAND <fctr>, AWATER <fctr>

Plotting shapefiles

ggplot(data = usa2, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color = "black", fill = "gray")

Plotting shapefiles

usa2 <- usa2 %>%
  filter(id != "Alaska", id != "Hawaii", id != "Puerto Rico")

ggplot(data = usa2, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color = "black", fill = "gray")

ggmap

library(ggmap)
get_googlemap("university of chicago", zoom = 12) %>%
  ggmap()

Changing map projections

Changing map projections

map_proj_base <- ggplot(data = usa2,
                        mapping = aes(x = long, y = lat, group = group)) + 
  geom_polygon(color = "black", fill = "gray")

map_proj_base +
  coord_map() +
  ggtitle("Mercator projection (default)")

map_proj_base +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50) +
  ggtitle("Albers equal-area projection")

Adding data to the map

  • Points
  • Symbols
  • Fills
  • Overlay vs. incorporate

airports

library(nycflights13)
airports
## # A tibble: 1,458 x 8
##      faa                           name   lat    lon   alt    tz   dst
##    <chr>                          <chr> <dbl>  <dbl> <int> <dbl> <chr>
##  1   04G              Lansdowne Airport  41.1  -80.6  1044    -5     A
##  2   06A  Moton Field Municipal Airport  32.5  -85.7   264    -6     A
##  3   06C            Schaumburg Regional  42.0  -88.1   801    -6     A
##  4   06N                Randall Airport  41.4  -74.4   523    -5     A
##  5   09J          Jekyll Island Airport  31.1  -81.4    11    -5     A
##  6   0A9 Elizabethton Municipal Airport  36.4  -82.2  1593    -5     A
##  7   0G6        Williams County Airport  41.5  -84.5   730    -5     A
##  8   0G7  Finger Lakes Regional Airport  42.9  -76.8   492    -5     A
##  9   0P2   Shoestring Aviation Airfield  39.8  -76.6  1000    -5     U
## 10   0S9          Jefferson County Intl  48.1 -122.8   108    -8     A
## # ... with 1,448 more rows, and 1 more variables: tzone <chr>

airports

ggplot(airports, aes(lon, lat)) +
  geom_point()

airports + map

ggplot() + 
  coord_map() + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "gray") +
  geom_point(data = airports, aes(x = lon, y = lat), shape = 1)

Crop a map’s limits

ggplot() + 
  coord_map(xlim = c(-130, -60),
            ylim = c(20, 50)) + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "gray") +
  geom_point(data = airports, aes(x = lon, y = lat), shape = 1)

Change the projection method

ggplot() + 
  coord_map(projection = "albers", lat0 = 25, lat1 = 50,
            xlim = c(-130, -60),
            ylim = c(20, 50)) + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "gray") +
  geom_point(data = airports, aes(x = lon, y = lat), shape = 1)

Bubble plot

ggplot() + 
  coord_map(xlim = c(-130, -60),
            ylim = c(20, 50)) + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "white") +
  geom_point(data = airports, aes(x = lon, y = lat, size = alt),
             fill = "grey", color = "black", alpha = .2) +
  # strip out background junk and remove legend
  theme_void() +
  theme(legend.position = "none")

Bubble plot

(airports_n <- flights %>%
  count(dest) %>%
  left_join(airports, by = c("dest" = "faa")))
## # A tibble: 105 x 9
##     dest     n                              name   lat    lon   alt    tz
##    <chr> <int>                             <chr> <dbl>  <dbl> <int> <dbl>
##  1   ABQ   254 Albuquerque International Sunport  35.0 -106.6  5355    -7
##  2   ACK   265                     Nantucket Mem  41.3  -70.1    48    -5
##  3   ALB   439                       Albany Intl  42.7  -73.8   285    -5
##  4   ANC     8        Ted Stevens Anchorage Intl  61.2 -150.0   152    -9
##  5   ATL 17215   Hartsfield Jackson Atlanta Intl  33.6  -84.4  1026    -5
##  6   AUS  2439             Austin Bergstrom Intl  30.2  -97.7   542    -6
##  7   AVL   275        Asheville Regional Airport  35.4  -82.5  2165    -5
##  8   BDL   443                      Bradley Intl  41.9  -72.7   173    -5
##  9   BGR   375                       Bangor Intl  44.8  -68.8   192    -5
## 10   BHM   297                   Birmingham Intl  33.6  -86.8   644    -6
## # ... with 95 more rows, and 2 more variables: dst <chr>, tzone <chr>
ggplot() + 
  coord_map(xlim = c(-130, -60),
            ylim = c(20, 50)) + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "white") +
  geom_point(data = airports_n, aes(x = lon, y = lat, size = n),
             fill = "grey", color = "black", alpha = .2) +
  theme_void() +
  theme(legend.position = "none")

Choropleth maps

Loading the data

## OGR data source with driver: ESRI Shapefile 
## Source: "../data/census_bureau/cb_2013_us_state_20m/cb_2013_us_state_20m.shp", layer: "cb_2013_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
## OGR data source with driver: ESRI Shapefile 
## Source: "../data/census_bureau/cb_2013_us_county_20m/cb_2013_us_county_20m.shp", layer: "cb_2013_us_county_20m"
## with 3221 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER

Number of foreign-born individuals in each region

(fb_state <- read_csv("../data/census_bureau/ACS_13_5YR_B05012_state/ACS_13_5YR_B05012.csv") %>%
  mutate(rate = HD01_VD03 / HD01_VD01))
## # A tibble: 51 x 10
##         GEO.id GEO.id2  `GEO.display-label` HD01_VD01 HD02_VD01 HD01_VD02
##          <chr>   <chr>                <chr>     <int>     <chr>     <int>
##  1 0400000US01      01              Alabama   4799277      <NA>   4631045
##  2 0400000US02      02               Alaska    720316      <NA>    669941
##  3 0400000US04      04              Arizona   6479703      <NA>   5609835
##  4 0400000US05      05             Arkansas   2933369      <NA>   2799972
##  5 0400000US06      06           California  37659181      <NA>  27483342
##  6 0400000US08      08             Colorado   5119329      <NA>   4623809
##  7 0400000US09      09          Connecticut   3583561      <NA>   3096374
##  8 0400000US10      10             Delaware    908446      <NA>    831683
##  9 0400000US11      11 District of Columbia    619371      <NA>    534142
## 10 0400000US12      12              Florida  19091156      <NA>  15392410
## # ... with 41 more rows, and 4 more variables: HD02_VD02 <int>,
## #   HD01_VD03 <int>, HD02_VD03 <int>, rate <dbl>
(fb_county <- read_csv("../data/census_bureau/ACS_13_5YR_B05012_county/ACS_13_5YR_B05012.csv") %>%
  mutate(rate = HD01_VD03 / HD01_VD01))
## # A tibble: 3,143 x 10
##            GEO.id GEO.id2      `GEO.display-label` HD01_VD01 HD02_VD01
##             <chr>   <chr>                    <chr>     <int>     <int>
##  1 0500000US01001   01001  Autauga County, Alabama     54907        NA
##  2 0500000US01003   01003  Baldwin County, Alabama    187114        NA
##  3 0500000US01005   01005  Barbour County, Alabama     27321        NA
##  4 0500000US01007   01007     Bibb County, Alabama     22754        NA
##  5 0500000US01009   01009   Blount County, Alabama     57623        NA
##  6 0500000US01011   01011  Bullock County, Alabama     10746        NA
##  7 0500000US01013   01013   Butler County, Alabama     20624        NA
##  8 0500000US01015   01015  Calhoun County, Alabama    117714        NA
##  9 0500000US01017   01017 Chambers County, Alabama     34145        NA
## 10 0500000US01019   01019 Cherokee County, Alabama     26034        NA
## # ... with 3,133 more rows, and 5 more variables: HD01_VD02 <int>,
## #   HD02_VD02 <int>, HD01_VD03 <int>, HD02_VD03 <int>, rate <dbl>

State-level choropleth

ggplot(fb_state, aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate), map = usa2) +
  expand_limits(x = usa2$long, y = usa2$lat)

State-level choropleth

ggplot(fb_state, aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate), map = usa2) +
  expand_limits(x = usa2$long, y = usa2$lat) +
  scale_fill_continuous(labels = scales::percent) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)

County-level choropleth

ggplot(fb_county, aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate), map = counties2) +
  expand_limits(x = counties2$long, y = counties2$lat) +
  scale_fill_continuous(labels = scales::percent) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)

cut_interval()

fb_county %>%
  mutate(rate_cut = cut_interval(rate, 6)) %>%
  ggplot(aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate_cut), map = counties2) +
  expand_limits(x = counties2$long, y = counties2$lat) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)

cut_number()

fb_county %>%
  mutate(rate_cut = cut_number(rate, 6)) %>%
  ggplot(aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate_cut), map = counties2) +
  expand_limits(x = counties2$long, y = counties2$lat) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)

Choosing a color palette

RColorBrewer

Sequential

# define ggplot object
seq_plot <- fb_county %>%
  mutate(rate_cut = cut_number(rate, 6)) %>%
  ggplot(aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate_cut), map = counties2) +
  expand_limits(x = counties2$long, y = counties2$lat) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)

seq_plot +
  scale_fill_brewer(palette = "BuGn")

seq_plot +
  scale_fill_brewer(palette = "YlGn")

seq_plot +
  scale_fill_brewer(palette = "Blues")

Qualitative

state_data <- data_frame(name = state.name,
                         region = state.region,
                         subregion = state.division,
                         abb = state.abb) %>%
  bind_cols(as_tibble(state.x77)) %>%
  # get id variable into data frame
  left_join(usa2 %>%
              select(id, NAME) %>%
              distinct(),
            by = c("name" = "NAME")) %>%
  # remove Alaska and Hawaii
  na.omit()

# set region base plot
region_p <- ggplot(state_data, aes(map_id = id)) +
  geom_map(aes(fill = region), map = usa2) +
  expand_limits(x = usa2$long, y = usa2$lat) +
  labs(fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)
region_p

# try different color brewers
region_p +
  scale_fill_brewer(palette = "Paired")

region_p +
  scale_fill_brewer(palette = "Dark2")

region_p +
  scale_fill_brewer(palette = "Pastel2")

# set subregion base plot
subregion_p <- ggplot(state_data, aes(map_id = id)) +
  geom_map(aes(fill = subregion), map = usa2) +
  expand_limits(x = usa2$long, y = usa2$lat) +
  labs(fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)
subregion_p

subregion_p +
  scale_fill_brewer(palette = "Paired")

subregion_p +
  scale_fill_brewer(palette = "Set1")

subregion_p +
  scale_fill_brewer(palette = "Pastel1")

Get world fertility rate data

## OGR data source with driver: ESRI Shapefile 
## Source: "../data/nautral_earth/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp", layer: "ne_110m_admin_0_countries"
## with 177 features
## It has 63 fields
## # A tibble: 14,520 x 7
##          `Country Name` `Country Code`
##                   <chr>          <chr>
##  1                Aruba            ABW
##  2              Andorra            AND
##  3          Afghanistan            AFG
##  4               Angola            AGO
##  5              Albania            ALB
##  6           Arab World            ARB
##  7 United Arab Emirates            ARE
##  8            Argentina            ARG
##  9              Armenia            ARM
## 10       American Samoa            ASM
## # ... with 14,510 more rows, and 5 more variables: `Indicator Name` <chr>,
## #   `Indicator Code` <chr>, year <int>, fertility <dbl>,
## #   fertility_rate <fctr>

For loop

for(year in seq(from = 1970, to = 2010, by = 5)){
  p <- fertility %>%
    filter(year == year) %>%
    ggplot(aes(map_id = `Country Code`)) +
    geom_map(aes(fill = fertility_rate), map = world2) +
    expand_limits(x = world2$long, y = world2$lat) +
    scale_fill_brewer(palette = "BuGn") +
    labs(title = "Fertility rate",
         subtitle = year,
         fill = NULL) +
    ggthemes::theme_map() +
    coord_map(projection = "mollweide", xlim = c(-180, 180))
  print(p)
}

purrr::map()

seq(from = 1970, to = 2010, by = 5) %>%
  purrr::map(~ fertility %>%
    filter(year == .x) %>%
    ggplot(aes(map_id = `Country Code`)) +
    geom_map(aes(fill = fertility_rate), map = world2) +
    expand_limits(x = world2$long, y = world2$lat) +
    scale_fill_brewer(palette = "BuGn") +
    labs(title = "Fertility rate",
         subtitle = .x,
         fill = NULL) +
    ggthemes::theme_map() +
    coord_map(projection = "mollweide", xlim = c(-180, 180)))
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

facet_wrap()

ggplot(fertility, aes(map_id = `Country Code`)) +
  facet_wrap(~ year) +
  geom_map(aes(fill = fertility_rate), map = world2) +
  expand_limits(x = world2$long, y = world2$lat) +
  scale_fill_brewer(palette = "BuGn") +
  labs(title = "Fertility rate",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "mollweide", xlim = c(-180, 180)) +
  theme(legend.position = "none")

facet_wrap()

south_africa <- world2 %>%
  filter(id == "ZAF")

fertility %>%
  filter(`Country Name` == "South Africa") %>%
  ggplot(aes(map_id = `Country Code`)) +
  facet_wrap(~ year) +
  geom_map(aes(fill = fertility_rate), map = south_africa) +
  expand_limits(x = south_africa$long, y = south_africa$lat) +
  scale_fill_brewer(palette = "BuGn") +
  labs(title = "Fertility rate",
       subtitle = "South Africa",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map() +
  theme(legend.position = "none")

gganimate

library(gganimate)

p <- ggplot(fertility, aes(map_id = `Country Code`, frame = year)) +
  geom_map(aes(fill = fertility_rate), map = world2) +
  expand_limits(x = world2$long, y = world2$lat) +
  scale_fill_brewer(palette = "BuGn") +
  labs(title = "Fertility rate",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "mollweide", xlim = c(-180, 180)) +
  theme(legend.position = "none")
gganimate(p)