## Table joins # Prevalence of low birth weight among new-borns # Originally sourced from "https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/UNICEF,NUTRITION,1.0/.NT_BW_LBW......?format=csv" LBW <-read.csv("https://github.com/HughSt/HughSt.github.io/raw/master/course_materials/week2/LBW_global.csv") # Filter for 2015 LBW_2015 <- subset(LBW, TIME_PERIOD==2015) # Get countries spatial data library(rworldmap) countries <- rworldmap::countriesCoarse # To join data, you need a common field. In this case, we are going to use country name. # Its always preferable to use codes instead of names/characters as they are less ambiguous. # But in this case, we are forced to used names. # To see whether the country names in your data exist in your spatial # data, you can use the %in% commant LBW_2015$Country %in% countries$NAME # To see which are not LBW_2015$Country[!LBW_2015$Country %in% countries$NAME] # You might have to manually change these to match as the reason # they are not matching is likely a minor spelling issue. For example countries$NAME[27] LBW_2015$Country[27] # In this case, we actually have the 3 letter ISO country codes in both # datasets head(LBW_2015$REF_AREA) head(countries$ISO_A3) LBW_2015$REF_AREA %in% countries$ISO_A3 # If the matching field in both objects has the same name, # you can just use the `by` argument. If they are different # use `by.x` and `by.y` countries_merged <- merge(countries, LBW_2015, by.x = "ISO_A3", by.y = "REF_AREA", all.x = T) dim(countries) dim(countries_merged) head(countries_merged) # And map col_pal <- colorNumeric("Reds", countries_merged$OBS_VALUE) leaflet() %>% addProviderTiles("CartoDB.DarkMatter") %>% addPolygons(data = countries_merged, col = col_pal(countries_merged$OBS_VALUE), fillOpacity = 0.8, weight=2) %>% addLegend(pal = col_pal, values = countries_merged$OBS_VALUE, title = "Prevalence of LBW") # There are some useful functions to tweak character strings # to make them match. For example, let's imagine our LBW_2015 # dataset doesn't have the `REF_AREA` field and instead has # something that looks like this LBW_2015$REF_AREA_2 <- paste0(tolower(LBW_2015$REF_AREA), ".") head(LBW_2015$REF_AREA_2) # In this case, we need to remove the "." and make # the characters uppercase in order to match with # the ISO3 field in the `countries` object # First remove the ".". In this case, as each # entry is 3 characters, we can just select the # first 3 LBW_2015$REF_AREA_2_custom <- substr(LBW_2015$REF_AREA_2, 1, 3) head(LBW_2015$REF_AREA_2_custom ) # Now we need to make it uppercase LBW_2015$REF_AREA_2_custom <- toupper(LBW_2015$REF_AREA_2_custom ) head(LBW_2015$REF_AREA_2_custom ) # Example of using gsub to substitute characters with other characters (or whitespace) # replace all instances of "a" with "-" gsub("a", "-", LBW_2015$REF_AREA_2) # IF you are working with "." this is a special character and # you need to wrap it gsub("\\Q.\\E", "-", LBW_2015$REF_AREA_2) # The grep function is also useful for # doing more of a fuzzy match. e.g. find # which elements of LBW_2015$REF_AREA_2 # have an "a" in them grep("a", LBW_2015$REF_AREA_2)