# title: "Raster resampling" # This is a deep dive on raster resampling - an important aspect of spatial analyses. # There are a number of ways that rasters can be resampled and the method you use is dependent on the data. # Why do resampling - because you might need to use two or more rasters in the same analysis # and they probably won't be the same resolution library(raster) library(rgeos) #### Load and check data #### # Let's load up the data we need. For this example, we are going to load up 3 different rasters of data from Burkina Faso. Elevation (continuous outcome), land use (categorical) and population (count - from the [WorldPop project](https://www.worldpop.org/)) BF_elev <- raster::getData("alt", country="BF") BF_land_use <- raster("https://github.com/phw272c/phw272c.github.io/raw/master/data/BF_land_use.tif") pop <- raster("https://github.com/phw272c/phw272c.github.io/raw/master/data/BF_pop.tif") # let's check out what's in these "things"called rasters #dimensions, resolution (in degrees), extent, projection, values BF_elev BF_land_use pop # First thing we notice is that, in this case, `BF_land_use` isn't in the same projection as `BF_elev` and `pop`, so let's reproject. # we also need to know something about the data we are importing # Elevation is obviously continuous # let's take a look at summary of the elevation values # we can do a frequency table of values freq(BF_elev) # maybe just look at a fraction of the cell values to get an idea summary(sampleRegular(BF_elev, size=10000)) # or alternatively cellStats(BF_elev, quantile) # Land use is categorical but let's inspect to make sure freq(BF_land_use) table(sampleRegular(BF_land_use, size=10000)) # population is also continuous but let's check what we have freq(pop) cellStats(pop, quantile) # oops, that negative value doesn't look good # let's take a closer look table(values(pop)<0) #alternatively table(pop[]<0) # 3 cells, let's ignore that in our toy example # remember from before that the projections were not the same so we need to reproject crs(BF_land_use) crs(BF_elev) # reproject - can time a little time, ~16 second on my machine system.time( BF_land_use <- projectRaster(BF_land_use, crs=crs(BF_elev), method="ngb") ) # Remember that as land use is categorical, we have to specify `method = "ngb"` so that nearest neighbor interpolation is used during reprojection, otherwise it will try to use binlinear interpolation which will result in meaningless values. # !unfortunately, you have to be careful, because projectRaster has bilinear as a default and will use it with no error if you don't specify method #### resample elevation raster #### #For demo purposes, we are going to crop the rasters across an arbitrary bounding box. # notice that the full extent of BF_elev is available in the raster summary BF_elev # one way to use the function extent is to give it column and row numbers # it will return the extent extent(BF_elev,499,507,301,309) #now we are going to crop the raster this extent using crop BF_elev_crop <- crop(BF_elev, extent(BF_elev,499,507, 301,309)) #and crop the other rasters to the same extents pop_crop <- crop(pop, extent(BF_elev_crop)) BF_land_use_crop <- crop(BF_land_use, extent(BF_elev_crop)) # Now let's create a dummy lower resolution raster we are going to resample to # this creates a lower resolution raster aggregating a number of cells horizontally and vertically using a specified function # in this case, the default is mean which is fine for elevation new_raster <- aggregate(BF_elev_crop, fact=3) # as you might expect this function shifts the raster new_raster <- raster::shift(new_raster, dx=0.004, dy=0.002) #and this creates lines out of this lower resolution dummy raster new_raster_outline <- rasterToPolygons(new_raster, dissolve=TRUE) #Let's plot the cropped elevation raster and overlay the grid of the new raster we are resample to plot(BF_elev_crop) # plot cell outlines lines(new_raster_outline) #Now let's resample elevation to the new raster. Here we can specify `method = bilinear` because this type of interpolation makes sense for a continuous outcome like elevation. In fact `method = binlinear` is the default so we don't really need to specify. # bilinear is linear interpolation in two dimensions, # there's a good visual explainer on the wiki https://en.wikipedia.org/wiki/Bilinear_interpolation BF_elev_crop_resampled_bilin <- resample(BF_elev_crop, new_raster, method="bilinear") plot(BF_elev_crop_resampled_bilin) lines(new_raster_outline) BF_elev_crop BF_elev_crop_resampled_bilin # now display these side by side with the same legend brks <- seq(270, 350, by=5) nb <- length(brks)-1 cols <- rev(terrain.colors(nb)) par(mfrow=c(1,2)) plot(BF_elev_crop, breaks=brks, col=cols, lab.breaks=brks, zlim=c(270,350), main='Original') plot(BF_elev_crop_resampled_bilin, breaks=brks, col=cols, lab.breaks=brks, zlim=c(270,350), main='Resampled') # reset to single graph plotting par(mfrow=c(1,1)) #### Resampling land use raster #### # Now let's repeat the resampling process with the land use raster. First let's plot and take a look at the frequency of different land use categories. plot(BF_land_use_crop) BF_land_use_crop table(BF_land_use_crop[]) #As this is categorical, and given that new raster is coarser (lower resolution), first aggregate using the mode (i.e. most frequent) class. # Here we aggregate by a factor of 9 because the resolution of `the`new_raster` is 9 times coarser than BF_land_use_crop BF_land_use_crop_aggregated <- aggregate(BF_land_use_crop, fun='modal', fact = 9) plot(BF_land_use_crop_aggregated) lines(new_raster_outline) #Now that the raster is at the right resolution, let's resample to the same grid BF_land_use_crop_aggregated_resamp <- resample(BF_land_use_crop_aggregated, new_raster, method = "ngb") plot(BF_land_use_crop_aggregated_resamp) lines(new_raster_outline) #### Resampling population raster #### #Now we have resampled a continuous and categorical raster, let's repeat on population. # The raw data in the `pop_crop` shows the number of individuals per cell. If we want to resample to a coarser resolution, we are probably most interested in maintaining these as counts and # therefore we want to sum the numbers as opposed to use interpolation or nearest neighbour. #Let's have a look at the population raster plot(pop_crop, col=topo.colors(64)) cellStats(pop_crop, sum) # As for the land use example, the new raster is at lower resolution. Therefore have to aggregate first. # As we want total population to remain the same, aggregate by summing population pop_crop_aggregated <- aggregate(pop_crop, fact = 3, fun = sum) #3.012048 # Check the total population is the same as the original raster cellStats(pop_crop_aggregated, sum) # Plot with new raster cell outlines plot(pop_crop_aggregated, col=topo.colors(64)) lines(new_raster_outline) #Now that the raster is at the right resolution, we can use the resample function to estimate the counts in the new grid pop_crop_aggregated_resamp <- resample(pop_crop_aggregated, new_raster) plot(pop_crop_aggregated_resamp, col=topo.colors(64)) lines(new_raster_outline) #The total should more or less be equal to the original - its not exact as we have to use interpolation to predict populations in the new cell. cellStats(pop_crop_aggregated_resamp, sum) # Not bad..