In this exercise we will apply a general form GWR to predict Soil Organic C. We will use two covariates, elevation and NDVI to fit GWR OLS model. The soil organic carbon data (train and test data set) could be found here.
library(GWmodel) ## GW models
library(spgwr) ## GW models
library(spdep) ## Spatial autocorrelation
library(gstat) ## Geostatistics
library(RColorBrewer) ## Visualization
library(classInt) ## Class intervals
library(raster) ## spatial data
library(gridExtra) # Multiple plot
library(ggplot2) # plot
library(latticeExtra) # advance ploting function
library(RStoolbox) # advance raster ploting function
#library(SpatialML) # Geographicall Weigted Random Forest
# Define data folder
dataFolder<-"D:\\Dropbox\\WebSite_Data\\R_Data\\Data_OLS\\"
train<-read.csv(paste0(dataFolder,"train_data.csv"), header= TRUE)
state<-shapefile(paste0(dataFolder,"GP_STATE.shp"))
grid<-read.csv(paste0(dataFolder, "GP_prediction_grid_data.csv"), header= TRUE)
xy <- train[,c(8:9)]
df<-train[,c(8:11,18)]
mf<-train[,c(10:11,18)]
SPDF<-SpatialPointsDataFrame(coords = xy, data = mf)
We will use gwr.sel() function in spgwr packageto find a bandwidth for a given geographically weighted regression by optimizing a selected function. For cross-validation, this scores the root mean square prediction error for the geographically weighted regressions, choosing the bandwidth minimizing this quantity
bwG <- gwr.sel(SOC ~ ELEV +NDVI,
data = SPDF,
gweight = gwr.Gauss,
verbose = FALSE)
bwG
## [1] 168261.2
Then, well use gwr() function to implement the basic geographically weighted regression approach to exploring spatial non-stationarity for given global bandwidth and chosen weighting scheme.
gwrG <- gwr(SOC ~ ELEV +NDVI,
data = SPDF,
bandwidth = bwG,
gweight = gwr.Gauss,
hatmatrix = TRUE)
gwrG
## Call:
## gwr(formula = SOC ~ ELEV + NDVI, data = SPDF, bandwidth = bwG,
## gweight = gwr.Gauss, hatmatrix = TRUE)
## Kernel function: gwr.Gauss
## Fixed bandwidth: 168261.2
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median 3rd Qu. Max.
## X.Intercept. -1.5269e+01 -5.0132e+00 -3.3765e+00 -1.9694e+00 8.3425e+00
## ELEV -2.9072e-03 7.9294e-05 1.5306e-03 2.5182e-03 8.4633e-03
## NDVI -7.6028e-01 1.2401e+01 1.5900e+01 1.9333e+01 3.7073e+01
## Global
## X.Intercept. -4.0684
## ELEV 0.0013
## NDVI 18.7817
## Number of data points: 368
## Effective number of parameters (residual: 2traceS - traceS'S): 26.93114
## Effective degrees of freedom (residual: 2traceS - traceS'S): 341.0689
## Sigma (residual: 2traceS - traceS'S): 3.769681
## Effective number of parameters (model: traceS): 19.62378
## Effective degrees of freedom (model: traceS): 348.3762
## Sigma (model: traceS): 3.729936
## Sigma (ML): 3.629123
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 2036.859
## AIC (GWR p. 96, eq. 4.22): 2012.66
## Residual sum of squares: 4846.757
## Quasi-global R2: 0.4846599
df$NDVI_slope<-gwrG$SDF$NDVI # Slope NDVI
df$ELEV_slope<-gwrG$SDF$ELEV # Slope Elevation
df$R2<-gwrG$SDF$localR2 # Local R2
rgb.palette.col <- colorRampPalette(c("red","yellow", "green", "blue"),space = "rgb")
p1<-levelplot(NDVI_slope~x+y, df,cex=0.6,
aspect = "iso",main= "Local slope: NDVI",
xlab="", ylab="",
scales=list(y=list(draw=T,cex=0.5,rot=90, tck= 0.5),x=list(draw=T, cex=0.5,tck= 0.6)),
par.settings=list(axis.line=list(col="grey",lwd=0.5)),
col.regions=rgb.palette.col (20),
colorkey=list(space="right",width=1.2,labels=list(cex=1.2)),
panel = function(...) {
panel.levelplot.points(...)
sp.polygons(state,lty=1,lwd=0.5,col="grey30")
})
p2<-levelplot(ELEV_slope~x+y, df,cex=0.6,
aspect = "iso",main= "Local slope: Elevation",
xlab="", ylab="",
scales=list(y=list(draw=T,cex=0.5,rot=90, tck= 0.5),x=list(draw=T, cex=0.5,tck= 0.6)),
par.settings=list(axis.line=list(col="grey",lwd=0.5)),
col.regions=rgb.palette.col (20),
colorkey=list(space="right",width=1.2,labels=list(cex=1.2)),
panel = function(...) {
panel.levelplot.points(...)
sp.polygons(state,lty=1,lwd=0.5,col="grey30")
})
p3<-levelplot(R2~x+y, df,cex=0.6,
aspect = "iso",main= "Local R2",
xlab="", ylab="",
scales=list(y=list(draw=T,cex=0.5,rot=90, tck= 0.5),x=list(draw=T, cex=0.5,tck= 0.6)),
par.settings=list(axis.line=list(col="grey",lwd=0.5)),
col.regions=rgb.palette.col (20),
colorkey=list(space="right",width=1.2,labels=list(cex=1.2)),
panel = function(...) {
panel.levelplot.points(...)
sp.polygons(state,lty=1,lwd=0.5,col="grey30")
})
grid.arrange(p1, p2,p3, ncol=3)
We can use use fit.points as additional argument to predict SOC at grid locations.
coordinates(grid) = ~x+y
g.soc <- gwr(SOC ~ ELEV +NDVI,
data = SPDF,
bandwidth = bwG,
fit.points=grid[, 2:3])
g.soc
## Call:
## gwr(formula = SOC ~ ELEV + NDVI, data = SPDF, bandwidth = bwG,
## fit.points = grid[, 2:3])
## Kernel function: gwr.Gauss
## Fixed bandwidth: 168261.2
## Fit points: 10674
## Summary of GWR coefficient estimates at fit points:
## Min. 1st Qu. Median 3rd Qu. Max.
## X.Intercept. -1.5923e+01 -4.9354e+00 -3.3336e+00 -2.0286e+00 9.1539
## ELEV -3.6870e-03 2.0298e-04 1.6168e-03 2.4894e-03 0.0116
## NDVI -1.3422e+00 1.1993e+01 1.5231e+01 1.8892e+01 37.6824
grid$NDVI_slope<-g.soc$SDF$NDVI # Slope NDVI
grid$ELEV_slope<-g.soc$SDF$ELEV # Slope Elevation
grid$Intercept<-g.soc$SDF$'(Intercept)' # Local Intercept
NDVI.slope<-rasterFromXYZ(as.data.frame(grid)[, c("x", "y", "NDVI_slope")])
ELEV.slope<-rasterFromXYZ(as.data.frame(grid)[, c("x", "y", "ELEV_slope")])
Intercept<-rasterFromXYZ(as.data.frame(grid)[, c("x", "y", "Intercept")])
p1<-ggR(Intercept, geom_raster = TRUE) +
scale_fill_gradientn("", colours = c("orange", "yellow", "green", "sky blue","blue"))+
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
ggtitle("Intercept")+
theme(plot.title = element_text(hjust = 0.5))
p2<-ggR(NDVI.slope, geom_raster = TRUE) +
scale_fill_gradientn("", colours = c("blue", "green","yellow", "orange"))+
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
ggtitle("Slope - NDVI")+
theme(plot.title = element_text(hjust = 0.5))
p3<-ggR(ELEV.slope, geom_raster = TRUE) +
scale_fill_gradientn("", colours = c("blue", "green","yellow", "orange"))+
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
ggtitle("Slope - Elevation")+
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1,p2,p3, ncol = 3) # Multiplot
In GWModel package allow to select a subset of independent variable with a pseudo stepwise procedure in a forward direction.
xy <- train[,c(8:9)]
df.1<-train[,c(8:19)]
mf.1<-train[,c(10:19)]
SPDF.1<-SpatialPointsDataFrame(coords = xy, data = mf.1)
names(SPDF.1)
## [1] "SOC" "ELEV" "Aspect" "Slope" "TPI"
## [6] "K_Factor" "MAP" "MAT" "NDVI" "Slit_Clay"
The function model.selection.gwr(), whose AICc outputs are sorted using model.sort.gwr and then inputted to model.view.gwr to provide a useful vi-sualization of the AICc data.
RESPONSE <- "SOC"
PREDICTORS <- c("ELEV", "Aspect","Slope","TPI","K_Factor","MAP","MAT","NDVI", "Slit_Clay")
model.sel <- model.selection.gwr(RESPONSE, PREDICTORS,
data = SPDF.1,
kernel = "bisquare",
adaptive = TRUE,
bw = 231)
sorted.models <- model.sort.gwr(model.sel, numVars = length(PREDICTORS),ruler.vector = model.sel[[2]][,2])
model.list <- sorted.models[[1]]
model.view.gwr(RESPONSE, PREDICTORS, model.list = model.list)
Figure below presents a circle view of the 45 GW regressions (numbered 1 to 45) that result from stepwise procedure. Here the dependent variable is located in the centre of the chart and the independent variables are represented as nodes differentiated by shapes and colors.
plot(sorted.models[[2]][,2], col = "black", pch = 20, lty = 5,
main = "Alternative view of GWR model selection procedure",
ylab = "AICc", xlab = "Model number", type = "b")
Figure displays the corresponding AICc values from the same as of circle figure. The two graphs work together, explaining model performance when more and more variables are introduced. Clearly, AICc values continue to fall until all independent variables are included.
gwr.bw <- bw.gwr(SOC~ELEV+Aspect+Slope+TPI+K_Factor+MAP+MAT+NDVI+Slit_Clay,
data = SPDF.1,
approach = "AICc",
kernel = "bisquare",
adaptive = TRUE)
## Adaptive bandwidth (number of nearest neighbours): 235 AICc value: 2041.217
## Adaptive bandwidth (number of nearest neighbours): 153 AICc value: 2048.422
## Adaptive bandwidth (number of nearest neighbours): 286 AICc value: 2042.04
## Adaptive bandwidth (number of nearest neighbours): 203 AICc value: 2041.382
## Adaptive bandwidth (number of nearest neighbours): 254 AICc value: 2041.324
## Adaptive bandwidth (number of nearest neighbours): 222 AICc value: 2041.512
## Adaptive bandwidth (number of nearest neighbours): 241 AICc value: 2041.294
## Adaptive bandwidth (number of nearest neighbours): 229 AICc value: 2041.29
## Adaptive bandwidth (number of nearest neighbours): 236 AICc value: 2041.291
## Adaptive bandwidth (number of nearest neighbours): 231 AICc value: 2041.213
## Adaptive bandwidth (number of nearest neighbours): 232 AICc value: 2041.292
## Adaptive bandwidth (number of nearest neighbours): 234 AICc value: 2041.295
## Adaptive bandwidth (number of nearest neighbours): 232 AICc value: 2041.292
## Adaptive bandwidth (number of nearest neighbours): 233 AICc value: 2041.257
## Adaptive bandwidth (number of nearest neighbours): 232 AICc value: 2041.292
## Adaptive bandwidth (number of nearest neighbours): 232 AICc value: 2041.292
## Adaptive bandwidth (number of nearest neighbours): 231 AICc value: 2041.213
gwr.bw
## [1] 231
The optimal bandwidth is found at N = 231
gwr.res <- gwr.basic(SOC~ELEV+Aspect+Slope+TPI+K_Factor+MAP+MAT+NDVI+Slit_Clay,
data = SPDF.1,
bw = gwr.bw,
kernel = "bisquare",
adaptive = TRUE,
F123.test = TRUE)
## Warning in pf(F2, F2.DF[1], F2.DF[2], lower.tail = FALSE): NaNs produced
gwr.res
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2019-10-18 13:00:33
## Call:
## gwr.basic(formula = SOC ~ ELEV + Aspect + Slope + TPI + K_Factor +
## MAP + MAT + NDVI + Slit_Clay, data = SPDF.1, bw = gwr.bw,
## kernel = "bisquare", adaptive = TRUE, F123.test = TRUE)
##
## Dependent (y) variable: SOC
## Independent variables: ELEV Aspect Slope TPI K_Factor MAP MAT NDVI Slit_Clay
## Number of data points: 368
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.0688 -2.0901 -0.4861 1.4701 17.5191
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9620569 2.9314299 0.328 0.742962
## ELEV -0.0001908 0.0006758 -0.282 0.777880
## Aspect 0.0015092 0.0090800 0.166 0.868083
## Slope 0.1405423 0.0729235 1.927 0.054739 .
## TPI 0.0250793 0.0561447 0.447 0.655368
## K_Factor -2.8115398 3.4926100 -0.805 0.421356
## MAP 0.0048705 0.0019738 2.468 0.014071 *
## MAT -0.3014147 0.0980634 -3.074 0.002277 **
## NDVI 8.9768177 2.4390791 3.680 0.000269 ***
## Slit_Clay 0.0324410 0.0167687 1.935 0.053824 .
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 3.847 on 358 degrees of freedom
## Multiple R-squared: 0.4366
## Adjusted R-squared: 0.4224
## F-statistic: 30.82 on 9 and 358 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 5298.888
## Sigma(hat): 3.804975
## AIC: 2047.857
## AICc: 2048.599
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: bisquare
## Adaptive bandwidth: 231 (number of nearest neighbours)
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept -3.44262655 -0.60164873 0.97765472 2.01764374 5.2844
## ELEV -0.00298087 -0.00095504 0.00037826 0.00069260 0.0015
## Aspect -0.01742220 -0.00177237 0.00559962 0.01043209 0.0167
## Slope -0.12830677 -0.02740338 0.01353384 0.09438182 0.1526
## TPI -0.11229518 -0.00603647 0.03383873 0.18720680 0.2984
## K_Factor -8.35475064 -5.53848256 -3.76182795 0.99261921 10.2079
## MAP 0.00065770 0.00463274 0.00617414 0.01181778 0.0154
## MAT -0.69521018 -0.53935868 -0.45411911 -0.29010281 -0.0065
## NDVI 1.56293955 5.10399371 9.41043826 12.38995543 16.9790
## Slit_Clay -0.05878864 0.01178128 0.03674368 0.04637966 0.0617
## ************************Diagnostic information*************************
## Number of data points: 368
## Effective number of parameters (2trace(S) - trace(S'S)): 36.82948
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 331.1705
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 2041.213
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 2004.854
## Residual sum of squares: 4627.128
## R-square value: 0.5080123
## Adjusted R-square value: 0.4531327
## ******************F test results of GWR calibration********************
## ---F1 test (Leung et al. 2000)
## F1 statistic Numerator DF Denominator DF Pr(>)
## 0.94397 Inf 358 0.2108
## ---F2 test (Leung et al. 2000)
## F2 statistic Numerator DF Denominator DF Pr(>)
## 1.6916 -2.3652 358 NA
## ---F3 test (Leung et al. 2000)
## F3 statistic Numerator DF Denominator DF Pr(>)
## Intercept 0.14681 158.28842 Inf 1.0000000
## ELEV 1.12412 172.73077 Inf 0.1262193
## Aspect 0.59043 123.24461 Inf 0.9999146
## Slope 0.41990 96.88100 Inf 0.9999999
## TPI 2.85866 39.75585 Inf 4.851e-09 ***
## K_Factor 1.16978 109.24648 Inf 0.1084885
## MAP 2.09811 78.40929 Inf 4.626e-08 ***
## MAT 0.62273 158.20877 Inf 0.9999447
## NDVI 1.99276 60.04008 Inf 7.666e-06 ***
## Slit_Clay 1.48229 148.88508 Inf 0.0001206 ***
## ---F4 test (GWR book p92)
## F4 statistic Numerator DF Denominator DF Pr(>)
## 0.87323 331.17052 358 0.105
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ***********************************************************************
## Program stops at: 2019-10-18 13:00:45
df.1$NDVI_slope<-gwr.res$SDF$NDVI # Slope NDVI
df.1$MAT_slope<-gwr.res$SDF$MAT # Slope Elevation
df.1$R2<-gwr.res$SDF$Local_R2 # Local R2
rgb.palette.col <- colorRampPalette(c("red","yellow", "green", "blue"),space = "rgb")
p1.1<-levelplot(NDVI_slope~x+y, df.1,cex=0.6,
aspect = "iso",main= "Local slope: NDVI",
xlab="", ylab="",
scales=list(y=list(draw=T,cex=0.5,rot=90, tck= 0.5),x=list(draw=T, cex=0.5,tck= 0.6)),
par.settings=list(axis.line=list(col="grey",lwd=0.5)),
col.regions=rgb.palette.col (20),
colorkey=list(space="right",width=1.2,labels=list(cex=1.2)),
panel = function(...) {
panel.levelplot.points(...)
sp.polygons(state,lty=1,lwd=0.5,col="grey30")
})
p2.1<-levelplot(MAT_slope~x+y, df.1,cex=0.6,
aspect = "iso",main= "Local slope: Temperature",
xlab="", ylab="",
scales=list(y=list(draw=T,cex=0.5,rot=90, tck= 0.5),x=list(draw=T, cex=0.5,tck= 0.6)),
par.settings=list(axis.line=list(col="grey",lwd=0.5)),
col.regions=rgb.palette.col (20),
colorkey=list(space="right",width=1.2,labels=list(cex=1.2)),
panel = function(...) {
panel.levelplot.points(...)
sp.polygons(state,lty=1,lwd=0.5,col="grey30")
})
p3.1<-levelplot(R2~x+y, df.1,cex=0.6,
aspect = "iso",main= "Local R2",
xlab="", ylab="",
scales=list(y=list(draw=T,cex=0.5,rot=90, tck= 0.5),x=list(draw=T, cex=0.5,tck= 0.6)),
par.settings=list(axis.line=list(col="grey",lwd=0.5)),
col.regions=rgb.palette.col (20),
colorkey=list(space="right",width=1.2,labels=list(cex=1.2)),
panel = function(...) {
panel.levelplot.points(...)
sp.polygons(state,lty=1,lwd=0.5,col="grey30")
})
grid.arrange(p1.1, p2.1,p3.1, ncol=3)
rm(list = ls())