## Code for Chapter 2 of An Introduction to R for Spatial Analysis and Mapping # Lex Comber & Chris Brunsdon # examples of simple assignment x <- 5 y <- 4 # the variables can be used in other operations x+y # including defining new variables z <- x + y z # which can then be passed to other functions sqrt(z) # example of Vector assignment hospital.distances <- c(4.3, 7.1, 6.3, 5.2, 3.2, 2.1) hospital.distances # multiplied by 2 hospital.distances*2 # squared (to the power of 2) hospital.distances^2 sum(hospital.distances) mean(hospital.distances) max.dist <- max(hospital.distances) max.dist hospital.distances hospital.distances [1] # first element hospital.distances[1:3] # a subset of elements 1 to 3 sqrt(hospital.distances[1:3]) # square roots of the subset hospital.distances[c(5, 3, 2)] # elements 5,3,2 (note the ordering) # example of character assignment name <- "Lex Comber" name # multiple elements assigned to a vector of character variables cities <- c("Exeter","Leeds","Leicester","Liverpool","London","Newcastle") cities length(cities) # an example of a logical vector northern <- c(FALSE, TRUE, FALSE, TRUE, FALSE, TRUE) northern # this can be used to subset other variables cities[northern] # creates an empty character vector character(length = 8) # conversion as.character(hospital.distances) # tests is.character(8) is.character("8") # creates an empty character vector numeric(length = 8) # conversions as.numeric(c("1980","-8","Geography")) as.numeric(c(FALSE,TRUE)) # tests is.numeric(hospital.distances) is.numeric(c(8, 8, 8, "8")) logical(length = 7) # conversion as.logical(c(7, 5, 0, -4, 5, 0.5)) # TRUE and FALSE can be converted to 1 and 0 as.logical(c(7, 5, 0, -4, 5, 0.5)) * 1 as.logical(c(7, 5, 0, -4, 5, 0.5)) + 0 # different ways to declare TRUE and FALSE as.logical(c("True","T","FALSE","Raspberry","9","0", 0)) # data from before hospital.distances # a logical test index <- (hospital.distances > 5) index # used to subset data hospital.distances[index] # and to undertake operations mean(hospital.distances) mean(hospital.distances[index]) # defining vectors vector(mode = "numeric", length = 8) vector(length = 8) a = 1:6 a # testing and conversion is.vector(a) is.vector(cities) is.vector(hospital.distances) # combine equal length vectors tmp <- data.frame(ID = a, Place = cities, Dist = hospital.distances) tmp is.vector(tmp) as.vector(tmp) as.vector(hospital.distances) as.vector(cities) # defining matrices matrix(ncol = 2, nrow = 10) matrix(hospital.distances) matrix(cities, ncol = 2) matrix(cities, ncol = 2, byrow = TRUE) # conversion and test as.matrix(6:3) is.matrix(as.matrix(6:3)) flow <- matrix(c(2000, 1243, 543, 1243, 212, 545, 654, 168, 109), nrow = 3, ncol = 3, byrow=TRUE) # Rows and columns can have names, not just 1,2,3,... colnames(flow) <- c("Leeds", "Maynooth"," Elsewhere") rownames(flow) <- c("Leeds", "Maynooth", "Elsewhere") # examine the matrix flow # and functions exist to summarise outflows <- rowSums(flow) outflows z <- c(6,7,8) names(z) <- c("Newcastle","London","Manchester") z # a vector assignment house.type <- c("Bungalow", "Flat", "Flat", "Detached", "Flat", "Terrace", "Terrace") # a factor assignment house.type <- factor(c("Bungalow", "Flat", "Flat", "Detached", "Flat", "Terrace", "Terrace"), levels=c("Bungalow","Flat","Detached","Semi","Terrace")) house.type # table can be used to summarise table(house.type) # 'levels' control what can be assigned house.type <- factor(c("People Carrier", "Flat", "Flat", "Hatchback", "Flat", "Terrace", "Terrace"), levels=c("Bungalow","Flat","Detached","Semi","Terrace")) house.type income <-factor(c("High", "High", "Low", "Low", "Low", "Medium", "Low", "Medium"), levels=c("Low", "Medium", "High")) income > "Low" # 'levels' in 'ordered' defines a relative order income <-ordered(c("High", "High", "Low", "Low", "Low", "Medium", "Low", "Medium"), levels=c("Low", "Medium", "High")) income > "Low" sort(income) tmp.list <- list("Lex Comber",c(2015, 2024), "Lecturer", matrix(c(6,3,1,2), c(2,2))) tmp.list # elements of the list can be selected tmp.list[[4]] employee <- list(name="Lex Comber", start.year = 2015, position="Professor") employee append(tmp.list, list(c(7,6,9,1))) # lappy with different functions lapply(tmp.list[[2]], is.numeric) lapply(tmp.list, length) employee <- list(name="Lex Comber", start.year = 2015, position="Professor") class(employee) <- "staff" print.staff <- function(x) { cat("Name: ",x$name,"\n") cat("Start Year: ",x$start.year,"\n") cat("Job Title: ",x$position,"\n") } # an example of the print class print(employee) print(unclass(employee)) new.staff <- function(name,year,post) { result <- list(name=name, start.year=year, position=post) class(result) <- "staff" return(result)} leeds.uni <- vector(mode='list',3) # assign values to elements in the list leeds.uni[[1]] <- new.staff("Malleson, Nick", 2016,"Professor") leeds.uni[[2]] <- new.staff("Comber, Lex", 2015,"Professor") leeds.uni[[3]] <- new.staff("Langlands, Alan", 2014,"VC") leeds.uni df <- data.frame(dist = seq(0,400, 100), city = c("Leeds", "Nottingham", "Leicester", "Durham", "Newcastle")) str(df) # package installation checks if (!is.element("tidyverse", installed.packages())) install.packages("tidyverse", dep = T) library(tidyverse) tb <- tibble(dist = seq(0,400, 100), city = c("Leeds", "Nottingham", "Leicester", "Durham", "Newcastle")) df tb df$ci tb$ci # 1 column df[,2] tb[,2] class(df[,2]) class(tb[,2]) # 2 columns df[,1:2] tb[,1:2] class(df[,1:2]) class(tb[,1:2]) data.frame(tb) as_tibble(df) names() colnames() rownames() length() ncol() nrow() cbind(df, Pop = c(700,250,230,150,1200)) cbind(tb, Pop = c(700,250,230,150,1200)) vignette("tibble") access <- factor(c("very good","good","very good","average", "bad","very good","average","bad", "very good","very good","average","bad","bad"), levels=c("very good","good","average","bad","very bad")) # access[4] <- "poor" # access access <- factor(c("very good","good","very good","average", "bad","very good","average","bad", "very good","very good","average","bad","bad"), levels=c("very good","good","average","bad","very bad")) table(access) access2 <- c("very good","good","average","bad", "very good","average","bad","very good", "very good","average","bad") table(access2) house.type <- factor(c("owned","owned","rented","owned", "other","rented","other","owned", "rented","owned","owned","owned","rented"), levels=c("owned","rented","other")) table(house.type, access) crosstab <- table(house.type, access) bedrooms <- ordered(c("1 bedroom","2 bedrooms","1 bedroom","2 bedrooms", "3+ bedrooms","2 bedrooms","3+ bedrooms","1 bedroom", "2 bedrooms","1 bedroom", "1 bedroom","2 bedrooms","2 bedrooms"), levels=c("1 bedroom","2 bedrooms","3+ bedrooms")) bedrooms > "1 bedroom" dim(crosstab) # Matrix dimensions rowSums(crosstab) # Row sums colnames(crosstab) # Column names apply(crosstab, 1, max) apply(crosstab,2,max) example <- c(1.4,2.6,1.1,1.5,1.2) # created a data frame df <- data.frame(dist = seq(0,400, 100), city = c("Leeds", "Nottingham", "Leicester", "Durham", "Newcastle")) tmp <- data.frame(ID = a, Place = cities, Dist = hospital.distances) # used cbind to add a column cbind(df, Pop = c(700,250,230,150,1200)) cbind(tb, Pop = c(700,250,230,150,1200)) # create data frame df <- data.frame(ID = 1:length(access), access, bedrooms, house.type) # have a look df if (!is.element(c("cols4all"), installed.packages())) install.packages("cols4all", dep = T) library(cols4all) if (!is.element("devtools", installed.packages())) install.packages("devtools", dep = T) devtools::install_github("lexcomber/Intro2R4SpatAnal") library(Intro2R4SpatAnal) x1 <- rnorm(100) y1 <- rnorm(100) plot(x1,y1) plot(x1, y1, pch=16, col='red') x2 <- seq(0,2*pi,len=100) y2 <- sin(x2) plot(x2, y2, type='l') plot(x2, y2, type='l', lwd=3, col='darkgreen') plot(x2, y2, type='l', col='darkgreen', lwd=3, ylim=c(-1.2,1.2)) y2r <- y2 + rnorm(100,0,0.1) points(x2,y2r, pch=16, col='darkred') y4 <- cos(x2) plot(x2, y2, type='l', lwd=3, col='darkgreen') lines(x2, y4, lwd=3, lty=2, col='darkblue') x2 <- seq(0,2*pi,len=100) y2 <- sin(x2) y4 <- cos(x2) # specify the plot layout and order par(mfrow = c(1,2)) # plot #1 plot(y2, y4) polygon(y2, y4, col='lightgreen') # plot #2 plot(y2, y4, asp=1, type='n') polygon(y2, y4,col='lightgreen') # reset par par(mfrow = c(1,1)) # load data from Intro2R4SpatAnal data(lads.polys) # select the 22nd element leeds <- lads.polys[[22]] head(leeds) # set the plot extent plot(leeds, asp=1, type='n', xlab="Easting", ylab="Northing") # plot the selected features with hatching polygon(leeds, density=14, angle=135) colours() plot(leeds, asp=1, type='n', xlab="Easting", ylab="Northing") polygon(leeds, col=rgb(0,0.5,0.7)) plot(leeds, asp=1, type='n', xlab="Easting", ylab="Northing") polygon(leeds, col=rgb(0,0.5,0.7,0.4)) # set the plot extent plot(leeds, asp=1, type='n', xlab="Easting", ylab="Northing") # plot random points set.seed(123) points(x = runif(500,413216,446877), y = runif(500, 422593,450175), pch=16, col='red') # plot the polygon with a transparency factor polygon(leeds, col=rgb(0,0.5,0.7,0.4)) plot(leeds, asp=1, type='n', xlab="Easting", ylab="Northing") polygon(leeds, col="#B3B333") # add text, specifying its placement, colour and size text(431952,436635,"Leeds",cex=1.5) text(431952,432598,"in Yorkshire",col='darkred') plot(x = c(-1.5,1.5), y = c(-1.5,1.5), asp=1, type='n') # plot the green/blue rectangle rect(-0.5,-0.5,0.5,0.5, border=NA, col=rgb(0,0.5,0.5,0.7)) # then the second one rect(0,0,1,1, col=rgb(1,0.5,0.5,0.7)) # load some gridded data data(pm10.grid) # convert to a matrix mat <- xtabs(pm102021g ~ X + Y, pm10.grid) # set some plot parameters (1 row, 2 columns) par(mfrow = c(1,2)) # plot the points using the default shading image(mat, asp = 1) # select and examine a colour palette with 7 classes greenpal <- c4a("brewer.greens", 11) # and now use this to plot the data image(mat, col=greenpal, asp = 1) # reset par par(mfrow = c(1,1)) library(tidyverse) if (!is.element("cowplot", installed.packages())) install.packages("cowplot", dep = T) if (!is.element("sf", installed.packages())) install.packages("sf", dep = T) library(cowplot) library(sf) df <- data.frame(x2, y2r) ggplot(data = df, aes(x = x2, y = y2r)) + geom_point(col='darkred') + geom_smooth(col="darkgreen", lwd = 1.5, se = FALSE) # theme_bw() # theme_dark() leeds <- leeds |> data.frame() # create a data.frame to hold the points set.seed(123) df <- data.frame(x = runif(500,413216,446877), y = runif(500, 422593,450175)) # now use ggplot to construct the layers of the plot p1 <- ggplot(data = leeds, aes(x = X, y= Y)) + geom_polygon(fill = rgb(0,0.5,0.7), alpha = 0.4) + geom_point(data = df, aes(x = x, y = y),col='red') + coord_fixed() # plot the plot p1 # ggplot(data = , aes(x,y,colour)) + geom_XYZ() # load leeds LSOA data data(leeds_lsoa) # extract the data frame and convert to tibble tb <- leeds_lsoa |> st_drop_geometry() |> as_tibble() tb tb$Unemployed <- as.factor((tb$unemp < median(tb$unemp)) + 0) levels(tb$Unemployed) <- list("Unemployed" = 0, "Not Unemployed"=1) tb$DegClass <- rep("Average", nrow(tb)) tb$DegClass[tb$degree >= 43.00] = "More Educated" tb$DegClass[tb$degree <= 24.35] = "Less Educated" tb$DegClass = factor(tb$DegClass, levels = c("Less Educated", "Average", "More Educated")) table(tb$DegClass) ggplot(data = tb, mapping=aes(x=degree, y=manual)) + geom_point() ggplot(data = tb, mapping=aes(x=degree, y=manual, colour = Unemployed)) + geom_point() ggplot(data = tb, mapping=aes(x=degree, y=manual)) + geom_point() + geom_smooth(method = "lm") ggplot(data = tb, mapping = aes(x=degree, y=manual)) + geom_point() + geom_smooth(method = "lm", col = "red", fill = "lightsalmon") + theme_bw() + xlab("% with bachelor degree") + ylab("% in manual occupations") + ggtitle("A scatter plot") ggplot(tb, aes(x=degree)) + geom_histogram(, binwidth = 5, colour = "red", fill = "grey") ggplot(tb, aes(x=degree)) + geom_histogram(binwidth=5,colour="white", fill="darksalmon") + # Ignore NA values geom_vline(aes(xintercept=median(degree, na.rm=T)), color="orangered1", linetype="dashed", size=1) ggplot(tb, aes(x=manual, y = unemp)) + geom_point(color="grey30") + facet_wrap(~DegClass) + xlab("% Manual Occupations") + ylab("% Unemployed") ggplot(tb, aes(x = degree)) + geom_boxplot() ggplot(tb, aes(DegClass, degree, fill = factor(Unemployed))) + geom_boxplot() + scale_fill_manual(name = "", values = c("orange", "firebrick3"), labels = c("Non-Rural"="Not Rural","Rural"="Rural")) + xlab("Degree Class") + ylab("% Degree") # display the first six rows head(leeds) # display the variable dimensions dim(leeds) write.csv(leeds, file = "test.csv") write.csv(leeds, file = "test.csv", row.names = F) tmp.leeds <- read.csv(file = "test.csv") # this will save everything in the workspace save(list = ls(), file = "MyData.RData") # this will save just leeds save(list = "leeds", file = "MyData.RData") # this will save leeds and lads.polys save(list = c("leeds", "lads.polys"), file = "MyData.RData") load("MyData.RData") st_write(leeds, "leeds.gpkg") # GeoPackage st_write(leeds, "leeds.shp") # ESRI shapefile st_write(leeds, "leeds.geojson") # GeoJson st_write(leeds, "leeds.gpkg", delete_layer = TRUE) # GeoPackage new.leeds <- st_read("leeds.shp") vignette(topic = "sf2", package = "sf") # Self-Test Question 1 access[4] <- "poor" access # Self-Test Question 4 # Using the access, house.type and bedrooms variables, write expressions to give the following: # The access of all residents with more than 1 bedroom # The counts of house types of all residents with 1 or 2 bedrooms # The counts of access for rented with 3 or more bedrooms. # Undo the access[4] <- "poor" line used above access <- factor(c("very good","good","very good","average", "bad","very good","average","bad", "very good","very good","average","bad","bad"), levels=c("very good","good","average","bad","very bad")) house.type <- factor(c("owned","owned","rented","owned", "other","rented","other","owned", "rented","owned","owned","owned","rented"), levels=c("owned","rented","other")) bedrooms <- ordered(c("1 bedroom","2 bedrooms","1 bedroom","2 bedrooms", "3+ bedrooms","2 bedrooms","3+ bedrooms","1 bedroom", "2 bedrooms","1 bedroom", "1 bedroom","2 bedrooms","2 bedrooms"), levels=c("1 bedroom","2 bedrooms","3+ bedrooms")) # 1 The access of all residents with more than 1 bedroom access[bedrooms > "1 bedroom"] # 2 The counts of house types of all residents with 1 or 2 bedrooms table(house.type[bedrooms < "3+ bedrooms"]) # 3 The counts of access for rented with 3 or more bedrooms. table(access[(bedrooms < "3+ bedrooms") & (house.type == "rented")]) # Self-Test Question 6 apply(crosstab,1,which.max) # Self-Test Question 7 # create data frame df <- data.frame(ID = 1:length(access), access, bedrooms, house.type) df[, 3] df[5, ] df[c(3, 5), ] df[df$ID > 10, ] # perhaps easier to create an index object index <- (df$access == "bad" | df$access == "very bad") df[index, c(1,3)] head(leeds) tail(leeds) ggplot(data = leeds, aes(x = X, y = Y)) + geom_polygon() + # specify data and aesthetics for points geom_point(data = df, aes(x, y),col='red')