# Text after a # are comments # Run every line of code line by line by putting the cursor at the line or # selecting the code you want to run and hitting ctrl+enter in RStudio # Get help ?data.frame # search in all help files ??data.frame # install a package from CRAN install.packages("dplyr") install.packages(c("reshape2", "ggplot2", "maptools", "rgdal", "leaflet", "xlsx")) # load a package library(ggplot2) # install a package from github, this requires the devtools package to be installed # so we do that first install.packages("devtools") devtools::install_github("iobis/robis") # browse package vignettes browseVignettes(package="ggplot2") # Directly open a vignette vignette("ggplot2-specs") # Vectors: one-dimensional data structures # single values are vectors of length 1 # numbers a <- 1 a # [1] 1 class(a) # [1] "numeric" length(a) # [1] 1 # text b <- "banana" b # [1] "banana" class(b) # [1] "character" # TRUE/FALSE (= booleans) d <- FALSE d # [1] FALSE class(d) # [1] "logical" # vector with 2 numbers, all elements in a vector have the same class a <- c(1, 2) a # 1 2 class(a) # [1] "numeric" length(a) # [1] 2 # 1 to 10 b <- 1:10 b # [1] 1 2 3 4 5 6 7 8 9 10 # even numbers to 10 a <- seq(0, 10, by=2) a # [1] 0 2 4 6 8 10 length(a) # [1] 6 # empty vector is known as NULL b <- c() b # [1] NULL # Matrices, two-dimensional data structures. Again, all elements are of the same class. matrix(1:6, nrow=3, ncol=2) # [,1] [,2] # [1,] 1 4 # [2,] 2 5 # [3,] 3 6 # Data frames, in data frames the columns can be of different classes. d <- data.frame(a=c(5, 6, 7), b=c("x", "y", "z")) d # a b # 1 5 x # 2 6 y # 3 7 z # select column "a", which is the first column, this returns a vector when possible d$a # [1] 5 6 7 d[,1] # [1] 5 6 7 d[,"a"] # [1] 5 6 7 d[1] # data frame with only the first column d[,1,drop=FALSE] # data frame with only the first column # a # 1 5 # 2 6 # 3 7 d[1,] # data frame with only the first row # a b # 1 5 x # demonstration of the 'dplyr' data frame wrapper library(dplyr) # install.packages("dplyr") data(iris) tbl_df(iris) # Lists, a collection of objects. a <- data.frame(a=c(1, 2, 3), b=c("x", "y", "z")) l <- list(a=a, b=1) l # $a # a b # 1 1 x # 2 2 y # 3 3 z # # $b # [1] 1 # access the second element "b" l$b # [1] 1 l[[2]] # [1] 1 l[["b"]] # [1] 1 # Writing and reading data data <- data.frame(x=10:15, y=40:45) # some data # Delimited text files write.table(data, "data.txt", sep="\t", dec=".", row.names=FALSE) data <- read.table("data.txt", header=TRUE, sep="\t", dec=".", stringsAsFactors=FALSE) # comma , separated write.csv(data, "data.csv", row.names=FALSE) data <- read.csv("data.csv", stringsAsFactors=FALSE) # dotcomma ; separated write.csv2(data, "data2.csv", row.names=FALSE) data <- read.csv2("data2.csv", stringsAsFactors=FALSE) # remove created files unlink(c("data.txt", "data.csv", "data2.csv")) # Excel files library(openxlsx) # install.packages("openxlsx") data <- data.frame(x = 10:15, y = 40:45) # generate some data write.xlsx(data, "data.xlsx", sheetName = "intro", row.names = FALSE) data2 <- read.xlsx("data.xlsx", 1) data2 <- read.xlsx("data.xlsx", sheet = "intro") # Reading from ZIP files temp <- tempfile() download.file("http://ipt.vliz.be/eurobis/archive.do?r=nsbs&v=1.1", temp) data <- read.table(unz(temp, "occurrence.txt"), sep="\t", header=TRUE, stringsAsFactors=FALSE) View(data) unlink(c(temp,"occurrence.txt")) # Reading shapefiles # Example requires the 'maptools', 'rgdal' and 'ggplot2' packages for reading and visualizing library(maptools) # install.packages("maptools") library(rgdal) # install.packages("rgdal") library(ggplot2) # install.packages("ggplot2") download.file("http://iobis.org/geoserver/OBIS/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=OBIS:summaries&outputFormat=SHAPE-ZIP", destfile="summaries.zip") unzip("summaries.zip") shape <- readOGR("summaries.shp", layer="summaries") shape@data$id <- rownames(shape@data) df <- fortify(shape, region="id") data <- merge(df, shape@data, by="id") # plot the number of species ggplot() + geom_polygon(data=data, aes(x=long, y=lat, group=group, fill=s), color='gray', size=.2) + scale_fill_distiller(palette = "Spectral") # remove all summaries files related to the shapefile unlink(list.files(".","^summaries[.](zip|shp|shx|dbf|cst|prj)")) # Inspecting data library(robis) # devtools::install_github("iobis/robis") library(dplyr) # install.packages("dplyr") data <- occurrence("Sargassum") # for this example, convert back from data frame tbl (dplyr) to standard data frame data <- as.data.frame(data) head(data) # first 6 rows head(data, n = 100) # first 100 rows dim(data) # dimensions nrow(data) # nmuber of rows ncol(data) # number of columns names(data) # column names str(data) # structure of the data summary(data) # summary of the data View(data) # View the data # now convert to data frame tbl (dplyr) data <- tbl_df(data) data head(data) print(data, n = 10) # Filtering library(robis) # devtools::install_github("iobis/robis") library(dplyr) # install.packages("dplyr") data <- occurrence("Sargassum") View(data %>% filter(scientificName == "Sargassum muticum" & yearcollected > 2005)) # Reordering View(data %>% arrange(datasetName, desc(eventDate))) # Selecting and renaming columns data %>% select(scientificName, eventDate, lon=decimalLongitude, lat=decimalLatitude) # `select()` can be used with `distinct()` to find unique combinations of values: data %>% select(scientificName, locality) %>% distinct() # Adding columns data %>% tbl_df %>% mutate(zone = .bincode(minimumDepthInMeters, breaks=c(0, 20, 100))) %>% select(minimumDepthInMeters, zone) %>% filter(!is.na(zone)) %>% print(n=100) ### Aggregation data %>% summarise(lat_mean = mean(decimalLatitude), lat_sd = sd(decimalLatitude)) data %>% group_by(scientificName) %>% summarise(records=n(), datasets=n_distinct(datasetName)) # Restructuring # Convert a dataset from OBIS to a matrix format, which is more suitable for community analysis: library(robis) library(reshape2) data <- occurrence(resourceid = 586) wdata <- dcast(data, locality ~ scientificName, value.var = "individualCount", fun.aggregate = sum) # And the other way around, from wide format to long format: ldata <- melt(wdata, variable.name = "scientificName", value.name = "individualCount") # Plotting # In this example, data for one species is extracted from an OBIS dataset. # Density and depth are visualized using the `ggplot2` package: library(robis) library(dplyr) library(reshape2) library(ggplot2) data <- occurrence(resourceid = 586) afil <- data %>% filter(scientificName == "Amphiura filiformis") %>% group_by(locality) %>% summarise(n = mean(individualCount), lon = mean(decimalLongitude), lat = mean(decimalLatitude), depth = mean(minimumDepthInMeters)) ggplot() + geom_point(data = afil, aes(lon, lat, size = n, colour = depth)) + scale_colour_distiller(palette = "Spectral") + theme(panel.background = element_blank()) + coord_fixed(ratio = 1) + scale_size(range = c(2, 12)) # Mapping # The `leaflet` package can be used to create interactive web based maps. # The example below shows the results of an outlier analysis of Verruca stroemia occurrences: library(leaflet) data <- occurrence("Verruca stroemia") data$qcnum <- qcflags(data$qc, c(24, 28)) colors <- c("red", "orange", "green")[data$qcnum + 1] m <- leaflet() m <- addProviderTiles(m, "CartoDB.Positron") m <- addCircleMarkers(m, data=data.frame(lat=data$decimalLatitude, lng=data$decimalLongitude), radius=3, weight=0, fillColor=colors, fillOpacity=0.5) m