setwd("/Users/macbookpro/home/elnino/data") # output is csv data printed to the console # functions by Graham Jones: lat.from.label <- function(label) { as.integer(substr(label,2,4)) } lon.from.label <- function(label) { as.integer(substr(label,6,8)) } lat.range.from.colnames <- function(cnames) { lat.from.label(cnames[1]) : lat.from.label(cnames[length(cnames)]) } lon.range.from.colnames <- function(cnames) { lon.from.label(cnames[1]) : lon.from.label(cnames[length(cnames)]) } textual.longitude.from.3D.lon <- function(lon, cnames) { lon.range <- lon.range.from.colnames(cnames) x <- lon.range[1] + lon - 2 x <- 2.5 * x if (x < 179.99) { txt <- paste0(x, "E") } else if (x > 180.01) { txt <- paste0(360-x, "W") } else { txt <- "180" } txt } textual.latitude.from.3D.lat <- function(lat, cnames) { lat.range <- lat.range.from.colnames(cnames) y <- lat.range[1] + lat - 2 y <- 2.5 * y if (y < 89.99) { txt <- paste0(90-y, "N") } else if (y > 90.01) { txt <- paste0(y-90, "S") } else { txt <- "0" } txt } data.to.3D <- function(vals) { lat.range <- lat.range.from.colnames(colnames(vals)) lon.range <- lon.range.from.colnames(colnames(vals)) n.times <- dim(vals)[1] n.lat <- length(lat.range) n.lon <- length(lon.range) vals3D <- array(0, dim=c(n.times, n.lat, n.lon)) for (tim in 1:n.times) { for (lat in 1:n.lat) { vals3D[tim, lat, ] <- vals[tim, ((lat-1) * n.lon) + (1:n.lon)] } } vals3D } seasonally.adjust <- function(vals) { stopifnot(dim(vals)[1] %% 365 == 0) n.years <- as.integer(dim(vals)[1]/365) offsets <- (0:(n.years-1))*365 y.means <- array(0, dim=c(365, dim(vals)[2], ncols=dim(vals)[3])) for (d in 1:365) { for (lat in 1:dim(vals)[2]) { for (lon in 1:dim(vals)[3]) { y.means[d, lat, lon] <- mean(vals[d + offsets, lat, lon]) } } } for (y in 1:n.years ) { for (d in 1:365) { tim <- (y-1)*365 + d vals[tim, , ] <- vals[tim, , ] - y.means[d, , ] } } vals } # untested. aimed at values calculated eg every 14 days. 14*26=364, miss one day seasonally.adjust.periods <- function(vals, nppy) { # nppy = number of periods per year stopifnot(dim(vals)[1] %% nppy == 0) n.years <- as.integer(dim(vals)[1]/nppy) offsets <- (0:(n.years-1))*nppy y.means <- array(0, dim=c(nppy, dim(vals)[2], ncols=dim(vals)[3])) for (p in 1:nppy) { for (lat in 1:dim(vals)[2]) { for (lon in 1:dim(vals)[3]) { y.means[p, lat, lon] <- mean(vals[p + offsets, lat, lon]) } } } for (y in 1:n.years ) { for (p in 1:nppy) { tim <- (y-1)*nppy + p vals[tim, , ] <- vals[tim, , ] - y.means[p, , ] } } vals } print("reading file...") #Kvals <- as.matrix(read.table(file="Scotland-1950-1952.txt", header=TRUE)) Kvals <- as.matrix(read.table(file="Pacific-1950-1979.txt", header=TRUE)) #Kvals <- as.matrix(read.table(file="Pacific-1950-1952.txt", header=TRUE)) Kvals.cnames <- colnames(Kvals) print("converting to 3D...") Kvals.3D <- data.to.3D(Kvals) print("seasonally adjusting...") SAvals.3D <- seasonally.adjust(Kvals.3D) # find covariances at time (day) p between each point x in region and a point # offset dS steps South, dE steps East, and with time lag dp, and # using a period length covp for the covariances. Positive # dp means the points in the region are at time p, and # the offset points are earlier, so is how the points # in the region are influenced by others. (Negative dp means # the offset point are at time p, not yet implemented.) covariances <- function(vals, region, p, dS, dE, dp, covp) { S0 <- region$S0 S1 <- region$S1 E0 <- region$E0 E1 <- region$E1 stopifnot(S1 + dS <= dim(vals)[2]) stopifnot(E1 + dE <= dim(vals)[3]) stopifnot(p - dp - covp >= 1) stopifnot(dp >= 0) covs <- matrix(0, nrow=S1-S0+1, ncol=E1-E0+1) hrange <- p + ((-covp+1):0) orange <- p - dp + ((-covp+1):0) for (lat in S0:S1) { for (lon in E0:E1) { covs[lat-S0, lon-E0] <- cov(vals[orange, lat+dS, lon+dE], vals[hrange, lat, lon]) } } covs } # functions by Dave Tanzer: compute.covariance <- function(vals, t, cov.days, lon1, lat1, lon2, lat2) { range <- t + (0:(cov.days-1)) cov(vals[range, lat1, lon1], vals[range, lat2, lon2]) } nlat <- dim(SAvals.3D)[2] nlon <- dim(SAvals.3D)[3] band.size <- 1 max.radius = min(nlat,nlon) - 1 numBands <- (max.radius + 1) / band.size print("numBands") print(numBands) observations.needed <- 10000 check.done <- function(all.bands) { for (i in 1:numBands) { if (length(all.bands[[i]]) < observations.needed) { return(FALSE) } } TRUE } compute.bands <- function(start.day, cov.days) { all.bands = list() for (i in 1:numBands) { all.bands[[i]] <- vector() } repeat { x1 <- sample(1:nlon, 1) y1 <- sample(1:nlat, 1) x2 <- sample(1:nlon, 1) y2 <- sample(1:nlat, 1) line.length <- sqrt((x2 - x1)^2 + (y2 - y1)^2) if (line.length > max.radius) { next } band.index <- 1 + floor(line.length / band.size) point.count <- length(all.bands[[band.index]]) if (point.count >= observations.needed) { next } all.bands[[band.index]][point.count + 1] <- compute.covariance(SAvals.3D, start.day, cov.days, x1, y1, x2, y2) if (point.count + 1 == observations.needed) { if (check.done(all.bands)) { break } } } all.bands } print.year.data <- function(year) { all.bands <- compute.bands((year - 1950) * 365, 365) cat(sprintf("%d, ", year)) for (i in 1:numBands) { median.val <- median(all.bands[[i]]) if (i < numBands) { cat(sprintf("%f, ", median.val)) } else { cat(sprintf("%f\n", median.val)) } } } for (year in 1950:1979) { print.year.data(year) }