Pearson.chisq = function (x, y = NULL, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = FALSE, B = 2000) { DNAME <- deparse(substitute(x)) if (is.data.frame(x)) x <- as.matrix(x) if (is.matrix(x)) { if (min(dim(x)) == 1L) x <- as.vector(x) } if (!is.matrix(x) && !is.null(y)) { if (length(x) != length(y)) stop("'x' and 'y' must have the same length") DNAME2 <- deparse(substitute(y)) xname <- if (length(DNAME) > 1L || nchar(DNAME, "w") > 30) "" else DNAME yname <- if (length(DNAME2) > 1L || nchar(DNAME2, "w") > 30) "" else DNAME2 OK <- complete.cases(x, y) x <- factor(x[OK]) y <- factor(y[OK]) if ((nlevels(x) < 2L) || (nlevels(y) < 2L)) stop("'x' and 'y' must have at least 2 levels") x <- table(x, y) names(dimnames(x)) <- c(xname, yname) DNAME <- paste(paste(DNAME, collapse = "\n"), "and", paste(DNAME2, collapse = "\n")) } if (any(x < 0) || anyNA(x)) stop("all entries of 'x' must be nonnegative and finite") if ((n <- sum(x)) == 0) stop("at least one entry of 'x' must be positive") if (simulate.p.value) { setMETH <- function() METHOD <<- paste(METHOD, "with simulated p-value\n\t (based on", B, "replicates)") almost.1 <- 1 - 64 * .Machine$double.eps } if (is.matrix(x)) { METHOD <- "Pearson's Chi-squared test" nr <- as.integer(nrow(x)) nc <- as.integer(ncol(x)) if (is.na(nr) || is.na(nc) || is.na(nr * nc)) stop("invalid nrow(x) or ncol(x)", domain = NA) sr <- rowSums(x) sc <- colSums(x) E <- outer(sr, sc, "*")/n v <- function(r, c, n) c * r * (n - r) * (n - c)/n^3 V <- outer(sr, sc, v, n) dimnames(E) <- dimnames(x) if (simulate.p.value && all(sr > 0) && all(sc > 0)) { setMETH() tmp <- .Call(C_chisq_sim, sr, sc, B, E) STATISTIC <- sum(sort((x - E)^2/E, decreasing = TRUE)) PARAMETER <- NA PVAL <- (1 + sum(tmp >= almost.1 * STATISTIC))/(B + 1) } else { if (simulate.p.value) warning("cannot compute simulated p-value with zero marginals") if (correct && nrow(x) == 2L && ncol(x) == 2L) { YATES <- min(0.5, abs(x - E)) if (YATES > 0) METHOD <- paste(METHOD, "(with correction)", sep="") } else YATES <- 0 STATISTIC <- sum((abs(x - E) - YATES)^2/E) PARAMETER <- (nr - 1L) * (nc - 1L) PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) } } else { if (length(dim(x)) > 2L) stop("invalid 'x'") if (length(x) == 1L) stop("'x' must at least have 2 elements") if (length(x) != length(p)) stop("'x' and 'p' must have the same number of elements") if (any(p < 0)) stop("probabilities must be non-negative.") if (abs(sum(p) - 1) > sqrt(.Machine$double.eps)) { if (rescale.p) p <- p/sum(p) else stop("probabilities must sum to 1.") } METHOD <- "Chi-squared test" E <- n * p V <- n * p * (1 - p) STATISTIC <- sum((x - E)^2/E) names(E) <- names(x) if (simulate.p.value) { setMETH() nx <- length(x) sm <- matrix(sample.int(nx, B * n, TRUE, prob = p), nrow = n) ss <- apply(sm, 2L, function(x, E, k) { sum((table(factor(x, levels = 1L:k)) - E)^2/E) }, E = E, k = nx) PARAMETER <- NA PVAL <- (1 + sum(ss >= almost.1 * STATISTIC))/(B + 1) } else { PARAMETER <- length(x) - 1 PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) } } names(STATISTIC) <- "STATISTIC" names(PARAMETER) <- "df" if (any(E < 5) && is.finite(PARAMETER)) warning("Chi-squared approximation may be incorrect") ## inference=data.frame(cbind(ts.stats = round(STATISTIC,4), p.value = round(PVAL, 4), d.f = PARAMETER, method = METHOD)) row.names(inference) = "" ## list(inference = inference, observed = x, expected = E, residuals = (x - E)/sqrt(E), stdres = (x - E)/sqrt(V)) } table2x2 = function(x, conf.level=0.95) { # x = data frame of the two-by-two table # if (class(x)[1] == "data.frame") { table2x2 <- as.matrix(x) } else { if ("matrix" %in% class(x) || "table" %in% class(x)) { if ("table" %in% class(x)) { table2x2 <- as.matrix(x) } else table2x2 <- x } else { stop("first argument 'x' must be a matrix or a data.frame") } } if (NROW(x) != 2) stop("Matrix must have exactly 2 rows") if (NCOL(x) != 2) stop("Matrix must have exactly 2 columns") ### alpha = 1 - conf.level a <- table2x2[1, 1] b <- table2x2[1, 2] c <- table2x2[2, 1] d <- table2x2[2, 2] p1 <- a/(a + b) p2 <- c/(c + d) ### rd <- round((p1 - p2), 4) se.rd <- round(sqrt(p1 * (1 - p1)/(a + b) + p2 * (1 - p2)/(c + d)), 4) rd.lower <- round(rd - qnorm(1 - alpha/2) * se.rd, 4) rd.upper <- round(rd + qnorm(1 - alpha/2) * se.rd, 4) rd.significance = "Yes" if (rd.lower*rd.upper < 0) rd.significance = "No" risk.difference <- c(rd, se.rd, rd.lower, rd.upper, rd.significance) ### rr <- round(p1/p2, 4) se.rr <- round(sqrt((1 - p1)/a + (1 - p2)/c), 4) rr.lower <- round(rr * exp(-qnorm(1 - alpha/2) * se.rr), 4) rr.upper <- round(rr * exp(qnorm(1 - alpha/2) * se.rr), 4) rr.significance = "Yes" if ((rr.lower < 1 & rr.upper >1)|(rr.lower > 1 & rr.upper <1)) rr.significance = "No" relative.risk <- c(rr, se.rr, rr.lower, rr.upper, rr.significance) ### or <- round((a * d)/(b * c), 4) se.or <- round(sqrt(1/a + 1/b + 1/c + 1/d), 4) or.lower <- round(exp(log(or) - qnorm(1 - alpha/2) * se.or), 4) or.upper <- round(exp(log(or) + qnorm(1 - alpha/2) * se.or), 4) or.significance = "Yes" if ((or.lower < 1 & or.upper >1)|(or.lower > 1 & or.upper <1)) or.significance = "No" odds.ratio <- c(or, se.or, or.lower, or.upper, or.significance) ### summary.table = as.data.frame(rbind(relative.risk = relative.risk, risk.difference = risk.difference, odds.ratio = odds.ratio)) colnames(summary.table) = c("measure", "SE", paste("Lower(",100*conf.level,"%).CI", sep=""), paste("Upper(",100*conf.level,"%).CI", sep = ""), paste("significance(",alpha,")", sep="")) ## summary.table }