# Chapter 5-多元正态分布的参数估计 #################### # Exam 1: MLEs for Multivariate Normal Distribution ###################### mu <- c(0, 1, 2) Sig <- matrix(c(1, 0.6, 0, 0.6, 1, -0.4, 0, -0.4, 1), 3, 3) n <- 1000 library(MASS) library(mvtnorm) X <- mvrnorm(n, mu, Sig) S <- cov(X) ### sample covariance matrix mu_mle <- colMeans(X) ### mle for mean Sig_mle <- (n-1)*S/n ### mle for covariance matrix = V/n mu_mle Sig_mle vars <- diag(sqrt(1/diag(Sig_mle))) Rhat <- vars%*%Sig_mle%*%vars ### mle for correlation matrix Rhat cor(X) ### for comparison purpose between Rhat and cor(X) Rhat-cor(X) #################### # Exam 2: Hypothesis testing H0: rho13 = 0; H1: rho13 ≠ 0 ###################### rho.hat = Rhat[1,3] #### test statisitc t.stat = rho.hat*sqrt((n-2)/(1-rho.hat^2)) #### compute p vlaue 2-2*pt(abs(t.stat), df=n-2) #################### # Exam 3: Confidence interval of rho13 ###################### zl <- rho.hat-qnorm(0.975)/sqrt(n-3) zu <- rho.hat+qnorm(0.975)/sqrt(n-3) lower <- (exp(2*zl)-1)/(exp(2*zl)+1) upper <- (exp(2*zu)-1)/(exp(2*zu)+1) print(c(lower,upper)) #################### # Exam 4: Wishart distribution ###################### S = toeplitz((10:1)/10) set.seed(11) W = rWishart(1000, df=20, Sigma=S) dim(W) #### 10 10 1000 mW = apply(W, 1:2, mean) #### ~= E[Wish(W, 20)] = 20 * W #################### # Exam 5: Wishart distribution: A test for covariance matrix ###################### Sig = matrix(c(1, 0.8, 0.4, 0.8, 1, 0.2, 0.4, 0.2, 1), nrow = 3) p = dim(Sig)[1] df = 4 Sig.vec = c(Sig) #Empirical (Using the simulated data) set.seed(9524) reps = 100000 #### number of obs in our sampling dist W.empr = rWishart(reps, df=df, Sigma=Sig) W.empr.vec = aperm(W.empr, c(3,2,1)) #### convert tha array to matrix dim(W.empr.vec) = c(reps, p*p) round(apply(W.empr.vec,2, mean), 2) #### sample mean df*Sig.vec #### theoretical mean #Comparet the variance Eij = function(n, i, j){ Eij = matrix(0, n, n) Eij[i,j] = 1 Eij } K = matrix(0, p*p, p*p) for(i in 1:p) for(j in 1:p) { EIJ = Eij(p, i, j) K = K+kronecker(EIJ, t(EIJ)) } W.theor = df*(diag(p*p)+K)%*%kronecker(Sig,Sig) round(cov(W.empr.vec), 2) #### sample covriance matrix of Wishart samples W.theor #################### # Exam 6: Test of Normality ###################### #Exam 6.1: Test of Normality for univariate #QQplot par(mfrow=c(2,2));set.seed(16); X = rnorm(50);qqnorm(X);qqline(X);legend("topleft",legend=c("n=50")) X = rnorm(100);qqnorm(X);qqline(X);legend("topleft",legend=c("n=100")) X = rnorm(1000);qqnorm(X);qqline(X);legend("topleft",legend=c("n=1000")) X = rnorm(10000);qqnorm(X);qqline(X);legend("topleft",legend=c("n=10000")) #Exam 6.2: See the data set in Table 4-1 of Johnson and Wichern (2008) # change directory to code file mdat <- read.table("T4-1.dat",header=F) mdat <- cbind(1:nrow(mdat), mdat) names(mdat) <- c('Oven', 'Radiation') head(mdat) mmean = mean(mdat$Radiation) mmean mvar = var(mdat$Radiation) mvar qqnorm(mdat$Radiation, pch = 20, main="Normal Probability Plot") qqline(mdat$Radiation) library(qqtest) par(mfrow=c(1,2)) library(qqtest) qqtest(mdat$Radiation,main="Radiation") # install.packages("nortest") library(nortest) library(help='nortest') shapiro.test(mdat$Radiation) mdat$logRad = log(mdat$Radiation) hist(mdat$logRad) qqnorm(mdat$logRad, pch = 19, main="Normal Probability Plot") qqline(mdat$logRad) qqtest(mdat$logRad,main="Log Radiation") shapiro.test(mdat$logRad) lillie.test(mdat$logRad) # Exam 6.3: Test normality for multivariates # Chi squred distribution Q-Q plot library(MASS) mu <- c(0,1) Sig <- matrix(c(1, 0.6, 0.6, 1), 2, 2) n <- 1000 biv <- mvrnorm(n, mu, Sig) Sbiv <- cov(biv) D2 <- mahalanobis(biv, colMeans(biv), Sbiv) qqplot(qchisq(ppoints(n), df = 2), D2, main = expression("Q-Q plot for" ~~ {chi^2}[nu == 2])) abline(c(0,1)) chiqqplot = function(x){ if(!is.matrix(x)) x = as.matrix(x) n = nrow(x); p = ncol(x) D2 = mahalanobis(x,colMeans(x),cov(x)) qqplot(qchisq(ppoints(n), df = p), D2,xlab="Theoretical Q of Chisquare", ylab="Mah distance", main = expression("Q-Q plot for" ~~ {chi^2}[p]),pch=19) abline(c(0,1)) } chiqqplot(mdat$logRad) # Exam 6.4: Multiple hypothesis testing testnormality = function(X, numproj = 1000) { p = ncol(X); n = nrow(X) x = matrix(rnorm(numproj * p), nrow = p) #### Generate 1,000 standard p-variate normal random variables. y = matrix(sqrt(apply(x^2, 2, sum)), nrow = p, ncol = numproj, by = T) z = x / y tempdat = as.matrix(X) %*% z #### This gives rise to a p x numproj matrix #### Perform Shapiro-Wilks' test and calculate individual p-values on each of the numproj observation sets. pvals = as.numeric(matrix(unlist(apply(tempdat,2,shapiro.test)),ncol=4, by =T)[,2]) #### Multiple hypthesis testing return(min(p.adjust(pvals,method="BH"))) } pvals <- testnormality(mdat, numproj = 10000) pvals pvals = testnormality(biv, numproj =10000) pvals # Exam 6.5: Energy Statistics library(energy) mvnorm.etest(mdat) data(iris) mvnorm.e(iris[1:50, 1:4]) normal.e(iris[1:50, 1]) ## Test if the iris Setosa data has multivariate normal distribution mvnorm.etest(iris[1:50,1:4], R = 199) ## Test a univariate sample for normality x = runif(50, 0, 10) mvnorm.etest(x, R = 199) # Example 6.6: Test of Normality # Data set in Table 5-1 of Johnson and Wichern (2008) # change directory to file x <- read.table("T5-1.dat") qqnorm(x[,1]) qqnorm(x[,2]) qqnorm(x[,3]) pvals = testnormality(mdat) pvals mvnorm.etest(x, R = 199) # 多元正态分布的离群点检测 # install.packages("mvoutlier") library(mvoutlier) library(help='mvoutlier') load(file = "3dExample.rda") pairs(dat) #引入离群点 outFactor = 1.5 dat1 = rbind(dat, outFactor*c(-1,-1.2,0.7)) pairs(dat) pairs(dat1, col = c(rep(1,300), 2), pch = c(rep(1,300), 3), cex = c(rep(1,300), 2)) library(mvoutlier) chisq.plot(dat) abline(c(0,1)) library(rgl) plot3d(dat1, col = c(rep(1,300), 2)) # 自动检测离群点 aq.plot(dat) #################### # Exam 7: box-cox变换 ###################### m1 = read.table(file ="T4-1.dat", header=F) #### microwave.door.close m2 = read.table("T4-5.dat", header=F) #### microwave.door.open # install.packages("car") library(car) mdat1 = as.matrix(cbind(m1,m2)) colnames(mdat1) = c("close", "open") bc = powerTransform(mdat1~1) #### Find the optimal box-cox parameter vector lambda summary(bc) bc.mdat = bcPower(mdat1,bc$lambda) #### Save the transformed values #检验正态性 library(energy) plot(bc.mdat) #chiqqplot(bc.mdat) 这是matlab命令 mvnorm.etest(bc.mdat,R=999) #非参数密度函数估计 library(MASS) f1 <- kde2d(bc.mdat[,1],bc.mdat[,2], h = c(width.SJ(bc.mdat[,1]), width.SJ(bc.mdat[,2]))) persp(f1, phi = 30, theta = 20, d = 5) #对结果进行比较 bc.close = powerTransform(mdat1[,1]) bc.nclose = bcPower(mdat1[,1],bc.close$lambda) bc.open = powerTransform(mdat1[,2]) bc.nopen = bcPower(mdat1[,2],bc.open$lambda) bc.ndat = cbind(bc.nclose,bc.nopen) #对不同lambda的结果进行比较 bc$lambda c(bc.close$lambda, bc.open$lambda) #检验正态性 plot(bc.ndat) chisq.plot(bc.ndat) mvnorm.etest(bc.ndat,R=999) #非参数密度函数估计 f2 = kde2d(bc.ndat[,1],bc.ndat[,2], h = c(width.SJ(bc.ndat[,1]), width.SJ(bc.ndat[,2]))) persp(f2, phi = 30, theta = 20, d = 5)