{cat("--------------- Multivariate Methods ---------------\n");T} { #Function: princomp #Data: SAS/STAT User's Guide, p. 801 #Reference(s): Chapter 21, The FACTOR Procedure. in SAS Institute Inc., # SAS/STAT User's Guide, Version 6, Fourth Edition, Volume 1, # NC: SAS Institute Inc., 1989. 943 pp. # # Harman, H.H. 1976. Modern Factor Analysis, 3rd edition. # University of Chicago Press, Chicago. #Description: 5 socioeconomic variables for 12 census tracts; calculate # principal components based on correlation; check sdev^2 # (eigenvalues), loadings (eigenvectors), proportion of # variance, cumulative proportion, and scores by components # against SAS run using proc princomp data=sevars out=sevpca; tol1 <- 5e-6 tol2 <- 5e-7 tol3 <- 2e-1 sevars <- matrix(c(5700,1000,3400,3800,4000,8200,1200,9100,9900,9600, 9600,9400,12.8,10.9,8.8,13.6,12.8,8.3,11.4,11.5,12.5, 13.7,9.6,11.4,2500,600,1000,1700,1600,2600,400,3300, 3400,3600,3300,4000,270,10,10,140,140,60,10,60,180, 390,80,100,25000,10000,9000,25000,25000,12000,16000, 14000,18000,25000,12000,13000),ncol=5, dimnames=list(NULL,c("POP","SCHOOL","EMPLOY", "SERVICES","HOUSE"))) sevars.pca <- princomp(sevars,cor=T) all(c(abs(sevars.pca$sdev^2 - c(2.87331,1.79666,0.21484,0.09993,0.01526)) < tol1, abs(sevars.pca$loadings[,1] - c(0.342730,0.452507,0.396695,0.550057, 0.466738)) < tol2, abs(sevars.pca$loadings[-4,2] - c(-0.601629,0.406414,-0.541665, 0.416429)) < tol2, abs(sevars.pca$loadings[-1,3] - c(0.688822,0.247958,-.664076, -.139649)) < tol2, abs(sevars.pca$loadings[-3,4] - c(-0.204033,0.353571,0.500386, -.763182)) < tol2, abs(sevars.pca$loadings[1:3,5] - c(0.689497,0.174861,-.698014)) < tol1, abs(summary(sevars.pca)$varmat[2,] - c(0.574663,0.359332,0.042967, 0.019987,0.003051)) < tol2, abs(summary(sevars.pca)$varmat[3,] - c(0.57466,0.93399,0.97696,0.99695, 1.00000)) < tol1, abs(sevars.pca$scores[,1] - c(1.64367,-2.25697,-2.49521,0.77913,0.56446, -1.17306,-1.73452,0.09745,1.32993,3.18609, -0.38486,0.44388)) < tol3, abs(sevars.pca$scores[,2] - c(0.95519,1.01710,-0.12032,1.73058,1.55725, -1.54174,1.57553,-1.14587,-0.75909,0.07906, -1.78280,-1.56488)) < tol3, abs(sevars.pca$scores[,3] - c(-0.48972,0.14813,-0.51818,0.37720,0.05224, -0.66299,0.17283,0.68236,0.32063,-0.54888, -0.11326,0.57964)) < tol3, abs(sevars.pca$scores[,4] - c(-0.01151,0.59215,0.14666,-0.29172,-0.46006, -0.40843,-0.03619,-0.08115,0.11051,0.43743, -0.16004,0.16234)) < tol3, abs(sevars.pca$scores[,5] - c(-0.17307,-0.03814,0.02537,-0.02556,-0.00754, -0.00015,0.08569,0.07390,0.22398,0.07799, 0.01402,-0.25651)) < tol3)) } { #Function: factanal #Data: Tabachinick and Fidell, Table 12.4 p. 611 #Reference: Tabachnick, B.G. and L.S. Fidell. 1989. Using Multivariate # Statistics, 2nd edition. Harper and Row, Publishers #Description: Hypothetical data set ranking importance of each of 4 variables # to selection of a ski resort; calculate 2 factors using # principal component extraction and varimax rotation; check # loadings, sum of squares of loadings, proportion of # variance explained by each factor, cumlative variance, and # communalities (1 - uniquenesses) tol1 <- 2e-3 tol2 <- 4e-3 skiers <- matrix(c(32,61,59,36,62,64,37,40,62,46,65,62,45,34,43,67,65, 43,35,40),ncol=4,dimnames=list(NULL,c("COST","LIFT","DEPTH", "POWDER"))) old.op <- options(warn=-1) skiers.fa <- factanal(skiers,factors=2) options(old.op) all(c(abs(skiers.fa$loadings[3:4,1] - c(0.994,0.997)) < tol1, abs(skiers.fa$loadings[1:2,2] - c(-0.981,0.977)) < tol1, abs(summary(skiers.fa)$varex[1,] - c(1.994,1.919)) < tol1, abs(summary(skiers.fa)$varex[2,] - c(0.50,0.48)) < tol1, abs(summary(skiers.fa)$varex[3,2] - 0.98) < tol1, abs((1 - skiers.fa$uniquenesses) - c(0.970,0.960,0.989,0.996)) < tol2)) } { #Function: factanal #Data: Mardia, Kent and Bibby, Table 1.2.1 p. 3 #Reference: Mardia K.V., Kent, J.T. and J.M. Bibby. 1979. Multivariate # Analysis. Academic Press. #Description: Data consist of exam marks on 5 topics for 88 students; # calculate 2 factors using maximum likekihood factor solutions # and varimax rotation; check factor loadings against results # on p. 266 tol <- 3e-2 mech <- c(77,63,75,55,63,53,51,59,62,64,52,55,50,65,31,60,44,42,62,31,44,49, 12,49,54,54,44,18,46,32,30,46,40,31,36,56,46,45,42,40,23,48,41,46, 46,40,49,22,35,48,31,17,49,59,37,40,35,38,43,39,62,48,34,18,35,59, 41,31,17,34,46,10,46,30,13,49,18, 8,23,30, 3, 7,15,15, 5,12, 5, 0) vect <- c(82,78,73,72,63,61,67,70,60,72,64,67,50,63,55,64,69,69,46,49,61,41, 58,53,49,53,56,44,52,45,69,49,27,42,59,40,56,42,60,63,55,48,63,52, 61,57,49,58,60,56,57,53,57,50,56,43,35,44,43,46,44,38,42,51,36,53, 41,52,51,30,40,46,37,34,51,50,32,42,38,24, 9,51,40,38,30,30,26,40) algb <- c(67,80,71,63,65,72,65,68,58,60,60,59,64,58,60,56,53,61,61,62,52,61, 61,49,56,46,55,50,65,49,50,53,54,48,51,56,57,55,54,53,59,49,49,53, 46,51,45,53,47,49,50,57,47,47,49,48,41,54,38,46,36,41,50,40,46,37, 43,37,52,50,47,36,45,43,50,38,31,48,36,43,51,43,43,39,44,32,15,21) anal <- c(67,70,66,70,70,64,65,62,62,62,63,62,55,56,57,54,53,55,57,63,62,49, 63,62,47,59,61,57,50,57,52,59,61,54,45,54,49,56,49,54,53,51,46,41, 38,52,48,56,54,42,54,43,39,15,28,21,51,47,34,32,22,44,47,56,48,22, 30,27,35,47,29,47,15,46,25,23,45,26,48,33,47,17,23,28,36,35,20, 9) stat <- c(81,81,81,68,63,73,68,56,70,45,54,44,63,37,73,40,53,45,45,62,46,64, 67,47,53,44,36,81,35,64,45,37,61,68,51,35,32,40,33,25,44,37,34,40, 41,31,39,41,33,32,34,51,26,46,45,61,50,24,49,43,42,33,29,30,29,19, 33,40,31,36,17,39,30,18,31, 9,40,40,15,25,40,22,18,17,18,21,20,14) scores.mat <- cbind(mech,vect,algb,anal,stat) scores.fa <- factanal(scores.mat,method="mle",factors=2) all(c(abs(scores.fa$loadings[,1] - c(0.243,0.337,0.717,0.732,0.689)) < tol, abs(scores.fa$loadings[,2] - c(0.693,0.682,0.534,0.343,0.312)) < tol)) } { #Function: princomp #Data: Mardia, Kent and Bibby, Table 1.2.1 p. 3 #Reference: Mardia K.V., Kent, J.T. and J.M. Bibby. 1979. Multivariate # Analysis. Academic Press. #Description: Data consist of exam marks on 5 topics for 88 students, from # previous test; calculate principal components based on # covariance matrix; check principal loadings against results # on p. 218; check scores for first 4 students on the first 2 # principal components against results on p. 249 tol1 <- 5e-3 tol2 <- 9e-2 scores.pca <- princomp(scores.mat) load <- scores.pca$loadings sscores <- scores.pca$scores[1:4,1:2] all(c(abs(load[,1] - c(0.51,0.37,0.35,0.45,0.53)) < tol1, abs(load[-3,2] - c(-0.75,-0.21,0.30,0.55)) < tol1, abs(load[,3] - c(-0.30,0.42,0.15,0.60,-0.60)) < tol1, abs(load[-3,4] - c(0.30,-0.78,0.52,-0.18)) < tol1, abs(load[-1,5] - c(0.19,-0.92,0.29,0.15)) < tol1, abs(sscores[,1] - c(66.4,63.7,63.0,44.6)) < tol2, abs(sscores[,2] - c(-6.5,6.8,-3.1,5.6)) < tol2)) } { #Function: princomp #Data: Mardia, Kent and Bibby, Exercise 8.2.1 p. 247 #Reference: Mardia K.V., Kent, J.T. and J.M. Bibby. 1979. Multivariate # Analysis. Academic Press. #Description: Measurements on 5 meterological variables over 11 years; # calculate principal components based on # covariance matrix; check principal loadings and proportion of # explained variance against results on p. 248 tol1 <- 7e-4 tol2 <- 5e-3 x1 <- c(87.9,89.9,153.0,132.1,88.8,220.9,117.7,109.0,156.1,181.5,181.4) x2 <- c(19.6,15.2,19.7,17.0,18.3,17.8,17.8,18.3,17.8,16.8,17.0) x3 <- c(1.0,90.1,56.6,91.0,93.7,106.9,65.5,41.8,57.4,140.6,74.3) x4 <- c(1661,968,1353,1293,1153,1286,1104,1574,1222,902,1150) x.mat <- cbind(x1,x2,x3,x4) x.pca <- princomp(x.mat,cor=T) load <- x.pca$loadings all(c(abs(load[,1] - c(-.291,.506,-.577,.571)) < tol1, abs(load[,2] - c(-.871,-.425,-.136,-.205)) < tol1, abs(load[,3] - c(.332,-.742,-.418,.405)) < tol1, abs(load[,4] - c(-.214,-.111,.688,.685)) < tol1, abs(summary(x.pca)$varmat[2,] - c(.65,.24,.07,.04)) < tol2)) } { #Function: princomp #Data: Mardia, Kent and Bibby, Exercise 8.2.1 p. 247 #Reference: Mardia K.V., Kent, J.T. and J.M. Bibby. 1979. Multivariate # Analysis. Academic Press. #Description: Measurements on 5 meterological variables over 11 years, from # previous test; calculate principal components based on # correlation matrix; check sdev (eigenvalues) and principal # loadings against results on p. 248 tol1 <- 6e-4 tol2 <- 4e-1 x.pca <- princomp(x.mat) load <- x.pca$loadings all(c(abs(load[,1] - c(-.049,.004,-.129,.990)) < tol1, abs(load[,2] - c(-.954,-.003,-.288,-.084)) < tol1, abs(load[,3] - c(-.296,-.008,.949,.109)) < tol1, abs(load[,4] - c(.005,-1.000,-.008,.003)) < tol1, abs(x.pca$sdev^2 - c(49023,1817,283,0.6)) < tol2)) } { #Clean up after multivar cleanup <- T rm(load,x.pca,tol,tol1,tol2,tol3,x1,x2,x3,x4,x.mat,scores.pca,sscores) rm(mech,vect,algb,anal,stat,scores.mat,scores.fa,skiers,old.op,skiers.fa) rm(sevars,sevars.pca) T }