[931] | 1 | <html> |
---|
| 2 | <head> |
---|
| 3 | <title>NIST PCA Example</title> |
---|
| 4 | </head> |
---|
| 5 | <P> |
---|
| 6 | <H2>NIST PCA Example</H2> |
---|
| 7 | <pre> |
---|
| 8 | |
---|
| 9 | # A Principal Components Analysis example |
---|
| 10 | # |
---|
| 11 | # walk through the NIST/Sematech PCA example data section 6.5 |
---|
| 12 | # http://www.itl.nist.gov/div898/handbook/ |
---|
| 13 | # |
---|
| 14 | # This file is part of the Hume La Package |
---|
| 15 | # (C)Copyright 2001, Hume Integration Services |
---|
| 16 | # |
---|
| 17 | # These calculations use the Hume Linear Algebra package, La |
---|
| 18 | # |
---|
| 19 | package require La |
---|
| 20 | catch { namespace import La::*} |
---|
| 21 | |
---|
| 22 | |
---|
| 23 | # Look at the bottom of this file to see the console output of this procedure. |
---|
| 24 | |
---|
| 25 | proc nist_pca {} { |
---|
| 26 | puts {Matrix of observation data X[10,3]} |
---|
| 27 | set X {2 10 3 7 4 3 4 1 8 6 3 5 8 6 1 8 5 7 7 2 9 5 3 3 9 5 8 7 4 5 8 2 2} |
---|
| 28 | puts [show $X] |
---|
| 29 | puts {Compute column norms} |
---|
| 30 | mnorms_br X Xmeans Xsigmas |
---|
| 31 | puts "Means=[show $Xmeans]" |
---|
| 32 | puts "Sigmas=[show $Xsigmas %.6f]" |
---|
| 33 | puts {Normalize observation data creating Z[10,3]} |
---|
| 34 | puts [show [set Z [mnormalize $X $Xmeans $Xsigmas]] %10.6f]\n |
---|
| 35 | puts {Naive computation method that is sensitive to round-off error} |
---|
| 36 | puts {ZZ is (Ztranspose)(Z) } |
---|
| 37 | puts "ZZ=\n[show [set ZZ [mmult [transpose $Z] $Z]] %10.6f]" |
---|
| 38 | puts {the correlation matrix, R = (Zt)(Z)/(n-1)} |
---|
| 39 | puts "R=\n[show [set R [mscale $ZZ [expr 1/9.0]]] %8.4f]" |
---|
| 40 | puts {Continuing the naive example use R for the eigensolution} |
---|
| 41 | set V $R |
---|
| 42 | mevsvd_br V evals |
---|
| 43 | puts "eigenvalues=[show $evals %8.4f]" |
---|
| 44 | puts "eigenvectors:\n[show $V %8.4f]\n" |
---|
| 45 | puts "Good computation method- perform SVD of Z" |
---|
| 46 | set U $Z |
---|
| 47 | msvd_br U S V |
---|
| 48 | puts "Singular values, S=[show $S]" |
---|
| 49 | puts "square S and divide by (number of observations)-1 to get the eigenvalues" |
---|
| 50 | set SS [mprod $S $S] |
---|
| 51 | set evals [mscale $SS [expr 1.0/9.0]] |
---|
| 52 | puts "eigenvalues=[show $evals %8.4f]" |
---|
| 53 | puts "eigenvectors:\n[show $V %8.4f]" |
---|
| 54 | puts "(the 3rd vector/column has inverted signs, thats ok the math still works)" |
---|
| 55 | puts "\nverify V x transpose = I" |
---|
| 56 | puts "As computed\n[show [mmult $V [transpose $V]] %20g]\n" |
---|
| 57 | puts "Rounded off\n[show [mround [mmult $V [transpose $V]]]]" |
---|
| 58 | #puts "Diagonal matrix of eigenvalues, L" |
---|
| 59 | #set L [mdiag $evals] |
---|
| 60 | # |
---|
| 61 | |
---|
| 62 | puts "\nsquare root of eigenvalues as a diagonal matrix" |
---|
| 63 | proc sqrt x { return [expr sqrt($x)] } |
---|
| 64 | set evals_sr [mat_unary_op $evals sqrt] |
---|
| 65 | set Lhalf [mdiag $evals_sr] |
---|
| 66 | puts [show $Lhalf %8.4f] |
---|
| 67 | |
---|
| 68 | puts "Factor Structure Matrix S" |
---|
| 69 | set S [mmult $V $Lhalf] |
---|
| 70 | puts [show $S %8.4f]\n |
---|
| 71 | set S2 [mrange $S 0 1] |
---|
| 72 | puts "Use first two eigenvalues\n[show $S2 %8.4f]" |
---|
| 73 | # Nash discusses better ways to compute this than multiplying SSt |
---|
| 74 | # the NIST method is sensitive to round-off error |
---|
| 75 | set S2S2 [mmult $S2 [transpose $S2]] |
---|
| 76 | puts "SSt for first two components, communality is the diagonal" |
---|
| 77 | puts [show $S2S2 %8.4f]\n |
---|
| 78 | |
---|
| 79 | |
---|
| 80 | # define reciprocal square root function |
---|
| 81 | proc recipsqrt x { return [expr 1.0/sqrt($x)] } |
---|
| 82 | # use it for Lrhalf calculation |
---|
| 83 | set Lrhalf [mdiag [mat_unary_op $evals recipsqrt]] |
---|
| 84 | |
---|
| 85 | set B [mmult $V $Lrhalf] |
---|
| 86 | puts "Factor score coefficients B Matrix:\n[show $B %12.4f]" |
---|
| 87 | |
---|
| 88 | puts "NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo)" |
---|
| 89 | |
---|
| 90 | puts "\nWork with first normalized observation row" |
---|
| 91 | set z1 {2 3 0 0.065621795889 0.316227766017 -0.7481995314} |
---|
| 92 | puts [show $z1 %.4f] |
---|
| 93 | |
---|
| 94 | puts "compute the \"factor scores\" from multiplying by B" |
---|
| 95 | set t [mmult [transpose $z1] $B] |
---|
| 96 | puts [show $t %.4f] |
---|
| 97 | puts "NIST implies you might chart these" |
---|
| 98 | |
---|
| 99 | #set T2 [dotprod $t $t] |
---|
| 100 | #puts "Hotelling T2=[format %.4f $T2]" |
---|
| 101 | |
---|
| 102 | puts "Compute the scores using V, these are more familiar" |
---|
| 103 | set t [mmult [transpose $z1] $V] |
---|
| 104 | puts [show $t %.4f] |
---|
| 105 | puts "Calculate T2 from the scores, sum ti*ti/evi" |
---|
| 106 | |
---|
| 107 | set T2 [msum [transpose [mdiv [mprod $t $t] [transpose $evals]]]] |
---|
| 108 | puts "Hotelling T2=[format %.4f $T2]" |
---|
| 109 | |
---|
| 110 | puts "Fit z1 using first two principal components" |
---|
| 111 | set p [mrange $V 0 1] |
---|
| 112 | puts p=\n[show $p %10.4f] |
---|
| 113 | set t [mmult [transpose $z1] $p] |
---|
| 114 | set zhat [mmult $t [transpose $p]] |
---|
| 115 | puts " z1=[show $z1 %10.6f]" |
---|
| 116 | puts "zhat=[show $zhat %10.6f]" |
---|
| 117 | set diffs [msub [transpose $z1] $zhat] |
---|
| 118 | puts "diff=[show $diffs %10.6f]" |
---|
| 119 | puts "Q statistic - distance from the model measurement for the first observation" |
---|
| 120 | set Q [dotprod $diffs $diffs] |
---|
| 121 | puts Q=[format %.4f $Q] |
---|
| 122 | puts "Some experts would compute a corrected Q that is larger since the |
---|
| 123 | observation was used in building the model. The Q calculation just used |
---|
| 124 | properly applies to the analysis of new observations." |
---|
| 125 | set corr [expr 10.0/(10.0 - 2 -1)] ;# N/(N - Ncomp - 1) |
---|
| 126 | puts "Corrected Q=[format %.4f [expr $Q * $corr]] (factor=[format %.4f $corr])" |
---|
| 127 | } |
---|
| 128 | |
---|
| 129 | return |
---|
| 130 | ########################################################################## |
---|
| 131 | Console Printout |
---|
| 132 | ########################################################################## |
---|
| 133 | Matrix of observation data X[10,3] |
---|
| 134 | 7 4 3 |
---|
| 135 | 4 1 8 |
---|
| 136 | 6 3 5 |
---|
| 137 | 8 6 1 |
---|
| 138 | 8 5 7 |
---|
| 139 | 7 2 9 |
---|
| 140 | 5 3 3 |
---|
| 141 | 9 5 8 |
---|
| 142 | 7 4 5 |
---|
| 143 | 8 2 2 |
---|
| 144 | Compute column norms |
---|
| 145 | Means=6.9 3.5 5.1 |
---|
| 146 | Sigmas=1.523884 1.581139 2.806738 |
---|
| 147 | Normalize observation data creating Z[10,3] |
---|
| 148 | 0.065622 0.316228 -0.748200 |
---|
| 149 | -1.903032 -1.581139 1.033228 |
---|
| 150 | -0.590596 -0.316228 -0.035629 |
---|
| 151 | 0.721840 1.581139 -1.460771 |
---|
| 152 | 0.721840 0.948683 0.676942 |
---|
| 153 | 0.065622 -0.948683 1.389513 |
---|
| 154 | -1.246814 -0.316228 -0.748200 |
---|
| 155 | 1.378058 0.948683 1.033228 |
---|
| 156 | 0.065622 0.316228 -0.035629 |
---|
| 157 | 0.721840 -0.948683 -1.104485 |
---|
| 158 | |
---|
| 159 | Naive computation method that is sensitive to round-off error |
---|
| 160 | ZZ is (Ztranspose)(Z) |
---|
| 161 | ZZ= |
---|
| 162 | 9.000000 6.017916 -0.911824 |
---|
| 163 | 6.017916 9.000000 -2.591349 |
---|
| 164 | -0.911824 -2.591349 9.000000 |
---|
| 165 | the correlation matrix, R = (Zt)(Z)/(n-1) |
---|
| 166 | R= |
---|
| 167 | 1.0000 0.6687 -0.1013 |
---|
| 168 | 0.6687 1.0000 -0.2879 |
---|
| 169 | -0.1013 -0.2879 1.0000 |
---|
| 170 | Continuing the naive example use R for the eigensolution |
---|
| 171 | eigenvalues= 1.7688 0.9271 0.3041 |
---|
| 172 | eigenvectors: |
---|
| 173 | 0.6420 0.3847 -0.6632 |
---|
| 174 | 0.6864 0.0971 0.7207 |
---|
| 175 | -0.3417 0.9179 0.2017 |
---|
| 176 | |
---|
| 177 | Good computation method- perform SVD of Z |
---|
| 178 | Singular values, S=3.98985804571 2.88854344819 1.65449373616 |
---|
| 179 | square S and divide by (number of observations)-1 to get the eigenvalues |
---|
| 180 | eigenvalues= 1.7688 0.9271 0.3041 |
---|
| 181 | eigenvectors: |
---|
| 182 | 0.6420 0.3847 0.6632 |
---|
| 183 | 0.6864 0.0971 -0.7207 |
---|
| 184 | -0.3417 0.9179 -0.2017 |
---|
| 185 | (the 3rd vector/column has inverted signs, thats ok the math still works) |
---|
| 186 | |
---|
| 187 | verify V x transpose = I |
---|
| 188 | As computed |
---|
| 189 | 1 -5.55112e-017 1.38778e-016 |
---|
| 190 | -5.55112e-017 1 5.55112e-017 |
---|
| 191 | 1.38778e-016 5.55112e-017 1 |
---|
| 192 | |
---|
| 193 | Rounded off |
---|
| 194 | 1.0 0.0 0.0 |
---|
| 195 | 0.0 1.0 0.0 |
---|
| 196 | 0.0 0.0 1.0 |
---|
| 197 | |
---|
| 198 | square root of eigenvalues as a diagonal matrix |
---|
| 199 | 1.3300 0.0000 0.0000 |
---|
| 200 | 0.0000 0.9628 0.0000 |
---|
| 201 | 0.0000 0.0000 0.5515 |
---|
| 202 | Factor Structure Matrix S |
---|
| 203 | 0.8538 0.3704 0.3658 |
---|
| 204 | 0.9128 0.0935 -0.3975 |
---|
| 205 | -0.4544 0.8838 -0.1112 |
---|
| 206 | |
---|
| 207 | Use first two eigenvalues |
---|
| 208 | 0.8538 0.3704 |
---|
| 209 | 0.9128 0.0935 |
---|
| 210 | -0.4544 0.8838 |
---|
| 211 | SSt for first two components, communality is the diagonal |
---|
| 212 | 0.8662 0.8140 -0.0606 |
---|
| 213 | 0.8140 0.8420 -0.3321 |
---|
| 214 | -0.0606 -0.3321 0.9876 |
---|
| 215 | |
---|
| 216 | Factor score coefficients B Matrix: |
---|
| 217 | 0.4827 0.3995 1.2026 |
---|
| 218 | 0.5161 0.1009 -1.3069 |
---|
| 219 | -0.2569 0.9533 -0.3657 |
---|
| 220 | NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo) |
---|
| 221 | |
---|
| 222 | Work with first normalized observation row |
---|
| 223 | 0.0656 0.3162 -0.7482 |
---|
| 224 | compute the "factor scores" from multiplying by B |
---|
| 225 | 0.3871 -0.6552 -0.0608 |
---|
| 226 | NIST implies you might chart these |
---|
| 227 | Compute the scores using V, these are more familiar |
---|
| 228 | 0.5148 -0.6308 -0.0335 |
---|
| 229 | Calculate T2 from the scores, sum ti*ti/evi |
---|
| 230 | Hotelling T2=0.5828 |
---|
| 231 | Fit z1 using first two principal components |
---|
| 232 | p= |
---|
| 233 | 0.6420 0.3847 |
---|
| 234 | 0.6864 0.0971 |
---|
| 235 | -0.3417 0.9179 |
---|
| 236 | z1= 0.065622 0.316228 -0.748200 |
---|
| 237 | zhat= 0.087847 0.292075 -0.754958 |
---|
| 238 | diff= -0.022225 0.024153 0.006758 |
---|
| 239 | Q statistic - distance from the model measurement for the first observation |
---|
| 240 | Q=0.0011 |
---|
| 241 | Some experts would compute a corrected Q that is larger since the |
---|
| 242 | observation was used in building the model. The Q calculation just used |
---|
| 243 | properly applies to the analysis of new observations. |
---|
| 244 | Corrected Q=0.0016 (factor=1.4286) |
---|
| 245 | </PRE> |
---|
| 246 | </html> |
---|