# source:trunk/la1.0/NIST.html@931

Last change on this file since 931 was 931, checked in by toby, 11 years ago

include rest of files

File size: 8.1 KB
Line
1<html>
3<title>NIST PCA Example</title>
5<P>
6<H2>NIST PCA Example</H2>
7<pre>
8
9# A Principal Components Analysis example
10#
11# walk through the NIST/Sematech PCA example data section 6.5
12# http://www.itl.nist.gov/div898/handbook/
13#
14# This file is part of the Hume La Package
15# (C)Copyright 2001, Hume Integration Services
16#
17# These calculations use the Hume Linear Algebra package, La
18#
19package require La
20catch { namespace import La::*}
21
22
23# Look at the bottom of this file to see the console output of this procedure.
24
25proc nist_pca {} {
26    puts {Matrix of observation data X[10,3]}
27    set X {2 10 3 7 4 3 4 1 8 6 3 5 8 6 1 8 5 7 7 2 9 5 3 3 9 5 8 7 4 5 8 2 2}
28    puts [show \$X]
29    puts {Compute column norms}
30    mnorms_br X Xmeans Xsigmas
31    puts "Means=[show \$Xmeans]"
32    puts "Sigmas=[show \$Xsigmas %.6f]"
33    puts {Normalize observation data creating Z[10,3]}
34    puts [show [set Z [mnormalize \$X \$Xmeans \$Xsigmas]] %10.6f]\n
35    puts {Naive computation method that is sensitive to round-off error}
36    puts {ZZ is (Ztranspose)(Z) }
37    puts "ZZ=\n[show [set ZZ [mmult [transpose \$Z] \$Z]] %10.6f]"
38    puts {the correlation matrix, R = (Zt)(Z)/(n-1)}
39    puts "R=\n[show [set R [mscale \$ZZ [expr 1/9.0]]] %8.4f]"
40    puts {Continuing the naive example use R for the eigensolution}
41    set V \$R
42    mevsvd_br V evals
43    puts "eigenvalues=[show \$evals %8.4f]"
44    puts "eigenvectors:\n[show \$V %8.4f]\n"
45    puts "Good computation method- perform SVD of Z"
46    set U \$Z
47    msvd_br U S V
48    puts "Singular values, S=[show \$S]"
49    puts "square S and divide by (number of observations)-1 to get the eigenvalues"
50    set SS [mprod \$S \$S]
51    set evals [mscale \$SS [expr 1.0/9.0]]
52    puts "eigenvalues=[show \$evals %8.4f]"
53    puts "eigenvectors:\n[show \$V %8.4f]"
54    puts "(the 3rd vector/column has inverted signs, thats ok the math still works)"
55    puts "\nverify V x transpose = I"
56    puts "As computed\n[show [mmult \$V [transpose \$V]] %20g]\n"
57    puts "Rounded off\n[show [mround [mmult \$V [transpose \$V]]]]"
58    #puts "Diagonal matrix of eigenvalues, L"
59    #set L [mdiag \$evals]
60    #
61
62    puts "\nsquare root of eigenvalues as a diagonal matrix"
63    proc sqrt x { return [expr sqrt(\$x)] }
64    set evals_sr [mat_unary_op \$evals sqrt]
65    set Lhalf [mdiag \$evals_sr]
66    puts [show \$Lhalf %8.4f]
67
68    puts "Factor Structure Matrix S"
69    set S [mmult \$V \$Lhalf]
70    puts [show \$S %8.4f]\n
71    set S2 [mrange \$S 0 1]
72    puts "Use first two eigenvalues\n[show \$S2 %8.4f]"
73    # Nash discusses better ways to compute this than multiplying SSt
74    # the NIST method is sensitive to round-off error
75    set S2S2 [mmult \$S2 [transpose \$S2]]
76    puts "SSt for first two components, communality is the diagonal"
77    puts [show \$S2S2 %8.4f]\n
78
79
80    # define reciprocal square root function
81    proc recipsqrt x { return [expr 1.0/sqrt(\$x)] }
82    # use it for Lrhalf calculation
83    set Lrhalf [mdiag [mat_unary_op \$evals recipsqrt]]
84
85    set B [mmult \$V \$Lrhalf]
86    puts "Factor score coefficients B Matrix:\n[show \$B %12.4f]"
87
88    puts "NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo)"
89
90    puts "\nWork with first normalized observation row"
91    set z1 {2 3 0 0.065621795889 0.316227766017 -0.7481995314}
92    puts [show \$z1 %.4f]
93
94    puts "compute the \"factor scores\" from multiplying by B"
95    set t [mmult [transpose \$z1] \$B]
96    puts [show \$t %.4f]
97    puts "NIST implies you might chart these"
98
99    #set T2 [dotprod \$t \$t]
100    #puts "Hotelling T2=[format %.4f \$T2]"
101
102    puts "Compute the scores using V, these are more familiar"
103    set t [mmult [transpose \$z1] \$V]
104    puts [show \$t %.4f]
105    puts "Calculate T2 from the scores, sum ti*ti/evi"
106
107    set T2 [msum [transpose [mdiv [mprod \$t \$t] [transpose \$evals]]]]
108    puts "Hotelling T2=[format %.4f \$T2]"
109
110    puts "Fit z1 using first two principal components"
111    set p [mrange \$V 0 1]
112    puts p=\n[show \$p %10.4f]
113    set t [mmult [transpose \$z1] \$p]
114    set zhat [mmult \$t [transpose \$p]]
115    puts "  z1=[show \$z1 %10.6f]"
116    puts "zhat=[show \$zhat %10.6f]"
117    set diffs [msub [transpose \$z1] \$zhat]
118    puts "diff=[show \$diffs %10.6f]"
119    puts "Q statistic - distance from the model measurement for the first observation"
120    set Q [dotprod \$diffs \$diffs]
121    puts Q=[format %.4f \$Q]
122    puts "Some experts would compute a corrected Q that is larger since the
123observation was used in building the model.  The Q calculation just used
124properly applies to the analysis of new observations."
125    set corr [expr 10.0/(10.0 - 2 -1)]  ;# N/(N - Ncomp - 1)
126    puts "Corrected Q=[format %.4f [expr \$Q * \$corr]] (factor=[format %.4f \$corr])"
127    }
128
129return
130##########################################################################
131Console Printout
132##########################################################################
133Matrix of observation data X[10,3]
1347 4 3
1354 1 8
1366 3 5
1378 6 1
1388 5 7
1397 2 9
1405 3 3
1419 5 8
1427 4 5
1438 2 2
144Compute column norms
145Means=6.9 3.5 5.1
146Sigmas=1.523884 1.581139 2.806738
147Normalize observation data creating Z[10,3]
148  0.065622   0.316228  -0.748200
149 -1.903032  -1.581139   1.033228
150 -0.590596  -0.316228  -0.035629
151  0.721840   1.581139  -1.460771
152  0.721840   0.948683   0.676942
153  0.065622  -0.948683   1.389513
154 -1.246814  -0.316228  -0.748200
155  1.378058   0.948683   1.033228
156  0.065622   0.316228  -0.035629
157  0.721840  -0.948683  -1.104485
158
159Naive computation method that is sensitive to round-off error
160ZZ is (Ztranspose)(Z)
161ZZ=
162  9.000000   6.017916  -0.911824
163  6.017916   9.000000  -2.591349
164 -0.911824  -2.591349   9.000000
165the correlation matrix, R = (Zt)(Z)/(n-1)
166R=
167  1.0000   0.6687  -0.1013
168  0.6687   1.0000  -0.2879
169 -0.1013  -0.2879   1.0000
170Continuing the naive example use R for the eigensolution
171eigenvalues=  1.7688   0.9271   0.3041
172eigenvectors:
173  0.6420   0.3847  -0.6632
174  0.6864   0.0971   0.7207
175 -0.3417   0.9179   0.2017
176
177Good computation method- perform SVD of Z
178Singular values, S=3.98985804571 2.88854344819 1.65449373616
179square S and divide by (number of observations)-1 to get the eigenvalues
180eigenvalues=  1.7688   0.9271   0.3041
181eigenvectors:
182  0.6420   0.3847   0.6632
183  0.6864   0.0971  -0.7207
184 -0.3417   0.9179  -0.2017
185(the 3rd vector/column has inverted signs, thats ok the math still works)
186
187verify V x transpose = I
188As computed
189                   1        -5.55112e-017         1.38778e-016
190       -5.55112e-017                    1         5.55112e-017
191        1.38778e-016         5.55112e-017                    1
192
193Rounded off
1941.0 0.0 0.0
1950.0 1.0 0.0
1960.0 0.0 1.0
197
198square root of eigenvalues as a diagonal matrix
199  1.3300   0.0000   0.0000
200  0.0000   0.9628   0.0000
201  0.0000   0.0000   0.5515
202Factor Structure Matrix S
203  0.8538   0.3704   0.3658
204  0.9128   0.0935  -0.3975
205 -0.4544   0.8838  -0.1112
206
207Use first two eigenvalues
208  0.8538   0.3704
209  0.9128   0.0935
210 -0.4544   0.8838
211SSt for first two components, communality is the diagonal
212  0.8662   0.8140  -0.0606
213  0.8140   0.8420  -0.3321
214 -0.0606  -0.3321   0.9876
215
216Factor score coefficients B Matrix:
217      0.4827       0.3995       1.2026
218      0.5161       0.1009      -1.3069
219     -0.2569       0.9533      -0.3657
220NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo)
221
222Work with first normalized observation row
2230.0656 0.3162 -0.7482
224compute the "factor scores" from multiplying by B
2250.3871 -0.6552 -0.0608
226NIST implies you might chart these
227Compute the scores using V, these are more familiar
2280.5148 -0.6308 -0.0335
229Calculate T2 from the scores, sum ti*ti/evi
230Hotelling T2=0.5828
231Fit z1 using first two principal components
232p=
233    0.6420     0.3847
234    0.6864     0.0971
235   -0.3417     0.9179
236  z1=  0.065622   0.316228  -0.748200
237zhat=  0.087847   0.292075  -0.754958
238diff= -0.022225   0.024153   0.006758
239Q statistic - distance from the model measurement for the first observation
240Q=0.0011
241Some experts would compute a corrected Q that is larger since the
242observation was used in building the model.  The Q calculation just used
243properly applies to the analysis of new observations.
244Corrected Q=0.0016 (factor=1.4286)
245</PRE>
246</html>
Note: See TracBrowser for help on using the repository browser.