source: trunk/la1.0/NIST.html @ 931

Last change on this file since 931 was 931, checked in by toby, 11 years ago

include rest of files

File size: 8.1 KB
Line 
1<html>
2<head>
3<title>NIST PCA Example</title>
4</head>
5<P>
6<H2>NIST PCA Example</H2>
7<pre>
8
9# A Principal Components Analysis example
10#
11# walk through the NIST/Sematech PCA example data section 6.5
12# http://www.itl.nist.gov/div898/handbook/
13#
14# This file is part of the Hume La Package
15# (C)Copyright 2001, Hume Integration Services
16#
17# These calculations use the Hume Linear Algebra package, La
18#
19package require La
20catch { namespace import La::*}
21
22
23# Look at the bottom of this file to see the console output of this procedure.
24
25proc nist_pca {} {
26    puts {Matrix of observation data X[10,3]}
27    set X {2 10 3 7 4 3 4 1 8 6 3 5 8 6 1 8 5 7 7 2 9 5 3 3 9 5 8 7 4 5 8 2 2}
28    puts [show $X]
29    puts {Compute column norms}
30    mnorms_br X Xmeans Xsigmas 
31    puts "Means=[show $Xmeans]"
32    puts "Sigmas=[show $Xsigmas %.6f]"
33    puts {Normalize observation data creating Z[10,3]}
34    puts [show [set Z [mnormalize $X $Xmeans $Xsigmas]] %10.6f]\n
35    puts {Naive computation method that is sensitive to round-off error}
36    puts {ZZ is (Ztranspose)(Z) }
37    puts "ZZ=\n[show [set ZZ [mmult [transpose $Z] $Z]] %10.6f]"
38    puts {the correlation matrix, R = (Zt)(Z)/(n-1)}
39    puts "R=\n[show [set R [mscale $ZZ [expr 1/9.0]]] %8.4f]"
40    puts {Continuing the naive example use R for the eigensolution}
41    set V $R
42    mevsvd_br V evals
43    puts "eigenvalues=[show $evals %8.4f]"
44    puts "eigenvectors:\n[show $V %8.4f]\n"
45    puts "Good computation method- perform SVD of Z"
46    set U $Z
47    msvd_br U S V
48    puts "Singular values, S=[show $S]"
49    puts "square S and divide by (number of observations)-1 to get the eigenvalues"
50    set SS [mprod $S $S]
51    set evals [mscale $SS [expr 1.0/9.0]]
52    puts "eigenvalues=[show $evals %8.4f]"
53    puts "eigenvectors:\n[show $V %8.4f]"
54    puts "(the 3rd vector/column has inverted signs, thats ok the math still works)"
55    puts "\nverify V x transpose = I"
56    puts "As computed\n[show [mmult $V [transpose $V]] %20g]\n"
57    puts "Rounded off\n[show [mround [mmult $V [transpose $V]]]]"
58    #puts "Diagonal matrix of eigenvalues, L"
59    #set L [mdiag $evals]
60    #
61   
62    puts "\nsquare root of eigenvalues as a diagonal matrix"
63    proc sqrt x { return [expr sqrt($x)] }
64    set evals_sr [mat_unary_op $evals sqrt]
65    set Lhalf [mdiag $evals_sr]
66    puts [show $Lhalf %8.4f]
67
68    puts "Factor Structure Matrix S"
69    set S [mmult $V $Lhalf]
70    puts [show $S %8.4f]\n
71    set S2 [mrange $S 0 1]
72    puts "Use first two eigenvalues\n[show $S2 %8.4f]"
73    # Nash discusses better ways to compute this than multiplying SSt
74    # the NIST method is sensitive to round-off error
75    set S2S2 [mmult $S2 [transpose $S2]]
76    puts "SSt for first two components, communality is the diagonal"
77    puts [show $S2S2 %8.4f]\n
78
79
80    # define reciprocal square root function
81    proc recipsqrt x { return [expr 1.0/sqrt($x)] }
82    # use it for Lrhalf calculation
83    set Lrhalf [mdiag [mat_unary_op $evals recipsqrt]]
84
85    set B [mmult $V $Lrhalf]
86    puts "Factor score coefficients B Matrix:\n[show $B %12.4f]"
87
88    puts "NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo)"
89
90    puts "\nWork with first normalized observation row"
91    set z1 {2 3 0 0.065621795889 0.316227766017 -0.7481995314}
92    puts [show $z1 %.4f]
93
94    puts "compute the \"factor scores\" from multiplying by B"
95    set t [mmult [transpose $z1] $B]
96    puts [show $t %.4f]
97    puts "NIST implies you might chart these"
98
99    #set T2 [dotprod $t $t]
100    #puts "Hotelling T2=[format %.4f $T2]"
101   
102    puts "Compute the scores using V, these are more familiar"
103    set t [mmult [transpose $z1] $V]
104    puts [show $t %.4f]
105    puts "Calculate T2 from the scores, sum ti*ti/evi"
106
107    set T2 [msum [transpose [mdiv [mprod $t $t] [transpose $evals]]]]
108    puts "Hotelling T2=[format %.4f $T2]"
109 
110    puts "Fit z1 using first two principal components"
111    set p [mrange $V 0 1]
112    puts p=\n[show $p %10.4f]
113    set t [mmult [transpose $z1] $p]
114    set zhat [mmult $t [transpose $p]]
115    puts "  z1=[show $z1 %10.6f]"
116    puts "zhat=[show $zhat %10.6f]"
117    set diffs [msub [transpose $z1] $zhat]
118    puts "diff=[show $diffs %10.6f]"
119    puts "Q statistic - distance from the model measurement for the first observation"
120    set Q [dotprod $diffs $diffs]
121    puts Q=[format %.4f $Q]
122    puts "Some experts would compute a corrected Q that is larger since the
123observation was used in building the model.  The Q calculation just used
124properly applies to the analysis of new observations."
125    set corr [expr 10.0/(10.0 - 2 -1)]  ;# N/(N - Ncomp - 1)
126    puts "Corrected Q=[format %.4f [expr $Q * $corr]] (factor=[format %.4f $corr])"
127    }
128
129return
130##########################################################################
131Console Printout
132##########################################################################
133Matrix of observation data X[10,3]
1347 4 3
1354 1 8
1366 3 5
1378 6 1
1388 5 7
1397 2 9
1405 3 3
1419 5 8
1427 4 5
1438 2 2
144Compute column norms
145Means=6.9 3.5 5.1
146Sigmas=1.523884 1.581139 2.806738
147Normalize observation data creating Z[10,3]
148  0.065622   0.316228  -0.748200
149 -1.903032  -1.581139   1.033228
150 -0.590596  -0.316228  -0.035629
151  0.721840   1.581139  -1.460771
152  0.721840   0.948683   0.676942
153  0.065622  -0.948683   1.389513
154 -1.246814  -0.316228  -0.748200
155  1.378058   0.948683   1.033228
156  0.065622   0.316228  -0.035629
157  0.721840  -0.948683  -1.104485
158
159Naive computation method that is sensitive to round-off error
160ZZ is (Ztranspose)(Z)
161ZZ=
162  9.000000   6.017916  -0.911824
163  6.017916   9.000000  -2.591349
164 -0.911824  -2.591349   9.000000
165the correlation matrix, R = (Zt)(Z)/(n-1)
166R=
167  1.0000   0.6687  -0.1013
168  0.6687   1.0000  -0.2879
169 -0.1013  -0.2879   1.0000
170Continuing the naive example use R for the eigensolution
171eigenvalues=  1.7688   0.9271   0.3041
172eigenvectors:
173  0.6420   0.3847  -0.6632
174  0.6864   0.0971   0.7207
175 -0.3417   0.9179   0.2017
176
177Good computation method- perform SVD of Z
178Singular values, S=3.98985804571 2.88854344819 1.65449373616
179square S and divide by (number of observations)-1 to get the eigenvalues
180eigenvalues=  1.7688   0.9271   0.3041
181eigenvectors:
182  0.6420   0.3847   0.6632
183  0.6864   0.0971  -0.7207
184 -0.3417   0.9179  -0.2017
185(the 3rd vector/column has inverted signs, thats ok the math still works)
186
187verify V x transpose = I
188As computed
189                   1        -5.55112e-017         1.38778e-016
190       -5.55112e-017                    1         5.55112e-017
191        1.38778e-016         5.55112e-017                    1
192
193Rounded off
1941.0 0.0 0.0
1950.0 1.0 0.0
1960.0 0.0 1.0
197
198square root of eigenvalues as a diagonal matrix
199  1.3300   0.0000   0.0000
200  0.0000   0.9628   0.0000
201  0.0000   0.0000   0.5515
202Factor Structure Matrix S
203  0.8538   0.3704   0.3658
204  0.9128   0.0935  -0.3975
205 -0.4544   0.8838  -0.1112
206
207Use first two eigenvalues
208  0.8538   0.3704
209  0.9128   0.0935
210 -0.4544   0.8838
211SSt for first two components, communality is the diagonal
212  0.8662   0.8140  -0.0606
213  0.8140   0.8420  -0.3321
214 -0.0606  -0.3321   0.9876
215
216Factor score coefficients B Matrix:
217      0.4827       0.3995       1.2026
218      0.5161       0.1009      -1.3069
219     -0.2569       0.9533      -0.3657
220NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo)
221
222Work with first normalized observation row
2230.0656 0.3162 -0.7482
224compute the "factor scores" from multiplying by B
2250.3871 -0.6552 -0.0608
226NIST implies you might chart these
227Compute the scores using V, these are more familiar
2280.5148 -0.6308 -0.0335
229Calculate T2 from the scores, sum ti*ti/evi
230Hotelling T2=0.5828
231Fit z1 using first two principal components
232p=
233    0.6420     0.3847
234    0.6864     0.0971
235   -0.3417     0.9179
236  z1=  0.065622   0.316228  -0.748200
237zhat=  0.087847   0.292075  -0.754958
238diff= -0.022225   0.024153   0.006758
239Q statistic - distance from the model measurement for the first observation
240Q=0.0011
241Some experts would compute a corrected Q that is larger since the
242observation was used in building the model.  The Q calculation just used
243properly applies to the analysis of new observations.
244Corrected Q=0.0016 (factor=1.4286)
245</PRE>
246</html>
Note: See TracBrowser for help on using the repository browser.