class Statsample::Factor::PrincipalAxis
Principal Axis Analysis for a covariance or correlation matrix.
For PCA, use Statsample::Factor::PCA
Usage:¶ ↑
require 'statsample' a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale ds={'a'=>a,'b'=>b}.to_dataset cor_matrix=Statsample::Bivariate.correlation_matrix(ds) pa=Statsample::Factor::PrincipalAxis.new(cor_matrix) pa.iterate(1) pa.m => 1 pca.component_matrix => GSL::Matrix [ 9.622e-01 9.622e-01 ] pca.communalities => [0.962964636346122, 0.962964636346122]
References:¶ ↑
-
SPSS Manual
-
Smith, L. (2002). A tutorial on Principal Component Analysis. Available on courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
Principal Axis Analysis for a covariance or correlation matrix.
For PCA, use Statsample::Factor::PCA
Usage:¶ ↑
require 'statsample' a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale ds={'a'=>a,'b'=>b}.to_dataset cor_matrix=Statsample::Bivariate.correlation_matrix(ds) pa=Statsample::Factor::PrincipalAxis.new(cor_matrix) pa.iterate(1) pa.m => 1 pca.component_matrix => GSL::Matrix [ 9.622e-01 9.622e-01 ] pca.communalities => [0.962964636346122, 0.962964636346122]
References:¶ ↑
-
SPSS Manual
-
Smith, L. (2002). A tutorial on Principal Component Analysis. Available on courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
Constants
- DELTA
Minimum difference between succesive iterations on sum of communalities
- MAX_ITERATIONS
Maximum number of iterations
Attributes
Eigenvalues of factor analysis
Tolerance for iterations
Initial eigenvalues
Number of iterations required to converge
Number of factors. Set by default to the number of factors with eigenvalues > 1 (Kaiser criterion).
Warning: Kaiser criterion overfactors! Give yourself some time and use Horn's Parallel Analysis.
Maximum number of iterations
Name of analysis
Use SMC(squared multiple correlations) as diagonal. If false, use 1
Public Class Methods
# File lib/statsample/factor/principalaxis.rb, line 65 def initialize(matrix, opts=Hash.new) @matrix=matrix if @matrix.respond_to? :fields @fields=@matrix.fields else @fields=@matrix.row_size.times.map {|i| _("Variable %d") % (i+1)} end @n_variables=@matrix.row_size @name="" @m=nil @initial_eigenvalues=nil @initial_communalities=nil @component_matrix=nil @delta=DELTA @smc=true @max_iterations=MAX_ITERATIONS opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } if @matrix.respond_to? :fields @variables_names=@matrix.fields else @variables_names=@n_variables.times.map {|i| "V#{i+1}"} end if @m.nil? pca=PCA.new(::Matrix.rows(@matrix.to_a)) @m=pca.m end @clean=true end
Returns two matrixes from a correlation matrix with regressors correlation matrix and criteria xy matrix.
# File lib/statsample/factor/principalaxis.rb, line 178 def self.separate_matrices(matrix, y) ac=[] matrix.column_size.times do |i| ac.push(matrix[y,i]) if i!=y end rxy=Matrix.columns([ac]) rows=[] matrix.row_size.times do |i| if i!=y row=[] matrix.row_size.times do |j| row.push(matrix[i,j]) if j!=y end rows.push(row) end end rxx=Matrix.rows(rows) [rxx,rxy] end
Public Instance Methods
Communality for all variables given m factors
# File lib/statsample/factor/principalaxis.rb, line 97 def communalities(m=nil) if m!=@m or @clean iterate(m) raise "Can't calculate comunality" if @communalities.nil? end @communalities end
Component matrix for m factors
# File lib/statsample/factor/principalaxis.rb, line 105 def component_matrix(m=nil) if m!=@m or @clean iterate(m) end @component_matrix end
# File lib/statsample/factor/principalaxis.rb, line 154 def initial_communalities if @initial_communalities.nil? if @smc # Based on O'Connors(2000) @initial_communalities=@matrix.inverse.diagonal.map{|i| 1-(1.quo(i))} @initial_communalities=@matrix.column_size.times.collect {|i| rxx , rxy = PrincipalAxis.separate_matrices(@matrix,i) matrix=(rxy.t*rxx.inverse*rxy) matrix[0,0] } else @initial_communalities=[1.0]*@matrix.column_size end end @initial_communalities end
Iterate to find the factors
# File lib/statsample/factor/principalaxis.rb, line 112 def iterate(m=nil) @clean=false m||=@m @m=m t = @max_iterations work_matrix=@matrix.to_a prev_com=initial_communalities pca=PCA.new(::Matrix.rows(work_matrix)) @initial_eigenvalues=pca.eigenvalues prev_sum=prev_com.inject(0) {|ac,v| ac+v} @iterations=0 t.times do |i| "#{@name}: Iteration #{i}" if $DEBUG @iterations+=1 prev_com.each_with_index{|v,it| work_matrix[it][it]=v } pca=PCA.new(::Matrix.rows(work_matrix)) @communalities=pca.communalities(m) @eigenvalues=pca.eigenvalues com_sum = @communalities.inject(0) {|ac,v| ac+v} #jump=true break if (com_sum-prev_sum).abs < @delta @communalities.each_with_index do |v2,i2| raise "Variable #{i2} with communality > 1" if v2>1.0 end prev_sum=com_sum prev_com=@communalities end @component_matrix=pca.component_matrix(m) @component_matrix.extend CovariateMatrix @component_matrix.name=_("Factor Matrix") @component_matrix.fields_x = @variables_names @component_matrix.fields_y = m.times.map {|i| "factor_#{i+1}"} end
# File lib/statsample/factor/principalaxis.rb, line 197 def report_building(generator) iterate if @clean generator.section(:name=>@name) do |s| s.text _("Number of factors: %d") % m s.text _("Iterations: %d") % @iterations s.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction")]) do |t| communalities(m).each_with_index {|com,i| t.row([@fields[i], sprintf("%0.4f", initial_communalities[i]), sprintf("%0.3f", com)]) } end s.table(:name=>_("Total Variance"), :header=>[_("Factor"), _("I.E.Total"), _("I.E. %"), _("I.E.Cum. %"), _("S.L.Total"), _("S.L. %"), _("S.L.Cum. %") ]) do |t| ac_eigen,ac_i_eigen=0,0 @initial_eigenvalues.each_with_index {|eigenvalue,i| ac_i_eigen+=eigenvalue ac_eigen+=@eigenvalues[i] new_row=[ _("Factor %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(@n_variables)), sprintf("%0.3f",ac_i_eigen*100.quo(@n_variables)) ] if i<@m new_row.concat [ sprintf("%0.3f", @eigenvalues[i]), sprintf("%0.3f%%", @eigenvalues[i]*100.quo(@n_variables)), sprintf("%0.3f",ac_eigen*100.quo(@n_variables)) ] else new_row.concat ["","",""] end t.row new_row } end s.parse_element(component_matrix) end end