class Statsample::Factor::PCA

Principal Component Analysis (PCA) of a covariance or correlation matrix..

NOTE: Sign of second and later eigenvalues could be different using Ruby or GSL, so values for PCs and component matrix should differ, because extendmatrix and gsl's methods to calculate eigenvectors are different. Using R is worse, cause first eigenvector could have negative values! For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis

Usage:

require 'statsample'
a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
ds={'a'=>a,'b'=>b}.to_dataset
cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
pca=Statsample::Factor::PCA.new(cor_matrix)
pca.m
=> 1
pca.eigenvalues
=> [1.92592927269225, 0.0740707273077545]
pca.component_matrix
=> GSL::Matrix
[  9.813e-01 
  9.813e-01 ]
pca.communalities
=> [0.962964636346122, 0.962964636346122]

References:

Principal Component Analysis (PCA) of a covariance or correlation matrix..

NOTE: Sign of second and later eigenvalues could be different using Ruby or GSL, so values for PCs and component matrix should differ, because extendmatrix and gsl's methods to calculate eigenvectors are different. Using R is worse, cause first eigenvector could have negative values! For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis

Usage:

require 'statsample'
a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
ds={'a'=>a,'b'=>b}.to_dataset
cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
pca=Statsample::Factor::PCA.new(cor_matrix)
pca.m
=> 1
pca.eigenvalues
=> [1.92592927269225, 0.0740707273077545]
pca.component_matrix
=> GSL::Matrix
[  9.813e-01 
  9.813e-01 ]
pca.communalities
=> [0.962964636346122, 0.962964636346122]

References:

Attributes

m[RW]

Number of factors. Set by default to the number of factors with eigen values > 1

matrix_type[RW]
name[RW]

Name of analysis

rotation_type[RW]

Type of rotation. By default, Statsample::Factor::Rotation::Varimax

summary_parallel_analysis[RW]

Add to the summary a parallel analysis report

summary_rotation[RW]

Add to the summary a rotation report

use_gsl[RW]

Use GSL if available

Public Class Methods

new(matrix, opts=Hash.new) click to toggle source
# File lib/statsample/factor/pca.rb, line 53
def initialize(matrix, opts=Hash.new)
  @use_gsl=nil
  @name=_("Principal Component Analysis")
  @matrix=matrix
  @n_variables=@matrix.column_size      
  @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| _("VAR_%d") % (i+1)}
  
  @matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
  
  @m=nil
  
  @rotation_type=Statsample::Factor::Varimax
  
  opts.each{|k,v|
    self.send("#{k}=",v) if self.respond_to? k
  }
  if @use_gsl.nil?
    @use_gsl=Statsample.has_gsl?
  end
  if @matrix.respond_to? :fields
    @variables_names=@matrix.fields
  else
    @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
  end
  calculate_eigenpairs
  
  if @m.nil?
    # Set number of factors with eigenvalues > 1
    @m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
  end
  
end

Public Instance Methods

communalities(m=nil) click to toggle source
# File lib/statsample/factor/pca.rb, line 185
def communalities(m=nil)
  
  m||=@m
  h=[]
  @n_variables.times do |i|
    sum=0
    m.times do |j|
      sum+=(@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
    end
    h.push(sum)
  end
  h
end
component_matrix(m=nil) click to toggle source
# File lib/statsample/factor/pca.rb, line 142
def component_matrix(m=nil)
  var="component_matrix_#{matrix_type}"
  send(var,m)
end
component_matrix_correlation(m=nil) click to toggle source

Matrix with correlations between components and variables

# File lib/statsample/factor/pca.rb, line 167
def component_matrix_correlation(m=nil)
  m||=@m
  raise "m should be > 0" if m<1
  omega_m=::Matrix.build(@n_variables, m) {0}
  gammas=[]
  m.times {|i|
    omega_m.column=i, @eigenpairs[i][1]
    gammas.push(Math::sqrt(@eigenpairs[i][0]))
  }
  gamma_m=::Matrix.diagonal(*gammas)
  cm=(omega_m*(gamma_m)).to_matrix
  
  cm.extend CovariateMatrix
  cm.name=_("Component matrix")
  cm.fields_x = @variables_names
  cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
  cm
end
component_matrix_covariance(m=nil) click to toggle source

Matrix with correlations between components and variables. Based on Härdle & Simar (2003, p.243)

# File lib/statsample/factor/pca.rb, line 148
def component_matrix_covariance(m=nil)
  m||=@m
  raise "m should be > 0" if m<1
  ff=feature_matrix(m)
  cm=::Matrix.build(@n_variables, m) {0}
  @n_variables.times {|i|
    m.times {|j|
      cm[i,j]=ff[i,j] * Math.sqrt(eigenvalues[j] / @matrix[i,i])
    }
  }
  cm.extend NamedMatrix
  cm.name=_("Component matrix (from covariance)")
  cm.fields_x = @variables_names
  cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
  
  cm
end
eigenvalues() click to toggle source

Array with eigenvalues

# File lib/statsample/factor/pca.rb, line 199
def eigenvalues
  @eigenpairs.collect {|c| c[0] }
end
eigenvectors() click to toggle source
# File lib/statsample/factor/pca.rb, line 202
def eigenvectors
  @eigenpairs.collect {|c| 
    @use_gsl ? c[1].to_gsl : c[1].to_vector
  }
end
feature_matrix(m=nil) click to toggle source

Feature matrix for m factors Returns m eigenvectors as columns. So, i=variable, j=component

# File lib/statsample/factor/pca.rb, line 103
def feature_matrix(m=nil)
  m||=@m
  if @use_gsl
    omega_m=GSL::Matrix.zeros(@n_variables,m)
    ev=eigenvectors
    m.times do |i|
      omega_m.set_column(i,ev[i])
    end
    omega_m
  else
    omega_m=::Matrix.build(@n_variables, m) {0}
    m.times do |i|
      omega_m.column= i, @eigenpairs[i][1]
    end
    omega_m
  end
end
principal_components(input, m=nil) click to toggle source

Returns Principal Components for input matrix or dataset The number of PC to return is equal to parameter m. If m isn't set, m set to number of PCs selected at object creation. Use covariance matrix

# File lib/statsample/factor/pca.rb, line 125
def principal_components(input, m=nil)
  if @use_gsl
    data_matrix=input.to_gsl
  else
    data_matrix=input.to_matrix
  end
  m||=@m
  
  raise "data matrix variables<>pca variables" if data_matrix.column_size!=@n_variables
  
  fv=feature_matrix(m)
  pcs=(fv.transpose*data_matrix.transpose).transpose
  
  pcs.extend Statsample::NamedMatrix
  pcs.fields_y=m.times.map {|i| "PC_%d" % (i+1)}
  pcs.to_dataset
end
rotation() click to toggle source
# File lib/statsample/factor/pca.rb, line 85
def rotation
  @rotation_type.new(component_matrix)
end
total_eigenvalues() click to toggle source
# File lib/statsample/factor/pca.rb, line 88
def total_eigenvalues
  eigenvalues.inject(0) {|ac,v| ac+v}
end