class StatsampleFactorTestCase

require 'rserve' require 'statsample/rserve_extension'

require 'rserve' require 'statsample/rserve_extension'

require 'rserve' require 'statsample/rserve_extension'

require 'rserve' require 'statsample/rserve_extension'

Public Instance Methods

pca_set(pca,type) click to toggle source
# File pkg/statsample-1.4.0/test/test_factor.rb, line 156
def pca_set(pca,type)
    expected_eigenvalues=[1.284, 0.0490]
    expected_eigenvalues.each_with_index{|ev,i|
      assert_in_delta(ev,pca.eigenvalues[i],0.001)
    }
    expected_communality=[0.590, 0.694]
    expected_communality.each_with_index{|ev,i|
      assert_in_delta(ev,pca.communalities[i],0.001)
    }
    expected_cm=[0.768, 0.833]
    obs=pca.component_matrix_correlation(1).column(0).to_a
    expected_cm.each_with_index{|ev,i|
      assert_in_delta(ev,obs[i],0.001)
    }

    assert(pca.summary)
end
principalcomponents(gsl) click to toggle source
# File pkg/statsample-1.4.0/test/test_factor.rb, line 85
def principalcomponents(gsl)
  ran=Distribution::Normal.rng
  samples=50
  x1=samples.times.map { ran.call()}.to_scale
  x2=samples.times.map {|i| ran.call()*0.5+x1[i]*0.5}.to_scale
  ds={'x1'=>x1,'x2'=>x2}.to_dataset
  
  cm=ds.correlation_matrix
  r=cm[0,1]
  pca=Statsample::Factor::PCA.new(cm,:m=>2,:use_gsl=>gsl)
  assert_in_delta(1+r,pca.eigenvalues[0],1e-10)
  assert_in_delta(1-r,pca.eigenvalues[1],1e-10)
  hs=1.0 / Math.sqrt(2)
  assert_equal_vector(Vector[1, 1]*hs, pca.eigenvectors[0])
  m_1=gsl ? Vector[-1,1] : Vector[1,-1]
  
  assert_equal_vector(hs*m_1, pca.eigenvectors[1])    
  
  pcs=pca.principal_components(ds)
  exp_pc_1=ds.collect_with_index {|row,i|
    hs*(row['x1']+row['x2'])
  }
  exp_pc_2=ds.collect_with_index {|row,i|
    gsl ? hs*(row['x2']-row['x1']) : hs*(row['x1']-row['x2'])

  }
  assert_similar_vector(exp_pc_1, pcs["PC_1"])
  assert_similar_vector(exp_pc_2, pcs["PC_2"])
end
setup() click to toggle source

Based on Hardle and Simar

# File pkg/statsample-1.4.0/test/test_factor.rb, line 8
def setup
  @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
end
test_antiimage() click to toggle source
# File pkg/statsample-1.4.0/test/test_factor.rb, line 114
def test_antiimage
  cor=Matrix[[1,0.964, 0.312],[0.964,1,0.411],[0.312,0.411,1]]
  expected=Matrix[[0.062,-0.057, 0.074],[-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]]
  ai=Statsample::Factor.anti_image_covariance_matrix(cor)
  assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected.to_s} not equal to #{ai.to_s}")
end
test_covariance_matrix() click to toggle source

Based on Hurdle example

# File pkg/statsample-1.4.0/test/test_factor.rb, line 12
def test_covariance_matrix
  ds=Statsample::PlainText.read(@fixtures_dir+"/bank2.dat", %w{v1 v2 v3 v4 v5 v6})
  ds.fields.each {|f|
    ds[f]=ds[f].centered
  }
  cm=ds.covariance_matrix
  pca =Statsample::Factor::PCA.new( cm, :m=>6)
  #puts pca.summary
  #puts pca.feature_matrix
  exp_eig=[2.985, 0.931,0.242, 0.194, 0.085, 0.035].to_scale
  assert_similar_vector(exp_eig, pca.eigenvalues.to_scale, 0.1)
  pcs=pca.principal_components(ds)
  k=6
  comp_matrix=pca.component_matrix()
  k.times {|i|
    pc_id="PC_#{i+1}"
    k.times {|j| # variable
        ds_id="v#{j+1}"
        r= Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
        assert_in_delta( r, comp_matrix[j,i]) 
      }
  }
  
end
test_kmo() click to toggle source
# File pkg/statsample-1.4.0/test/test_factor.rb, line 120
def test_kmo
    @v1=[1 ,2 ,3 ,4 ,7 ,8 ,9 ,10,14,15,20,50,60,70].to_scale
    @v2=[5 ,6 ,11,12,13,16,17,18,19,20,30,0,0,0].to_scale
    @v3=[10,3 ,20,30,40,50,80,10,20,30,40,2,3,4].to_scale
    # KMO: 0.490
    ds={'v1'=>@v1,'v2'=>@v2,'v3'=>@v3}.to_dataset
    cor=Statsample::Bivariate.correlation_matrix(ds)
   kmo=Statsample::Factor.kmo(cor)
   assert_in_delta(0.667, kmo,0.001)
   assert_in_delta(0.81, Statsample::Factor.kmo(harman_817),0.01)
   
end
test_kmo_univariate() click to toggle source
# File pkg/statsample-1.4.0/test/test_factor.rb, line 132
def test_kmo_univariate
  m=harman_817
  expected=[0.73,0.76,0.84,0.87,0.53,0.93,0.78,0.86]
  m.row_size.times.map {|i|
    assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m,i),0.01)
  }
end
test_parallelanalysis() click to toggle source
# File pkg/statsample-1.4.0/test/test_factor_pa.rb, line 45
def test_parallelanalysis
  pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95)
  assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
  assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
  assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
  assert(pa.summary.size>0)
end
test_parallelanalysis_with_data() click to toggle source
# File pkg/statsample-1.4.0/test/test_factor_pa.rb, line 11
def test_parallelanalysis_with_data
  if Statsample.has_gsl?
    samples=100
    variables=10
    iterations=50
    rng = Distribution::Normal.rng
    f1=samples.times.collect {rng.call}.to_scale
    f2=samples.times.collect {rng.call}.to_scale    
    vectors={}
    variables.times do |i|
      if i<5
        vectors["v#{i}"]=samples.times.collect {|nv|
          f1[nv]*5+f2[nv]*2+rng.call
        }.to_scale
      else
        vectors["v#{i}"]=samples.times.collect {|nv|
          f2[nv]*5+f1[nv]*2+rng.call
        }.to_scale
      end
      
    end
    ds=vectors.to_dataset
    
    pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations)
    pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95)
    3.times do |n|
      var="ev_0000#{n+1}"
      assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean,0.05)
    end
  else
    skip("Too slow without GSL")
  end
  
end
test_pca() click to toggle source

Tested with SPSS and R

# File pkg/statsample-1.4.0/test/test_factor.rb, line 140
def test_pca
    a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
    b=[2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_scale
    a.recode! {|c| c-a.mean}
    b.recode! {|c| c-b.mean}
    ds={'a'=>a,'b'=>b}.to_dataset
    cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
    if Statsample.has_gsl?
      pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>true)
      pca_set(pca,"gsl")
    else
      skip("Eigenvalues could be calculated with GSL (requires gsl)")
    end
    pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>false)
    pca_set(pca,"ruby")
end
test_principalaxis() click to toggle source

Tested with R

# File pkg/statsample-1.4.0/test/test_factor.rb, line 175
def test_principalaxis
    matrix=::Matrix[
    [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807],  [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
    
    
    fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1, :max_iterations=>50)

    cm=::Matrix[[0.923],[0.912],[0.507],[0.483]]
    
    assert_equal_matrix(cm,fa.component_matrix,0.001)
    
    h2=[0.852,0.832,0.257,0.233]
    h2.each_with_index{|ev,i|
      assert_in_delta(ev,fa.communalities[i],0.001)
    }
    eigen1=2.175
    assert_in_delta(eigen1, fa.eigenvalues[0],0.001)
    assert(fa.summary.size>0)
    fa=Statsample::Factor::PrincipalAxis.new(matrix,:smc=>false)
          
    assert_raise RuntimeError do
      fa.iterate
    end

end
test_principalcomponents() click to toggle source
# File pkg/statsample-1.4.0/test/test_factor.rb, line 80
def test_principalcomponents()
principalcomponents(true)
principalcomponents(false)

end
test_principalcomponents_ruby_gsl() click to toggle source
# File pkg/statsample-1.4.0/test/test_factor.rb, line 36
  def test_principalcomponents_ruby_gsl
    
    ran=Distribution::Normal.rng
    
#    @r=::Rserve::Connection.new

    samples=20
    [3,5,7].each {|k|
      v={}
      v["x0"]=samples.times.map { ran.call()}.to_scale.centered
      (1...k).each {|i|
        v["x#{i}"]=samples.times.map {|ii| ran.call()*0.5+v["x#{i-1}"][ii]*0.5}.to_scale.centered
      }
      
      ds=v.to_dataset
      cm=ds.covariance_matrix
#      @r.assign('ds',ds)
#      @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
#      puts "eigenvalues"
#      puts @r.eval('v').to_ruby.to_s
      pca_ruby=Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>false )
      pca_gsl =Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>true  )
      pc_ruby = pca_ruby.principal_components(ds)
      pc_gsl  = pca_gsl.principal_components(ds)
      # Test component matrix correlation!
      cm_ruby=pca_ruby.component_matrix
      #puts cm_ruby.summary
      k.times {|i|
        pc_id="PC_#{i+1}"
        assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i],1e-10)
        # Revert gsl component values
        pc_gsl_data= (pc_gsl[pc_id][0]-pc_ruby[pc_id][0]).abs>1e-6 ? pc_gsl[pc_id].recode {|v| -v} : pc_gsl[pc_id] 
        assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6,"PC for #{k} variables")
        if false
        k.times {|j| # variable
          ds_id="x#{j}"
          r= Statsample::Bivariate.correlation(ds[ds_id],pc_ruby[pc_id])
          puts "#{pc_id}-#{ds_id}:#{r}"
        }
        end
      }
    }
    #@r.close
  end
test_rotation_varimax() click to toggle source
# File pkg/statsample-1.4.0/test/test_factor.rb, line 202
def test_rotation_varimax
  a = Matrix[ [ 0.4320,  0.8129,  0.3872]  ,
    [0.7950, -0.5416,  0.2565]  ,
    [0.5944,  0.7234, -0.3441],
  [0.8945, -0.3921, -0.1863] ]

  expected= Matrix[[-0.0204423,     0.938674,    -0.340334],
    [0.983662, 0.0730206, 0.134997],
    [0.0826106, 0.435975, -0.893379],
  [0.939901, -0.0965213, -0.309596]]
  varimax=Statsample::Factor::Varimax.new(a)
  assert(!varimax.rotated.nil?, "Rotated shouldn't be empty")
  assert(!varimax.component_transformation_matrix.nil?, "Component matrix shouldn't be empty")
  assert(!varimax.h2.nil?, "H2 shouldn't be empty")
  
  assert_equal_matrix(expected,varimax.rotated,1e-6)
  assert(varimax.summary.size>0)
end