class Statsample::Regression::Multiple::BaseEngine

Base class for Multiple Regression Engines

Base class for Multiple Regression Engines

Attributes

cases[R]

Minimum number of valid case for pairs of correlation

digits[RW]
name[RW]

Name of analysis

total_cases[R]

Number of total cases (dataset.cases)

valid_cases[R]

Number of valid cases (listwise)

Public Class Methods

new(ds, y_var, opts = Hash.new) click to toggle source
# File lib/statsample/regression/multiple/baseengine.rb, line 20
def initialize(ds, y_var, opts = Hash.new)
  @ds=ds
  @predictors_n=@ds.fields.size-1
  @total_cases=@ds.cases
  @cases=@ds.cases
  @y_var=y_var
  @r2=nil
  @name=_("Multiple Regression:  %s over %s") % [ ds.fields.join(",") , @y_var]
  
  
  opts_default={:digits=>3}
  @opts=opts_default.merge opts
  
  @opts.each{|k,v|
    self.send("#{k}=",v) if self.respond_to? k
  }
  
end
univariate?() click to toggle source
# File lib/statsample/regression/multiple/baseengine.rb, line 17
def self.univariate?
  true
end

Public Instance Methods

anova() click to toggle source

Calculate F Test

# File lib/statsample/regression/multiple/baseengine.rb, line 39
def anova
  @anova||=Statsample::Anova::OneWay.new(:ss_num=>ssr, :ss_den=>sse, :df_num=>df_r, :df_den=>df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
end
assign_names(c) click to toggle source
# File lib/statsample/regression/multiple/baseengine.rb, line 212
def assign_names(c)
  a={}
  @fields.each_index {|i|
    a[@fields[i]]=c[i]
  }
  a
end
coeffs_se() click to toggle source

Standard Error for coefficients

# File lib/statsample/regression/multiple/baseengine.rb, line 149
def coeffs_se
  out={}
  mse=sse.quo(df_e)
  coeffs.each {|k,v|
    out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
  }
  out
end
coeffs_t() click to toggle source

T values for coeffs

# File lib/statsample/regression/multiple/baseengine.rb, line 99
def coeffs_t
  out={}
  se=coeffs_se
  coeffs.each do |k,v|
    out[k]=v / se[k]
  end
  out
end
coeffs_tolerances() click to toggle source

Tolerances for each coefficient

# File lib/statsample/regression/multiple/baseengine.rb, line 142
def coeffs_tolerances
  @fields.inject({}) {|a,f|
  a[f]=tolerance(f);
    a
  }
end
constant_se() click to toggle source

Standard error for constant

# File lib/statsample/regression/multiple/baseengine.rb, line 182
def constant_se
  estimated_variance_covariance_matrix[0,0]
end
constant_t() click to toggle source

T for constant

# File lib/statsample/regression/multiple/baseengine.rb, line 178
def constant_t
  constant.to_f/constant_se
end
df_e() click to toggle source

Degrees of freedom for error

# File lib/statsample/regression/multiple/baseengine.rb, line 120
def df_e
  @valid_cases-@predictors_n-1
end
df_r() click to toggle source

Degrees of freedom for regression

# File lib/statsample/regression/multiple/baseengine.rb, line 116
def df_r
  @predictors_n
end
estimated_variance_covariance_matrix() click to toggle source

Estimated Variance-Covariance Matrix Used for calculation of se of constant

# File lib/statsample/regression/multiple/baseengine.rb, line 165
def estimated_variance_covariance_matrix
  #mse_p=mse
  columns=[]
  @ds_valid.fields.each{|k|
    v=@ds_valid[k]
    columns.push(v.data) unless k==@y_var
  }
  columns.unshift([1.0]*@valid_cases)
  x=Matrix.columns(columns)
  matrix=((x.t*x)).inverse * mse
  matrix.collect {|i| Math::sqrt(i) if i>=0 }
end
f() click to toggle source

Fisher for Anova

# File lib/statsample/regression/multiple/baseengine.rb, line 124
def f
  anova.f
end
mse() click to toggle source

Mean Square Error

# File lib/statsample/regression/multiple/baseengine.rb, line 112
def mse
  sse.quo(df_e)
end
msr() click to toggle source

Mean square Regression

# File lib/statsample/regression/multiple/baseengine.rb, line 108
def msr
  ssr.quo(df_r)
end
predicted() click to toggle source

Retrieves a vector with predicted values for y

# File lib/statsample/regression/multiple/baseengine.rb, line 47
def predicted
  @total_cases.times.collect { |i|
    invalid=false
    vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
    if invalid
      nil
    else
      process(vect)
    end
  }.to_vector(:scale)
end
probability() click to toggle source

p-value of Fisher

# File lib/statsample/regression/multiple/baseengine.rb, line 128
def probability
  anova.probability
end
process(v) click to toggle source
# File lib/statsample/regression/multiple/baseengine.rb, line 240
def process(v)
  c=coeffs
  total=constant
  @fields.each_index{|i|
    total+=c[@fields[i]]*v[i]
  }
  total
end
r() click to toggle source

R Multiple

# File lib/statsample/regression/multiple/baseengine.rb, line 75
def r
  raise "You should implement this"
end
r2_adjusted() click to toggle source

R^2 Adjusted. Estimate Population R^2 usign Ezequiel formula. Always lower than sample R^2

Reference:

  • Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.

# File lib/statsample/regression/multiple/baseengine.rb, line 87
def r2_adjusted
  r2-((1-r2)*@predictors_n).quo(df_e)
end
report_building(b) click to toggle source
# File lib/statsample/regression/multiple/baseengine.rb, line 185
def report_building(b)
  di="%0.#{digits}f"
  b.section(:name=>@name) do |g|
    c=coeffs
    g.text _("Engine: %s") % self.class
    g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
    g.text _("R=")+(di % r)
    g.text _("R^2=")+(di % r2)
    g.text _("R^2 Adj=")+(di % r2_adjusted)
    g.text _("Std.Error R=")+ (di % se_estimate)
    
    g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') )
    
    g.parse_element(anova)
    sc=standarized_coeffs
    
    cse=coeffs_se
    g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
                        t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)])
      @fields.each do |f|
        t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))])
      end  
    end
  end
end
residuals() click to toggle source

Retrieves a vector with residuals values for y

# File lib/statsample/regression/multiple/baseengine.rb, line 63
def residuals
  (0...@total_cases).collect{|i|
    invalid=false
    vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
    if invalid or @ds[@y_var][i].nil?
      nil
    else
      @ds[@y_var][i] - process(vect)
    end
  }.to_vector(:scale)
end
se_estimate() click to toggle source

Standard error of estimate

# File lib/statsample/regression/multiple/baseengine.rb, line 43
def se_estimate
  Math::sqrt(sse.quo(df_e))
end
se_r2() click to toggle source

Estandar error of R^2 ????

# File lib/statsample/regression/multiple/baseengine.rb, line 159
def se_r2
  Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
end
sse() click to toggle source

Sum of squares (Error)

# File lib/statsample/regression/multiple/baseengine.rb, line 95
def sse
  sst - ssr
end
sse_direct() click to toggle source
# File lib/statsample/regression/multiple/baseengine.rb, line 237
def sse_direct
  sst-ssr
end
ssr() click to toggle source

Sum of squares (regression)

# File lib/statsample/regression/multiple/baseengine.rb, line 91
def ssr
  r2*sst
end
ssr_direct() click to toggle source

Sum of squares of regression using the predicted value minus y mean

# File lib/statsample/regression/multiple/baseengine.rb, line 222
def ssr_direct
  mean=@dy.mean
  cases=0
  ssr=(0...@ds.cases).inject(0) {|a,i|
    invalid=false
    v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
    if !invalid
      cases+=1
      a+((process(v)-mean)**2)
    else
      a
    end
  }
  ssr
end
sst() click to toggle source

Sum of squares Total

# File lib/statsample/regression/multiple/baseengine.rb, line 79
def sst
  raise "You should implement this"
end
standarized_predicted() click to toggle source

Retrieves a vector with standarized values for y

# File lib/statsample/regression/multiple/baseengine.rb, line 59
def standarized_predicted
  predicted.standarized
end
tolerance(var) click to toggle source

Tolerance for a given variable talkstats.com/showthread.php?t=5056

# File lib/statsample/regression/multiple/baseengine.rb, line 133
def tolerance(var)
  ds=assign_names(@dep_columns)
  ds.each{|k,v|
  ds[k]=v.to_vector(:scale)
  }
  lr=self.class.new(ds.to_dataset,var)
  1-lr.r2
end