class Statsample::Regression::Multiple::BaseEngine
Base class for Multiple Regression Engines
Base class for Multiple Regression Engines
Attributes
Minimum number of valid case for pairs of correlation
Name of analysis
Number of total cases (dataset.cases)
Number of valid cases (listwise)
Public Class Methods
# File lib/statsample/regression/multiple/baseengine.rb, line 20 def initialize(ds, y_var, opts = Hash.new) @ds=ds @predictors_n=@ds.fields.size-1 @total_cases=@ds.cases @cases=@ds.cases @y_var=y_var @r2=nil @name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var] opts_default={:digits=>3} @opts=opts_default.merge opts @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } end
# File lib/statsample/regression/multiple/baseengine.rb, line 17 def self.univariate? true end
Public Instance Methods
Calculate F Test
# File lib/statsample/regression/multiple/baseengine.rb, line 39 def anova @anova||=Statsample::Anova::OneWay.new(:ss_num=>ssr, :ss_den=>sse, :df_num=>df_r, :df_den=>df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA") end
# File lib/statsample/regression/multiple/baseengine.rb, line 212 def assign_names(c) a={} @fields.each_index {|i| a[@fields[i]]=c[i] } a end
Standard Error for coefficients
# File lib/statsample/regression/multiple/baseengine.rb, line 149 def coeffs_se out={} mse=sse.quo(df_e) coeffs.each {|k,v| out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k))) } out end
T values for coeffs
# File lib/statsample/regression/multiple/baseengine.rb, line 99 def coeffs_t out={} se=coeffs_se coeffs.each do |k,v| out[k]=v / se[k] end out end
Tolerances for each coefficient
# File lib/statsample/regression/multiple/baseengine.rb, line 142 def coeffs_tolerances @fields.inject({}) {|a,f| a[f]=tolerance(f); a } end
Standard error for constant
# File lib/statsample/regression/multiple/baseengine.rb, line 182 def constant_se estimated_variance_covariance_matrix[0,0] end
T for constant
# File lib/statsample/regression/multiple/baseengine.rb, line 178 def constant_t constant.to_f/constant_se end
Degrees of freedom for error
# File lib/statsample/regression/multiple/baseengine.rb, line 120 def df_e @valid_cases-@predictors_n-1 end
Degrees of freedom for regression
# File lib/statsample/regression/multiple/baseengine.rb, line 116 def df_r @predictors_n end
Estimated Variance-Covariance Matrix Used for calculation of se of constant
# File lib/statsample/regression/multiple/baseengine.rb, line 165 def estimated_variance_covariance_matrix #mse_p=mse columns=[] @ds_valid.fields.each{|k| v=@ds_valid[k] columns.push(v.data) unless k==@y_var } columns.unshift([1.0]*@valid_cases) x=Matrix.columns(columns) matrix=((x.t*x)).inverse * mse matrix.collect {|i| Math::sqrt(i) if i>=0 } end
Fisher for Anova
# File lib/statsample/regression/multiple/baseengine.rb, line 124 def f anova.f end
Mean Square Error
# File lib/statsample/regression/multiple/baseengine.rb, line 112 def mse sse.quo(df_e) end
Mean square Regression
# File lib/statsample/regression/multiple/baseengine.rb, line 108 def msr ssr.quo(df_r) end
Retrieves a vector with predicted values for y
# File lib/statsample/regression/multiple/baseengine.rb, line 47 def predicted @total_cases.times.collect { |i| invalid=false vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]} if invalid nil else process(vect) end }.to_vector(:scale) end
p-value of Fisher
# File lib/statsample/regression/multiple/baseengine.rb, line 128 def probability anova.probability end
# File lib/statsample/regression/multiple/baseengine.rb, line 240 def process(v) c=coeffs total=constant @fields.each_index{|i| total+=c[@fields[i]]*v[i] } total end
R Multiple
# File lib/statsample/regression/multiple/baseengine.rb, line 75 def r raise "You should implement this" end
R^2 Adjusted. Estimate Population R^2 usign Ezequiel formula. Always lower than sample R^2
Reference:¶ ↑
-
Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.
# File lib/statsample/regression/multiple/baseengine.rb, line 87 def r2_adjusted r2-((1-r2)*@predictors_n).quo(df_e) end
# File lib/statsample/regression/multiple/baseengine.rb, line 185 def report_building(b) di="%0.#{digits}f" b.section(:name=>@name) do |g| c=coeffs g.text _("Engine: %s") % self.class g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases]) g.text _("R=")+(di % r) g.text _("R^2=")+(di % r2) g.text _("R^2 Adj=")+(di % r2_adjusted) g.text _("Std.Error R=")+ (di % se_estimate) g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') ) g.parse_element(anova) sc=standarized_coeffs cse=coeffs_se g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t| t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)]) @fields.each do |f| t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))]) end end end end
Retrieves a vector with residuals values for y
# File lib/statsample/regression/multiple/baseengine.rb, line 63 def residuals (0...@total_cases).collect{|i| invalid=false vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]} if invalid or @ds[@y_var][i].nil? nil else @ds[@y_var][i] - process(vect) end }.to_vector(:scale) end
Standard error of estimate
# File lib/statsample/regression/multiple/baseengine.rb, line 43 def se_estimate Math::sqrt(sse.quo(df_e)) end
Estandar error of R^2 ????
# File lib/statsample/regression/multiple/baseengine.rb, line 159 def se_r2 Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3))) end
Sum of squares (Error)
# File lib/statsample/regression/multiple/baseengine.rb, line 95 def sse sst - ssr end
# File lib/statsample/regression/multiple/baseengine.rb, line 237 def sse_direct sst-ssr end
Sum of squares (regression)
# File lib/statsample/regression/multiple/baseengine.rb, line 91 def ssr r2*sst end
Sum of squares of regression using the predicted value minus y mean
# File lib/statsample/regression/multiple/baseengine.rb, line 222 def ssr_direct mean=@dy.mean cases=0 ssr=(0...@ds.cases).inject(0) {|a,i| invalid=false v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]} if !invalid cases+=1 a+((process(v)-mean)**2) else a end } ssr end
Sum of squares Total
# File lib/statsample/regression/multiple/baseengine.rb, line 79 def sst raise "You should implement this" end
Retrieves a vector with standarized values for y
# File lib/statsample/regression/multiple/baseengine.rb, line 59 def standarized_predicted predicted.standarized end
Tolerance for a given variable talkstats.com/showthread.php?t=5056
# File lib/statsample/regression/multiple/baseengine.rb, line 133 def tolerance(var) ds=assign_names(@dep_columns) ds.each{|k,v| ds[k]=v.to_vector(:scale) } lr=self.class.new(ds.to_dataset,var) 1-lr.r2 end