class Statsample::StratifiedSample

Public Class Methods

new(ms,strata_sizes) click to toggle source
# File lib/statsample/multiset.rb, line 202
def initialize(ms,strata_sizes)
  raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
  @ms=ms
  raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
  @strata_sizes=strata_sizes
  @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
  @strata_number=@ms.n_datasets
  @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
end

Public Instance Methods

calculate_n_total(es) click to toggle source
# File lib/statsample/multiset.rb, line 120
def calculate_n_total(es)
  es.inject(0) {|a,h| a+h['N'] }
end
mean(*vectors) click to toggle source

mean for an array of vectors

# File lib/statsample/multiset.rb, line 98
def mean(*vectors)
  n_total=0
  means=vectors.inject(0){|a,v|
    n_total+=v.size
    a+v.sum
  }
  means.to_f/n_total
end
population_size() click to toggle source

Population size. Equal to sum of strata sizes Symbol: N<sub>h</sub>

# File lib/statsample/multiset.rb, line 217
def population_size
  @population_size
end
proportion(field, v=1) click to toggle source

Population proportion based on strata

# File lib/statsample/multiset.rb, line 234
def proportion(field, v=1)
  @ms.sum_field(field) {|s_name,vector|
  stratum_ponderation(s_name)*vector.proportion(v)
  }
end
proportion_sd_esd_wor(es) click to toggle source
# File lib/statsample/multiset.rb, line 197
def proportion_sd_esd_wor(es)
    Math::sqrt(proportion_variance_ksd_wor(es))
end
proportion_sd_ksd_wor(es) click to toggle source
# File lib/statsample/multiset.rb, line 171
def proportion_sd_ksd_wor(es)
    Math::sqrt(proportion_variance_ksd_wor(es))
end
proportion_sd_ksd_wr(es) click to toggle source
# File lib/statsample/multiset.rb, line 176
def proportion_sd_ksd_wr(es)
  n_total=calculate_n_total(es)
  sum=es.inject(0){|a,h|
    val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
    a+val
  }
  Math::sqrt(sum) * (1.0/n_total)
end
proportion_standard_error(field,v=1) click to toggle source
# File lib/statsample/multiset.rb, line 288
def proportion_standard_error(field,v=1)
  prop=proportion(field,v)
  sum=@ms.sum_field(field) {|s_name,vector|
    nh=vector.size
    s_size=@strata_sizes[s_name]
    (s_size**2 * (1-(nh / s_size)) * prop * (1-prop) / (nh - 1 ))
  }
  (1.quo(@population_size)) * Math::sqrt(sum)
end
proportion_variance_esd_wor(es) click to toggle source
# File lib/statsample/multiset.rb, line 188
def proportion_variance_esd_wor(es)
  n_total=n_total=calculate_n_total(es)
  
  sum=es.inject(0){|a,h|
    a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
    a+val
  }
  Math::sqrt(sum) * (1.0/n_total**2)
end
proportion_variance_ksd_wor(es) click to toggle source
# File lib/statsample/multiset.rb, line 164
def proportion_variance_ksd_wor(es)
  n_total=calculate_n_total(es)
    es.inject(0){|a,h|
      val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
      a+val
    }
end
proportion_variance_ksd_wr(es) click to toggle source
# File lib/statsample/multiset.rb, line 184
def proportion_variance_ksd_wr(es)
    proportion_variance_ksd_wor(es)**2
end
sample_size() click to toggle source

Sample size. Equal to sum of sample of each stratum

# File lib/statsample/multiset.rb, line 221
def sample_size
  @sample_size
end
standard_error_esd_wor(es) click to toggle source
# File lib/statsample/multiset.rb, line 148
def standard_error_esd_wor(es)
  Math::sqrt(variance_ksd_wor(es))
end
standard_error_esd_wr(es) click to toggle source
# File lib/statsample/multiset.rb, line 160
def standard_error_esd_wr(es)
  Math::sqrt(variance_esd_wr(es))
end
standard_error_ksd_wor(es) click to toggle source
# File lib/statsample/multiset.rb, line 132
def standard_error_ksd_wor(es)
  Math::sqrt(variance_ksd_wor(es))
end
standard_error_ksd_wr(es) click to toggle source
# File lib/statsample/multiset.rb, line 107
def standard_error_ksd_wr(es)
  n_total=0
  sum=es.inject(0){|a,h|
      n_total+=h['N']
      a+((h['N']**2 * h['s']**2) / h['n'].to_f)
  }
  (1.to_f / n_total)*Math::sqrt(sum)
end
standard_error_wor(field) click to toggle source

Standard error with estimated population variance and without replacement. Source: Cochran (1972)

# File lib/statsample/multiset.rb, line 254
def standard_error_wor(field)
  es=@ms.collect_vector(field) {|s_n, vector|
    {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
  }
  
  StratifiedSample.standard_error_esd_wor(es)
end
standard_error_wor_2(field) click to toggle source

Standard error with estimated population variance and without replacement. Source: stattrek.com/Lesson6/STRAnalysis.aspx

# File lib/statsample/multiset.rb, line 265
def standard_error_wor_2(field)
  sum=@ms.sum_field(field) {|s_name,vector|
    s_size=@strata_sizes[s_name]
  (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
  }
  (1/@population_size.to_f)*Math::sqrt(sum)
end
standard_error_wr(field) click to toggle source
# File lib/statsample/multiset.rb, line 273
def standard_error_wr(field)
  es=@ms.collect_vector(field) {|s_n, vector|
    {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
  }
  
  StratifiedSample.standard_error_esd_wr(es)
end
strata_number() click to toggle source

Number of strata

# File lib/statsample/multiset.rb, line 212
def strata_number
  @strata_number
end
stratum_ponderation(h) click to toggle source

Stratum ponderation. Symbol: W<sub>h</sub>

# File lib/statsample/multiset.rb, line 241
def stratum_ponderation(h)
  @strata_sizes[h].to_f / @population_size
end
Also aliased as: wh, wh
stratum_size(h) click to toggle source

Size of stratum x

# File lib/statsample/multiset.rb, line 225
def stratum_size(h)
  @strata_sizes[h]
end
variance_esd_wor(es) click to toggle source
# File lib/statsample/multiset.rb, line 138
def variance_esd_wor(es)
  n_total=calculate_n_total(es)
  sum=es.inject(0){|a,h|
    val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
    a+val
  }
  (1.0/(n_total**2))*sum
end
variance_esd_wr(es) click to toggle source

Based on stattrek.com/Lesson6/STRAnalysis.aspx

# File lib/statsample/multiset.rb, line 152
def variance_esd_wr(es)
  n_total=calculate_n_total(es)
    sum=es.inject(0){|a,h|
      val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
      a+val
    }
    (1.0/(n_total**2))*sum
end
variance_ksd_wor(es) click to toggle source

Source : Cochran (1972)

# File lib/statsample/multiset.rb, line 125
def variance_ksd_wor(es)
n_total=calculate_n_total(es)
es.inject(0){|a,h|
  val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
  a+val
}
end
variance_ksd_wr(es) click to toggle source
# File lib/statsample/multiset.rb, line 117
def variance_ksd_wr(es)
  standard_error_ksd_wr(es)**2
end
variance_pst(field,v=1) click to toggle source

Cochran(1971), p. 150

# File lib/statsample/multiset.rb, line 298
def variance_pst(field,v=1)
  sum=@ms.datasets.inject(0) {|a,da|
    stratum_name=da[0]
    ds=da[1]
    nh=ds.cases.to_f
    s_size=@strata_sizes[stratum_name]
    prop=ds[field].proportion(v)
    a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
  }
  (1/@population_size.to_f ** 2)*sum
end
vectors_by_field(field) click to toggle source
# File lib/statsample/multiset.rb, line 228
def vectors_by_field(field)
  @ms.datasets.collect{|k,ds|
    ds[field]
  }
end
wh(h)
Alias for: stratum_ponderation