class Statsample::Multiset

Multiset joins multiple dataset with the same fields and vectors but with different number of cases. This is the base class for stratified and cluster sampling estimation

Multiset joins multiple dataset with the same fields and vectors but with different number of cases. This is the base class for stratified and cluster sampling estimation

Attributes

datasets[R]

Array with Statsample::Dataset

fields[R]

Name of fields

Public Class Methods

new(fields) click to toggle source

To create a multiset

  • ::new(%w{f1 f2 f3}) # define only fields

# File lib/statsample/multiset.rb, line 12
def initialize(fields)
    @fields=fields
    @datasets={}
end
new_empty_vectors(fields,ds_names) click to toggle source
# File lib/statsample/multiset.rb, line 16
def self.new_empty_vectors(fields,ds_names) 
    ms=Multiset.new(fields)
    ds_names.each{|d|
        ms.add_dataset(d,Dataset.new(fields))
    }
    ms
end

Public Instance Methods

[](i) click to toggle source
# File lib/statsample/multiset.rb, line 85
def[](i)
  @datasets[i]
end
add_dataset(key,ds) click to toggle source
# File lib/statsample/multiset.rb, line 59
def add_dataset(key,ds)
  if(ds.fields!=@fields)
   raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
  else
      @datasets[key]=ds
  end
end
collect_vector(field) { |k, v| ... } click to toggle source
# File lib/statsample/multiset.rb, line 74
def collect_vector(field)
  @datasets.collect {|k,v|
    yield k, v[field]
  }
end
datasets_names() click to toggle source
# File lib/statsample/multiset.rb, line 53
def datasets_names
    @datasets.keys.sort
end
each(&block) click to toggle source
# File lib/statsample/multiset.rb, line 88
def each(&block)
  @datasets.each {|k,ds|
    next if ds.cases==0
    block.call(k,ds)
  }
end
each_vector(field) { |k, v| ... } click to toggle source
# File lib/statsample/multiset.rb, line 80
def each_vector(field)
  @datasets.each {|k,v|
    yield k, v[field]
  }
end
n_datasets() click to toggle source
# File lib/statsample/multiset.rb, line 56
def n_datasets
    @datasets.size
end
sum_field(field) { |stratum_name,vector| ... } click to toggle source
# File lib/statsample/multiset.rb, line 66
def sum_field(field)
  @datasets.inject(0) {|a,da|
    stratum_name=da[0]
    vector=da[1][field]
    val=yield stratum_name,vector
    a+val
  }
end
union() { |k,ds| ... } click to toggle source

Generate a new dataset as a union of partial dataset If block given, this is applied to each dataset before union

# File lib/statsample/multiset.rb, line 25
def union(&block)
  union_field={}
  types={}
  names={}
  labels={}
  each do |k,ds|
    if block
      ds=ds.dup
      yield k,ds
    end
    @fields.each do |f|
      union_field[f]||=Array.new
      union_field[f].concat(ds[f].data)
      types[f]||=ds[f].type
      names[f]||=ds[f].name
      labels[f]||=ds[f].labels
    end
  end
  
  @fields.each do |f|
    union_field[f]=union_field[f].to_vector(types[f])
    union_field[f].name=names[f]
    union_field[f].labels=labels[f]
  end
  ds_union=union_field.to_dataset
  ds_union.fields=@fields
  ds_union
end