class Statsample::Histogram

A histogram consists of a set of bins which count the number of events falling into a given range of a continuous variable x.

This implementations follows convention of GSL for specification.

* Verbatim: *

The range for bin[i] is given by range[i] to range[i+1]. 
For n bins there are n+1 entries in the array range. 
Each bin is inclusive at the lower end and exclusive at the upper end. 
Mathematically this means that the bins are defined 
by the following inequality,

 bin[i] corresponds to range[i] <= x < range[i+1]

Here is a diagram of the correspondence between ranges and bins
on the number-line for x,

    [ bin[0] )[ bin[1] )[ bin[2] )[ bin[3] )[ bin[4] )
 ---|---------|---------|---------|---------|---------|---  x
  r[0]      r[1]      r[2]      r[3]      r[4]      r[5]

In this picture the values of the range array are denoted by r. 
On the left-hand side of each bin the square bracket ‘[’ denotes 
an inclusive lower bound ( r <= x), and the round parentheses ‘)’ 
on the right-hand side denote an exclusive upper bound (x < r). 
Thus any samples which fall on the upper end of the histogram are 
excluded. 
If you want to include this value for the last bin you will need to 
add an extra bin to your histogram.

Reference:

A histogram consists of a set of bins which count the number of events falling into a given range of a continuous variable x.

This implementations follows convention of GSL for specification.

* Verbatim: *

The range for bin[i] is given by range[i] to range[i+1]. 
For n bins there are n+1 entries in the array range. 
Each bin is inclusive at the lower end and exclusive at the upper end. 
Mathematically this means that the bins are defined 
by the following inequality,

 bin[i] corresponds to range[i] <= x < range[i+1]

Here is a diagram of the correspondence between ranges and bins
on the number-line for x,

    [ bin[0] )[ bin[1] )[ bin[2] )[ bin[3] )[ bin[4] )
 ---|---------|---------|---------|---------|---------|---  x
  r[0]      r[1]      r[2]      r[3]      r[4]      r[5]

In this picture the values of the range array are denoted by r. 
On the left-hand side of each bin the square bracket ‘[’ denotes 
an inclusive lower bound ( r <= x), and the round parentheses ‘)’ 
on the right-hand side denote an exclusive upper bound (x < r). 
Thus any samples which fall on the upper end of the histogram are 
excluded. 
If you want to include this value for the last bin you will need to 
add an extra bin to your histogram.

Reference:

Attributes

bin[R]
name[RW]
range[R]

Public Class Methods

new(p1, min_max=false, opts=Hash.new) click to toggle source
# File lib/statsample/histogram.rb, line 67
def initialize(p1, min_max=false, opts=Hash.new)
  
  if p1.is_a? Array
    range=p1
    @n_bins=p1.size-1
  elsif p1.is_a? Integer
    @n_bins=p1
  end
  
  @bin=[0.0]*(@n_bins)
  if(min_max)
    min, max=min_max[0], min_max[1]
    range=Array.new(@n_bins+1)
    (@n_bins+1).times {|i| range[i]=min+(i*(max-min).quo(@n_bins)) }
  end
  range||=[0.0]*(@n_bins+1)
  set_ranges(range)
  @name=""
  opts.each{|k,v|
  self.send("#{k}=",v) if self.respond_to? k
  }
end

Public Instance Methods

alloc(n_bins, range=nil, opts=Hash.new) click to toggle source

Alloc n_bins, using range as ranges of bins

# File lib/statsample/histogram.rb, line 44
def alloc(n_bins, range=nil, opts=Hash.new)
  Histogram.new(n_bins, range, opts)
  
end
alloc_uniform(n_bins, p1=nil,p2=nil) click to toggle source

Alloc n_bins bins, using p1 as minimum and p2 as maximum

# File lib/statsample/histogram.rb, line 50
def alloc_uniform(n_bins, p1=nil,p2=nil)
  if p1.is_a? Array
    min,max=p1
  else
    min,max=p1,p2
  end
  range=max - min
  step=range / n_bins.to_f
  range=(n_bins+1).times.map {|i| min + (step*i)}
  Histogram.new(range)
end
bins() click to toggle source

Number of bins

# File lib/statsample/histogram.rb, line 90
def bins
  @n_bins
end
each() { |arg| ... } click to toggle source
# File lib/statsample/histogram.rb, line 125
def each
  bins.times.each do |i|
    r=get_range(i)
    arg={:i=>i, :low=>r[0],:high=>r[1], :middle=>(r[0]+r[1]) / 2.0,  :value=>@bin[i]}
    yield arg
  end
end
estimated_mean() click to toggle source
# File lib/statsample/histogram.rb, line 144
def estimated_mean
  sum,n=0,0
  each do |v|
    sum+= v[:value]* v[:middle]
    n+=v[:value]
  end
  sum / n
end
Also aliased as: mean, mean
estimated_standard_deviation() click to toggle source
# File lib/statsample/histogram.rb, line 141
def estimated_standard_deviation
  Math::sqrt(estimated_variance)
end
Also aliased as: sigma, sigma
estimated_variance() click to toggle source
# File lib/statsample/histogram.rb, line 132
def estimated_variance
  sum,n=0,0
  mean=estimated_mean
  each do |v|
    sum+=v[:value]*(v[:middle]-mean)**2
    n+=v[:value]
  end
  sum / (n-1)
end
get_range(i) click to toggle source
# File lib/statsample/histogram.rb, line 110
def get_range(i)
  [@range[i],@range[i+1]]
end
increment(x, w=1) click to toggle source
# File lib/statsample/histogram.rb, line 94
def increment(x, w=1)
  if x.respond_to? :each
    x.each{|y| increment(y,w) }
  elsif x.is_a? Numeric
    (range.size-1).times do |i|
      if x>=range[i] and x<range[i+1]
        @bin[i]+=w
        break
      end
    end
  end
end
max() click to toggle source
# File lib/statsample/histogram.rb, line 113
def max
  @range.last
end
max_val() click to toggle source
# File lib/statsample/histogram.rb, line 119
def max_val
  @bin.max
end
mean()
Alias for: estimated_mean
min() click to toggle source
# File lib/statsample/histogram.rb, line 116
def min
  @range.first
end
min_val() click to toggle source
# File lib/statsample/histogram.rb, line 122
def min_val
  @bin.min
end
report_building(generator) click to toggle source
# File lib/statsample/histogram.rb, line 160
def report_building(generator)
  hg=Statsample::Graph::Histogram.new(self)
  generator.parse_element(hg)
end
report_building_text(generator) click to toggle source
# File lib/statsample/histogram.rb, line 164
def report_building_text(generator)
  @range.each_with_index do |r,i|
    next if i==@bin.size
    generator.text(sprintf("%5.2f : %d", r, @bin[i]))
  end
end
set_ranges(range) click to toggle source
# File lib/statsample/histogram.rb, line 106
def set_ranges(range)
  raise "Range size should be bin+1" if range.size!=@bin.size+1
  @range=range
end
sigma()
sum(start=nil,_end=nil) click to toggle source
# File lib/statsample/histogram.rb, line 155
def sum(start=nil,_end=nil)
  start||=0
  _end||=@n_bins-1
  (start.._end).inject(0) {|ac,i| ac+@bin[i]}
end