Add more discrete distributions (#25)

estebanz01 · web-flow · commit 4ca57b1e9dbc · 2018-07-04T16:02:26.000-05:00
* distributions: discrete: Negative binomial.

Add negative binomial distribution and unit tests.

* distribution: discrete: Geometric distribution.

This change adds the geometric distribution to the list of discrete
functions.

* distribution: discrete: Bernoulli distribution.

* distribution: discrete: LogSeries distribution.

* gem: Bump to version 2.0.5.

* distribution: beta: Add uncovered case where alpha + beta == 0.
diff --git a/lib/statistics/distribution/bernoulli.rb b/lib/statistics/distribution/bernoulli.rb
@@ -0,0 +1,35 @@
+module Statistics
+  module Distribution
+    class Bernoulli
+      def self.density_function(n, p)
+        return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}.
+
+        case n
+        when 0 then 1.0 - p
+        when 1 then p
+        end
+      end
+
+      def self.cumulative_function(n, p)
+        return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}.
+
+        case n
+        when 0 then 1.0 - p
+        when 1 then 1.0
+        end
+      end
+
+      def self.variance(p)
+        p * (1.0 - p)
+      end
+
+      def self.skewness(p)
+        (1.0 - 2.0*p).to_f / Math.sqrt(p * (1.0 - p))
+      end
+
+      def self.kurtosis(p)
+        (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p))
+      end
+    end
+  end
+end
diff --git a/lib/statistics/distribution/geometric.rb b/lib/statistics/distribution/geometric.rb
@@ -0,0 +1,76 @@
+module Statistics
+  module Distribution
+    class Geometric
+      attr_accessor :probability_of_success, :always_success_allowed
+
+      def initialize(p, always_success: false)
+        self.probability_of_success = p.to_f
+        self.always_success_allowed = always_success
+      end
+
+      def density_function(k)
+        k = k.to_i
+
+        if always_success_allowed
+          return if k < 0
+
+          ((1.0 - probability_of_success) ** k) * probability_of_success
+        else
+          return if k <= 0
+
+          ((1.0 - probability_of_success) ** (k - 1.0)) * probability_of_success
+        end
+      end
+
+      def cumulative_function(k)
+        k = k.to_i
+
+        if always_success_allowed
+          return if k < 0
+
+          1.0 - ((1.0 - probability_of_success) ** (k + 1.0))
+        else
+          return if k <= 0
+
+          1.0 - ((1.0 - probability_of_success) ** k)
+        end
+      end
+
+      def mean
+        if always_success_allowed
+          (1.0 - probability_of_success) / probability_of_success
+        else
+          1.0 / probability_of_success
+        end
+      end
+
+      def median
+        if always_success_allowed
+          (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - 1.0
+        else
+          (-1.0 / Math.log2(1.0 - probability_of_success)).ceil
+        end
+      end
+
+      def mode
+        if always_success_allowed
+          0.0
+        else
+          1.0
+        end
+      end
+
+      def variance
+        (1.0 - probability_of_success) / (probability_of_success ** 2)
+      end
+
+      def skewness
+        (2.0 - probability_of_success) / Math.sqrt(1.0 - probability_of_success)
+      end
+
+      def kurtosis
+        6.0 + ((probability_of_success ** 2) / (1.0 - probability_of_success))
+      end
+    end
+  end
+end
diff --git a/lib/statistics/distribution/logseries.rb b/lib/statistics/distribution/logseries.rb
@@ -0,0 +1,51 @@
+module Statistics
+  module Distribution
+    class LogSeries
+      def self.density_function(k, p)
+        return if k <= 0
+        k = k.to_i
+
+        left = (-1.0 / Math.log(1.0 - p))
+        right = (p ** k).to_f
+
+        left * right / k
+      end
+
+      def self.cumulative_function(k, p)
+        return if k <= 0
+
+        # Sadly, the incomplete beta function is converging
+        # too fast to zero and breaking the calculation on logs.
+        # So, we default to the basic definition of the CDF which is
+        # the integral (-Inf, K) of the PDF, with P(X <= x) which can
+        # be solved as a summation of all PDFs from 1 to K. Note that the summation approach
+        # only applies to discrete distributions.
+        #
+        # right = Math.incomplete_beta_function(p, (k + 1).floor, 0) / Math.log(1.0 - p)
+        # 1.0 + right
+
+        result = 0.0
+        1.upto(k) do |number|
+          result += self.density_function(number, p)
+        end
+
+        result
+      end
+
+      def self.mode
+        1.0
+      end
+
+      def self.mean(p)
+        (-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p))
+      end
+
+      def self.variance(p)
+        up = p + Math.log(1.0 - p)
+        down = ((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2)
+
+        (-1.0 * p) * (up / down.to_f)
+      end
+    end
+  end
+end
diff --git a/lib/statistics/distribution/negative_binomial.rb b/lib/statistics/distribution/negative_binomial.rb
@@ -0,0 +1,51 @@
+module Statistics
+  module Distribution
+    class NegativeBinomial
+      attr_accessor :number_of_failures, :probability_per_trial
+
+      def initialize(r, p)
+        self.number_of_failures = r.to_i
+        self.probability_per_trial = p
+      end
+
+      def probability_mass_function(k)
+        return if number_of_failures < 0 || k < 0 || k > number_of_failures
+
+        left = Math.combination(k + number_of_failures - 1, k)
+        right = ((1 - probability_per_trial) ** number_of_failures) * (probability_per_trial ** k)
+
+        left * right
+      end
+
+      def cumulative_function(k)
+        return if k < 0 || k > number_of_failures
+        k = k.to_i
+
+        1.0 - Math.incomplete_beta_function(probability_per_trial, k + 1, number_of_failures)
+      end
+
+      def mean
+        (probability_per_trial * number_of_failures)/(1 - probability_per_trial).to_f
+      end
+
+      def variance
+        (probability_per_trial * number_of_failures)/((1 - probability_per_trial) ** 2).to_f
+      end
+
+      def skewness
+        (1 + probability_per_trial).to_f / Math.sqrt(probability_per_trial * number_of_failures)
+      end
+
+      def mode
+        if number_of_failures > 1
+          up = probability_per_trial * (number_of_failures - 1)
+          down = (1 - probability_per_trial).to_f
+
+          (up/down).floor
+        elsif number_of_failures <= 1
+          0.0
+        end
+      end
+    end
+  end
+end
diff --git a/lib/statistics/version.rb b/lib/statistics/version.rb
@@ -1,3 +1,3 @@
 module Statistics
-  VERSION = "2.0.4"
+  VERSION = "2.0.5"
 end
diff --git a/spec/statistics/distribution/bernoulli_spec.rb b/spec/statistics/distribution/bernoulli_spec.rb
@@ -0,0 +1,62 @@
+require 'spec_helper'
+
+describe Statistics::Distribution::Bernoulli do
+  describe '.density_function' do
+    it 'is not defined when the outcome is different from zero or one' do
+      expect(described_class.density_function(rand(2..10), rand)).to be_nil
+      expect(described_class.density_function(rand(-5..-1), rand)).to be_nil
+    end
+
+    it 'returns the expected value when the outcome is zero' do
+      p = rand
+      expect(described_class.density_function(0, p)).to eq (1.0 - p)
+    end
+
+    it 'returns the expected value when the outcome is one' do
+      p = rand
+      expect(described_class.density_function(1, p)).to eq p
+    end
+  end
+
+  describe '.cumulative_function' do
+    it 'is not defined when the outcome is different from zero or one' do
+      expect(described_class.cumulative_function(rand(2..10), rand)).to be_nil
+      expect(described_class.density_function(rand(-5..-1), rand)).to be_nil
+    end
+
+    it 'returns the expected value when the outcome is zero' do
+      p = rand
+      expect(described_class.cumulative_function(0, p)).to eq (1.0 - p)
+    end
+
+    it 'returns the expected value when the outcome is one' do
+      expect(described_class.cumulative_function(1, rand)).to eq 1.0
+    end
+  end
+
+  describe '.variance' do
+    it 'returns the expected value for the bernoulli distribution' do
+      p = rand
+
+      expect(described_class.variance(p)).to eq p * (1.0 - p)
+    end
+  end
+
+  describe '.skewness' do
+    it 'returns the expected value for the bernoulli distribution' do
+      p = rand
+      expected_value = (1.0 - 2.0*p).to_f / Math.sqrt(p * (1.0 - p))
+
+      expect(described_class.skewness(p)).to eq expected_value
+    end
+  end
+
+  describe '.kurtosis' do
+    it 'returns the expected value for the bernoulli distribution' do
+      p = rand
+      expected_value = (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p))
+
+      expect(described_class.kurtosis(p)).to eq expected_value
+    end
+  end
+end
diff --git a/spec/statistics/distribution/beta_spec.rb b/spec/statistics/distribution/beta_spec.rb
@@ -62,11 +62,21 @@
       expect(described_class.new(alpha, beta).mean).to be_nil
     end
 
+    it 'returns nil if the sum of alpha and beta is zero' do
+      alpha = -1
+      beta = 1
+
+      expect(described_class.new(alpha, beta).mean).to be_nil
+    end
+
     it 'calculates the expected mean for the beta distribution' do
       alpha = rand(-5..5)
       beta = rand(-5..5)
 
-      alpha = 1 if alpha + beta == 0 # To avoid NaN results.
+      if alpha + beta == 0 # To avoid NaN results.
+        alpha = 1
+        beta = 1
+      end
 
       expect(described_class.new(alpha, beta).mean).to eq alpha.to_f/(alpha.to_f + beta.to_f)
     end
diff --git a/spec/statistics/distribution/geometric_spec.rb b/spec/statistics/distribution/geometric_spec.rb
diff --git a/spec/statistics/distribution/logseries_spec.rb b/spec/statistics/distribution/logseries_spec.rb
diff --git a/spec/statistics/distribution/negative_binomial_spec.rb b/spec/statistics/distribution/negative_binomial_spec.rb