From 7fbc0ce6c7a1cc89c0ccc20a948ccea7acb55ff9 Mon Sep 17 00:00:00 2001 From: Oliver Czulo Date: Sun, 7 Apr 2024 13:56:58 +0200 Subject: [PATCH] Added functions to the chi_square test library to test contingency tables for independence of observations --- .../statistical_test/chi_squared_test.rb | 59 +++++++++++++++++++ ruby-statistics.gemspec | 1 + .../statistical_test/chi_squared_test_spec.rb | 38 ++++++++++++ 3 files changed, 98 insertions(+) diff --git a/lib/ruby-statistics/statistical_test/chi_squared_test.rb b/lib/ruby-statistics/statistical_test/chi_squared_test.rb index 982d2f2..370af78 100644 --- a/lib/ruby-statistics/statistical_test/chi_squared_test.rb +++ b/lib/ruby-statistics/statistical_test/chi_squared_test.rb @@ -1,6 +1,9 @@ module RubyStatistics module StatisticalTest class ChiSquaredTest + + require 'matrix' + def self.chi_statistic(expected, observed) # If the expected is a number, we asumme that all expected observations # has the same probability to occur, hence we expect to see the same number @@ -37,6 +40,62 @@ def self.goodness_of_fit(alpha, expected, observed) alternative: p_value <= alpha, confidence_level: 1 - alpha } end + + # The following three functions serve to calculate a test of independence for contingency + # tables (short: ct) of the type + # + # A B + # X 20 18 + # Y 7 35 + # + # They have been tested using 2x2 and 3x3 tables. Tables are implemented as type Matrix. + # + def self.test_of_independence(alpha, observed_matrix) + expected_matrix = calculate_expected_matrix(observed_matrix) + df = (observed_matrix.row_size - 1) * (observed_matrix.column_size - 1) + chi_score = chi_statistic_matrix(observed_matrix, expected_matrix) + probability = Distribution::ChiSquared.new(df).cumulative_function(chi_score) + p_value = 1.0 - probability + + { + chi_score: chi_score, + df: df, + probability: probability, + p_value: p_value, + alpha: alpha, + null: alpha < p_value, + alternative: p_value <= alpha, + confidence_level: 1 - alpha, + expected: expected_matrix + } + end + + # For a contingency table of observed values, calculate the expected values + def self.calculate_expected_matrix(observed_matrix) + row_sums = observed_matrix.row_vectors.map { |row| row.to_a.sum.to_r } + col_sums = observed_matrix.column_vectors.map { |col| col.to_a.sum.to_r } + total_sum = row_sums.sum + + # create a mutable array from the Matrix of observed values + # so we have a 'template' for our Matrx of expected values + expected = observed_matrix.to_a + # calculate the expected values + observed_matrix.each_with_index do |i, row, col| + expected[row][col] = (row_sums[row] * col_sums[col]) / total_sum + end + Matrix.rows(expected) + end + + def self.chi_statistic_matrix(observed_matrix, expected_matrix) + sum = 0.0 + observed_matrix.each_with_index do |i, row, col| + sum += (observed_matrix[row, col] - expected_matrix[row, col])**2 / expected_matrix[row, col] + end + sum + end + + private_class_method :chi_statistic_matrix + end end end diff --git a/ruby-statistics.gemspec b/ruby-statistics.gemspec index ff4d555..83bccfd 100644 --- a/ruby-statistics.gemspec +++ b/ruby-statistics.gemspec @@ -32,4 +32,5 @@ Gem::Specification.new do |spec| spec.add_development_dependency "grb", '~> 0.4.1', '>= 0.4.1' spec.add_development_dependency 'byebug', '>= 9.1.0' spec.add_development_dependency 'pry' + spec.add_development_dependency 'matrix' end diff --git a/spec/ruby-statistics/statistical_test/chi_squared_test_spec.rb b/spec/ruby-statistics/statistical_test/chi_squared_test_spec.rb index a8ad853..534dfbd 100644 --- a/spec/ruby-statistics/statistical_test/chi_squared_test_spec.rb +++ b/spec/ruby-statistics/statistical_test/chi_squared_test_spec.rb @@ -1,4 +1,5 @@ require 'spec_helper' +require 'matrix' describe RubyStatistics::StatisticalTest::ChiSquaredTest do describe '.chi_statistic' do @@ -82,4 +83,41 @@ expect(result[:alternative]).to be false end end + + describe '.calculate_expected_matrix' do + + it 'calculate expected values for a 2*3 contingency table of observed values' do + + observed = Matrix[[388,51692],[119,45633],[271,40040]] + result = described_class.calculate_expected_matrix(observed) + + expect(result.map(&:to_i)).to eq(Matrix[[(40518240/138143), (7153969200/138143)], [(35595056/138143), (6284723480/138143)], [(31361958/138143), (5537320515/138143)]]) + + end + + end + + describe '.test_of_independence' do + + it 'calculate test of independence for a 2*3 contingency table' do + + observed = Matrix[[388,51692],[119,45633],[271,40040]] + alpha = 0.05 + result = {} + + expect do + result = described_class.test_of_independence(alpha, observed) + end.not_to raise_error + + expect(result[:chi_score].round(4)).to eq(114.3600) + expect(result[:p_value]).to eq(0.0) + expect(result[:df]).to eq(2) + expect(result[:null]).to be false + expect(result[:alternative]).to be true + + end + + end + + end