|
| 1 | +require 'spec_helper' |
| 2 | + |
| 3 | +describe Statistics::SpearmanRankCoefficient do |
| 4 | + describe '.rank' do |
| 5 | + context 'when only ranks are needed' do |
| 6 | + it 'returns an array of elements corresponding to the expected ranks wihout altering order' do |
| 7 | + expected_ranks = [4, 1, 3, 2, 5] |
| 8 | + |
| 9 | + result = described_class.rank(data: [10, 30, 12, 15, 3], return_ranks_only: true) |
| 10 | + |
| 11 | + expect(result).to eq expected_ranks |
| 12 | + end |
| 13 | + end |
| 14 | + |
| 15 | + context 'when ranks and passed elements are needed' do |
| 16 | + it 'returns a hash composed by the elements and ranking information' do |
| 17 | + expected_ranks = { |
| 18 | + 30 => { counter: 1, rank: 1, tie_rank: 1 }, |
| 19 | + 15 => { counter: 1, rank: 2, tie_rank: 2 }, |
| 20 | + 12 => { counter: 1, rank: 3, tie_rank: 3 }, |
| 21 | + 10 => { counter: 1, rank: 4, tie_rank: 4 }, |
| 22 | + 3 => { counter: 1, rank: 5, tie_rank: 5 } |
| 23 | + } |
| 24 | + |
| 25 | + result = described_class.rank(data: [10, 30, 12, 15, 3], return_ranks_only: false) |
| 26 | + |
| 27 | + expect(result).to eq expected_ranks |
| 28 | + end |
| 29 | + end |
| 30 | + |
| 31 | + context 'when there are ties' do |
| 32 | + it 'returns a ranking list with solved ties when ranks only are needed' do |
| 33 | + expected_ranking = [9, 3, 10, 4, 6.5, 5, 8, 1, 2, 6.5] |
| 34 | + data = [56, 75, 45, 71, 61, 64, 58, 80, 76, 61] |
| 35 | + |
| 36 | + result = described_class.rank(data: data, return_ranks_only: true) |
| 37 | + |
| 38 | + expect(result).to eq expected_ranking |
| 39 | + end |
| 40 | + |
| 41 | + it 'returns a hash composed by the elements and some ranking information' do |
| 42 | + expected_ranks = { |
| 43 | + 80 => { counter: 1, rank: 1, tie_rank: 1 }, |
| 44 | + 76 => { counter: 1, rank: 2, tie_rank: 2 }, |
| 45 | + 75 => { counter: 1, rank: 3, tie_rank: 3 }, |
| 46 | + 71 => { counter: 1, rank: 4, tie_rank: 4 }, |
| 47 | + 64 => { counter: 1, rank: 5, tie_rank: 5 }, |
| 48 | + 61 => { counter: 2, rank: 13, tie_rank: 6.5 }, |
| 49 | + 58 => { counter: 1, rank: 8, tie_rank: 8 }, |
| 50 | + 56 => { counter: 1, rank: 9, tie_rank: 9 }, |
| 51 | + 45 => { counter: 1, rank: 10, tie_rank: 10 } |
| 52 | + } |
| 53 | + data = [56, 75, 45, 71, 61, 64, 58, 80, 76, 61] |
| 54 | + |
| 55 | + result = described_class.rank(data: data, return_ranks_only: false) |
| 56 | + |
| 57 | + expect(result).to include(expected_ranks) |
| 58 | + end |
| 59 | + |
| 60 | + it 'returns a hash containing information about the existing ties' do |
| 61 | + tie_rank = { 61 => { counter: 2, tie_rank: 6.5, rank: 13 } } |
| 62 | + data = [56, 75, 45, 71, 61, 64, 58, 80, 76, 61] |
| 63 | + |
| 64 | + result = described_class.rank(data: data, return_ranks_only: false) |
| 65 | + |
| 66 | + expect(result).to include(tie_rank) |
| 67 | + end |
| 68 | + end |
| 69 | + end |
| 70 | + |
| 71 | + describe '.coefficient' do |
| 72 | + it 'raises an error when the groups have different number of cases' do |
| 73 | + expect do |
| 74 | + described_class.coefficient([1, 2, 3], [1, 2, 3, 4]) |
| 75 | + end.to raise_error(StandardError, 'Both group sets must have the same number of cases.') |
| 76 | + end |
| 77 | + |
| 78 | + it 'returns nothing when both groups have a size of zero cases' do |
| 79 | + expect(described_class.coefficient([], [])).to be_nil |
| 80 | + end |
| 81 | + |
| 82 | + context 'when there are ties in the data' do |
| 83 | + it 'calculates the spearman rank coefficient for example one' do |
| 84 | + # Example taken from http://www.biostathandbook.com/spearman.html |
| 85 | + volume = [1760, 2040, 2440, 2550, 2730, 2740, 3010, 3080, 3370, 3740, 4910, 5090, 5090, 5380, 5850, 6730, 6990, 7960] |
| 86 | + frequency = [529, 566, 473, 461, 465, 532, 484, 527, 488, 485, 478, 434, 468, 449, 425, 389, 421, 416] |
| 87 | + |
| 88 | + volume_rank = described_class.rank(data: volume) |
| 89 | + frequency_rank = described_class.rank(data: frequency) |
| 90 | + |
| 91 | + rho = described_class.coefficient(volume_rank, frequency_rank) |
| 92 | + expect(rho.round(3)).to eq -0.763 |
| 93 | + end |
| 94 | + |
| 95 | + it 'calcultes the spearman rank coefficient for example two' do |
| 96 | + # Example taken from https://geographyfieldwork.com/SpearmansRank.htm |
| 97 | + # Results from R: |
| 98 | + # cor(c(50, 175, 270, 375, 425, 580, 710, 790, 890, 980), c(1.80, 1.20, 2.0, 1.0, 1.0, 1.20, 0.80, 0.60, 1.0, 0.85), method = 'spearman') |
| 99 | + # [1] -0.7570127 |
| 100 | + distance = [50, 175, 270, 375, 425, 580, 710, 790, 890, 980] |
| 101 | + price = [1.80, 1.20, 2.0, 1.0, 1.0, 1.20, 0.80, 0.60, 1.0, 0.85] |
| 102 | + |
| 103 | + distance_rank = described_class.rank(data: distance) |
| 104 | + price_rank = described_class.rank(data: price) |
| 105 | + |
| 106 | + rho = described_class.coefficient(distance_rank, price_rank) |
| 107 | + |
| 108 | + expect(rho.round(7)).to eq -0.7570127 |
| 109 | + end |
| 110 | + |
| 111 | + it 'calculates the spearman rank coefficient for example three' do |
| 112 | + # Example taken from http://www.real-statistics.com/correlation/spearmans-rank-correlation/spearmans-rank-correlation-detailed/ |
| 113 | + |
| 114 | + life_exp = [80, 78, 60, 53, 85, 84, 73, 79, 81, 75, 68, 72, 58, 92, 65] |
| 115 | + cigarretes = [5, 23, 25, 48, 17, 8, 4, 26, 11, 19, 14, 35, 29, 4, 23] |
| 116 | + |
| 117 | + life_rank = described_class.rank(data: life_exp) |
| 118 | + cigarretes_rank = described_class.rank(data: cigarretes) |
| 119 | + |
| 120 | + rho = described_class.coefficient(life_rank, cigarretes_rank) |
| 121 | + |
| 122 | + expect(rho.round(5)).to eq -0.67442 |
| 123 | + end |
| 124 | + end |
| 125 | + |
| 126 | + context 'when there are no ties in the data' do |
| 127 | + it 'calculates the spearman rank coefficient for example one' do |
| 128 | + # Example taken from here: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide-2.php |
| 129 | + english_data = [56, 75, 45, 71, 62, 64, 58, 80, 76, 61] |
| 130 | + math_data = [66, 70, 40, 60, 65, 56, 59, 77, 67, 63] |
| 131 | + |
| 132 | + english_rank = described_class.rank(data: english_data) |
| 133 | + math_rank = described_class.rank(data: math_data) |
| 134 | + |
| 135 | + rho = described_class.coefficient(english_rank, math_rank) |
| 136 | + |
| 137 | + expect(rho.round(2)).to eq 0.67 |
| 138 | + end |
| 139 | + |
| 140 | + it 'calculates the spearman rank coefficient for example two' do |
| 141 | + # Example taken from here: https://www.statisticshowto.datasciencecentral.com/spearman-rank-correlation-definition-calculate/ |
| 142 | + physics = [35, 23, 47, 17, 10, 43, 9, 6, 28] |
| 143 | + math = [30, 33, 45, 23, 8, 49, 12, 4, 31] |
| 144 | + |
| 145 | + physics_rank = described_class.rank(data: physics) |
| 146 | + math_rank = described_class.rank(data: math) |
| 147 | + |
| 148 | + rho = described_class.coefficient(physics_rank, math_rank) |
| 149 | + |
| 150 | + expect(rho).to eq 0.9 |
| 151 | + end |
| 152 | + end |
| 153 | + end |
| 154 | +end |
0 commit comments