Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Statistics/Sample/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ module Statistics.Sample.Internal
(
robustSumVar
, sum
, sumF
) where

import Numeric.Sum (kbn, sumVector)
import qualified Numeric.Sum as Sum
import Prelude hiding (sum)
import Statistics.Function (square)
import qualified Data.Vector.Generic as G
Expand All @@ -26,5 +27,9 @@ robustSumVar m = sum . G.map (square . subtract m)
{-# INLINE robustSumVar #-}

sum :: (G.Vector v Double) => v Double -> Double
sum = sumVector kbn
sum = Sum.sumVector Sum.kbn
{-# INLINE sum #-}

sumF :: Foldable f => f Double -> Double
sumF = Sum.sum Sum.kbn
{-# INLINE sumF #-}
125 changes: 66 additions & 59 deletions Statistics/Test/Bartlett.hs
Original file line number Diff line number Diff line change
@@ -1,92 +1,99 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-|
Module : Statistics.Test.Bartlett
Description : Bartlett's test for homogeneity of variances.
Copyright : (c) Praneya Kumar, 2025
Copyright : (c) Praneya Kumar, Alexey Khudyakov, 2025
License : BSD-3-Clause

Implements Bartlett's test to check if multiple groups have equal variances.
Assesses equality of variances assuming normal distribution, sensitive to non-normality.
Bartlett's test is used to check that multiple groups of observations
come from distributions with equal variances. This test assumes that
samples come from normal distribution. If this is not the case it may
simple test for non-normality and Levene's ("Statistics.Test.Levene")
is preferred

>>> import qualified Data.Vector.Unboxed as VU
>>> import Statistics.Test.Bartlett
>>> :{
let a = VU.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
b = VU.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
c = VU.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
in bartlettTest [a,b,c]
:}
Right (Test {testSignificance = mkPValue 1.1254782518843598e-5, testStatistics = 22.789434813726768, testDistribution = chiSquared 2})

-}
module Statistics.Test.Bartlett (
bartlettTest,
module Statistics.Distribution.ChiSquared
) where

import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import Statistics.Distribution (cumulative)
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Primitive as VP
#if MIN_VERSION_vector(0,13,2)
import qualified Data.Vector.Strict as VV
#endif

import Statistics.Distribution (complCumulative)
import Statistics.Distribution.ChiSquared (chiSquared, ChiSquared(..))
import Statistics.Sample (varianceUnbiased)
import Statistics.Types (mkPValue)
import Statistics.Test.Types (Test(..))

-- | Perform Bartlett's test for equal variances.
-- The input is a list of vectors, where each vector represents a group of observations.
-- Returns Either an error message or a Test ChiSquared containing the test statistic and p-value.
bartlettTest :: [U.Vector Double] -> Either String (Test ChiSquared)
-- | Perform Bartlett's test for equal variances. The input is a list
-- of vectors, where each vector represents a group of observations.
bartlettTest :: VG.Vector v Double => [v Double] -> Either String (Test ChiSquared)
bartlettTest groups
| length groups < 2 = Left "At least two groups are required for Bartlett's test."
| any ((< 2) . G.length) groups = Left "Each group must have at least two observations."
| any (<= 0) groupVariances = Left "All groups must have positive variance."
| otherwise = Right $ Test
| length groups < 2 = Left "At least two groups are required for Bartlett's test."
| any ((< 2) . VG.length) groups = Left "Each group must have at least two observations."
| any ((<= 0) . var) groupVariances = Left "All groups must have positive variance."
| otherwise = Right Test
{ testSignificance = pValue
, testStatistics = tStatistic
, testDistribution = chiDist
}
where
-- Number of groups
k = length groups

-- Sample sizes for each group
ni = map G.length groups
ni' = map fromIntegral ni

ni = map (fromIntegral . VG.length) groups
-- Total number of observations across all groups
nTotal = sum ni

-- Variance for each group (unbiased estimate)
groupVariances = map varianceUnbiased groups

-- Pooled variance calculation
sumWeightedVars = sum [ (n - 1) * v | (n, v) <- zip ni' groupVariances ]
pooledVariance = sumWeightedVars / fromIntegral (nTotal - k)

n_tot = sum $ fromIntegral . VG.length <$> groups
-- Variance estimates
groupVariances = toVar <$> groups
sumWeightedVars = sum [ (n - 1) * v | Var{sampleN=n, var=v} <- groupVariances ]
pooledVariance = sumWeightedVars / fromIntegral (n_tot - k)
-- Numerator of Bartlett's statistic
numerator =
fromIntegral (nTotal - k) * log pooledVariance -
sum [ (n - 1) * log v | (n, v) <- zip ni' groupVariances ]

fromIntegral (n_tot - k) * log pooledVariance -
sum [ (n - 1) * log v | Var{sampleN=n, var=v} <- groupVariances ]
-- Denominator correction term
sumReciprocals = sum [1 / (n - 1) | n <- ni']
sumReciprocals = sum [1 / (n - 1) | n <- ni]
denomCorrection =
1 + (sumReciprocals - 1 / fromIntegral (nTotal - k)) / (3 * (fromIntegral k - 1))
1 + (sumReciprocals - 1 / fromIntegral (n_tot - k)) / (3 * (fromIntegral k - 1))

-- Test statistic T
-- Test statistic and test distrubution
tStatistic = max 0 $ numerator / denomCorrection

-- Degrees of freedom and chi-squared distribution
df = k - 1
chiDist = chiSquared df
pValue = mkPValue $ 1 - cumulative chiDist tStatistic


-- Example usage:
-- import qualified Data.Vector.Unboxed as U
-- import Statistics.Test.Bartlett

-- main :: IO ()
-- main = do
-- let a = U.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
-- b = U.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
-- c = U.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]

-- case bartlettTest [a,b,c] of
-- Left err -> putStrLn $ "Error: " ++ err
-- Right test -> do
-- putStrLn $ "Bartlett's Test Statistic: " ++ show (testStatistics test)
-- putStrLn $ "P-Value: " ++ show (testSignificance test)

-- Sample Output
-- Bartlett's Test Statistic: ~32
-- P-Value: ~1e-5
chiDist = chiSquared (k - 1)
pValue = mkPValue $ complCumulative chiDist tStatistic
{-# SPECIALIZE bartlettTest :: [V.Vector Double] -> Either String (Test ChiSquared) #-}
{-# SPECIALIZE bartlettTest :: [VU.Vector Double] -> Either String (Test ChiSquared) #-}
{-# SPECIALIZE bartlettTest :: [VS.Vector Double] -> Either String (Test ChiSquared) #-}
{-# SPECIALIZE bartlettTest :: [VP.Vector Double] -> Either String (Test ChiSquared) #-}
#if MIN_VERSION_vector(0,13,2)
{-# SPECIALIZE bartlettTest :: [VV.Vector Double] -> Either String (Test ChiSquared) #-}
#endif

-- Estimate of variance
data Var = Var
{ sampleN :: !Double -- ^ N of elements
, var :: !Double -- ^ Sample variance
}

toVar :: VG.Vector v Double => v Double -> Var
toVar xs = Var { sampleN = fromIntegral $ VG.length xs
, var = varianceUnbiased xs
}
Loading
Loading