Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions Statistics/Test/Bartlett.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{-# LANGUAGE FlexibleContexts #-}
{-|
Module : Statistics.Test.Bartlett
Description : Bartlett's test for homogeneity of variances.
Copyright : (c) Praneya Kumar, 2025
License : BSD-3-Clause

Implements Bartlett's test to check if multiple groups have equal variances.
Assesses equality of variances assuming normal distribution, sensitive to non-normality.
-}
module Statistics.Test.Bartlett (
bartlettTest,
module Statistics.Distribution.ChiSquared
) where

import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import Statistics.Distribution (cumulative)
import Statistics.Distribution.ChiSquared (chiSquared, ChiSquared(..))
import Statistics.Sample (varianceUnbiased)
import Statistics.Types (mkPValue)
import Statistics.Test.Types (Test(..))

-- | Perform Bartlett's test for equal variances.
-- The input is a list of vectors, where each vector represents a group of observations.
-- Returns Either an error message or a Test ChiSquared containing the test statistic and p-value.
bartlettTest :: [U.Vector Double] -> Either String (Test ChiSquared)
bartlettTest groups
| length groups < 2 = Left "At least two groups are required for Bartlett's test."
| any ((< 2) . G.length) groups = Left "Each group must have at least two observations."
| any (<= 0) groupVariances = Left "All groups must have positive variance."
| otherwise = Right $ Test
{ testSignificance = pValue
, testStatistics = tStatistic
, testDistribution = chiDist
}
where
-- Number of groups
k = length groups

-- Sample sizes for each group
ni = map G.length groups
ni' = map fromIntegral ni

-- Total number of observations across all groups
nTotal = sum ni

-- Variance for each group (unbiased estimate)
groupVariances = map varianceUnbiased groups

-- Pooled variance calculation
sumWeightedVars = sum [ (n - 1) * v | (n, v) <- zip ni' groupVariances ]
pooledVariance = sumWeightedVars / fromIntegral (nTotal - k)

-- Numerator of Bartlett's statistic
numerator =
fromIntegral (nTotal - k) * log pooledVariance -
sum [ (n - 1) * log v | (n, v) <- zip ni' groupVariances ]

-- Denominator correction term
sumReciprocals = sum [1 / (n - 1) | n <- ni']
denomCorrection =
1 + (sumReciprocals - 1 / fromIntegral (nTotal - k)) / (3 * (fromIntegral k - 1))

-- Test statistic T
tStatistic = max 0 $ numerator / denomCorrection

-- Degrees of freedom and chi-squared distribution
df = k - 1
chiDist = chiSquared df
pValue = mkPValue $ 1 - cumulative chiDist tStatistic
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

complCumulative should calculates 1-CDF without loss of precision

AFAIR χ² doesn't implement it but it may at some point



-- Example usage:
-- import qualified Data.Vector.Unboxed as U
-- import Statistics.Test.Bartlett

-- main :: IO ()
-- main = do
-- let a = U.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
-- b = U.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
-- c = U.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]

-- case bartlettTest [a,b,c] of
-- Left err -> putStrLn $ "Error: " ++ err
-- Right test -> do
-- putStrLn $ "Bartlett's Test Statistic: " ++ show (testStatistics test)
-- putStrLn $ "P-Value: " ++ show (testSignificance test)

-- Sample Output
-- Bartlett's Test Statistic: ~32
-- P-Value: ~1e-5
152 changes: 152 additions & 0 deletions Statistics/Test/Levene.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
{-# LANGUAGE FlexibleContexts #-}

{-|
Module : Statistics.Test.Levene
Description : Levene's test for homogeneity of variances.
Copyright : (c) Praneya Kumar, 2025
License : BSD-3-Clause

Implements Levene's test to check if multiple groups have equal variances.
Assesses equality of variances, robust to non-normality, and versatile with mean or median centering.
-}
module Statistics.Test.Levene (
Center(..),
levenesTest
) where

import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Algorithms.Merge as VA
import Statistics.Distribution (cumulative)
import Statistics.Distribution.FDistribution (fDistribution, FDistribution)
import Statistics.Types (mkPValue)
import Statistics.Test.Types (Test(..))
import qualified Statistics.Sample as Sample
import Control.Exception (assert)

-- | Center calculation method
data Center =
Mean -- ^ Use arithmetic mean
| Median -- ^ Use median
| Trimmed Double -- ^ Trimmed mean with given proportion to cut from each end
deriving (Eq, Show)

-- | Trim data from both ends with error handling and performance optimization
trimboth :: (Ord a, Fractional a)
=> V.Vector a
-> Double
-> Either String (V.Vector a)
trimboth vec prop
| prop < 0 || prop > 1 = Left "Proportion must be between 0 and 1"
| V.null vec = Right vec
| otherwise = do
let sorted = V.modify VA.sort vec
n = V.length sorted
lowerCut = floor (prop * fromIntegral n)
upperCut = n - lowerCut
assert (upperCut >= lowerCut) $
Right $ V.slice lowerCut (upperCut - lowerCut) sorted

-- | Calculate median using pre-sorted vector
vectorMedian :: (Fractional a, Ord a)
=> V.Vector a
-> Either String a
vectorMedian vec
| V.null vec = Left "Empty vector in median calculation"
| otherwise = Right $
if odd len
then sorted V.! mid
else (sorted V.! (mid - 1) + sorted V.! mid) / 2
where
sorted = V.modify VA.sort vec
len = V.length sorted
mid = len `div` 2

-- | Main Levene's test function with full error handling
levenesTest :: Double -- ^ Significance level (alpha)
-> Center -- ^ Centering method
-> [V.Vector Double] -- ^ Input samples
-> Either String (Test FDistribution)
levenesTest alpha center samples
| alpha < 0 || alpha > 1 = Left "Significance level must be between 0 and 1"
| length samples < 2 = Left "At least two samples required"
| otherwise = do
processed <- mapM processSample samples
let (deviationsList, niList) = unzip processed
deviations = V.fromList deviationsList -- V.Vector (U.Vector Double)
ni = V.fromList niList -- V.Vector Int
zbari = V.map Sample.mean deviations -- V.Vector Double
k = V.length deviations
n = V.sum ni
zbar = V.sum (V.zipWith (\z n' -> z * fromIntegral n') zbari ni) / fromIntegral n

-- Numerator: Sum over (ni * (zbari - zbar)^2)
numerator = V.sum $ V.zipWith (\n z -> fromIntegral n * (z - zbar) ** 2) ni zbari

-- Denominator: Sum over sum((dev_ij - zbari)^2)
denominator = V.sum $ V.zipWith (\dev z -> U.sum (U.map (\x -> (x - z) ** 2) dev)) deviations zbari

-- Handle division by zero and invalid values
if denominator <= 0 || isNaN denominator || isInfinite denominator
then Left "Invalid denominator in W-statistic calculation"
else do
let wStat = (fromIntegral (n - k) / fromIntegral (k - 1)) * (numerator / denominator)
df1 = k - 1
df2 = n - k
fDist = fDistribution df1 df2
pVal = mkPValue $ 1 - cumulative fDist wStat

-- Validate distribution parameters
if df1 < 1 || df2 < 1
then Left "Invalid degrees of freedom"
else Right $ Test
{ testStatistics = wStat
, testSignificance = pVal
, testDistribution = fDist
}
where
-- Process samples with error handling and optimized sorting
processSample vec = case center of
Mean -> do
let dev = V.map (abs . subtract (Sample.mean vec)) vec
return (U.convert dev, V.length vec)

Median -> do
sortedVec <- Right $ V.modify VA.sort vec
m <- vectorMedian sortedVec
let dev = V.map (abs . subtract m) sortedVec
return (U.convert dev, V.length vec)

Trimmed p -> do
trimmed_for_center_calculation <- trimboth vec p
let robust_center = Sample.mean trimmed_for_center_calculation
-- Calculate deviations for ALL ORIGINAL points from the robust_center
deviations_from_robust_center = V.map (abs . subtract robust_center) vec -- Use 'vec' (original data)
-- Return deviations and the ORIGINAL sample size
return (U.convert deviations_from_robust_center, V.length vec) -- Use 'V.length vec'


-- Example usage:
-- import qualified Data.Vector as V
-- import LevenesTest (Center(..), levenesTest)
-- import Statistics.Test.Types (testStatistics, testSignificance)
-- import Statistics.Types (pValue)

-- main :: IO ()
-- main = do
-- let a = V.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
-- b = V.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
-- c = V.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]

-- case levenesTest (Trimmed 0.05) [a, b, c] of
-- Left err -> putStrLn $ "Error: " ++ err
-- Right test -> do
-- putStrLn $ "Levene's W Statistic: " ++ show (testStatistics test)
-- putStrLn $ "P-Value: " ++ show (pValue (testSignificance test))
-- putStrLn $ "Reject null hypothesis at α=0.05: " ++ show (testSignificance test < 0.05)


-- Sample Output
-- Levene's W Statistic: 7.905
-- P-Value: 0.002
-- Reject null hypothesis at α=0.05: True
10 changes: 10 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
## Unreleased

- **New Features**:
- Implemented Bartlett's test (`Statistics.Test.Bartlett`) for homogeneity of variances.
- Implemented Levene's test (`Statistics.Test.Levene`) for homogeneity of variances.
- Resolves [#137](https://github.com/haskell/statistics/issues/137).

- **Documentation**:
- Added usage examples and Haddock comments for both tests.

## Changes in 0.16.3.0

* `S.Sample.correlation`, `S.Sample.covariance`,
Expand Down
2 changes: 2 additions & 0 deletions statistics.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ library
Statistics.Sample.KernelDensity.Simple
Statistics.Sample.Normalize
Statistics.Sample.Powers
Statistics.Test.Bartlett
Statistics.Test.Levene
Statistics.Test.ChiSquared
Statistics.Test.KolmogorovSmirnov
Statistics.Test.KruskalWallis
Expand Down
62 changes: 58 additions & 4 deletions tests/Tests/Parametric.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,18 @@ import Data.Maybe (fromJust)
import Statistics.Test.StudentT
import Statistics.Types
import qualified Data.Vector.Unboxed as U
import Test.Tasty (testGroup)
import Tests.Helpers (testEquality)
import qualified Data.Vector as V
import Test.Tasty (testGroup, TestTree)
import Test.Tasty.HUnit (testCase, assertBool)
import Tests.Helpers (testEquality)
import qualified Test.Tasty as Tst

import Statistics.Test.Levene
import Statistics.Test.Bartlett


tests :: Tst.TestTree
tests = testGroup "Parametric tests" studentTTests
tests = testGroup "Parametric tests" (studentTTests ++ bartlettTests ++ leveneTests)

-- 2 samples x 20 obs data
--
Expand Down Expand Up @@ -77,7 +83,7 @@ testTTest name pVal test =
, testEquality name (isSignificant (mkPValue $ pValue pVal + 1e-5) test)
Significant
]

studentTTests :: [Tst.TestTree]
studentTTests = concat
[ -- R: t.test(sample1, sample2, alt="two.sided", var.equal=T)
Expand All @@ -100,3 +106,51 @@ studentTTests = concat
(mkPValue 0.01705) (fromJust $ pairedTTest BGreater sample12)
]
where sample12 = U.zip sample1 sample2


------------------------------------------------------------
-- Bartlett's Test
------------------------------------------------------------

bartlettTests :: [TestTree]
bartlettTests =
let
a = U.fromList [9.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
b = U.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 9.36, 9.18, 8.67, 9.05]
c = U.fromList [8.95, 8.12, 8.95, 8.85, 8.03, 8.84, 8.07, 8.98, 8.86, 8.98]
result = bartlettTest [a, b, c]
in case result of
Left err -> [testCase "Bartlett test - failed" (assertBool err False)]
Right test ->
[ testApproxEqual "Bartlett's Chi-Square Statistic" 1e-3 (testStatistics test) 1.802
, testApproxEqual "Bartlett's P-Value" 1e-3 (pValue $ testSignificance test) 0.406
, testEquality "Reject null hypothesis at alpha = 0.05"
(isSignificant (mkPValue 0.05) test) NotSignificant
]

------------------------------------------------------------
-- Levene's Test (Trimmed Mean)
------------------------------------------------------------

-- Local helper for approximate equality
testApproxEqual :: String -> Double -> Double -> Double -> TestTree
testApproxEqual name epsilon actual expected =
testCase name $
let diff = abs (actual - expected)
in assertBool (name ++ ": expected ≈ " ++ show expected ++ ", got " ++ show actual) (diff < epsilon)

leveneTests :: [TestTree]
leveneTests =
let
a = V.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
b = V.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
c = V.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
result = levenesTest 0.05 (Trimmed 0.05) [a, b, c]
in case result of
Left err -> [testCase "Levene trimmed mean test - failed" (assertBool err False)]
Right test ->
[ testApproxEqual "Levene's W Statistic (Trimmed 0.05)" 1e-3 (testStatistics test) 7.905
, testApproxEqual "Levene's P-Value (Trimmed 0.05)" 1e-3 (pValue $ testSignificance test) 0.002
, testEquality "Reject null hypothesis at alpha = 0.05"
(isSignificant (mkPValue 0.05) test) Significant
]