haskell · Shimuuar · Jun 29, 2025 · Apr 5, 2025 · Apr 13, 2025 · May 21, 2025
diff --git a/Statistics/Test/Bartlett.hs b/Statistics/Test/Bartlett.hs
@@ -0,0 +1,92 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-|
+Module      : Statistics.Test.Bartlett
+Description : Bartlett's test for homogeneity of variances.
+Copyright   : (c) Praneya Kumar, 2025
+License     : BSD-3-Clause
+
+Implements Bartlett's test to check if multiple groups have equal variances.
+Assesses equality of variances assuming normal distribution, sensitive to non-normality.
+-}
+module Statistics.Test.Bartlett (
+    bartlettTest,
+    module Statistics.Distribution.ChiSquared
+) where
+
+import qualified Data.Vector.Generic as G
+import qualified Data.Vector.Unboxed as U
+import Statistics.Distribution (cumulative)
+import Statistics.Distribution.ChiSquared (chiSquared, ChiSquared(..))
+import Statistics.Sample (varianceUnbiased)
+import Statistics.Types (mkPValue)
+import Statistics.Test.Types (Test(..))
+
+-- | Perform Bartlett's test for equal variances.
+-- The input is a list of vectors, where each vector represents a group of observations.
+-- Returns Either an error message or a Test ChiSquared containing the test statistic and p-value.
+bartlettTest :: [U.Vector Double] -> Either String (Test ChiSquared)
+bartlettTest groups
+  | length groups < 2 = Left "At least two groups are required for Bartlett's test."
+  | any ((< 2) . G.length) groups = Left "Each group must have at least two observations."
+  | any (<= 0) groupVariances = Left "All groups must have positive variance."
+  | otherwise = Right $ Test
+      { testSignificance = pValue
+      , testStatistics   = tStatistic
+      , testDistribution = chiDist
+      }
+  where
+    -- Number of groups
+    k = length groups
+
+    -- Sample sizes for each group
+    ni = map G.length groups
+    ni' = map fromIntegral ni
+
+    -- Total number of observations across all groups
+    nTotal = sum ni
+
+    -- Variance for each group (unbiased estimate)
+    groupVariances = map varianceUnbiased groups
+
+    -- Pooled variance calculation
+    sumWeightedVars = sum [ (n - 1) * v | (n, v) <- zip ni' groupVariances ]
+    pooledVariance = sumWeightedVars / fromIntegral (nTotal - k)
+
+    -- Numerator of Bartlett's statistic
+    numerator =
+      fromIntegral (nTotal - k) * log pooledVariance -
+      sum [ (n - 1) * log v | (n, v) <- zip ni' groupVariances ]
+
+    -- Denominator correction term
+    sumReciprocals = sum [1 / (n - 1) | n <- ni']
+    denomCorrection =
+      1 + (sumReciprocals - 1 / fromIntegral (nTotal - k)) / (3 * (fromIntegral k - 1))
+
+    -- Test statistic T
+    tStatistic = max 0 $ numerator / denomCorrection
+
+    -- Degrees of freedom and chi-squared distribution
+    df = k - 1
+    chiDist = chiSquared df
+    pValue = mkPValue $ 1 - cumulative chiDist tStatistic
+
+
+-- Example usage:
+-- import qualified Data.Vector.Unboxed as U
+-- import Statistics.Test.Bartlett
+
+-- main :: IO ()
+-- main = do
+--     let a = U.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
+--         b = U.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
+--         c = U.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
+
+--     case bartlettTest [a,b,c] of
+--         Left err -> putStrLn $ "Error: " ++ err
+--         Right test -> do
+--             putStrLn $ "Bartlett's Test Statistic: " ++ show (testStatistics test)
+--             putStrLn $ "P-Value: " ++ show (testSignificance test)
+
+-- Sample Output
+-- Bartlett's Test Statistic: ~32
+-- P-Value: ~1e-5
diff --git a/Statistics/Test/Levene.hs b/Statistics/Test/Levene.hs
@@ -0,0 +1,152 @@
+{-# LANGUAGE FlexibleContexts #-}
+
+{-|
+Module      : Statistics.Test.Levene
+Description : Levene's test for homogeneity of variances.
+Copyright   : (c) Praneya Kumar, 2025
+License     : BSD-3-Clause
+
+Implements Levene's test to check if multiple groups have equal variances.
+Assesses equality of variances, robust to non-normality, and versatile with mean or median centering.
+-}
+module Statistics.Test.Levene (
+    Center(..),
+    levenesTest
+) where
+
+import qualified Data.Vector as V
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Algorithms.Merge as VA
+import Statistics.Distribution (cumulative)
+import Statistics.Distribution.FDistribution (fDistribution, FDistribution)
+import Statistics.Types (mkPValue)
+import Statistics.Test.Types (Test(..))
+import qualified Statistics.Sample as Sample
+import Control.Exception (assert)
+
+-- | Center calculation method 
+data Center = 
+    Mean               -- ^ Use arithmetic mean
+  | Median             -- ^ Use median
+  | Trimmed Double     -- ^ Trimmed mean with given proportion to cut from each end
+  deriving (Eq, Show)
+
+-- | Trim data from both ends with error handling and performance optimization
+trimboth :: (Ord a, Fractional a) 
+         => V.Vector a 
+         -> Double 
+         -> Either String (V.Vector a)
+trimboth vec prop
+  | prop < 0 || prop > 1 = Left "Proportion must be between 0 and 1"
+  | V.null vec = Right vec
+  | otherwise = do
+      let sorted = V.modify VA.sort vec
+          n = V.length sorted
+          lowerCut = floor (prop * fromIntegral n)
+          upperCut = n - lowerCut
+      assert (upperCut >= lowerCut) $ 
+        Right $ V.slice lowerCut (upperCut - lowerCut) sorted
+
+-- | Calculate median using pre-sorted vector
+vectorMedian :: (Fractional a, Ord a) 
+             => V.Vector a 
+             -> Either String a
+vectorMedian vec
+  | V.null vec = Left "Empty vector in median calculation"
+  | otherwise = Right $ 
+      if odd len 
+        then sorted V.! mid 
+        else (sorted V.! (mid - 1) + sorted V.! mid) / 2
+  where
+    sorted = V.modify VA.sort vec
+    len = V.length sorted
+    mid = len `div` 2
+
+-- | Main Levene's test function with full error handling
+levenesTest :: Double              -- ^ Significance level (alpha)
+            -> Center             -- ^ Centering method
+            -> [V.Vector Double]  -- ^ Input samples
+            -> Either String (Test FDistribution)
+levenesTest alpha center samples
+  | alpha < 0 || alpha > 1 = Left "Significance level must be between 0 and 1"
+  | length samples < 2 = Left "At least two samples required"
+  | otherwise = do
+      processed <- mapM processSample samples
+      let (deviationsList, niList) = unzip processed
+          deviations = V.fromList deviationsList  -- V.Vector (U.Vector Double)
+          ni = V.fromList niList                  -- V.Vector Int
+          zbari = V.map Sample.mean deviations    -- V.Vector Double
+          k = V.length deviations
+          n = V.sum ni
+          zbar = V.sum (V.zipWith (\z n' -> z * fromIntegral n') zbari ni) / fromIntegral n
+
+          -- Numerator: Sum over (ni * (zbari - zbar)^2)
+          numerator = V.sum $ V.zipWith (\n z -> fromIntegral n * (z - zbar) ** 2) ni zbari
+
+          -- Denominator: Sum over sum((dev_ij - zbari)^2)
+          denominator = V.sum $ V.zipWith (\dev z -> U.sum (U.map (\x -> (x - z) ** 2) dev)) deviations zbari
+
+      -- Handle division by zero and invalid values
+      if denominator <= 0 || isNaN denominator || isInfinite denominator
+        then Left "Invalid denominator in W-statistic calculation"
+        else do
+          let wStat = (fromIntegral (n - k) / fromIntegral (k - 1)) * (numerator / denominator)
+              df1 = k - 1
+              df2 = n - k
+              fDist = fDistribution df1 df2
+              pVal = mkPValue $ 1 - cumulative fDist wStat
+
+          -- Validate distribution parameters
+          if df1 < 1 || df2 < 1
+            then Left "Invalid degrees of freedom"
+            else Right $ Test
+                  { testStatistics   = wStat
+                  , testSignificance = pVal
+                  , testDistribution = fDist
+                  }
+  where
+    -- Process samples with error handling and optimized sorting
+    processSample vec = case center of
+      Mean -> do
+        let dev = V.map (abs . subtract (Sample.mean vec)) vec
+        return (U.convert dev, V.length vec)
+
+      Median -> do
+        sortedVec <- Right $ V.modify VA.sort vec
+        m <- vectorMedian sortedVec
+        let dev = V.map (abs . subtract m) sortedVec
+        return (U.convert dev, V.length vec)
+
+      Trimmed p -> do
+        trimmed_for_center_calculation <- trimboth vec p
+        let robust_center = Sample.mean trimmed_for_center_calculation
+            -- Calculate deviations for ALL ORIGINAL points from the robust_center
+            deviations_from_robust_center = V.map (abs . subtract robust_center) vec -- Use 'vec' (original data)
+        -- Return deviations and the ORIGINAL sample size
+        return (U.convert deviations_from_robust_center, V.length vec) -- Use 'V.length vec'
+
+
+-- Example usage:
+-- import qualified Data.Vector as V
+-- import LevenesTest (Center(..), levenesTest)
+-- import Statistics.Test.Types (testStatistics, testSignificance)
+-- import Statistics.Types (pValue)
+
+-- main :: IO ()
+-- main = do
+--     let a = V.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
+--         b = V.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
+--         c = V.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
+
+--     case levenesTest (Trimmed 0.05) [a, b, c] of
+--         Left err -> putStrLn $ "Error: " ++ err
+--         Right test -> do
+--             putStrLn $ "Levene's W Statistic: " ++ show (testStatistics test)
+--             putStrLn $ "P-Value: " ++ show (pValue (testSignificance test))
+--             putStrLn $ "Reject null hypothesis at α=0.05: " ++ show (testSignificance test < 0.05)
+
+
+-- Sample Output
+-- Levene's W Statistic: 7.905
+-- P-Value: 0.002
+-- Reject null hypothesis at α=0.05: True
diff --git a/changelog.md b/changelog.md
@@ -1,3 +1,13 @@
+## Unreleased
+
+- **New Features**:
+  - Implemented Bartlett's test (`Statistics.Test.Bartlett`) for homogeneity of variances.
+  - Implemented Levene's test (`Statistics.Test.Levene`) for homogeneity of variances.
+  - Resolves [#137](https://github.com/haskell/statistics/issues/137).
+
+- **Documentation**:
+  - Added usage examples and Haddock comments for both tests.
+
 ## Changes in 0.16.3.0
 
  * `S.Sample.correlation`, `S.Sample.covariance`,

diff --git a/statistics.cabal b/statistics.cabal
@@ -107,6 +107,8 @@ library
     Statistics.Sample.KernelDensity.Simple
     Statistics.Sample.Normalize
     Statistics.Sample.Powers
+    Statistics.Test.Bartlett
+    Statistics.Test.Levene
     Statistics.Test.ChiSquared
     Statistics.Test.KolmogorovSmirnov
     Statistics.Test.KruskalWallis

diff --git a/tests/Tests/Parametric.hs b/tests/Tests/Parametric.hs
@@ -4,12 +4,18 @@ import Data.Maybe (fromJust)
 import Statistics.Test.StudentT
 import Statistics.Types
 import qualified Data.Vector.Unboxed as U
-import Test.Tasty (testGroup)
-import Tests.Helpers  (testEquality)
+import qualified Data.Vector as V
+import Test.Tasty (testGroup, TestTree)
+import Test.Tasty.HUnit (testCase, assertBool)
+import Tests.Helpers (testEquality)
 import qualified Test.Tasty as Tst
 
+import Statistics.Test.Levene
+import Statistics.Test.Bartlett
+
+
 tests :: Tst.TestTree
-tests = testGroup "Parametric tests" studentTTests
+tests = testGroup "Parametric tests" (studentTTests ++ bartlettTests ++ leveneTests)
 
 -- 2 samples x 20 obs data
 --
@@ -77,7 +83,7 @@ testTTest name pVal test =
   , testEquality name (isSignificant (mkPValue $ pValue pVal + 1e-5) test)
     Significant
   ]
-  
+
 studentTTests :: [Tst.TestTree]
 studentTTests = concat
   [ -- R: t.test(sample1, sample2, alt="two.sided", var.equal=T)
@@ -100,3 +106,51 @@ studentTTests = concat
       (mkPValue 0.01705) (fromJust $ pairedTTest BGreater sample12)
   ]
   where sample12 = U.zip sample1 sample2
+
+
+------------------------------------------------------------
+-- Bartlett's Test
+------------------------------------------------------------
+
+bartlettTests :: [TestTree]
+bartlettTests =
+  let
+    a = U.fromList [9.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
+    b = U.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 9.36, 9.18, 8.67, 9.05]
+    c = U.fromList [8.95, 8.12, 8.95, 8.85, 8.03, 8.84, 8.07, 8.98, 8.86, 8.98]
+    result = bartlettTest [a, b, c]
+  in case result of
+       Left err -> [testCase "Bartlett test - failed" (assertBool err False)]
+       Right test ->
+         [ testApproxEqual "Bartlett's Chi-Square Statistic" 1e-3 (testStatistics test) 1.802
+         , testApproxEqual "Bartlett's P-Value" 1e-3 (pValue $ testSignificance test) 0.406
+         , testEquality "Reject null hypothesis at alpha = 0.05"
+             (isSignificant (mkPValue 0.05) test) NotSignificant
+         ]
+
+------------------------------------------------------------
+-- Levene's Test (Trimmed Mean)
+------------------------------------------------------------
+
+-- Local helper for approximate equality
+testApproxEqual :: String -> Double -> Double -> Double -> TestTree
+testApproxEqual name epsilon actual expected =
+  testCase name $
+    let diff = abs (actual - expected)
+    in assertBool (name ++ ": expected ≈ " ++ show expected ++ ", got " ++ show actual) (diff < epsilon)
+
+leveneTests :: [TestTree]
+leveneTests =
+  let
+    a = V.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
+    b = V.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
+    c = V.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
+    result = levenesTest 0.05 (Trimmed 0.05) [a, b, c]
+  in case result of
+       Left err -> [testCase "Levene trimmed mean test - failed" (assertBool err False)]
+       Right test ->
+         [ testApproxEqual "Levene's W Statistic (Trimmed 0.05)" 1e-3 (testStatistics test) 7.905
+         , testApproxEqual "Levene's P-Value (Trimmed 0.05)" 1e-3 (pValue $ testSignificance test) 0.002
+         , testEquality "Reject null hypothesis at alpha = 0.05"
+             (isSignificant (mkPValue 0.05) test) Significant
+         ]