Skip to content

Commit 218ffe5

Browse files
committed
Make statistical functions return Optionals.
1 parent f37c61a commit 218ffe5

2 files changed

Lines changed: 46 additions & 62 deletions

File tree

src/Data/DataFrame/Internal.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,14 @@ reduceColumn f (UnboxedColumn (column :: c)) = case testEquality (typeRep @c) (t
286286
Just Refl -> f column
287287
Nothing -> error $ "Can't reduce. Incompatible types: " ++ show (typeRep @a) ++ " " ++ show (typeRep @a)
288288

289+
safeReduceColumn :: forall a b. (Typeable a) => (a -> b) -> Column -> Maybe b
290+
safeReduceColumn f (BoxedColumn (column :: c)) = do
291+
Refl <- testEquality (typeRep @c) (typeRep @a)
292+
return $ f column
293+
safeReduceColumn f (UnboxedColumn (column :: c)) = do
294+
Refl <- testEquality (typeRep @c) (typeRep @a)
295+
return $ f column
296+
289297
longZipColumns :: Column -> Column -> Column
290298
longZipColumns (BoxedColumn column) (BoxedColumn other) = BoxedColumn (V.generate (max (VG.length column) (VG.length other)) (\i -> (column VG.!? i, other VG.!? i)))
291299
longZipColumns (BoxedColumn column) (UnboxedColumn other) = BoxedColumn (V.generate (max (VG.length column) (VG.length other)) (\i -> (column VG.!? i, other VG.!? i)))

src/Data/DataFrame/Operations.hs

Lines changed: 38 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ import Text.Read (readMaybe)
9494
import Type.Reflection
9595
import GHC.IO.Unsafe (unsafePerformIO)
9696
import Control.Monad (foldM_)
97+
import Control.Applicative (asum)
9798

9899
-- | /O(n)/ Adds a vector to the dataframe.
99100
addColumn ::
@@ -814,100 +815,75 @@ frequencies name df = case name `MS.lookup` DI.columnIndices df of
814815
initDf = DI.empty & addColumn "Statistic" (V.fromList ["Count" :: T.Text, "Percentage (%)"])
815816
in L.foldl' (\df (col, k) -> addColumn (vText col) (V.fromList [k, k * 100 `div` total]) df) initDf counts
816817

817-
mean :: T.Text -> DataFrame -> Double
818+
mean :: T.Text -> DataFrame -> Maybe Double
818819
mean = applyStatistic SS.mean
819820

820-
median :: T.Text -> DataFrame -> Double
821+
median :: T.Text -> DataFrame -> Maybe Double
821822
median = applyStatistic (SS.median SS.medianUnbiased)
822823

823-
standardDeviation :: T.Text -> DataFrame -> Double
824+
standardDeviation :: T.Text -> DataFrame -> Maybe Double
824825
standardDeviation = applyStatistic SS.fastStdDev
825826

826-
skewness :: T.Text -> DataFrame -> Double
827+
skewness :: T.Text -> DataFrame -> Maybe Double
827828
skewness = applyStatistic SS.skewness
828829

829-
variance :: T.Text -> DataFrame -> Double
830+
variance :: T.Text -> DataFrame -> Maybe Double
830831
variance = applyStatistic SS.variance
831832

832-
interQuartileRange :: T.Text -> DataFrame -> Double
833+
interQuartileRange :: T.Text -> DataFrame -> Maybe Double
833834
interQuartileRange = applyStatistic (SS.midspread SS.medianUnbiased 4)
834835

835836
correlation :: T.Text -> T.Text -> DataFrame -> Maybe Double
836837
correlation first second df = DI.reduceColumn @(VU.Vector (Double, Double)) SS.correlation <$> (DI.zipColumns <$> (DI.getColumn first df) <*> (DI.getColumn second df))
837838

838-
sum :: T.Text -> DataFrame -> Double
839-
sum name df = case name `MS.lookup` DI.columnIndices df of
840-
Nothing -> throw $ ColumnNotFoundException name "apply" (map fst $ M.toList $ DI.columnIndices df)
841-
Just i -> case DI.columns df V.!? i of
842-
Nothing -> error "Internal error: Column is empty"
843-
Just c -> case c of
844-
Just ((UnboxedColumn (column :: VU.Vector a'))) -> case testEquality (typeRep @a') (typeRep @Int) of
845-
Just Refl -> VG.sum (VU.map fromIntegral column)
846-
Nothing -> case testEquality (typeRep @a') (typeRep @Double) of
847-
Just Refl -> VG.sum column
848-
Nothing -> error $ "Cannot get mean of non-numeric column: " ++ T.unpack name -- Not sure what to do with no numeric - return nothing???
849-
Nothing -> error $ "Cannot get mean of non numeric column" ++ T.unpack name
850-
851-
applyStatistic :: (forall v . (VG.Vector v Double)
852-
=> v Double -> Double) -> T.Text -> DataFrame -> Double
853-
applyStatistic f name df = case name `MS.lookup` DI.columnIndices df of
854-
Nothing -> throw $ ColumnNotFoundException name "apply" (map fst $ M.toList $ DI.columnIndices df)
855-
Just i -> case DI.columns df V.!? i of
856-
Nothing -> error "Internal error: Column is empty"
857-
Just c -> case c of
858-
Just ((UnboxedColumn (column :: VU.Vector a'))) -> case testEquality (typeRep @a') (typeRep @Int) of
859-
Just Refl -> f (VU.map fromIntegral column)
860-
Nothing -> case testEquality (typeRep @a') (typeRep @Double) of
861-
Just Refl -> f column
862-
Nothing -> case testEquality (typeRep @a') (typeRep @Float) of
863-
Just Refl -> f (VG.map realToFrac column)
864-
Nothing -> error $ "Cannot get mean of non-numeric column: " ++ T.unpack name -- Not sure what to do with no numeric - return nothing???
865-
_ -> error $ "Cannot get mean of non numeric column: " ++ T.unpack name
866-
867-
applyStatistics :: (VU.Vector Double -> VU.Vector Double) -> T.Text -> DataFrame -> VU.Vector Double
868-
applyStatistics f name df = case name `MS.lookup` DI.columnIndices df of
869-
Nothing -> throw $ ColumnNotFoundException name "apply" (map fst $ M.toList $ DI.columnIndices df)
870-
Just i -> case DI.columns df V.!? i of
871-
Nothing -> error "Internal error: Column is empty"
872-
Just c -> case c of
873-
Just ((UnboxedColumn (column :: VU.Vector a'))) -> case testEquality (typeRep @a') (typeRep @Int) of
874-
Just Refl -> f (VU.map fromIntegral column)
875-
Nothing -> case testEquality (typeRep @a') (typeRep @Double) of
876-
Just Refl -> f column
877-
Nothing -> case testEquality (typeRep @a') (typeRep @Float) of
878-
Just Refl -> f (VG.map realToFrac column)
879-
Nothing -> error $ "Cannot get mean of non-numeric column: " ++ T.unpack name -- Not sure what to do with no numeric - return nothing???
880-
_ -> error $ "Cannot get mean of non numeric column: " ++ T.unpack name
839+
sum :: T.Text -> DataFrame -> Maybe Double
840+
sum name df = case DI.getColumn name df of
841+
Just ((UnboxedColumn (column :: VU.Vector a'))) -> case testEquality (typeRep @a') (typeRep @Int) of
842+
Just Refl -> Just $ VG.sum (VU.map fromIntegral column)
843+
Nothing -> case testEquality (typeRep @a') (typeRep @Double) of
844+
Just Refl -> Just $ VG.sum column
845+
Nothing -> Nothing
846+
Nothing -> Nothing
847+
848+
applyStatistic :: (VU.Vector Double -> Double) -> T.Text -> DataFrame -> Maybe Double
849+
applyStatistic f name df = do
850+
column <- DI.getColumn name df
851+
matching <- asum [ DI.transform (fromIntegral :: Int -> Double) column,
852+
DI.transform (realToFrac :: Float -> Double) column,
853+
Just column ]
854+
DI.safeReduceColumn f matching
855+
856+
applyStatistics :: (VU.Vector Double -> VU.Vector Double) -> T.Text -> DataFrame -> Maybe (VU.Vector Double)
857+
applyStatistics f name df = case DI.getColumn name df of
858+
Just ((UnboxedColumn (column :: VU.Vector a'))) -> case testEquality (typeRep @a') (typeRep @Int) of
859+
Just Refl -> Just $ f (VU.map fromIntegral column)
860+
Nothing -> case testEquality (typeRep @a') (typeRep @Double) of
861+
Just Refl -> Just $ f column
862+
Nothing -> case testEquality (typeRep @a') (typeRep @Float) of
863+
Just Refl -> Just $ f (VG.map realToFrac column)
864+
Nothing -> Nothing
865+
_ -> Nothing
881866

882867
summarize :: DataFrame -> DataFrame
883868
summarize df = fold columnStats (columnNames df) (fromList [("Statistic", DI.toColumn ["Mean" :: T.Text, "Minimum", "25%" ,"Median", "75%", "Max", "StdDev", "IQR", "Skewness"])])
884869
where columnStats name d = if all isJust (stats name) then addUnboxedColumn name (VU.fromList (map (roundTo 2 . fromMaybe 0) $ stats name)) d else d
885870
stats name = let
886-
quantiles = valuesOrNothing $! applyStatistics (SS.quantilesVec SS.medianUnbiased (VU.fromList [0,1,2,3,4]) 4) name
871+
quantiles = applyStatistics (SS.quantilesVec SS.medianUnbiased (VU.fromList [0,1,2,3,4]) 4) name df
887872
min' = flip (VG.!) 0 <$> quantiles
888873
quartile1 = flip (VG.!) 1 <$> quantiles
889874
median' = flip (VG.!) 2 <$> quantiles
890875
quartile3 = flip (VG.!) 3 <$> quantiles
891876
max' = flip (VG.!) 4 <$> quantiles
892877
iqr = (-) <$> quartile3 <*> quartile1
893-
in [valueOrNothing $! mean name,
878+
in [mean name df,
894879
min',
895880
quartile1,
896881
median',
897882
quartile3,
898883
max',
899-
valueOrNothing $! standardDeviation name,
884+
standardDeviation name df,
900885
iqr,
901-
valueOrNothing $! skewness name]
902-
--
903-
valueOrNothing f = unsafePerformIO $ catch
904-
(seq (Just $! f df) (return $ Just $! f df))
905-
(\(e::SomeException) ->
906-
return Nothing)
907-
valuesOrNothing f = unsafePerformIO $ catch
908-
(seq (Just $! f df) (return $ Just $! f df))
909-
(\(e::SomeException) ->
910-
return Nothing)
886+
skewness name df]
911887
roundTo :: Int -> Double -> Double
912888
roundTo n x = fromInteger (round $ x * (10^n)) / (10.0^^n)
913889

0 commit comments

Comments
 (0)