{-# OPTIONS_GHC -Wall #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ExplicitForAll #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Main (main) where
import Frames
import qualified Data.Foldable as F
import qualified Data.List as L
import Lens.Micro.Extras
import Control.Foldl (fold, sum)
import qualified Control.Foldl as Foldl
import Prelude hiding (sum)
import qualified Frames.MapReduce as FMR
import qualified Frames.Folds as FF
tableTypes "Purchases" "purchases.csv"
loadBenchmarks :: IO (Frame Purchases)
loadBenchmarks = inCoreAoS (readTable "purchases.csv")
unpack :: FMR.Unpack Purchases Purchases
unpack = FMR.unpackFilterOnField @Country (const True)
assign :: FMR.Assign (Record '[Country]) Purchases (Record '[Amount, Discount])
assign = FMR.splitOnKeys @'[Country]
reduce :: FMR.Reduce (Record '[Country])
(Record '[Amount, Discount])
(Frame Purchases)
reduce = FMR.foldAndAddKey $ (FF.foldAllConstrained @Num @'[Amount, Discount]) sum
mrFold :: FMR.Fold Purchases (Frame Purchases)
mrFold = FMR.concatFold $ FMR.mapReduceFold unpack assign reduce
rhead :: Show a => Frame a -> IO ()
rhead = \ms -> mapM_ print (((take 6) . F.toList) ms)
main :: IO ()
main = do
ms <- loadBenchmarks
rhead ms
print $ fold sum (view amount <$> ms)
let result = FMR.fold mrFold ms
putStrLn $ (L.intercalate "\n" $ fmap show $ fold Foldl.list result)
{country :-> "USA", amount :-> 2000, discount :-> 10}
{country :-> "USA", amount :-> 3500, discount :-> 15}
{country :-> "USA", amount :-> 3000, discount :-> 20}
{country :-> "Canada", amount :-> 120, discount :-> 12}
{country :-> "Canada", amount :-> 180, discount :-> 18}
{country :-> "Canada", amount :-> 3100, discount :-> 21}
17210
{country :-> "Australia", amount :-> 600, discount :-> 60}
{country :-> "Brazil", amount :-> 460, discount :-> 46}
{country :-> "Canada", amount :-> 3400, discount :-> 51}
{country :-> "France", amount :-> 500, discount :-> 50}
{country :-> "Germany", amount :-> 570, discount :-> 57}
{country :-> "India", amount :-> 720, discount :-> 72}
{country :-> "Italy", amount :-> 630, discount :-> 63}
{country :-> "Japan", amount :-> 690, discount :-> 69}
{country :-> "Spain", amount :-> 660, discount :-> 66}
{country :-> "UK", amount :-> 480, discount :-> 48}
{country :-> "USA", amount :-> 8500, discount :-> 45}
Rasmus Bååth wrote an interesting blog: https://www.sumsar.net/blog/pandas-feels-clunky-when-coming-from-r/ where he remarks that Pandas feels clunky compared to R's frames. So I thought I would try to reproduce it using Frames and https://hackage.haskell.org/package/Frames-map-reduce (@adamConnerSax) hoping it would be as easy as R. This is as far as I got but it doesn't seem as slick as the R. Does anyone have any better ideas?
which gives