diff --git a/analyze/examples/MoneySpent.hs b/analyze/examples/MoneySpent.hs new file mode 100644 index 0000000..cae32d8 --- /dev/null +++ b/analyze/examples/MoneySpent.hs @@ -0,0 +1,118 @@ +module MoneySpent +( + main, + Prices (..), + Items (..) +) +where + + +import qualified Data.HashMap.Strict as HM +import qualified Data.Text as T +import qualified Data.Text.Read as TR +import qualified Data.Text.Show as TS +import qualified Data.Vector as V + +import Control.Monad.Catch (catch) +import Data.Maybe (fromJust, isJust, maybe) + +import qualified Anaylze.RFrame as F +import qualified Anaylze.Common as C +import qualified Anaylze.CSV as CSV + +lookupFilter :: (Data k) => + (k -> Bool) -> Vector k -> HashMap k Int -> [Int] +lookupFilter pred keys lkup = map ((flip HM.lookup) lkup) + $ V.filter pred keys + +removeLegalFees :: (Data v, MonadThrow m) => + RFrame T.Text v -> RFrame T.Text v -> RFrame T.Text v +removeLegalFees = + F.filter (\keys lkup i values -> not $ + (T.isInfixOf (T.pack "legal fees") $ values !! + lookupFilter (T.isInfixOf (T.pack "item")) keys lkup)) + +groupBy :: (Data v, MonadThrow m) => + RFrame T.Text v -> T.Text -> m [RFrame T.Text v] +groupBy items tCol = V.mapM (\item -> F.filter (\keys lkup i values -> + item == values !! lookupFilter tCol keys lkup) items) + $ col tCol items + +addPriceCol :: (Data v, MonadThrow m) => + [RFrame T.Text v] -> RFrame T.Text v -> m [RFrame T.Text v] +addPriceCol splitItems prices = do + priceTag <- col (T.pack "price") prices + + return $ V.map (\(s,p) -> + addColumn s (T.pack "price") $ V.take + (length $ rframeData s / length $ rframeKeys s) + $ V.fromList $ repeat p) + $ V.zip splitItems priceTag + +merge :: (Data v, MonadThrow m) => + m [R.Frame T.Text v] -> m (R.Frame T.Text v) +merge = foldM appendRows F.empty + +filterDates :: (Data v) => + R.Frame T.Text v -> R.Frame T.Text v +filterDates frame = + F.filter (\keys lkup i values -> + let date = (TR.readMaybe $ values !! + lookupFilter (T.pack "date") keys lkup) :: Maybe Int + in maybe False ((>) 6) date) + +totalPrice :: + V.Vector T.Text -> V.Vector (V.Vector T.Text) -> Maybe Double +totalPrice keys values = (*) <$> price <*> units + where + lookupRead str = (TR.reads $ values !! + lookupFilter (T.pack str) keys (C.makeLookup keys)) :: Double + price = lookupRead "price" + units = lookupRead "units" + +accumSumCol :: (Data v) => + RFrame T.Text v -> V.Vector T.Text +accumSumCol items = + V.map TS.show $ V.foldl (\acc values -> + let price = totalPrice keys values + in if isJust price then + snoc acc $ (last acc) + + (fromJust price) + else acc + ) V.empty (rframeData items) + where keys = rframeKeys items + +mean :: RFrame T.Text T.Text -> Double +mean items = + V.foldl (\acc values -> + let price = totalPrice keys values + in if isJust price then + acc + price + else + acc + ) + (0::Double) (rframeData items) / (V.length $ rframeData items) + where keys = rframeKeys items + + +main = do + -- Load Csv files + prices <- CSV.loadCsvFileWithHeader "./data/prices.csv" + items <- CSV.loadCsvFileWithHeader "./data/items.csv" + + -- remove Legal fees + let pricesWithoutL = removeLegalFees prices + let itemsWithoutL = removeLegalFees items + + -- merge price and purchase data + splitItems <- groupBy itemsWithoutL (T.pack "item-bought") + priceItems <- merge $ addPriceCol splitItems pricesWithoutL + + -- filter dates, group by people + -- get accumlative sum and append it to the groups, then merge them + accsum <- mapM (\person -> addColumn person (T.pack "accsum") accumSumCol) + $ groupBy (filterDates priceItems) (T.pack "person") + + let finalItems = merge accsum + --show mean + print $ mean finalItems diff --git a/analyze/examples/data/items.csv b/analyze/examples/data/items.csv new file mode 100644 index 0000000..0404d88 --- /dev/null +++ b/analyze/examples/data/items.csv @@ -0,0 +1,6 @@ +date , person , item-bought , units-bought +7 , bob , car , 1 +5 , alice , car , 1 +4 , bob , legal fees (1 hour) , 20 +3 , alice , computer , 2 +1 , bob , computer , 1 diff --git a/analyze/examples/data/prices.csv b/analyze/examples/data/prices.csv new file mode 100644 index 0000000..a494e06 --- /dev/null +++ b/analyze/examples/data/prices.csv @@ -0,0 +1,4 @@ +item , price +computer , 1000 +car , 5000 +legal fees (1 hour) , 400 diff --git a/analyze/test/Money.hs b/analyze/test/Money.hs new file mode 100644 index 0000000..b0b93bf --- /dev/null +++ b/analyze/test/Money.hs @@ -0,0 +1,68 @@ +module Main where + +import qualified Analyze as A +import Test.Tasty +import Test.Tasty.HUnit +import Test.Tasty.Quickcheck +import Data.Text.Encoding (decodeUtf8, encodeUtf8) + +import MoneyFixtures +import MoneySpent + +testRemoveLegalFees :: TestTree +testRemoveLegalFees = testCase "removeLegalFees" $ do + table <- decodeWithHeader $ encodeUtf8 exampleUnitsCsv + noFees <- decodeWithHeader noLegalFees + removeLegalFees table ?@= noFees + +testGroupBy :: TestTree +testGroupBy = testCase "groupBy (person)" $ do + grouped <- mapM decodeWithHeader personsGrouped + table <- decodeWithHeader $ encodeUtf8 exampleUnitsCsv + people <- groupBy table (T.pack "person") + grouped ?@= people + +testWithPriceCol :: TestTree +testWithPriceCol = testCase "addPriceCol" $ do + table <- decodeWithHeader $ encodeUtf8 exampleUnitsCsv + prices <- decodeWithHeader $ encodeUtf8 examplePriceCsv + examplePC <- mapM decodeWithHeader priceCol + splitItems <- groupBy table (T.pack "item-bought") + priceCol <- addPriceCol splitItems prices + priceCol ?@= examplePC + +testDatesFiltered :: TestTree +testDatesFiltered = testCase "datesFiltered" $ do + table <- decodeWithHeader $ encodeUtf8 exampleUnitsCsv + filtered <- decodeWithHeader datesFiltered + filterDates table ?@= filtered + +testAccSum :: TestTree +testAccSum = testCase "accumSumCol" $ do + table <- decodeWithHeader $ head personsGrouped + accumSumCol table ?@= accumSum + +testMerge :: TestTree +testMerge = testCase "merge" $ do + tables <- mapM decodeWithHeader personsGrouped + full <- encodeWithHeader $ encodeUtf8 exampleUnitsCsv + merge tables ?@= full + +testMean :: TestTree +testMean = testCase "mean" $ do + pc <- mapM decodeWithHeader withPriceCol + table <- merge pc + mean table ?@= totalMean + +tests :: TestTree +tests = [ + testGroupBy, + testWithPriceCol, + testDatesFiltered, + testAccSum, + testMerge, + testMean + ] + +main :: IO () +main = defaultMain tests diff --git a/analyze/test/MoneyFixtures.hs b/analyze/test/MoneyFixtures.hs new file mode 100644 index 0000000..8659a0a --- /dev/null +++ b/analyze/test/MoneyFixtures.hs @@ -0,0 +1,48 @@ +module MoneyFixtures where + +import qualified Data.Text as T +import qualified Data.ByteString.Lazy as LBS +import qualified Data.Vector as V + +import Data.Text (Text) +import Data.Vector (Vector) +import Data.Text.Encoding (decodeUtf8, encodeUtf8) + +noLegalFees :: LBS.ByteString +noLegalFees = encodeUtf8 (T.pack ("date,item-bought,person,units\n" `mappend` + "6,motorbike,bob,2\n" `mappend` "6,computer,alice,2")) + +datesFiltered :: LBS.ByteString +datesFiltered = encodeUtf8 + (T.pack $ "date,item-bought,person,units\n" `mappend` + "5,legal fees,bob,3\n") + +personsGrouped :: [LBS.ByteString] +personsGrouped = map encodeUtf8 [bob, alice] + where bob = T.pack "date,item-bought,person,units\n" `mappend` + "Legal fees,bob,3\n" `mappend` "5,motorbike,bob,2\n" + alice = T.pack "date,item-bought,person,units\n" `mappend` + "6,computer,alice,2" + +withpriceCol :: [LBS.ByteString] +withpriceCol = map encodeUtf8 [bob, alice] + where bob = T.pack "date,item-bought,person,units,price\n" `mappend` + "legal fees,bob,3,300\n" `mappend` "5,motorbike,bob,2,200\n" + alice = T.pack "date,item-bought,person,units\n" `mappend` + "6,computer,alice,2,100" + +accumSum :: Vector Text +accumSum = V.fromList $ ["900", "1300"] + +totalMean :: Double +totalMean = 500 + +examplePriceCsv :: Text +examplePriceCsv = "item,price\n" `mappend` "legal fees,300" `mappend` + "motorbike,200" `mappend` "computer,100" + +exampleUnitsCsv :: Text +exampleUnitsCsv = "date,item-bought,person,units\n" `mappend` + "5,legal fees,bob,3\n" `mappend` "6,computer,alice,2" `mappend` + "6,motorbike,bob,2" +