{-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE InstanceSigs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} module DataFrame.Internal.DataFrame where import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Vector as V import qualified Data.Vector.Unboxed as VU import Control.DeepSeq (NFData (..), rnf) import Control.Exception (throw) import Data.Function (on) import Data.List (sortBy, transpose, (\\)) import Data.Type.Equality (TestEquality (testEquality), type (:~:) (Refl)) import DataFrame.Display.Terminal.PrettyPrint import DataFrame.Errors import DataFrame.Internal.Column import DataFrame.Internal.Expression import Text.Printf import Type.Reflection (typeRep) data DataFrame = DataFrame { DataFrame -> Vector Column columns :: V.Vector Column , DataFrame -> Map Text Int columnIndices :: M.Map T.Text Int , DataFrame -> (Int, Int) dataframeDimensions :: (Int, Int) , DataFrame -> Map Text UExpr derivingExpressions :: M.Map T.Text UExpr } instance NFData DataFrame where rnf :: DataFrame -> () rnf (DataFrame Vector Column cols Map Text Int idx (Int, Int) dims Map Text UExpr _exprs) = Vector Column -> () forall a. NFData a => a -> () rnf Vector Column cols () -> () -> () forall a b. a -> b -> b `seq` Map Text Int -> () forall a. NFData a => a -> () rnf Map Text Int idx () -> () -> () forall a b. a -> b -> b `seq` (Int, Int) -> () forall a. NFData a => a -> () rnf (Int, Int) dims data GroupedDataFrame = Grouped { GroupedDataFrame -> DataFrame fullDataframe :: DataFrame , GroupedDataFrame -> [Text] groupedColumns :: [T.Text] , GroupedDataFrame -> Vector Int valueIndices :: VU.Vector Int , GroupedDataFrame -> Vector Int offsets :: VU.Vector Int , GroupedDataFrame -> Vector Int rowToGroup :: VU.Vector Int } instance Show GroupedDataFrame where show :: GroupedDataFrame -> String show (Grouped DataFrame df [Text] cols Vector Int _indices Vector Int _os Vector Int _rtg) = String -> String -> ShowS forall r. PrintfType r => String -> r printf String "{ keyColumns: %s groupedColumns: %s }" ([Text] -> String forall a. Show a => a -> String show [Text] cols) ([Text] -> String forall a. Show a => a -> String show (Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (DataFrame -> Map Text Int columnIndices DataFrame df) [Text] -> [Text] -> [Text] forall a. Eq a => [a] -> [a] -> [a] \\ [Text] cols)) instance Eq GroupedDataFrame where == :: GroupedDataFrame -> GroupedDataFrame -> Bool (==) (Grouped DataFrame df [Text] cols Vector Int _indices Vector Int _os Vector Int _rtg) (Grouped DataFrame df' [Text] cols' Vector Int _indices' Vector Int _os' Vector Int _rtg') = (DataFrame df DataFrame -> DataFrame -> Bool forall a. Eq a => a -> a -> Bool == DataFrame df') Bool -> Bool -> Bool && ([Text] cols [Text] -> [Text] -> Bool forall a. Eq a => a -> a -> Bool == [Text] cols') instance Eq DataFrame where (==) :: DataFrame -> DataFrame -> Bool DataFrame a == :: DataFrame -> DataFrame -> Bool == DataFrame b = Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (DataFrame -> Map Text Int columnIndices DataFrame a) [Text] -> [Text] -> Bool forall a. Eq a => a -> a -> Bool == Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (DataFrame -> Map Text Int columnIndices DataFrame b) Bool -> Bool -> Bool && ((Text, Int) -> Bool -> Bool) -> Bool -> [(Text, Int)] -> Bool forall a b. (a -> b -> b) -> b -> [a] -> b forall (t :: * -> *) a b. Foldable t => (a -> b -> b) -> b -> t a -> b foldr ( \(Text name, Int index) Bool acc -> Bool acc Bool -> Bool -> Bool && (DataFrame -> Vector Column columns DataFrame a Vector Column -> Int -> Maybe Column forall a. Vector a -> Int -> Maybe a V.!? Int index Maybe Column -> Maybe Column -> Bool forall a. Eq a => a -> a -> Bool == (DataFrame -> Vector Column columns DataFrame b Vector Column -> Int -> Maybe Column forall a. Vector a -> Int -> Maybe a V.!? (DataFrame -> Map Text Int columnIndices DataFrame b Map Text Int -> Text -> Int forall k a. Ord k => Map k a -> k -> a M.! Text name))) ) Bool True (Map Text Int -> [(Text, Int)] forall k a. Map k a -> [(k, a)] M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame a) instance Show DataFrame where show :: DataFrame -> String show :: DataFrame -> String show DataFrame d = let rows :: Int rows = Int 20 (Int r, Int c) = DataFrame -> (Int, Int) dataframeDimensions DataFrame d d' :: DataFrame d' = DataFrame d { columns = V.map (takeColumn rows) (columns d) , dataframeDimensions = (min rows r, c) } truncationInfo :: String truncationInfo = String "\n" String -> ShowS forall a. [a] -> [a] -> [a] ++ String "Showing " String -> ShowS forall a. [a] -> [a] -> [a] ++ Int -> String forall a. Show a => a -> String show (Int -> Int -> Int forall a. Ord a => a -> a -> a min Int rows Int r) String -> ShowS forall a. [a] -> [a] -> [a] ++ String " rows out of " String -> ShowS forall a. [a] -> [a] -> [a] ++ Int -> String forall a. Show a => a -> String show Int r in Text -> String T.unpack (DataFrame -> Bool -> Text asText DataFrame d' Bool False) String -> ShowS forall a. [a] -> [a] -> [a] ++ (if Int r Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int rows then String truncationInfo else String "") toMarkdownTable :: DataFrame -> T.Text toMarkdownTable :: DataFrame -> Text toMarkdownTable DataFrame df = DataFrame -> Bool -> Text asText DataFrame df Bool True asText :: DataFrame -> Bool -> T.Text asText :: DataFrame -> Bool -> Text asText DataFrame d Bool properMarkdown = let header :: [Text] header = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text] forall a b. (a -> b) -> [a] -> [b] map (Text, Int) -> Text forall a b. (a, b) -> a fst (((Text, Int) -> (Text, Int) -> Ordering) -> [(Text, Int)] -> [(Text, Int)] forall a. (a -> a -> Ordering) -> [a] -> [a] sortBy (Int -> Int -> Ordering forall a. Ord a => a -> a -> Ordering compare (Int -> Int -> Ordering) -> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c `on` (Text, Int) -> Int forall a b. (a, b) -> b snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)] forall a b. (a -> b) -> a -> b $ Map Text Int -> [(Text, Int)] forall k a. Map k a -> [(k, a)] M.toList (DataFrame -> Map Text Int columnIndices DataFrame d)) types :: [Text] types = Vector Text -> [Text] forall a. Vector a -> [a] V.toList (Vector Text -> [Text]) -> Vector Text -> [Text] forall a b. (a -> b) -> a -> b $ (Text -> Bool) -> Vector Text -> Vector Text forall a. (a -> Bool) -> Vector a -> Vector a V.filter (Text -> Text -> Bool forall a. Eq a => a -> a -> Bool /= Text "") (Vector Text -> Vector Text) -> Vector Text -> Vector Text forall a b. (a -> b) -> a -> b $ (Column -> Text) -> Vector Column -> Vector Text forall a b. (a -> b) -> Vector a -> Vector b V.map Column -> Text getType (DataFrame -> Vector Column columns DataFrame d) getType :: Column -> T.Text getType :: Column -> Text getType (BoxedColumn (Vector a column :: V.Vector a)) = String -> Text T.pack (String -> Text) -> String -> Text forall a b. (a -> b) -> a -> b $ TypeRep a -> String forall a. Show a => a -> String show (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) getType (UnboxedColumn (Vector a column :: VU.Vector a)) = String -> Text T.pack (String -> Text) -> String -> Text forall a b. (a -> b) -> a -> b $ TypeRep a -> String forall a. Show a => a -> String show (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) getType (OptionalColumn (Vector (Maybe a) column :: V.Vector a)) = String -> Text T.pack (String -> Text) -> String -> Text forall a b. (a -> b) -> a -> b $ TypeRep (Maybe a) -> String forall a. Show a => a -> String show (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) get :: Maybe Column -> V.Vector T.Text get :: Maybe Column -> Vector Text get (Just (BoxedColumn (Vector a column :: V.Vector a))) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @T.Text) of Just a :~: Text Refl -> Vector a Vector Text column Maybe (a :~: Text) Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @String) of Just a :~: String Refl -> (String -> Text) -> Vector String -> Vector Text forall a b. (a -> b) -> Vector a -> Vector b V.map String -> Text T.pack Vector a Vector String column Maybe (a :~: String) Nothing -> (a -> Text) -> Vector a -> Vector Text forall a b. (a -> b) -> Vector a -> Vector b V.map (String -> Text T.pack (String -> Text) -> (a -> String) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> String forall a. Show a => a -> String show) Vector a column get (Just (UnboxedColumn Vector a column)) = (a -> Text) -> Vector a -> Vector Text forall a b. (a -> b) -> Vector a -> Vector b V.map (String -> Text T.pack (String -> Text) -> (a -> String) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> String forall a. Show a => a -> String show) (Vector a -> Vector a forall (v :: * -> *) a (w :: * -> *). (Vector v a, Vector w a) => v a -> w a V.convert Vector a column) get (Just (OptionalColumn Vector (Maybe a) column)) = (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text forall a b. (a -> b) -> Vector a -> Vector b V.map (String -> Text T.pack (String -> Text) -> (Maybe a -> String) -> Maybe a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . Maybe a -> String forall a. Show a => a -> String show) Vector (Maybe a) column get Maybe Column Nothing = Vector Text forall a. Vector a V.empty getTextColumnFromFrame :: DataFrame -> (Integer, Text) -> Vector Text getTextColumnFromFrame DataFrame df (Integer i, Text name) = Maybe Column -> Vector Text get (Maybe Column -> Vector Text) -> Maybe Column -> Vector Text forall a b. (a -> b) -> a -> b $ Vector Column -> Int -> Maybe Column forall a. Vector a -> Int -> Maybe a (V.!?) (DataFrame -> Vector Column columns DataFrame d) (Map Text Int -> Text -> Int forall k a. Ord k => Map k a -> k -> a (M.!) (DataFrame -> Map Text Int columnIndices DataFrame d) Text name) rows :: [[Text]] rows = [[Text]] -> [[Text]] forall a. [[a]] -> [[a]] transpose ([[Text]] -> [[Text]]) -> [[Text]] -> [[Text]] forall a b. (a -> b) -> a -> b $ (Integer -> Text -> [Text]) -> [Integer] -> [Text] -> [[Text]] forall a b c. (a -> b -> c) -> [a] -> [b] -> [c] zipWith (((Integer, Text) -> [Text]) -> Integer -> Text -> [Text] forall a b c. ((a, b) -> c) -> a -> b -> c curry (Vector Text -> [Text] forall a. Vector a -> [a] V.toList (Vector Text -> [Text]) -> ((Integer, Text) -> Vector Text) -> (Integer, Text) -> [Text] forall b c a. (b -> c) -> (a -> b) -> a -> c . DataFrame -> (Integer, Text) -> Vector Text getTextColumnFromFrame DataFrame d)) [Integer 0 ..] [Text] header in Bool -> [Text] -> [Text] -> [[Text]] -> Text showTable Bool properMarkdown [Text] header [Text] types [[Text]] rows empty :: DataFrame empty :: DataFrame empty = DataFrame { columns :: Vector Column columns = Vector Column forall a. Vector a V.empty , columnIndices :: Map Text Int columnIndices = Map Text Int forall k a. Map k a M.empty , dataframeDimensions :: (Int, Int) dataframeDimensions = (Int 0, Int 0) , derivingExpressions :: Map Text UExpr derivingExpressions = Map Text UExpr forall k a. Map k a M.empty } getColumn :: T.Text -> DataFrame -> Maybe Column getColumn :: Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df = do Int i <- DataFrame -> Map Text Int columnIndices DataFrame df Map Text Int -> Text -> Maybe Int forall k a. Ord k => Map k a -> k -> Maybe a M.!? Text name DataFrame -> Vector Column columns DataFrame df Vector Column -> Int -> Maybe Column forall a. Vector a -> Int -> Maybe a V.!? Int i unsafeGetColumn :: T.Text -> DataFrame -> Column unsafeGetColumn :: Text -> DataFrame -> Column unsafeGetColumn Text name DataFrame df = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Maybe Column Nothing -> DataFrameException -> Column forall a e. Exception e => e -> a throw (DataFrameException -> Column) -> DataFrameException -> Column forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException Text name Text "" (Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame df) Just Column col -> Column col null :: DataFrame -> Bool null :: DataFrame -> Bool null DataFrame df = Vector Column -> Bool forall a. Vector a -> Bool V.null (DataFrame -> Vector Column columns DataFrame df)