{-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE InstanceSigs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} module DataFrame.Internal.Row where import qualified Data.List as L import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Vector as V import qualified Data.Vector.Algorithms.Merge as VA import qualified Data.Vector.Generic as VG import qualified Data.Vector.Unboxed as VU import Control.DeepSeq (NFData (..)) import Control.Exception (throw) import Control.Monad.ST (runST) import Data.Function (on) import Data.Maybe (fromMaybe) import Data.Type.Equality (TestEquality (..)) import Data.Typeable (type (:~:) (..)) import DataFrame.Errors (DataFrameException (..)) import DataFrame.Internal.Column import DataFrame.Internal.DataFrame import DataFrame.Internal.Expression (Expr (..)) import Text.ParserCombinators.ReadPrec (ReadPrec) import Text.Read ( Lexeme (Ident), lexP, parens, readListPrec, readListPrecDefault, readPrec, ) import Type.Reflection (typeOf, typeRep) data Any where Value :: (Columnable a) => a -> Any instance Eq Any where (==) :: Any -> Any -> Bool (Value a a) == :: Any -> Any -> Bool == (Value a b) = Bool -> Maybe Bool -> Bool forall a. a -> Maybe a -> a fromMaybe Bool False (Maybe Bool -> Bool) -> Maybe Bool -> Bool forall a b. (a -> b) -> a -> b $ do a :~: a Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (a -> TypeRep a forall a. Typeable a => a -> TypeRep a typeOf a a) (a -> TypeRep a forall a. Typeable a => a -> TypeRep a typeOf a b) Bool -> Maybe Bool forall a. a -> Maybe a forall (m :: * -> *) a. Monad m => a -> m a return (Bool -> Maybe Bool) -> Bool -> Maybe Bool forall a b. (a -> b) -> a -> b $ a a a -> a -> Bool forall a. Eq a => a -> a -> Bool == a a b instance Ord Any where (<=) :: Any -> Any -> Bool (Value a a) <= :: Any -> Any -> Bool <= (Value a b) = Bool -> Maybe Bool -> Bool forall a. a -> Maybe a -> a fromMaybe Bool False (Maybe Bool -> Bool) -> Maybe Bool -> Bool forall a b. (a -> b) -> a -> b $ do a :~: a Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (a -> TypeRep a forall a. Typeable a => a -> TypeRep a typeOf a a) (a -> TypeRep a forall a. Typeable a => a -> TypeRep a typeOf a b) Bool -> Maybe Bool forall a. a -> Maybe a forall (m :: * -> *) a. Monad m => a -> m a return (Bool -> Maybe Bool) -> Bool -> Maybe Bool forall a b. (a -> b) -> a -> b $ a a a -> a -> Bool forall a. Ord a => a -> a -> Bool <= a a b instance Show Any where show :: Any -> String show :: Any -> [Char] show (Value a a) = Text -> [Char] T.unpack (a -> Text forall a. Columnable a => a -> Text showValue a a) instance NFData Any where rnf :: Any -> () rnf (Value a a) = a -> () forall a. NFData a => a -> () rnf a a showValue :: forall a. (Columnable a) => a -> T.Text showValue :: forall a. Columnable a => a -> Text showValue a v = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @T.Text) of Just a :~: Text Refl -> a Text v Maybe (a :~: Text) Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char]) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @String) of Just a :~: [Char] Refl -> [Char] -> Text T.pack a [Char] v Maybe (a :~: [Char]) Nothing -> ([Char] -> Text T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> [Char] forall a. Show a => a -> [Char] show) a v instance Read Any where readListPrec :: ReadPrec [Any] readListPrec :: ReadPrec [Any] readListPrec = ReadPrec [Any] forall a. Read a => ReadPrec [a] readListPrecDefault readPrec :: ReadPrec Any readPrec :: ReadPrec Any readPrec = ReadPrec Any -> ReadPrec Any forall a. ReadPrec a -> ReadPrec a parens (ReadPrec Any -> ReadPrec Any) -> ReadPrec Any -> ReadPrec Any forall a b. (a -> b) -> a -> b $ do Ident [Char] "Value" <- ReadPrec Lexeme lexP ReadPrec Any forall a. Read a => ReadPrec a readPrec toAny :: forall a. (Columnable a) => a -> Any toAny :: forall a. Columnable a => a -> Any toAny = a -> Any forall a. Columnable a => a -> Any Value fromAny :: forall a. (Columnable a) => Any -> Maybe a fromAny :: forall a. Columnable a => Any -> Maybe a fromAny (Value (a v :: b)) = do a :~: a Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @b) a -> Maybe a forall a. a -> Maybe a forall (f :: * -> *) a. Applicative f => a -> f a pure a a v type Row = V.Vector Any (!?) :: [a] -> Int -> Maybe a !? :: forall a. [a] -> Int -> Maybe a (!?) [] Int _ = Maybe a forall a. Maybe a Nothing (!?) (a x : [a] _) Int 0 = a -> Maybe a forall a. a -> Maybe a Just a x (!?) (a x : [a] xs) Int n = [a] -> Int -> Maybe a forall a. [a] -> Int -> Maybe a (!?) [a] xs (Int n Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1) mkColumnFromRow :: Int -> [[Any]] -> Column mkColumnFromRow :: Int -> [[Any]] -> Column mkColumnFromRow Int i [[Any]] rows = case [[Any]] rows of [] -> [Text] -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => [a] -> Column fromList ([] :: [T.Text]) ([Any] row : [[Any]] _) -> case [Any] row [Any] -> Int -> Maybe Any forall a. [a] -> Int -> Maybe a !? Int i of Maybe Any Nothing -> [Text] -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => [a] -> Column fromList ([] :: [T.Text]) Just (Value (a v :: a)) -> [a] -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => [a] -> Column fromList ([a] -> Column) -> [a] -> Column forall a b. (a -> b) -> a -> b $ [a] -> [a] forall a. [a] -> [a] reverse ([a] -> [a]) -> [a] -> [a] forall a b. (a -> b) -> a -> b $ ([a] -> [Any] -> [a]) -> [a] -> [[Any]] -> [a] forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b L.foldl' [a] -> [Any] -> [a] addToList [a v] (Int -> [[Any]] -> [[Any]] forall a. Int -> [a] -> [a] drop Int 1 [[Any]] rows) where addToList :: [a] -> [Any] -> [a] addToList [a] acc [Any] r = case [Any] r [Any] -> Int -> Maybe Any forall a. [a] -> Int -> Maybe a !? Int i of Maybe Any Nothing -> [a] acc Just (Value (a v' :: b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @b) of Maybe (a :~: a) Nothing -> [a] acc Just a :~: a Refl -> a v' a -> [a] -> [a] forall a. a -> [a] -> [a] : [a] [a] acc toRowList :: DataFrame -> [[(T.Text, Any)]] toRowList :: DataFrame -> [[(Text, Any)]] toRowList DataFrame df = let names :: [Text] names = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text] forall a b. (a -> b) -> [a] -> [b] map (Text, Int) -> Text forall a b. (a, b) -> a fst (((Text, Int) -> (Text, Int) -> Ordering) -> [(Text, Int)] -> [(Text, Int)] forall a. (a -> a -> Ordering) -> [a] -> [a] L.sortBy (Int -> Int -> Ordering forall a. Ord a => a -> a -> Ordering compare (Int -> Int -> Ordering) -> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c `on` (Text, Int) -> Int forall a b. (a, b) -> b snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)] forall a b. (a -> b) -> a -> b $ Map Text Int -> [(Text, Int)] forall k a. Map k a -> [(k, a)] M.toList (DataFrame -> Map Text Int columnIndices DataFrame df)) in (Int -> [(Text, Any)]) -> [Int] -> [[(Text, Any)]] forall a b. (a -> b) -> [a] -> [b] map ([Text] -> [Any] -> [(Text, Any)] forall a b. [a] -> [b] -> [(a, b)] zip [Text] names ([Any] -> [(Text, Any)]) -> (Int -> [Any]) -> Int -> [(Text, Any)] forall b c a. (b -> c) -> (a -> b) -> a -> c . Vector Any -> [Any] forall a. Vector a -> [a] V.toList (Vector Any -> [Any]) -> (Int -> Vector Any) -> Int -> [Any] forall b c a. (b -> c) -> (a -> b) -> a -> c . DataFrame -> [Text] -> Int -> Vector Any mkRowRep DataFrame df [Text] names) [Int 0 .. ((Int, Int) -> Int forall a b. (a, b) -> a fst (DataFrame -> (Int, Int) dataframeDimensions DataFrame df) Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1)] toRowVector :: [T.Text] -> DataFrame -> V.Vector Row toRowVector :: [Text] -> DataFrame -> Vector (Vector Any) toRowVector [Text] names DataFrame df = Int -> (Int -> Vector Any) -> Vector (Vector Any) forall a. Int -> (Int -> a) -> Vector a V.generate ((Int, Int) -> Int forall a b. (a, b) -> a fst (DataFrame -> (Int, Int) dataframeDimensions DataFrame df)) (DataFrame -> [Text] -> Int -> Vector Any mkRowRep DataFrame df [Text] names) rowValue :: forall a. Expr a -> [(T.Text, Any)] -> Maybe a rowValue :: forall a. Expr a -> [(Text, Any)] -> Maybe a rowValue (Col Text name) [(Text, Any)] row = Text -> [(Text, Any)] -> Maybe Any forall a b. Eq a => a -> [(a, b)] -> Maybe b lookup Text name [(Text, Any)] row Maybe Any -> (Any -> Maybe a) -> Maybe a forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= forall a. Columnable a => Any -> Maybe a fromAny @a rowValue Expr a _ [(Text, Any)] _ = [Char] -> Maybe a forall a. HasCallStack => [Char] -> a error [Char] "Can only get rowValue of column reference" mkRowFromArgs :: [T.Text] -> DataFrame -> Int -> Row mkRowFromArgs :: [Text] -> DataFrame -> Int -> Vector Any mkRowFromArgs [Text] names DataFrame df Int i = (Text -> Any) -> Vector Text -> Vector Any forall a b. (a -> b) -> Vector a -> Vector b V.map Text -> Any get ([Text] -> Vector Text forall a. [a] -> Vector a V.fromList [Text] names) where get :: Text -> Any get Text name = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Maybe Column Nothing -> DataFrameException -> Any forall a e. Exception e => e -> a throw (DataFrameException -> Any) -> DataFrameException -> Any forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException Text name Text "[INTERNAL] mkRowFromArgs" (Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame df) Just (BoxedColumn Vector a column) -> a -> Any forall a. Columnable a => a -> Any toAny (Vector a column Vector a -> Int -> a forall a. Vector a -> Int -> a V.! Int i) Just (UnboxedColumn Vector a column) -> a -> Any forall a. Columnable a => a -> Any toAny (Vector a column Vector a -> Int -> a forall a. Unbox a => Vector a -> Int -> a VU.! Int i) Just (OptionalColumn Vector (Maybe a) column) -> Maybe a -> Any forall a. Columnable a => a -> Any toAny (Vector (Maybe a) column Vector (Maybe a) -> Int -> Maybe a forall a. Vector a -> Int -> a V.! Int i) mkRowRep :: DataFrame -> [T.Text] -> Int -> Row mkRowRep :: DataFrame -> [Text] -> Int -> Vector Any mkRowRep DataFrame df [Text] names Int i = Int -> (Int -> Any) -> Vector Any forall a. Int -> (Int -> a) -> Vector a V.generate ([Text] -> Int forall a. [a] -> Int forall (t :: * -> *) a. Foldable t => t a -> Int L.length [Text] names) (\Int index -> Text -> Any get (Vector Text names' Vector Text -> Int -> Text forall a. Vector a -> Int -> a V.! Int index)) where names' :: Vector Text names' = [Text] -> Vector Text forall a. [a] -> Vector a V.fromList [Text] names throwError :: Text -> Any throwError Text name = [Char] -> Any forall a. HasCallStack => [Char] -> a error ([Char] -> Any) -> [Char] -> Any forall a b. (a -> b) -> a -> b $ [Char] "Column " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Text -> [Char] T.unpack Text name [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] " has less items than " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] "the other columns at index " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Int -> [Char] forall a. Show a => a -> [Char] show Int i get :: Text -> Any get Text name = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Just (BoxedColumn Vector a c) -> case Vector a c Vector a -> Int -> Maybe a forall a. Vector a -> Int -> Maybe a V.!? Int i of Just a e -> a -> Any forall a. Columnable a => a -> Any toAny a e Maybe a Nothing -> Text -> Any throwError Text name Just (OptionalColumn Vector (Maybe a) c) -> case Vector (Maybe a) c Vector (Maybe a) -> Int -> Maybe (Maybe a) forall a. Vector a -> Int -> Maybe a V.!? Int i of Just Maybe a e -> Maybe a -> Any forall a. Columnable a => a -> Any toAny Maybe a e Maybe (Maybe a) Nothing -> Text -> Any throwError Text name Just (UnboxedColumn Vector a c) -> case Vector a c Vector a -> Int -> Maybe a forall a. Unbox a => Vector a -> Int -> Maybe a VU.!? Int i of Just a e -> a -> Any forall a. Columnable a => a -> Any toAny a e Maybe a Nothing -> Text -> Any throwError Text name Maybe Column Nothing -> DataFrameException -> Any forall a e. Exception e => e -> a throw (DataFrameException -> Any) -> DataFrameException -> Any forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException Text name Text "mkRowRep" (Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame df) sortedIndexes' :: [Bool] -> V.Vector Row -> VU.Vector Int sortedIndexes' :: [Bool] -> Vector (Vector Any) -> Vector Int sortedIndexes' [Bool] flipCompare Vector (Vector Any) rows = (forall s. ST s (Vector Int)) -> Vector Int forall a. (forall s. ST s a) -> a runST ((forall s. ST s (Vector Int)) -> Vector Int) -> (forall s. ST s (Vector Int)) -> Vector Int forall a b. (a -> b) -> a -> b $ do MVector s (Int, Vector Any) withIndexes <- Vector (Int, Vector Any) -> ST s (Mutable Vector (PrimState (ST s)) (Int, Vector Any)) forall (m :: * -> *) (v :: * -> *) a. (PrimMonad m, Vector v a) => v a -> m (Mutable v (PrimState m) a) VG.thaw (Vector (Vector Any) -> Vector (Int, Vector Any) forall a. Vector a -> Vector (Int, a) V.indexed Vector (Vector Any) rows) Comparison (Int, Vector Any) -> MVector (PrimState (ST s)) (Int, Vector Any) -> ST s () forall (m :: * -> *) (v :: * -> * -> *) e. (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () VA.sortBy ([Bool] -> Vector Any -> Vector Any -> Ordering produceOrderingFromRow [Bool] flipCompare (Vector Any -> Vector Any -> Ordering) -> ((Int, Vector Any) -> Vector Any) -> Comparison (Int, Vector Any) forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c `on` (Int, Vector Any) -> Vector Any forall a b. (a, b) -> b snd) MVector s (Int, Vector Any) MVector (PrimState (ST s)) (Int, Vector Any) withIndexes Vector (Int, Vector Any) sorted <- Mutable Vector (PrimState (ST s)) (Int, Vector Any) -> ST s (Vector (Int, Vector Any)) forall (m :: * -> *) (v :: * -> *) a. (PrimMonad m, Vector v a) => Mutable v (PrimState m) a -> m (v a) VG.unsafeFreeze Mutable Vector (PrimState (ST s)) (Int, Vector Any) MVector s (Int, Vector Any) withIndexes Vector Int -> ST s (Vector Int) forall a. a -> ST s a forall (m :: * -> *) a. Monad m => a -> m a return (Vector Int -> ST s (Vector Int)) -> Vector Int -> ST s (Vector Int) forall a b. (a -> b) -> a -> b $ Int -> (Int -> Int) -> Vector Int forall a. Unbox a => Int -> (Int -> a) -> Vector a VU.generate (Vector (Vector Any) -> Int forall (v :: * -> *) a. Vector v a => v a -> Int VG.length Vector (Vector Any) rows) (\Int i -> (Int, Vector Any) -> Int forall a b. (a, b) -> a fst (Vector (Int, Vector Any) sorted Vector (Int, Vector Any) -> Int -> (Int, Vector Any) forall (v :: * -> *) a. (HasCallStack, Vector v a) => v a -> Int -> a VG.! Int i)) produceOrderingFromRow :: [Bool] -> Row -> Row -> Ordering produceOrderingFromRow :: [Bool] -> Vector Any -> Vector Any -> Ordering produceOrderingFromRow [Bool] mustFlips Vector Any v1 Vector Any v2 = (Ordering -> Ordering -> Ordering) -> Ordering -> Vector Ordering -> Ordering forall a b. (a -> b -> b) -> b -> Vector a -> b V.foldr Ordering -> Ordering -> Ordering forall a. Semigroup a => a -> a -> a (<>) Ordering forall a. Monoid a => a mempty Vector Ordering vZipped where vFlip :: Vector Bool vFlip = [Bool] -> Vector Bool forall a. [a] -> Vector a V.fromList [Bool] mustFlips vZipped :: Vector Ordering vZipped = (Bool -> Any -> Any -> Ordering) -> Vector Bool -> Vector Any -> Vector Any -> Vector Ordering forall a b c d. (a -> b -> c -> d) -> Vector a -> Vector b -> Vector c -> Vector d V.zipWith3 (\Bool b Any e1 Any e2 -> if Bool b then Any -> Any -> Ordering forall a. Ord a => a -> a -> Ordering compare Any e1 Any e2 else Any -> Any -> Ordering forall a. Ord a => a -> a -> Ordering compare Any e2 Any e1) Vector Bool vFlip Vector Any v1 Vector Any v2