{-# LANGUAGE ConstrainedClassMethods #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE UndecidableSuperClasses #-} module DataFrame.Operations.Transformations where import qualified Data.List as L import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Vector as V import Control.Exception (throw) import Data.Maybe import DataFrame.Errors (DataFrameException (..), TypeErrorContext (..)) import DataFrame.Internal.Column ( Column (..), Columnable, TypedColumn (..), ifoldrColumn, imapColumn, mapColumn, ) import DataFrame.Internal.DataFrame (DataFrame (..), getColumn) import DataFrame.Internal.Expression import DataFrame.Internal.Interpreter import DataFrame.Internal.Nullable (BaseType) import DataFrame.Operations.Core apply :: forall b c. (Columnable b, Columnable c) => (b -> c) -> T.Text -> DataFrame -> DataFrame apply :: forall b c. (Columnable b, Columnable c) => (b -> c) -> Text -> DataFrame -> DataFrame apply b -> c f Text columnName DataFrame d = case (b -> c) -> Text -> DataFrame -> Either DataFrameException DataFrame forall b c. (Columnable b, Columnable c) => (b -> c) -> Text -> DataFrame -> Either DataFrameException DataFrame safeApply b -> c f Text columnName DataFrame d of Left (TypeMismatchException TypeErrorContext a b context) -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw (DataFrameException -> DataFrame) -> DataFrameException -> DataFrame forall a b. (a -> b) -> a -> b $ TypeErrorContext a b -> DataFrameException forall a b. (Typeable a, Typeable b) => TypeErrorContext a b -> DataFrameException TypeMismatchException (TypeErrorContext a b context{callingFunctionName = Just "apply"}) Left DataFrameException exception -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw DataFrameException exception Right DataFrame df -> DataFrame df safeApply :: forall b c. (Columnable b, Columnable c) => (b -> c) -> T.Text -> DataFrame -> Either DataFrameException DataFrame safeApply :: forall b c. (Columnable b, Columnable c) => (b -> c) -> Text -> DataFrame -> Either DataFrameException DataFrame safeApply b -> c f Text columnName DataFrame d = case Text -> DataFrame -> Maybe Column getColumn Text columnName DataFrame d of Maybe Column Nothing -> DataFrameException -> Either DataFrameException DataFrame forall a b. a -> Either a b Left (DataFrameException -> Either DataFrameException DataFrame) -> DataFrameException -> Either DataFrameException DataFrame forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException Text columnName Text "apply" (Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame d) Just Column column -> do Column column' <- (b -> c) -> Column -> Either DataFrameException Column forall b c. (Columnable b, Columnable c) => (b -> c) -> Column -> Either DataFrameException Column mapColumn b -> c f Column column DataFrame -> Either DataFrameException DataFrame forall a. a -> Either DataFrameException a forall (f :: * -> *) a. Applicative f => a -> f a pure (DataFrame -> Either DataFrameException DataFrame) -> DataFrame -> Either DataFrameException DataFrame forall a b. (a -> b) -> a -> b $ Text -> Column -> DataFrame -> DataFrame insertColumn Text columnName Column column' DataFrame d derive :: forall a. (Columnable a) => T.Text -> Expr a -> DataFrame -> DataFrame derive :: forall a. Columnable a => Text -> Expr a -> DataFrame -> DataFrame derive Text name Expr a expr DataFrame df = case forall a. Columnable a => DataFrame -> Expr a -> Either DataFrameException (TypedColumn a) interpret @a DataFrame df (Expr a -> Expr a forall a. (Eq a, Ord a, Show a, Typeable a) => Expr a -> Expr a normalize Expr a expr) of Left DataFrameException e -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw DataFrameException e Right (TColumn Column value) -> (Text -> Column -> DataFrame -> DataFrame insertColumn Text name Column value DataFrame df) { derivingExpressions = M.insert name (UExpr expr) (derivingExpressions df) } deriveWithExpr :: forall a. (Columnable a) => T.Text -> Expr a -> DataFrame -> (Expr a, DataFrame) deriveWithExpr :: forall a. Columnable a => Text -> Expr a -> DataFrame -> (Expr a, DataFrame) deriveWithExpr Text name Expr a expr DataFrame df = case forall a. Columnable a => DataFrame -> Expr a -> Either DataFrameException (TypedColumn a) interpret @a DataFrame df (Expr a -> Expr a forall a. (Eq a, Ord a, Show a, Typeable a) => Expr a -> Expr a normalize Expr a expr) of Left DataFrameException e -> DataFrameException -> (Expr a, DataFrame) forall a e. Exception e => e -> a throw DataFrameException e Right (TColumn Column value) -> ( Text -> Expr a forall a. Columnable a => Text -> Expr a Col Text name , (Text -> Column -> DataFrame -> DataFrame insertColumn Text name Column value DataFrame df) { derivingExpressions = M.insert name (UExpr expr) (derivingExpressions df) } ) deriveMany :: [NamedExpr] -> DataFrame -> DataFrame deriveMany :: [NamedExpr] -> DataFrame -> DataFrame deriveMany [NamedExpr] exprs DataFrame df = let f :: NamedExpr -> DataFrame -> DataFrame f (Text name, UExpr (Expr a expr :: Expr a)) DataFrame d = case forall a. Columnable a => DataFrame -> Expr a -> Either DataFrameException (TypedColumn a) interpret @a DataFrame df Expr a expr of Left DataFrameException e -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw DataFrameException e Right (TColumn Column value) -> Text -> Column -> DataFrame -> DataFrame insertColumn Text name Column value DataFrame d in (NamedExpr -> DataFrame -> DataFrame) -> [NamedExpr] -> DataFrame -> DataFrame forall a. (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame fold NamedExpr -> DataFrame -> DataFrame f [NamedExpr] exprs DataFrame df applyMany :: (Columnable b, Columnable c) => (b -> c) -> [T.Text] -> DataFrame -> DataFrame applyMany :: forall b c. (Columnable b, Columnable c) => (b -> c) -> [Text] -> DataFrame -> DataFrame applyMany b -> c f [Text] names DataFrame df = (DataFrame -> Text -> DataFrame) -> DataFrame -> [Text] -> DataFrame forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b L.foldl' ((Text -> DataFrame -> DataFrame) -> DataFrame -> Text -> DataFrame forall a b c. (a -> b -> c) -> b -> a -> c flip ((b -> c) -> Text -> DataFrame -> DataFrame forall b c. (Columnable b, Columnable c) => (b -> c) -> Text -> DataFrame -> DataFrame apply b -> c f)) DataFrame df [Text] names applyInt :: (Columnable b) => (Int -> b) -> T.Text -> DataFrame -> DataFrame applyInt :: forall b. Columnable b => (Int -> b) -> Text -> DataFrame -> DataFrame applyInt = (Int -> b) -> Text -> DataFrame -> DataFrame forall b c. (Columnable b, Columnable c) => (b -> c) -> Text -> DataFrame -> DataFrame apply applyDouble :: (Columnable b) => (Double -> b) -> T.Text -> DataFrame -> DataFrame applyDouble :: forall b. Columnable b => (Double -> b) -> Text -> DataFrame -> DataFrame applyDouble = (Double -> b) -> Text -> DataFrame -> DataFrame forall b c. (Columnable b, Columnable c) => (b -> c) -> Text -> DataFrame -> DataFrame apply applyWhere :: forall a b. (Columnable a, Columnable b) => (a -> Bool) -> T.Text -> (b -> b) -> T.Text -> DataFrame -> DataFrame applyWhere :: forall a b. (Columnable a, Columnable b) => (a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame applyWhere a -> Bool condition Text filterColumnName b -> b f Text columnName DataFrame df = case Text -> DataFrame -> Maybe Column getColumn Text filterColumnName DataFrame df of Maybe Column Nothing -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw (DataFrameException -> DataFrame) -> DataFrameException -> DataFrame forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException Text filterColumnName Text "applyWhere" (Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame df) Just Column column -> case (Int -> a -> Vector Int -> Vector Int) -> Vector Int -> Column -> Either DataFrameException (Vector Int) forall a b. (Columnable a, Columnable b) => (Int -> a -> b -> b) -> b -> Column -> Either DataFrameException b ifoldrColumn (\Int i a val Vector Int acc -> if a -> Bool condition a val then Int -> Vector Int -> Vector Int forall a. a -> Vector a -> Vector a V.cons Int i Vector Int acc else Vector Int acc) Vector Int forall a. Vector a V.empty Column column of Left DataFrameException e -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw DataFrameException e Right Vector Int indexes -> if Vector Int -> Bool forall a. Vector a -> Bool V.null Vector Int indexes then DataFrame df else (DataFrame -> Int -> DataFrame) -> DataFrame -> Vector Int -> DataFrame forall b a. (b -> a -> b) -> b -> Vector a -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b L.foldl' (\DataFrame d Int i -> Int -> (b -> b) -> Text -> DataFrame -> DataFrame forall a. Columnable a => Int -> (a -> a) -> Text -> DataFrame -> DataFrame applyAtIndex Int i b -> b f Text columnName DataFrame d) DataFrame df Vector Int indexes applyAtIndex :: forall a. (Columnable a) => Int -> (a -> a) -> T.Text -> DataFrame -> DataFrame applyAtIndex :: forall a. Columnable a => Int -> (a -> a) -> Text -> DataFrame -> DataFrame applyAtIndex Int i a -> a f Text columnName DataFrame df = case Text -> DataFrame -> Maybe Column getColumn Text columnName DataFrame df of Maybe Column Nothing -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw (DataFrameException -> DataFrame) -> DataFrameException -> DataFrame forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException Text columnName Text "applyAtIndex" (Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame df) Just Column column -> case (Int -> a -> a) -> Column -> Either DataFrameException Column forall b c. (Columnable b, Columnable c) => (Int -> b -> c) -> Column -> Either DataFrameException Column imapColumn (\Int index a value -> if Int index Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int i then a -> a f a value else a value) Column column of Left DataFrameException e -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw DataFrameException e Right Column column' -> Text -> Column -> DataFrame -> DataFrame insertColumn Text columnName Column column' DataFrame df imputeCore :: forall b. (Columnable b) => Expr (Maybe b) -> b -> DataFrame -> DataFrame imputeCore :: forall b. Columnable b => Expr (Maybe b) -> b -> DataFrame -> DataFrame imputeCore (Col Text columnName) b value DataFrame df = case Text -> DataFrame -> Maybe Column getColumn Text columnName DataFrame df of Maybe Column Nothing -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw (DataFrameException -> DataFrame) -> DataFrameException -> DataFrame forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException Text columnName Text "impute" (Map Text Int -> [Text] forall k a. Map k a -> [k] M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame df) Just (OptionalColumn Vector (Maybe a) _) -> case (Maybe b -> b) -> Text -> DataFrame -> Either DataFrameException DataFrame forall b c. (Columnable b, Columnable c) => (b -> c) -> Text -> DataFrame -> Either DataFrameException DataFrame safeApply (b -> Maybe b -> b forall a. a -> Maybe a -> a fromMaybe b value) Text columnName DataFrame df of Left (TypeMismatchException TypeErrorContext a b context) -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw (DataFrameException -> DataFrame) -> DataFrameException -> DataFrame forall a b. (a -> b) -> a -> b $ TypeErrorContext a b -> DataFrameException forall a b. (Typeable a, Typeable b) => TypeErrorContext a b -> DataFrameException TypeMismatchException (TypeErrorContext a b context{callingFunctionName = Just "impute"}) Left DataFrameException exception -> DataFrameException -> DataFrame forall a e. Exception e => e -> a throw DataFrameException exception Right DataFrame res -> DataFrame res Maybe Column _ -> DataFrame df imputeCore Expr (Maybe b) _ b _ DataFrame df = DataFrame df class (Columnable a) => ImputeOp a where runImpute :: Expr a -> BaseType a -> DataFrame -> DataFrame runImputeWith :: (Columnable (BaseType a)) => (Expr (BaseType a) -> Expr (BaseType a)) -> Expr a -> DataFrame -> DataFrame instance {-# OVERLAPPABLE #-} (Columnable a) => ImputeOp a where runImpute :: Expr a -> BaseType a -> DataFrame -> DataFrame runImpute Expr a _ BaseType a _ DataFrame df = DataFrame df runImputeWith :: Columnable (BaseType a) => (Expr (BaseType a) -> Expr (BaseType a)) -> Expr a -> DataFrame -> DataFrame runImputeWith Expr (BaseType a) -> Expr (BaseType a) _ Expr a _ DataFrame df = DataFrame df impute :: forall a. (ImputeOp a) => Expr a -> BaseType a -> DataFrame -> DataFrame impute :: forall a. ImputeOp a => Expr a -> BaseType a -> DataFrame -> DataFrame impute = Expr a -> BaseType a -> DataFrame -> DataFrame forall a. ImputeOp a => Expr a -> BaseType a -> DataFrame -> DataFrame runImpute