{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Operations.Subset where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import qualified Prelude

import Control.Exception (throw)
import Data.Function ((&))
import Data.Maybe (
    fromJust,
    fromMaybe,
    isJust,
    isNothing,
 )
import Data.Type.Equality (TestEquality (..))
import DataFrame.Errors (
    DataFrameException (..),
    TypeErrorContext (..),
 )
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (
    DataFrame (..),
    derivingExpressions,
    empty,
    getColumn,
    unsafeGetColumn,
 )
import DataFrame.Internal.Expression
import DataFrame.Internal.Interpreter
import DataFrame.Operations.Core
import DataFrame.Operations.Merge ()
import DataFrame.Operations.Transformations (apply)
import System.Random
import Type.Reflection
import Prelude hiding (filter, take)


take :: Int -> DataFrame -> DataFrame
take :: Int -> DataFrame -> DataFrame
take Int
n DataFrame
d = DataFrame
d{columns = V.map (takeColumn n') (columns d), dataframeDimensions = (n', c)}
  where
    (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
    n' :: Int
n' = Int -> Int -> Int -> Int
clip Int
n Int
0 Int
r


takeLast :: Int -> DataFrame -> DataFrame
takeLast :: Int -> DataFrame -> DataFrame
takeLast Int
n DataFrame
d =
    DataFrame
d
        { columns = V.map (takeLastColumn n') (columns d)
        , dataframeDimensions = (n', c)
        }
  where
    (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
    n' :: Int
n' = Int -> Int -> Int -> Int
clip Int
n Int
0 Int
r


drop :: Int -> DataFrame -> DataFrame
drop :: Int -> DataFrame -> DataFrame
drop Int
n DataFrame
d =
    DataFrame
d
        { columns = V.map (sliceColumn n' (max (r - n') 0)) (columns d)
        , dataframeDimensions = (max (r - n') 0, c)
        }
  where
    (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
    n' :: Int
n' = Int -> Int -> Int -> Int
clip Int
n Int
0 Int
r


dropLast :: Int -> DataFrame -> DataFrame
dropLast :: Int -> DataFrame -> DataFrame
dropLast Int
n DataFrame
d =
    DataFrame
d{columns = V.map (sliceColumn 0 n') (columns d), dataframeDimensions = (n', c)}
  where
    (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
    n' :: Int
n' = Int -> Int -> Int -> Int
clip (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n) Int
0 Int
r


range :: (Int, Int) -> DataFrame -> DataFrame
range :: (Int, Int) -> DataFrame -> DataFrame
range (Int
start, Int
end) DataFrame
d =
    DataFrame
d
        { columns = V.map (sliceColumn (clip start 0 r) n') (columns d)
        , dataframeDimensions = (n', c)
        }
  where
    (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
    n' :: Int
n' = Int -> Int -> Int -> Int
clip (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int
0 Int
r

clip :: Int -> Int -> Int -> Int
clip :: Int -> Int -> Int -> Int
clip Int
n Int
left Int
right = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
right (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
n Int
left


filter ::
    forall a.
    (Columnable a) =>
    
    Expr a ->
    
    (a -> Bool) ->
    
    DataFrame ->
    DataFrame
filter :: forall a.
Columnable a =>
Expr a -> (a -> Bool) -> DataFrame -> DataFrame
filter (Col Text
filterColumnName) a -> Bool
condition DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
filterColumnName DataFrame
df of
    Maybe Column
Nothing ->
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
filterColumnName Text
"filter" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just (BoxedColumn (Vector a
column :: V.Vector b)) -> Text -> Vector a -> (a -> Bool) -> DataFrame -> DataFrame
forall a b (v :: * -> *).
(Vector v b, Vector v Int, Columnable a, Columnable b) =>
Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame
filterByVector Text
filterColumnName Vector a
column a -> Bool
condition DataFrame
df
    Just (OptionalColumn (Vector (Maybe a)
column :: V.Vector b)) -> Text -> Vector (Maybe a) -> (a -> Bool) -> DataFrame -> DataFrame
forall a b (v :: * -> *).
(Vector v b, Vector v Int, Columnable a, Columnable b) =>
Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame
filterByVector Text
filterColumnName Vector (Maybe a)
column a -> Bool
condition DataFrame
df
    Just (UnboxedColumn (Vector a
column :: VU.Vector b)) -> Text -> Vector a -> (a -> Bool) -> DataFrame -> DataFrame
forall a b (v :: * -> *).
(Vector v b, Vector v Int, Columnable a, Columnable b) =>
Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame
filterByVector Text
filterColumnName Vector a
column a -> Bool
condition DataFrame
df
filter Expr a
expr a -> Bool
condition DataFrame
df =
    let
        (TColumn Column
col) = case forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @a DataFrame
df (Expr a -> Expr a
forall a. (Eq a, Ord a, Show a, Typeable a) => Expr a -> Expr a
normalize Expr a
expr) of
            Left DataFrameException
e -> DataFrameException -> TypedColumn a
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right TypedColumn a
c -> TypedColumn a
c
        indexes :: Vector Int
indexes = case (a -> Bool) -> Column -> Either DataFrameException (Vector Int)
forall a.
Columnable a =>
(a -> Bool) -> Column -> Either DataFrameException (Vector Int)
findIndices a -> Bool
condition Column
col of
            Right Vector Int
ixs -> Vector Int
ixs
            Left DataFrameException
e -> DataFrameException -> Vector Int
forall a e. Exception e => e -> a
throw DataFrameException
e
        c' :: Int
c' = (Int, Int) -> Int
forall a b. (a, b) -> b
snd ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a b. (a -> b) -> a -> b
$ DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df
     in
        DataFrame
df
            { columns = V.map (atIndicesStable indexes) (columns df)
            , dataframeDimensions = (VU.length indexes, c')
            }

filterByVector ::
    forall a b v.
    (VG.Vector v b, VG.Vector v Int, Columnable a, Columnable b) =>
    T.Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame
filterByVector :: forall a b (v :: * -> *).
(Vector v b, Vector v Int, Columnable a, Columnable b) =>
Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame
filterByVector Text
filterColumnName v b
column a -> Bool
condition DataFrame
df = case TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
    Maybe (a :~: b)
Nothing ->
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
            TypeErrorContext a b -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException
                ( MkTypeErrorContext
                    { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                    , expectedType :: Either String (TypeRep b)
expectedType = TypeRep b -> Either String (TypeRep b)
forall a b. b -> Either a b
Right (TypeRep b -> Either String (TypeRep b))
-> TypeRep b -> Either String (TypeRep b)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b
                    , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
filterColumnName)
                    , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"filter"
                    }
                )
    Just a :~: b
Refl ->
        let
            ixs :: Vector Int
ixs = v Int -> Vector Int
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VG.convert ((a -> Bool) -> v a -> v Int
forall (v :: * -> *) a.
(Vector v a, Vector v Int) =>
(a -> Bool) -> v a -> v Int
VG.findIndices a -> Bool
condition v a
v b
column)
         in
            DataFrame
df
                { columns = V.map (atIndicesStable ixs) (columns df)
                , dataframeDimensions = (VG.length ixs, snd (dataframeDimensions df))
                }


filterBy :: (Columnable a) => (a -> Bool) -> Expr a -> DataFrame -> DataFrame
filterBy :: forall a.
Columnable a =>
(a -> Bool) -> Expr a -> DataFrame -> DataFrame
filterBy = (Expr a -> (a -> Bool) -> DataFrame -> DataFrame)
-> (a -> Bool) -> Expr a -> DataFrame -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip Expr a -> (a -> Bool) -> DataFrame -> DataFrame
forall a.
Columnable a =>
Expr a -> (a -> Bool) -> DataFrame -> DataFrame
filter


filterWhere :: Expr Bool -> DataFrame -> DataFrame
filterWhere :: Expr Bool -> DataFrame -> DataFrame
filterWhere Expr Bool
expr DataFrame
df =
    let
        (TColumn Column
col) = case forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @Bool DataFrame
df (Expr Bool -> Expr Bool
forall a. (Eq a, Ord a, Show a, Typeable a) => Expr a -> Expr a
normalize Expr Bool
expr) of
            Left DataFrameException
e -> DataFrameException -> TypedColumn Bool
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right TypedColumn Bool
c -> TypedColumn Bool
c
        indexes :: Vector Int
indexes = case (Bool -> Bool) -> Column -> Either DataFrameException (Vector Int)
forall a.
Columnable a =>
(a -> Bool) -> Column -> Either DataFrameException (Vector Int)
findIndices Bool -> Bool
forall a. a -> a
id Column
col of
            Right Vector Int
ixs -> Vector Int
ixs
            Left DataFrameException
e -> DataFrameException -> Vector Int
forall a e. Exception e => e -> a
throw DataFrameException
e
        c' :: Int
c' = (Int, Int) -> Int
forall a b. (a, b) -> b
snd ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a b. (a -> b) -> a -> b
$ DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df
     in
        DataFrame
df
            { columns = V.map (atIndicesStable indexes) (columns df)
            , dataframeDimensions = (VU.length indexes, c')
            }


filterJust :: T.Text -> DataFrame -> DataFrame
filterJust :: Text -> DataFrame -> DataFrame
filterJust Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Maybe Column
Nothing ->
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"filterJust" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just column :: Column
column@(OptionalColumn (Vector (Maybe a)
col :: V.Vector (Maybe a))) -> Expr (Maybe a) -> (Maybe a -> Bool) -> DataFrame -> DataFrame
forall a.
Columnable a =>
Expr a -> (a -> Bool) -> DataFrame -> DataFrame
filter (forall a. Columnable a => Text -> Expr a
Col @(Maybe a) Text
name) Maybe a -> Bool
forall a. Maybe a -> Bool
isJust DataFrame
df DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply @(Maybe a) Maybe a -> a
forall a. HasCallStack => Maybe a -> a
fromJust Text
name
    Just Column
column -> DataFrame
df


filterNothing :: T.Text -> DataFrame -> DataFrame
filterNothing :: Text -> DataFrame -> DataFrame
filterNothing Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Maybe Column
Nothing ->
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"filterNothing" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just (OptionalColumn (Vector (Maybe a)
col :: V.Vector (Maybe a))) -> Expr (Maybe a) -> (Maybe a -> Bool) -> DataFrame -> DataFrame
forall a.
Columnable a =>
Expr a -> (a -> Bool) -> DataFrame -> DataFrame
filter (forall a. Columnable a => Text -> Expr a
Col @(Maybe a) Text
name) Maybe a -> Bool
forall a. Maybe a -> Bool
isNothing DataFrame
df
    Maybe Column
_ -> DataFrame
df


filterAllJust :: DataFrame -> DataFrame
filterAllJust :: DataFrame -> DataFrame
filterAllJust DataFrame
df = (Text -> DataFrame -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> DataFrame -> DataFrame
filterJust DataFrame
df (DataFrame -> [Text]
columnNames DataFrame
df)


filterAllNothing :: DataFrame -> DataFrame
filterAllNothing :: DataFrame -> DataFrame
filterAllNothing DataFrame
df = (Text -> DataFrame -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> DataFrame -> DataFrame
filterNothing DataFrame
df (DataFrame -> [Text]
columnNames DataFrame
df)


cube :: (Int, Int) -> DataFrame -> DataFrame
cube :: (Int, Int) -> DataFrame -> DataFrame
cube (Int
length, Int
width) = Int -> DataFrame -> DataFrame
take Int
length (DataFrame -> DataFrame)
-> (DataFrame -> DataFrame) -> DataFrame -> DataFrame
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [SelectionCriteria] -> DataFrame -> DataFrame
selectBy [(Int, Int) -> SelectionCriteria
ColumnIndexRange (Int
0, Int
width Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]


select ::
    [T.Text] ->
    DataFrame ->
    DataFrame
select :: [Text] -> DataFrame -> DataFrame
select [Text]
cs DataFrame
df
    | [Text] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
L.null [Text]
cs = DataFrame
empty
    | (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` DataFrame -> [Text]
columnNames DataFrame
df) [Text]
cs =
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
                (String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> String
forall a. Show a => a -> String
show ([Text] -> String) -> [Text] -> String
forall a b. (a -> b) -> a -> b
$ [Text]
cs [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
L.\\ DataFrame -> [Text]
columnNames DataFrame
df)
                Text
"select"
                (DataFrame -> [Text]
columnNames DataFrame
df)
    | Bool
otherwise =
        let result :: DataFrame
result = (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' DataFrame -> Text -> DataFrame
addKeyValue DataFrame
empty [Text]
cs
            filteredExprs :: Map Text UExpr
filteredExprs = (Text -> UExpr -> Bool) -> Map Text UExpr -> Map Text UExpr
forall k a. (k -> a -> Bool) -> Map k a -> Map k a
M.filterWithKey (\Text
k UExpr
_ -> Text
k Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`L.elem` [Text]
cs) (DataFrame -> Map Text UExpr
derivingExpressions DataFrame
df)
         in DataFrame
result{derivingExpressions = filteredExprs}
  where
    addKeyValue :: DataFrame -> Text -> DataFrame
addKeyValue DataFrame
d Text
k = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
        Column
col <- Text -> DataFrame -> Maybe Column
getColumn Text
k DataFrame
df
        DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
insertColumn Text
k Column
col DataFrame
d

data SelectionCriteria
    = ColumnProperty (Column -> Bool)
    | ColumnNameProperty (T.Text -> Bool)
    | ColumnTextRange (T.Text, T.Text)
    | ColumnIndexRange (Int, Int)
    | ColumnName T.Text


byName :: T.Text -> SelectionCriteria
byName :: Text -> SelectionCriteria
byName = Text -> SelectionCriteria
ColumnName


byProperty :: (Column -> Bool) -> SelectionCriteria
byProperty :: (Column -> Bool) -> SelectionCriteria
byProperty = (Column -> Bool) -> SelectionCriteria
ColumnProperty


byNameProperty :: (T.Text -> Bool) -> SelectionCriteria
byNameProperty :: (Text -> Bool) -> SelectionCriteria
byNameProperty = (Text -> Bool) -> SelectionCriteria
ColumnNameProperty


byNameRange :: (T.Text, T.Text) -> SelectionCriteria
byNameRange :: (Text, Text) -> SelectionCriteria
byNameRange = (Text, Text) -> SelectionCriteria
ColumnTextRange


byIndexRange :: (Int, Int) -> SelectionCriteria
byIndexRange :: (Int, Int) -> SelectionCriteria
byIndexRange = (Int, Int) -> SelectionCriteria
ColumnIndexRange


selectBy :: [SelectionCriteria] -> DataFrame -> DataFrame
selectBy :: [SelectionCriteria] -> DataFrame -> DataFrame
selectBy [SelectionCriteria]
xs DataFrame
df = [Text] -> DataFrame -> DataFrame
select [Text]
finalSelection DataFrame
df
  where
    finalSelection :: [Text]
finalSelection = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
Prelude.filter (Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`S.member` Set Text
columnsWithProperties) (DataFrame -> [Text]
columnNames DataFrame
df)
    columnsWithProperties :: Set Text
columnsWithProperties = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList (([Text] -> SelectionCriteria -> [Text])
-> [Text] -> [SelectionCriteria] -> [Text]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' [Text] -> SelectionCriteria -> [Text]
columnWithProperty [] [SelectionCriteria]
xs)
    columnWithProperty :: [Text] -> SelectionCriteria -> [Text]
columnWithProperty [Text]
acc (ColumnName Text
name) = [Text]
acc [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text
name]
    columnWithProperty [Text]
acc (ColumnNameProperty Text -> Bool
f) = [Text]
acc [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
L.filter Text -> Bool
f (DataFrame -> [Text]
columnNames DataFrame
df)
    columnWithProperty [Text]
acc (ColumnTextRange (Text
from, Text
to)) =
        [Text]
acc
            [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text] -> [Text]
forall a. [a] -> [a]
reverse
                ((Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
Prelude.dropWhile (Text
to Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/=) ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ [Text] -> [Text]
forall a. [a] -> [a]
reverse ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Text
from Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/=) (DataFrame -> [Text]
columnNames DataFrame
df))
    columnWithProperty [Text]
acc (ColumnIndexRange (Int
from, Int
to)) = [Text]
acc [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ Int -> [Text] -> [Text]
forall a. Int -> [a] -> [a]
Prelude.take (Int
to Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
from Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> [Text] -> [Text]
forall a. Int -> [a] -> [a]
Prelude.drop Int
from (DataFrame -> [Text]
columnNames DataFrame
df))
    columnWithProperty [Text]
acc (ColumnProperty Column -> Bool
f) =
        [Text]
acc
            [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> Bool) -> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> Bool) -> [a] -> [a]
L.filter (\(Text
k, Int
v) -> Int
v Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
ixs) (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList (DataFrame -> Map Text Int
columnIndices DataFrame
df)))
      where
        ixs :: [Int]
ixs = ([Int] -> Int -> Column -> [Int])
-> [Int] -> Vector Column -> [Int]
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl' (\[Int]
acc Int
i Column
c -> if Column -> Bool
f Column
c then Int
i Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int]
acc else [Int]
acc) [] (DataFrame -> Vector Column
columns DataFrame
df)


exclude ::
    [T.Text] ->
    DataFrame ->
    DataFrame
exclude :: [Text] -> DataFrame -> DataFrame
exclude [Text]
cs DataFrame
df =
    let keysToKeep :: [Text]
keysToKeep = DataFrame -> [Text]
columnNames DataFrame
df [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
L.\\ [Text]
cs
     in [Text] -> DataFrame -> DataFrame
select [Text]
keysToKeep DataFrame
df


sample :: (RandomGen g) => g -> Double -> DataFrame -> DataFrame
sample :: forall g. RandomGen g => g -> Double -> DataFrame -> DataFrame
sample g
pureGen Double
p DataFrame
df =
    let
        rand :: Vector Double
rand = g -> Int -> Vector Double
forall g. RandomGen g => g -> Int -> Vector Double
generateRandomVector g
pureGen ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
     in
        DataFrame
df
            DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Double -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
"__rand__" Vector Double
rand
            DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Expr Bool -> DataFrame -> DataFrame
filterWhere
                ( BinaryOp Double Double Bool
-> Expr Double -> Expr Double -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
BinaryOp c b a -> Expr c -> Expr b -> Expr a
Binary
                    ( MkBinaryOp
                        { binaryFn :: Double -> Double -> Bool
binaryFn = Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
(>=)
                        , binaryName :: Text
binaryName = Text
"geq"
                        , binarySymbol :: Maybe Text
binarySymbol = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
">="
                        , binaryCommutative :: Bool
binaryCommutative = Bool
False
                        , binaryPrecedence :: Int
binaryPrecedence = Int
1
                        }
                    )
                    (forall a. Columnable a => Text -> Expr a
Col @Double Text
"__rand__")
                    (Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit (Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
p))
                )
            DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& [Text] -> DataFrame -> DataFrame
exclude [Text
"__rand__"]


randomSplit ::
    (RandomGen g) => g -> Double -> DataFrame -> (DataFrame, DataFrame)
randomSplit :: forall g.
RandomGen g =>
g -> Double -> DataFrame -> (DataFrame, DataFrame)
randomSplit g
pureGen Double
p DataFrame
df =
    let
        rand :: Vector Double
rand = g -> Int -> Vector Double
forall g. RandomGen g => g -> Int -> Vector Double
generateRandomVector g
pureGen ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
        withRand :: DataFrame
withRand = DataFrame
df DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Double -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
"__rand__" Vector Double
rand
     in
        ( DataFrame
withRand
            DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Expr Bool -> DataFrame -> DataFrame
filterWhere
                ( BinaryOp Double Double Bool
-> Expr Double -> Expr Double -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
BinaryOp c b a -> Expr c -> Expr b -> Expr a
Binary
                    ( MkBinaryOp
                        { binaryFn :: Double -> Double -> Bool
binaryFn = Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
(<=)
                        , binaryName :: Text
binaryName = Text
"leq"
                        , binarySymbol :: Maybe Text
binarySymbol = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"<="
                        , binaryCommutative :: Bool
binaryCommutative = Bool
False
                        , binaryPrecedence :: Int
binaryPrecedence = Int
1
                        }
                    )
                    (forall a. Columnable a => Text -> Expr a
Col @Double Text
"__rand__")
                    (Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
p)
                )
            DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& [Text] -> DataFrame -> DataFrame
exclude [Text
"__rand__"]
        , DataFrame
withRand
            DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Expr Bool -> DataFrame -> DataFrame
filterWhere
                ( BinaryOp Double Double Bool
-> Expr Double -> Expr Double -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
BinaryOp c b a -> Expr c -> Expr b -> Expr a
Binary
                    ( MkBinaryOp
                        { binaryFn :: Double -> Double -> Bool
binaryFn = Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
(>)
                        , binaryName :: Text
binaryName = Text
"gt"
                        , binarySymbol :: Maybe Text
binarySymbol = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
">"
                        , binaryCommutative :: Bool
binaryCommutative = Bool
False
                        , binaryPrecedence :: Int
binaryPrecedence = Int
1
                        }
                    )
                    (forall a. Columnable a => Text -> Expr a
Col @Double Text
"__rand__")
                    (Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
p)
                )
            DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& [Text] -> DataFrame -> DataFrame
exclude [Text
"__rand__"]
        )


kFolds :: (RandomGen g) => g -> Int -> DataFrame -> [DataFrame]
kFolds :: forall g. RandomGen g => g -> Int -> DataFrame -> [DataFrame]
kFolds g
pureGen Int
folds DataFrame
df =
    let
        rand :: Vector Double
rand = g -> Int -> Vector Double
forall g. RandomGen g => g -> Int -> Vector Double
generateRandomVector g
pureGen ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
        withRand :: DataFrame
withRand = DataFrame
df DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Double -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
"__rand__" Vector Double
rand
        partitionSize :: Double
partitionSize = Double
1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
folds
        singleFold :: Int -> DataFrame -> DataFrame
singleFold Int
n DataFrame
d =
            DataFrame
d
                DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Expr Bool -> DataFrame -> DataFrame
filterWhere
                    ( BinaryOp Double Double Bool
-> Expr Double -> Expr Double -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
BinaryOp c b a -> Expr c -> Expr b -> Expr a
Binary
                        ( MkBinaryOp
                            { binaryFn :: Double -> Double -> Bool
binaryFn = Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
(>=)
                            , binaryName :: Text
binaryName = Text
"geq"
                            , binarySymbol :: Maybe Text
binarySymbol = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
">="
                            , binaryCommutative :: Bool
binaryCommutative = Bool
False
                            , binaryPrecedence :: Int
binaryPrecedence = Int
1
                            }
                        )
                        (forall a. Columnable a => Text -> Expr a
Col @Double Text
"__rand__")
                        (Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
partitionSize))
                    )
        go :: Int -> DataFrame -> [DataFrame]
go (-1) DataFrame
_ = []
        go Int
n DataFrame
d =
            let
                d' :: DataFrame
d' = Int -> DataFrame -> DataFrame
singleFold Int
n DataFrame
d
                d'' :: DataFrame
d'' =
                    DataFrame
d
                        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Expr Bool -> DataFrame -> DataFrame
filterWhere
                            ( BinaryOp Double Double Bool
-> Expr Double -> Expr Double -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
BinaryOp c b a -> Expr c -> Expr b -> Expr a
Binary
                                ( MkBinaryOp
                                    { binaryFn :: Double -> Double -> Bool
binaryFn = Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
(<)
                                    , binaryName :: Text
binaryName = Text
"lt"
                                    , binarySymbol :: Maybe Text
binarySymbol = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"<"
                                    , binaryCommutative :: Bool
binaryCommutative = Bool
False
                                    , binaryPrecedence :: Int
binaryPrecedence = Int
1
                                    }
                                )
                                (forall a. Columnable a => Text -> Expr a
Col @Double Text
"__rand__")
                                (Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
partitionSize))
                            )
             in
                DataFrame
d' DataFrame -> [DataFrame] -> [DataFrame]
forall a. a -> [a] -> [a]
: Int -> DataFrame -> [DataFrame]
go (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) DataFrame
d''
     in
        (DataFrame -> DataFrame) -> [DataFrame] -> [DataFrame]
forall a b. (a -> b) -> [a] -> [b]
map ([Text] -> DataFrame -> DataFrame
exclude [Text
"__rand__"]) (Int -> DataFrame -> [DataFrame]
go (Int
folds Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) DataFrame
withRand)

generateRandomVector :: (RandomGen g) => g -> Int -> VU.Vector Double
generateRandomVector :: forall g. RandomGen g => g -> Int -> Vector Double
generateRandomVector g
pureGen Int
k = [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
VU.fromList ([Double] -> Vector Double) -> [Double] -> Vector Double
forall a b. (a -> b) -> a -> b
$ g -> Int -> [Double]
forall {t} {t}. (Eq t, Num t, RandomGen t) => t -> t -> [Double]
go g
pureGen Int
k
  where
    go :: t -> t -> [Double]
go t
g t
0 = []
    go t
g t
n =
        let
            (Double
v, t
g') = (Double, Double) -> t -> (Double, t)
forall a g. (UniformRange a, RandomGen g) => (a, a) -> g -> (a, g)
uniformR (Double
0 :: Double, Double
1 :: Double) t
g
         in
            Double
v Double -> [Double] -> [Double]
forall a. a -> [a] -> [a]
: t -> t -> [Double]
go t
g' (t
n t -> t -> t
forall a. Num a => a -> a -> a
- t
1)


columnToTextVec :: Column -> V.Vector T.Text
columnToTextVec :: Column -> Vector Text
columnToTextVec (BoxedColumn (Vector a
col :: V.Vector a)) =
    case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
        Just a :~: Text
Refl -> Vector a
Vector Text
col
        Maybe (a :~: Text)
Nothing -> (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) Vector a
col
columnToTextVec (UnboxedColumn Vector a
col) = (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
col)
columnToTextVec (OptionalColumn Vector (Maybe a)
col) = (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (Maybe a -> String) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> String
forall a. Show a => a -> String
show) Vector (Maybe a)
col


groupByIndices :: Column -> M.Map T.Text (VU.Vector Int)
groupByIndices :: Column -> Map Text (Vector Int)
groupByIndices Column
col =
    let textVec :: Vector Text
textVec = Column -> Vector Text
columnToTextVec Column
col
        (Map Text [Int]
grouped, Int
_) =
            ((Map Text [Int], Int) -> Text -> (Map Text [Int], Int))
-> (Map Text [Int], Int) -> Vector Text -> (Map Text [Int], Int)
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
                (\(!Map Text [Int]
m, !Int
i) Text
key -> (([Int] -> [Int] -> [Int])
-> Text -> [Int] -> Map Text [Int] -> Map Text [Int]
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
(++) Text
key [Int
i] Map Text [Int]
m, Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1))
                (Map Text [Int]
forall k a. Map k a
M.empty, Int
0)
                Vector Text
textVec
     in ([Int] -> Vector Int) -> Map Text [Int] -> Map Text (Vector Int)
forall a b k. (a -> b) -> Map k a -> Map k b
M.map ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList ([Int] -> Vector Int) -> ([Int] -> [Int]) -> [Int] -> Vector Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [Int]
forall a. [a] -> [a]
L.reverse) Map Text [Int]
grouped


rowsAtIndices :: VU.Vector Int -> DataFrame -> DataFrame
rowsAtIndices :: Vector Int -> DataFrame -> DataFrame
rowsAtIndices Vector Int
ixs DataFrame
df =
    DataFrame
df
        { columns = V.map (atIndicesStable ixs) (columns df)
        , dataframeDimensions = (VU.length ixs, snd (dataframeDimensions df))
        }


stratifiedSample ::
    forall a g.
    (SplitGen g, RandomGen g, Columnable a) =>
    g -> Double -> Expr a -> DataFrame -> DataFrame
stratifiedSample :: forall a g.
(SplitGen g, RandomGen g, Columnable a) =>
g -> Double -> Expr a -> DataFrame -> DataFrame
stratifiedSample g
gen Double
p Expr a
strataCol DataFrame
df =
    let col :: Column
col = case Expr a
strataCol of
            Col Text
name -> Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df
            Expr a
_ -> TypedColumn a -> Column
forall a. TypedColumn a -> Column
unwrapTypedColumn ((DataFrameException -> TypedColumn a)
-> (TypedColumn a -> TypedColumn a)
-> Either DataFrameException (TypedColumn a)
-> TypedColumn a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> TypedColumn a
forall a e. Exception e => e -> a
throw TypedColumn a -> TypedColumn a
forall a. a -> a
id (forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @a DataFrame
df Expr a
strataCol))
        groups :: [Vector Int]
groups = Map Text (Vector Int) -> [Vector Int]
forall k a. Map k a -> [a]
M.elems (Column -> Map Text (Vector Int)
groupByIndices Column
col)
        go :: g -> [Vector Int] -> DataFrame
go g
_ [] = DataFrame
forall a. Monoid a => a
mempty
        go g
g (Vector Int
ixs : [Vector Int]
rest) =
            let stratum :: DataFrame
stratum = Vector Int -> DataFrame -> DataFrame
rowsAtIndices Vector Int
ixs DataFrame
df
                (g
g1, g
g2) = g -> (g, g)
forall g. SplitGen g => g -> (g, g)
splitGen g
g
             in g -> Double -> DataFrame -> DataFrame
forall g. RandomGen g => g -> Double -> DataFrame -> DataFrame
sample g
g1 Double
p DataFrame
stratum DataFrame -> DataFrame -> DataFrame
forall a. Semigroup a => a -> a -> a
<> g -> [Vector Int] -> DataFrame
go g
g2 [Vector Int]
rest
     in g -> [Vector Int] -> DataFrame
go g
gen [Vector Int]
groups


stratifiedSplit ::
    forall a g.
    (SplitGen g, RandomGen g, Columnable a) =>
    g -> Double -> Expr a -> DataFrame -> (DataFrame, DataFrame)
stratifiedSplit :: forall a g.
(SplitGen g, RandomGen g, Columnable a) =>
g -> Double -> Expr a -> DataFrame -> (DataFrame, DataFrame)
stratifiedSplit g
gen Double
p Expr a
strataCol DataFrame
df =
    let col :: Column
col = case Expr a
strataCol of
            Col Text
name -> Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df
            Expr a
_ -> TypedColumn a -> Column
forall a. TypedColumn a -> Column
unwrapTypedColumn ((DataFrameException -> TypedColumn a)
-> (TypedColumn a -> TypedColumn a)
-> Either DataFrameException (TypedColumn a)
-> TypedColumn a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> TypedColumn a
forall a e. Exception e => e -> a
throw TypedColumn a -> TypedColumn a
forall a. a -> a
id (forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @a DataFrame
df Expr a
strataCol))
        groups :: [Vector Int]
groups = Map Text (Vector Int) -> [Vector Int]
forall k a. Map k a -> [a]
M.elems (Column -> Map Text (Vector Int)
groupByIndices Column
col)
        go :: g -> [Vector Int] -> (DataFrame, DataFrame)
go g
_ [] = (DataFrame
forall a. Monoid a => a
mempty, DataFrame
forall a. Monoid a => a
mempty)
        go g
g (Vector Int
ixs : [Vector Int]
rest) =
            let stratum :: DataFrame
stratum = Vector Int -> DataFrame -> DataFrame
rowsAtIndices Vector Int
ixs DataFrame
df
                (g
g1, g
g2) = g -> (g, g)
forall g. SplitGen g => g -> (g, g)
splitGen g
g
                (DataFrame
tr, DataFrame
va) = g -> Double -> DataFrame -> (DataFrame, DataFrame)
forall g.
RandomGen g =>
g -> Double -> DataFrame -> (DataFrame, DataFrame)
randomSplit g
g1 Double
p DataFrame
stratum
                (DataFrame
trAcc, DataFrame
vaAcc) = g -> [Vector Int] -> (DataFrame, DataFrame)
go g
g2 [Vector Int]
rest
             in (DataFrame
tr DataFrame -> DataFrame -> DataFrame
forall a. Semigroup a => a -> a -> a
<> DataFrame
trAcc, DataFrame
va DataFrame -> DataFrame -> DataFrame
forall a. Semigroup a => a -> a -> a
<> DataFrame
vaAcc)
     in g -> [Vector Int] -> (DataFrame, DataFrame)
go g
gen [Vector Int]
groups