{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE InstanceSigs #-} {-# LANGUAGE OverloadedStrings #-} module DataFrame.Operations.Permutation where import qualified Data.List as L import qualified Data.Text as T import qualified Data.Vector as V import qualified Data.Vector.Unboxed as VU import qualified Data.Vector.Unboxed.Mutable as VUM import Control.Exception (throw) import Control.Monad.ST (runST) import Data.Vector.Internal.Check (HasCallStack) import DataFrame.Errors (DataFrameException (..)) import DataFrame.Internal.Column (Columnable, atIndicesStable) import DataFrame.Internal.DataFrame (DataFrame (..)) import DataFrame.Internal.Expression (Expr (Col)) import DataFrame.Internal.Row (sortedIndexes', toRowVector) import DataFrame.Operations.Core (columnNames, dimensions) import System.Random (Random (randomR), RandomGen) data SortOrder where Asc :: (Columnable a) => Expr a -> SortOrder Desc :: (Columnable a) => Expr a -> SortOrder instance Eq SortOrder where (==) :: SortOrder -> SortOrder -> Bool == :: SortOrder -> SortOrder -> Bool (==) (Asc Expr a _) (Asc Expr a _) = Bool True (==) (Desc Expr a _) (Desc Expr a _) = Bool True (==) SortOrder _ SortOrder _ = Bool False getSortColumnName :: SortOrder -> T.Text getSortColumnName :: SortOrder -> Text getSortColumnName (Asc (Col Text n)) = Text n getSortColumnName (Desc (Col Text n)) = Text n getSortColumnName SortOrder _ = [Char] -> Text forall a. HasCallStack => [Char] -> a error [Char] "Sorting on compound column" mustFlipCompare :: SortOrder -> Bool mustFlipCompare :: SortOrder -> Bool mustFlipCompare (Asc Expr a _) = Bool True mustFlipCompare (Desc Expr a _) = Bool False sortBy :: [SortOrder] -> DataFrame -> DataFrame sortBy :: [SortOrder] -> DataFrame -> DataFrame sortBy [SortOrder] sortOrds DataFrame df | (Text -> Bool) -> [Text] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool any (Text -> [Text] -> Bool forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool `notElem` DataFrame -> [Text] columnNames DataFrame df) [Text] names = DataFrameException -> DataFrame forall a e. Exception e => e -> a throw (DataFrameException -> DataFrame) -> DataFrameException -> DataFrame forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException ([Char] -> Text T.pack ([Char] -> Text) -> [Char] -> Text forall a b. (a -> b) -> a -> b $ [Text] -> [Char] forall a. Show a => a -> [Char] show ([Text] -> [Char]) -> [Text] -> [Char] forall a b. (a -> b) -> a -> b $ [Text] names [Text] -> [Text] -> [Text] forall a. Eq a => [a] -> [a] -> [a] L.\\ DataFrame -> [Text] columnNames DataFrame df) Text "sortBy" (DataFrame -> [Text] columnNames DataFrame df) | Bool otherwise = let indexes :: Vector Int indexes = [Bool] -> Vector Row -> Vector Int sortedIndexes' [Bool] mustFlips ([Text] -> DataFrame -> Vector Row toRowVector [Text] names DataFrame df) in DataFrame df{columns = V.map (atIndicesStable indexes) (columns df)} where names :: [Text] names = (SortOrder -> Text) -> [SortOrder] -> [Text] forall a b. (a -> b) -> [a] -> [b] map SortOrder -> Text getSortColumnName [SortOrder] sortOrds mustFlips :: [Bool] mustFlips = (SortOrder -> Bool) -> [SortOrder] -> [Bool] forall a b. (a -> b) -> [a] -> [b] map SortOrder -> Bool mustFlipCompare [SortOrder] sortOrds shuffle :: (RandomGen g) => g -> DataFrame -> DataFrame shuffle :: forall g. RandomGen g => g -> DataFrame -> DataFrame shuffle g pureGen DataFrame df = let indexes :: Vector Int indexes = g -> Int -> Vector Int forall g. (HasCallStack, RandomGen g) => g -> Int -> Vector Int shuffledIndices g pureGen ((Int, Int) -> Int forall a b. (a, b) -> a fst (DataFrame -> (Int, Int) dimensions DataFrame df)) in DataFrame df{columns = V.map (atIndicesStable indexes) (columns df)} shuffledIndices :: (HasCallStack, RandomGen g) => g -> Int -> VU.Vector Int shuffledIndices :: forall g. (HasCallStack, RandomGen g) => g -> Int -> Vector Int shuffledIndices g pureGen Int k | Int k Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 0 = [Char] -> Vector Int forall a. HasCallStack => [Char] -> a error ([Char] -> Vector Int) -> [Char] -> Vector Int forall a b. (a -> b) -> a -> b $ [Char] "Vector index may not be a neative number: " [Char] -> [Char] -> [Char] forall a. Semigroup a => a -> a -> a <> Int -> [Char] forall a. Show a => a -> [Char] show Int k | Int k Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int 0 = Vector Int forall a. Unbox a => Vector a VU.empty | Bool otherwise = g -> Vector Int forall g. RandomGen g => g -> Vector Int shuffleVec g pureGen where shuffleVec :: (RandomGen g) => g -> VU.Vector Int shuffleVec :: forall g. RandomGen g => g -> Vector Int shuffleVec g g = (forall s. ST s (Vector Int)) -> Vector Int forall a. (forall s. ST s a) -> a runST ((forall s. ST s (Vector Int)) -> Vector Int) -> (forall s. ST s (Vector Int)) -> Vector Int forall a b. (a -> b) -> a -> b $ do MVector s Int vm <- Int -> (Int -> Int) -> ST s (MVector (PrimState (ST s)) Int) forall (m :: * -> *) a. (PrimMonad m, Unbox a) => Int -> (Int -> a) -> m (MVector (PrimState m) a) VUM.generate Int k Int -> Int forall a. a -> a id let (Int n, g nGen) = (Int, Int) -> g -> (Int, g) forall g. RandomGen g => (Int, Int) -> g -> (Int, g) forall a g. (Random a, RandomGen g) => (a, a) -> g -> (a, g) randomR (Int 1, Int k Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1) g g MVector (PrimState (ST s)) Int -> Int -> g -> ST s () forall {f :: * -> *} {t} {a}. (RandomGen t, PrimMonad f, Unbox a) => MVector (PrimState f) a -> Int -> t -> f () go MVector s Int MVector (PrimState (ST s)) Int vm Int n g nGen MVector (PrimState (ST s)) Int -> ST s (Vector Int) forall a (m :: * -> *). (Unbox a, PrimMonad m) => MVector (PrimState m) a -> m (Vector a) VU.unsafeFreeze MVector s Int MVector (PrimState (ST s)) Int vm go :: MVector (PrimState f) a -> Int -> t -> f () go MVector (PrimState f) a v (-1) t _ = () -> f () forall a. a -> f a forall (f :: * -> *) a. Applicative f => a -> f a pure () go MVector (PrimState f) a v Int 0 t _ = () -> f () forall a. a -> f a forall (f :: * -> *) a. Applicative f => a -> f a pure () go MVector (PrimState f) a v Int maxInd t gen = let (Int n, t nextGen) = (Int, Int) -> t -> (Int, t) forall g. RandomGen g => (Int, Int) -> g -> (Int, g) forall a g. (Random a, RandomGen g) => (a, a) -> g -> (a, g) randomR (Int 1, Int maxInd) t gen in MVector (PrimState f) a -> Int -> Int -> f () forall (m :: * -> *) a. (PrimMonad m, Unbox a) => MVector (PrimState m) a -> Int -> Int -> m () VUM.swap MVector (PrimState f) a v Int 0 Int n f () -> f () -> f () forall a b. f a -> f b -> f b forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b *> MVector (PrimState f) a -> Int -> t -> f () go (MVector (PrimState f) a -> MVector (PrimState f) a forall a s. Unbox a => MVector s a -> MVector s a VUM.tail MVector (PrimState f) a v) (Int maxInd Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1) t nextGen