{-# LANGUAGE BangPatterns #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} module DataFrame.IO.CSV where import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as C import qualified Data.ByteString.Lazy as BL import qualified Data.List as L import qualified Data.Map.Strict as M import qualified Data.Proxy as P import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Text.IO as TIO import qualified Data.Vector as V import qualified Data.Vector.Mutable as VM import qualified Data.Vector.Unboxed as VU import qualified Data.Vector.Unboxed.Mutable as VUM import Data.Csv.Streaming (Records (..)) import qualified Data.Csv.Streaming as CsvStream import Control.Monad import Data.Char import qualified Data.Csv as Csv import Data.Either import Data.Function (on) import Data.Functor import Data.IORef import Data.Maybe import Data.Type.Equality (TestEquality (testEquality)) import Data.Word (Word8) import DataFrame.Internal.Column import DataFrame.Internal.DataFrame (DataFrame (..)) import DataFrame.Internal.Parsing import DataFrame.Internal.Schema import DataFrame.Operations.Typing import System.IO import Type.Reflection import Prelude hiding (concat, takeWhile) chunkSize :: Int chunkSize :: Int chunkSize = Int 16_384 data PagedVector a = PagedVector { forall a. PagedVector a -> IORef [Vector a] pvChunks :: !(IORef [V.Vector a]) , forall a. PagedVector a -> IORef (IOVector a) pvActive :: !(IORef (VM.IOVector a)) , forall a. PagedVector a -> IORef Int pvCount :: !(IORef Int) } data PagedUnboxedVector a = PagedUnboxedVector { forall a. PagedUnboxedVector a -> IORef [Vector a] puvChunks :: !(IORef [VU.Vector a]) , forall a. PagedUnboxedVector a -> IORef (IOVector a) puvActive :: !(IORef (VUM.IOVector a)) , forall a. PagedUnboxedVector a -> IORef Int puvCount :: !(IORef Int) } data BuilderColumn = BuilderInt !(PagedUnboxedVector Int) !(PagedUnboxedVector Word8) | BuilderDouble !(PagedUnboxedVector Double) !(PagedUnboxedVector Word8) | BuilderText !(PagedVector T.Text) !(PagedUnboxedVector Word8) | BuilderBS !(PagedVector BS.ByteString) !(PagedUnboxedVector Word8) newPagedVector :: IO (PagedVector a) newPagedVector :: forall a. IO (PagedVector a) newPagedVector = do IOVector a active <- Int -> IO (MVector (PrimState IO) a) forall (m :: * -> *) a. PrimMonad m => Int -> m (MVector (PrimState m) a) VM.unsafeNew Int chunkSize IORef [Vector a] -> IORef (IOVector a) -> IORef Int -> PagedVector a forall a. IORef [Vector a] -> IORef (IOVector a) -> IORef Int -> PagedVector a PagedVector (IORef [Vector a] -> IORef (IOVector a) -> IORef Int -> PagedVector a) -> IO (IORef [Vector a]) -> IO (IORef (IOVector a) -> IORef Int -> PagedVector a) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> [Vector a] -> IO (IORef [Vector a]) forall a. a -> IO (IORef a) newIORef [] IO (IORef (IOVector a) -> IORef Int -> PagedVector a) -> IO (IORef (IOVector a)) -> IO (IORef Int -> PagedVector a) forall a b. IO (a -> b) -> IO a -> IO b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> IOVector a -> IO (IORef (IOVector a)) forall a. a -> IO (IORef a) newIORef IOVector a active IO (IORef Int -> PagedVector a) -> IO (IORef Int) -> IO (PagedVector a) forall a b. IO (a -> b) -> IO a -> IO b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> Int -> IO (IORef Int) forall a. a -> IO (IORef a) newIORef Int 0 newPagedUnboxedVector :: (VUM.Unbox a) => IO (PagedUnboxedVector a) newPagedUnboxedVector :: forall a. Unbox a => IO (PagedUnboxedVector a) newPagedUnboxedVector = do IOVector a active <- Int -> IO (MVector (PrimState IO) a) forall (m :: * -> *) a. (PrimMonad m, Unbox a) => Int -> m (MVector (PrimState m) a) VUM.unsafeNew Int chunkSize IORef [Vector a] -> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a forall a. IORef [Vector a] -> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a PagedUnboxedVector (IORef [Vector a] -> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a) -> IO (IORef [Vector a]) -> IO (IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> [Vector a] -> IO (IORef [Vector a]) forall a. a -> IO (IORef a) newIORef [] IO (IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a) -> IO (IORef (IOVector a)) -> IO (IORef Int -> PagedUnboxedVector a) forall a b. IO (a -> b) -> IO a -> IO b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> IOVector a -> IO (IORef (IOVector a)) forall a. a -> IO (IORef a) newIORef IOVector a active IO (IORef Int -> PagedUnboxedVector a) -> IO (IORef Int) -> IO (PagedUnboxedVector a) forall a b. IO (a -> b) -> IO a -> IO b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> Int -> IO (IORef Int) forall a. a -> IO (IORef a) newIORef Int 0 appendPagedVector :: PagedVector a -> a -> IO () appendPagedVector :: forall a. PagedVector a -> a -> IO () appendPagedVector (PagedVector IORef [Vector a] chunksRef IORef (IOVector a) activeRef IORef Int countRef) !a val = do Int count <- IORef Int -> IO Int forall a. IORef a -> IO a readIORef IORef Int countRef IOVector a active <- IORef (IOVector a) -> IO (IOVector a) forall a. IORef a -> IO a readIORef IORef (IOVector a) activeRef if Int count Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int chunkSize then do MVector (PrimState IO) a -> Int -> a -> IO () forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> Int -> a -> m () VM.unsafeWrite IOVector a MVector (PrimState IO) a active Int count a val IORef Int -> Int -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Int countRef (Int -> IO ()) -> Int -> IO () forall a b. (a -> b) -> a -> b $! Int count Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1 else do Vector a frozen <- MVector (PrimState IO) a -> IO (Vector a) forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> m (Vector a) V.unsafeFreeze IOVector a MVector (PrimState IO) a active IORef [Vector a] -> ([Vector a] -> [Vector a]) -> IO () forall a. IORef a -> (a -> a) -> IO () modifyIORef' IORef [Vector a] chunksRef (Vector a frozen Vector a -> [Vector a] -> [Vector a] forall a. a -> [a] -> [a] :) IOVector a newActive <- Int -> IO (MVector (PrimState IO) a) forall (m :: * -> *) a. PrimMonad m => Int -> m (MVector (PrimState m) a) VM.unsafeNew Int chunkSize MVector (PrimState IO) a -> Int -> a -> IO () forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> Int -> a -> m () VM.unsafeWrite IOVector a MVector (PrimState IO) a newActive Int 0 a val IORef (IOVector a) -> IOVector a -> IO () forall a. IORef a -> a -> IO () writeIORef IORef (IOVector a) activeRef IOVector a newActive IORef Int -> Int -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Int countRef Int 1 {-# INLINE appendPagedVector #-} appendPagedUnboxedVector :: (VUM.Unbox a) => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector :: forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector (PagedUnboxedVector IORef [Vector a] chunksRef IORef (IOVector a) activeRef IORef Int countRef) !a val = do Int count <- IORef Int -> IO Int forall a. IORef a -> IO a readIORef IORef Int countRef IOVector a active <- IORef (IOVector a) -> IO (IOVector a) forall a. IORef a -> IO a readIORef IORef (IOVector a) activeRef if Int count Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int chunkSize then do MVector (PrimState IO) a -> Int -> a -> IO () forall (m :: * -> *) a. (PrimMonad m, Unbox a) => MVector (PrimState m) a -> Int -> a -> m () VUM.unsafeWrite IOVector a MVector (PrimState IO) a active Int count a val IORef Int -> Int -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Int countRef (Int -> IO ()) -> Int -> IO () forall a b. (a -> b) -> a -> b $! Int count Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1 else do Vector a frozen <- MVector (PrimState IO) a -> IO (Vector a) forall a (m :: * -> *). (Unbox a, PrimMonad m) => MVector (PrimState m) a -> m (Vector a) VU.unsafeFreeze IOVector a MVector (PrimState IO) a active IORef [Vector a] -> ([Vector a] -> [Vector a]) -> IO () forall a. IORef a -> (a -> a) -> IO () modifyIORef' IORef [Vector a] chunksRef (Vector a frozen Vector a -> [Vector a] -> [Vector a] forall a. a -> [a] -> [a] :) IOVector a newActive <- Int -> IO (MVector (PrimState IO) a) forall (m :: * -> *) a. (PrimMonad m, Unbox a) => Int -> m (MVector (PrimState m) a) VUM.unsafeNew Int chunkSize MVector (PrimState IO) a -> Int -> a -> IO () forall (m :: * -> *) a. (PrimMonad m, Unbox a) => MVector (PrimState m) a -> Int -> a -> m () VUM.unsafeWrite IOVector a MVector (PrimState IO) a newActive Int 0 a val IORef (IOVector a) -> IOVector a -> IO () forall a. IORef a -> a -> IO () writeIORef IORef (IOVector a) activeRef IOVector a newActive IORef Int -> Int -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Int countRef Int 1 {-# INLINE appendPagedUnboxedVector #-} freezePagedVector :: PagedVector a -> IO (V.Vector a) freezePagedVector :: forall a. PagedVector a -> IO (Vector a) freezePagedVector (PagedVector IORef [Vector a] chunksRef IORef (IOVector a) activeRef IORef Int countRef) = do Int count <- IORef Int -> IO Int forall a. IORef a -> IO a readIORef IORef Int countRef IOVector a active <- IORef (IOVector a) -> IO (IOVector a) forall a. IORef a -> IO a readIORef IORef (IOVector a) activeRef [Vector a] chunks <- IORef [Vector a] -> IO [Vector a] forall a. IORef a -> IO a readIORef IORef [Vector a] chunksRef IORef [Vector a] -> [Vector a] -> IO () forall a. IORef a -> a -> IO () writeIORef IORef [Vector a] chunksRef [] let frozenChunks :: [Vector a] frozenChunks = [Vector a] -> [Vector a] forall a. [a] -> [a] reverse [Vector a] chunks totalLen :: Int totalLen = Int count Int -> Int -> Int forall a. Num a => a -> a -> a + [Int] -> Int forall a. Num a => [a] -> a forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a sum ((Vector a -> Int) -> [Vector a] -> [Int] forall a b. (a -> b) -> [a] -> [b] map Vector a -> Int forall a. Vector a -> Int V.length [Vector a] frozenChunks) IOVector a mv <- Int -> IO (MVector (PrimState IO) a) forall (m :: * -> *) a. PrimMonad m => Int -> m (MVector (PrimState m) a) VM.unsafeNew Int totalLen let copyChunk :: Int -> Vector a -> IO Int copyChunk !Int offset Vector a chunk = do MVector (PrimState IO) a -> Vector a -> IO () forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> Vector a -> m () V.copy (Int -> Int -> IOVector a -> IOVector a forall s a. Int -> Int -> MVector s a -> MVector s a VM.slice Int offset (Vector a -> Int forall a. Vector a -> Int V.length Vector a chunk) IOVector a mv) Vector a chunk Int -> IO Int forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure (Int offset Int -> Int -> Int forall a. Num a => a -> a -> a + Vector a -> Int forall a. Vector a -> Int V.length Vector a chunk) Int offset <- (Int -> Vector a -> IO Int) -> Int -> [Vector a] -> IO Int forall (t :: * -> *) (m :: * -> *) b a. (Foldable t, Monad m) => (b -> a -> m b) -> b -> t a -> m b foldM Int -> Vector a -> IO Int copyChunk Int 0 [Vector a] frozenChunks MVector (PrimState IO) a -> MVector (PrimState IO) a -> IO () forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> MVector (PrimState m) a -> m () VM.copy (Int -> Int -> IOVector a -> IOVector a forall s a. Int -> Int -> MVector s a -> MVector s a VM.slice Int offset Int count IOVector a mv) (Int -> Int -> IOVector a -> IOVector a forall s a. Int -> Int -> MVector s a -> MVector s a VM.slice Int 0 Int count IOVector a active) MVector (PrimState IO) a -> IO (Vector a) forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> m (Vector a) V.unsafeFreeze IOVector a MVector (PrimState IO) a mv freezePagedUnboxedVector :: (VUM.Unbox a) => PagedUnboxedVector a -> IO (VU.Vector a) freezePagedUnboxedVector :: forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a) freezePagedUnboxedVector (PagedUnboxedVector IORef [Vector a] chunksRef IORef (IOVector a) activeRef IORef Int countRef) = do Int count <- IORef Int -> IO Int forall a. IORef a -> IO a readIORef IORef Int countRef IOVector a active <- IORef (IOVector a) -> IO (IOVector a) forall a. IORef a -> IO a readIORef IORef (IOVector a) activeRef [Vector a] chunks <- IORef [Vector a] -> IO [Vector a] forall a. IORef a -> IO a readIORef IORef [Vector a] chunksRef IORef [Vector a] -> [Vector a] -> IO () forall a. IORef a -> a -> IO () writeIORef IORef [Vector a] chunksRef [] let frozenChunks :: [Vector a] frozenChunks = [Vector a] -> [Vector a] forall a. [a] -> [a] reverse [Vector a] chunks totalLen :: Int totalLen = Int count Int -> Int -> Int forall a. Num a => a -> a -> a + [Int] -> Int forall a. Num a => [a] -> a forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a sum ((Vector a -> Int) -> [Vector a] -> [Int] forall a b. (a -> b) -> [a] -> [b] map Vector a -> Int forall a. Unbox a => Vector a -> Int VU.length [Vector a] frozenChunks) IOVector a mv <- Int -> IO (MVector (PrimState IO) a) forall (m :: * -> *) a. (PrimMonad m, Unbox a) => Int -> m (MVector (PrimState m) a) VUM.unsafeNew Int totalLen let copyChunk :: Int -> Vector a -> IO Int copyChunk !Int offset Vector a chunk = do MVector (PrimState IO) a -> Vector a -> IO () forall a (m :: * -> *). (Unbox a, PrimMonad m) => MVector (PrimState m) a -> Vector a -> m () VU.copy (Int -> Int -> IOVector a -> IOVector a forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a VUM.slice Int offset (Vector a -> Int forall a. Unbox a => Vector a -> Int VU.length Vector a chunk) IOVector a mv) Vector a chunk Int -> IO Int forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure (Int offset Int -> Int -> Int forall a. Num a => a -> a -> a + Vector a -> Int forall a. Unbox a => Vector a -> Int VU.length Vector a chunk) Int offset <- (Int -> Vector a -> IO Int) -> Int -> [Vector a] -> IO Int forall (t :: * -> *) (m :: * -> *) b a. (Foldable t, Monad m) => (b -> a -> m b) -> b -> t a -> m b foldM Int -> Vector a -> IO Int copyChunk Int 0 [Vector a] frozenChunks MVector (PrimState IO) a -> MVector (PrimState IO) a -> IO () forall (m :: * -> *) a. (PrimMonad m, Unbox a) => MVector (PrimState m) a -> MVector (PrimState m) a -> m () VUM.copy (Int -> Int -> IOVector a -> IOVector a forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a VUM.slice Int offset Int count IOVector a mv) (Int -> Int -> IOVector a -> IOVector a forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a VUM.slice Int 0 Int count IOVector a active) MVector (PrimState IO) a -> IO (Vector a) forall a (m :: * -> *). (Unbox a, PrimMonad m) => MVector (PrimState m) a -> m (Vector a) VU.unsafeFreeze IOVector a MVector (PrimState IO) a mv data = | UseFirstRow | ProvideNames [T.Text] deriving (HeaderSpec -> HeaderSpec -> Bool (HeaderSpec -> HeaderSpec -> Bool) -> (HeaderSpec -> HeaderSpec -> Bool) -> Eq HeaderSpec forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a $c== :: HeaderSpec -> HeaderSpec -> Bool == :: HeaderSpec -> HeaderSpec -> Bool $c/= :: HeaderSpec -> HeaderSpec -> Bool /= :: HeaderSpec -> HeaderSpec -> Bool Eq, Int -> HeaderSpec -> ShowS [HeaderSpec] -> ShowS HeaderSpec -> [Char] (Int -> HeaderSpec -> ShowS) -> (HeaderSpec -> [Char]) -> ([HeaderSpec] -> ShowS) -> Show HeaderSpec forall a. (Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a $cshowsPrec :: Int -> HeaderSpec -> ShowS showsPrec :: Int -> HeaderSpec -> ShowS $cshow :: HeaderSpec -> [Char] show :: HeaderSpec -> [Char] $cshowList :: [HeaderSpec] -> ShowS showList :: [HeaderSpec] -> ShowS Show) data TypeSpec = InferFromSample Int | SpecifyTypes [(T.Text, SchemaType)] TypeSpec | NoInference data ReadOptions = ReadOptions { :: HeaderSpec , ReadOptions -> TypeSpec typeSpec :: TypeSpec , ReadOptions -> Bool safeRead :: Bool , ReadOptions -> [Char] dateFormat :: String , ReadOptions -> Char columnSeparator :: Char , ReadOptions -> Maybe Int numColumns :: Maybe Int , ReadOptions -> [Text] missingIndicators :: [T.Text] } shouldInferFromSample :: TypeSpec -> Bool shouldInferFromSample :: TypeSpec -> Bool shouldInferFromSample (InferFromSample Int _) = Bool True shouldInferFromSample (SpecifyTypes [(Text, SchemaType)] _ TypeSpec fallback) = TypeSpec -> Bool shouldInferFromSample TypeSpec fallback shouldInferFromSample TypeSpec _ = Bool False schemaTypeMap :: TypeSpec -> M.Map T.Text SchemaType schemaTypeMap :: TypeSpec -> Map Text SchemaType schemaTypeMap (SpecifyTypes [(Text, SchemaType)] xs TypeSpec _) = [(Text, SchemaType)] -> Map Text SchemaType forall k a. Ord k => [(k, a)] -> Map k a M.fromList [(Text, SchemaType)] xs schemaTypeMap TypeSpec _ = Map Text SchemaType forall k a. Map k a M.empty typeInferenceSampleSize :: TypeSpec -> Int typeInferenceSampleSize :: TypeSpec -> Int typeInferenceSampleSize (InferFromSample Int n) = Int n typeInferenceSampleSize (SpecifyTypes [(Text, SchemaType)] _ TypeSpec fallback) = TypeSpec -> Int typeInferenceSampleSize TypeSpec fallback typeInferenceSampleSize TypeSpec _ = Int 0 defaultReadOptions :: ReadOptions defaultReadOptions :: ReadOptions defaultReadOptions = ReadOptions { headerSpec :: HeaderSpec headerSpec = HeaderSpec UseFirstRow , typeSpec :: TypeSpec typeSpec = Int -> TypeSpec InferFromSample Int 100 , safeRead :: Bool safeRead = Bool True , dateFormat :: [Char] dateFormat = [Char] "%Y-%m-%d" , columnSeparator :: Char columnSeparator = Char ',' , numColumns :: Maybe Int numColumns = Maybe Int forall a. Maybe a Nothing , missingIndicators :: [Text] missingIndicators = [Text "Nothing", Text "NULL", Text "", Text " ", Text "nan", Text "null", Text "N/A", Text "NaN", Text "NAN", Text "NA"] } readCsv :: FilePath -> IO DataFrame readCsv :: [Char] -> IO DataFrame readCsv = ReadOptions -> [Char] -> IO DataFrame readSeparated ReadOptions defaultReadOptions readCsvWithOpts :: ReadOptions -> FilePath -> IO DataFrame readCsvWithOpts :: ReadOptions -> [Char] -> IO DataFrame readCsvWithOpts = ReadOptions -> [Char] -> IO DataFrame readSeparated readTsv :: FilePath -> IO DataFrame readTsv :: [Char] -> IO DataFrame readTsv = ReadOptions -> [Char] -> IO DataFrame readSeparated (ReadOptions defaultReadOptions{columnSeparator = '\t'}) readSeparated :: ReadOptions -> FilePath -> IO DataFrame readSeparated :: ReadOptions -> [Char] -> IO DataFrame readSeparated ReadOptions opts ![Char] path = do let stripUtf8Bom :: LazyByteString -> LazyByteString stripUtf8Bom LazyByteString bs = LazyByteString -> Maybe LazyByteString -> LazyByteString forall a. a -> Maybe a -> a fromMaybe LazyByteString bs (LazyByteString -> LazyByteString -> Maybe LazyByteString BL.stripPrefix LazyByteString "\xEF\xBB\xBF" LazyByteString bs) LazyByteString csvData <- LazyByteString -> LazyByteString stripUtf8Bom (LazyByteString -> LazyByteString) -> IO LazyByteString -> IO LazyByteString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> [Char] -> IO LazyByteString BL.readFile [Char] path ReadOptions -> LazyByteString -> IO DataFrame decodeSeparated ReadOptions opts LazyByteString csvData decodeSeparated :: ReadOptions -> BL.ByteString -> IO DataFrame decodeSeparated :: ReadOptions -> LazyByteString -> IO DataFrame decodeSeparated !ReadOptions opts LazyByteString csvData = do let sep :: Char sep = ReadOptions -> Char columnSeparator ReadOptions opts let decodeOpts :: DecodeOptions decodeOpts = DecodeOptions Csv.defaultDecodeOptions{Csv.decDelimiter = fromIntegral (ord sep)} let stream :: Records (Vector LazyByteString) stream = DecodeOptions -> HasHeader -> LazyByteString -> Records (Vector LazyByteString) forall a. FromRecord a => DecodeOptions -> HasHeader -> LazyByteString -> Records a CsvStream.decodeWith DecodeOptions decodeOpts HasHeader Csv.NoHeader LazyByteString csvData let peekStream :: Records a -> m (a, Records a) peekStream (Cons (Right a row) Records a rest) = (a, Records a) -> m (a, Records a) forall a. a -> m a forall (m :: * -> *) a. Monad m => a -> m a return (a row, Records a rest) peekStream (Cons (Left [Char] err) Records a _) = [Char] -> m (a, Records a) forall a. HasCallStack => [Char] -> a error ([Char] -> m (a, Records a)) -> [Char] -> m (a, Records a) forall a b. (a -> b) -> a -> b $ [Char] "Error parsing CSV header: " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] err peekStream (Nil Maybe [Char] Nothing LazyByteString _) = [Char] -> m (a, Records a) forall a. HasCallStack => [Char] -> a error [Char] "Empty CSV file" peekStream (Nil (Just [Char] err) LazyByteString _) = [Char] -> m (a, Records a) forall a. HasCallStack => [Char] -> a error [Char] err (Vector LazyByteString firstRowRaw, Records (Vector LazyByteString) dataStream) <- Records (Vector LazyByteString) -> IO (Vector LazyByteString, Records (Vector LazyByteString)) forall {m :: * -> *} {a}. Monad m => Records a -> m (a, Records a) peekStream Records (Vector LazyByteString) stream let ([Text] columnNames, Records (Vector LazyByteString) rowsToProcess) = case ReadOptions -> HeaderSpec headerSpec ReadOptions opts of HeaderSpec NoHeader -> ( (Int -> Text) -> [Int] -> [Text] forall a b. (a -> b) -> [a] -> [b] map ([Char] -> Text T.pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . Int -> [Char] forall a. Show a => a -> [Char] show) [Int 0 .. Vector LazyByteString -> Int forall a. Vector a -> Int V.length Vector LazyByteString firstRowRaw Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1] , Either [Char] (Vector LazyByteString) -> Records (Vector LazyByteString) -> Records (Vector LazyByteString) forall a. Either [Char] a -> Records a -> Records a Cons (Vector LazyByteString -> Either [Char] (Vector LazyByteString) forall a b. b -> Either a b Right Vector LazyByteString firstRowRaw) Records (Vector LazyByteString) dataStream ) HeaderSpec UseFirstRow -> ( (LazyByteString -> Text) -> [LazyByteString] -> [Text] forall a b. (a -> b) -> [a] -> [b] map (Text -> Text T.strip (Text -> Text) -> (LazyByteString -> Text) -> LazyByteString -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . ByteString -> Text TE.decodeUtf8Lenient (ByteString -> Text) -> (LazyByteString -> ByteString) -> LazyByteString -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . LazyByteString -> ByteString BL.toStrict) (Vector LazyByteString -> [LazyByteString] forall a. Vector a -> [a] V.toList Vector LazyByteString firstRowRaw) , Records (Vector LazyByteString) dataStream ) ProvideNames [Text] ns -> ( [Text] ns [Text] -> [Text] -> [Text] forall a. [a] -> [a] -> [a] ++ Int -> [Text] -> [Text] forall a. Int -> [a] -> [a] drop ([Text] -> Int forall a. [a] -> Int forall (t :: * -> *) a. Foldable t => t a -> Int length [Text] ns) ((Int -> Text) -> [Int] -> [Text] forall a b. (a -> b) -> [a] -> [b] map ([Char] -> Text T.pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . Int -> [Char] forall a. Show a => a -> [Char] show) [Int 0 .. Vector LazyByteString -> Int forall a. Vector a -> Int V.length Vector LazyByteString firstRowRaw Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1]) , Either [Char] (Vector LazyByteString) -> Records (Vector LazyByteString) -> Records (Vector LazyByteString) forall a. Either [Char] a -> Records a -> Records a Cons (Vector LazyByteString -> Either [Char] (Vector LazyByteString) forall a b. b -> Either a b Right Vector LazyByteString firstRowRaw) Records (Vector LazyByteString) dataStream ) (Vector LazyByteString sampleRow, Records (Vector LazyByteString) _) <- Records (Vector LazyByteString) -> IO (Vector LazyByteString, Records (Vector LazyByteString)) forall {m :: * -> *} {a}. Monad m => Records a -> m (a, Records a) peekStream Records (Vector LazyByteString) rowsToProcess [BuilderColumn] builderCols <- [Text] -> [LazyByteString] -> ReadOptions -> IO [BuilderColumn] initializeColumns [Text] columnNames (Vector LazyByteString -> [LazyByteString] forall a. Vector a -> [a] V.toList Vector LazyByteString sampleRow) ReadOptions opts let !builderColsV :: Vector BuilderColumn builderColsV = [BuilderColumn] -> Vector BuilderColumn forall a. [a] -> Vector a V.fromList [BuilderColumn] builderCols [Text] -> Records (Vector LazyByteString) -> Vector BuilderColumn -> Maybe Int -> IO () processStream (ReadOptions -> [Text] missingIndicators ReadOptions opts) Records (Vector LazyByteString) rowsToProcess Vector BuilderColumn builderColsV (ReadOptions -> Maybe Int numColumns ReadOptions opts) Vector Column frozenCols <- (BuilderColumn -> IO Column) -> Vector BuilderColumn -> IO (Vector Column) forall (m :: * -> *) a b. Monad m => (a -> m b) -> Vector a -> m (Vector b) V.mapM (ReadOptions -> BuilderColumn -> IO Column finalizeBuilderColumn ReadOptions opts) Vector BuilderColumn builderColsV let numRows :: Int numRows = Int -> (Column -> Int) -> Maybe Column -> Int forall b a. b -> (a -> b) -> Maybe a -> b maybe Int 0 Column -> Int columnLength (Vector Column frozenCols Vector Column -> Int -> Maybe Column forall a. Vector a -> Int -> Maybe a V.!? Int 0) let df :: DataFrame df = Vector Column -> Map Text Int -> (Int, Int) -> Map Text UExpr -> DataFrame DataFrame Vector Column frozenCols ([(Text, Int)] -> Map Text Int forall k a. Ord k => [(k, a)] -> Map k a M.fromList ([Text] -> [Int] -> [(Text, Int)] forall a b. [a] -> [b] -> [(a, b)] zip [Text] columnNames [Int 0 ..])) (Int numRows, Vector Column -> Int forall a. Vector a -> Int V.length Vector Column frozenCols) Map Text UExpr forall k a. Map k a M.empty DataFrame -> IO DataFrame forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure (DataFrame -> IO DataFrame) -> DataFrame -> IO DataFrame forall a b. (a -> b) -> a -> b $ Bool -> Map Text SchemaType -> DataFrame -> DataFrame parseWithTypes (ReadOptions -> Bool safeRead ReadOptions opts) (TypeSpec -> Map Text SchemaType schemaTypeMap (ReadOptions -> TypeSpec typeSpec ReadOptions opts)) DataFrame df initializeColumns :: [T.Text] -> [BL.ByteString] -> ReadOptions -> IO [BuilderColumn] initializeColumns :: [Text] -> [LazyByteString] -> ReadOptions -> IO [BuilderColumn] initializeColumns [Text] names [LazyByteString] row ReadOptions opts = (Text -> Maybe SchemaType -> IO BuilderColumn) -> [Text] -> [Maybe SchemaType] -> IO [BuilderColumn] forall (m :: * -> *) a b c. Applicative m => (a -> b -> m c) -> [a] -> [b] -> m [c] zipWithM Text -> Maybe SchemaType -> IO BuilderColumn initColumn [Text] names ((Text -> Maybe SchemaType) -> [Text] -> [Maybe SchemaType] forall a b. (a -> b) -> [a] -> [b] map Text -> Maybe SchemaType lookupType [Text] names) where typeMap :: Map Text SchemaType typeMap = TypeSpec -> Map Text SchemaType schemaTypeMap (ReadOptions -> TypeSpec typeSpec ReadOptions opts) shouldInfer :: Bool shouldInfer = case ReadOptions -> TypeSpec typeSpec ReadOptions opts of InferFromSample Int _ -> Bool True SpecifyTypes [(Text, SchemaType)] _ TypeSpec fallback -> TypeSpec -> Bool shouldInferFromSample TypeSpec fallback TypeSpec NoInference -> Bool False lookupType :: Text -> Maybe SchemaType lookupType Text name = Text -> Map Text SchemaType -> Maybe SchemaType forall k a. Ord k => k -> Map k a -> Maybe a M.lookup Text name Map Text SchemaType typeMap initColumn :: T.Text -> Maybe SchemaType -> IO BuilderColumn initColumn :: Text -> Maybe SchemaType -> IO BuilderColumn initColumn Text _ Maybe SchemaType Nothing | Bool shouldInfer = do PagedUnboxedVector Word8 validityRef <- IO (PagedUnboxedVector Word8) forall a. Unbox a => IO (PagedUnboxedVector a) newPagedUnboxedVector PagedVector ByteString -> PagedUnboxedVector Word8 -> BuilderColumn BuilderBS (PagedVector ByteString -> PagedUnboxedVector Word8 -> BuilderColumn) -> IO (PagedVector ByteString) -> IO (PagedUnboxedVector Word8 -> BuilderColumn) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> IO (PagedVector ByteString) forall a. IO (PagedVector a) newPagedVector IO (PagedUnboxedVector Word8 -> BuilderColumn) -> IO (PagedUnboxedVector Word8) -> IO BuilderColumn forall a b. IO (a -> b) -> IO a -> IO b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8) forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure PagedUnboxedVector Word8 validityRef initColumn Text _ Maybe SchemaType mtype = do PagedUnboxedVector Word8 validityRef <- IO (PagedUnboxedVector Word8) forall a. Unbox a => IO (PagedUnboxedVector a) newPagedUnboxedVector let t :: SchemaType t = SchemaType -> Maybe SchemaType -> SchemaType forall a. a -> Maybe a -> a fromMaybe (forall a. Columnable a => SchemaType schemaType @T.Text) Maybe SchemaType mtype case SchemaType t of SType (Proxy a _ :: P.Proxy a) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Int) of Just a :~: Int Refl -> PagedUnboxedVector Int -> PagedUnboxedVector Word8 -> BuilderColumn BuilderInt (PagedUnboxedVector Int -> PagedUnboxedVector Word8 -> BuilderColumn) -> IO (PagedUnboxedVector Int) -> IO (PagedUnboxedVector Word8 -> BuilderColumn) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> IO (PagedUnboxedVector Int) forall a. Unbox a => IO (PagedUnboxedVector a) newPagedUnboxedVector IO (PagedUnboxedVector Word8 -> BuilderColumn) -> IO (PagedUnboxedVector Word8) -> IO BuilderColumn forall a b. IO (a -> b) -> IO a -> IO b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8) forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure PagedUnboxedVector Word8 validityRef Maybe (a :~: Int) Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Double) of Just a :~: Double Refl -> PagedUnboxedVector Double -> PagedUnboxedVector Word8 -> BuilderColumn BuilderDouble (PagedUnboxedVector Double -> PagedUnboxedVector Word8 -> BuilderColumn) -> IO (PagedUnboxedVector Double) -> IO (PagedUnboxedVector Word8 -> BuilderColumn) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> IO (PagedUnboxedVector Double) forall a. Unbox a => IO (PagedUnboxedVector a) newPagedUnboxedVector IO (PagedUnboxedVector Word8 -> BuilderColumn) -> IO (PagedUnboxedVector Word8) -> IO BuilderColumn forall a b. IO (a -> b) -> IO a -> IO b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8) forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure PagedUnboxedVector Word8 validityRef Maybe (a :~: Double) Nothing -> PagedVector Text -> PagedUnboxedVector Word8 -> BuilderColumn BuilderText (PagedVector Text -> PagedUnboxedVector Word8 -> BuilderColumn) -> IO (PagedVector Text) -> IO (PagedUnboxedVector Word8 -> BuilderColumn) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> IO (PagedVector Text) forall a. IO (PagedVector a) newPagedVector IO (PagedUnboxedVector Word8 -> BuilderColumn) -> IO (PagedUnboxedVector Word8) -> IO BuilderColumn forall a b. IO (a -> b) -> IO a -> IO b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8) forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure PagedUnboxedVector Word8 validityRef processStream :: [T.Text] -> CsvStream.Records (V.Vector BL.ByteString) -> V.Vector BuilderColumn -> Maybe Int -> IO () processStream :: [Text] -> Records (Vector LazyByteString) -> Vector BuilderColumn -> Maybe Int -> IO () processStream [Text] _ Records (Vector LazyByteString) _ Vector BuilderColumn _ (Just Int 0) = () -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return () processStream [Text] missing (Cons (Right Vector LazyByteString row) Records (Vector LazyByteString) rest) Vector BuilderColumn cols Maybe Int n = [Text] -> Vector LazyByteString -> Vector BuilderColumn -> IO () processRow [Text] missing Vector LazyByteString row Vector BuilderColumn cols IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> [Text] -> Records (Vector LazyByteString) -> Vector BuilderColumn -> Maybe Int -> IO () processStream [Text] missing Records (Vector LazyByteString) rest Vector BuilderColumn cols ((Int -> Int) -> Maybe Int -> Maybe Int forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap ((Int -> Int -> Int) -> Int -> Int -> Int forall a b c. (a -> b -> c) -> b -> a -> c flip (-) Int 1) Maybe Int n) processStream [Text] missing (Cons (Left [Char] err) Records (Vector LazyByteString) _) Vector BuilderColumn _ Maybe Int _ = [Char] -> IO () forall a. HasCallStack => [Char] -> a error ([Char] "CSV Parse Error: " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] err) processStream [Text] missing (Nil Maybe [Char] _ LazyByteString _) Vector BuilderColumn _ Maybe Int _ = () -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return () processRow :: [T.Text] -> V.Vector BL.ByteString -> V.Vector BuilderColumn -> IO () processRow :: [Text] -> Vector LazyByteString -> Vector BuilderColumn -> IO () processRow [Text] missing !Vector LazyByteString vals !Vector BuilderColumn cols = (LazyByteString -> BuilderColumn -> IO ()) -> Vector LazyByteString -> Vector BuilderColumn -> IO () forall (m :: * -> *) a b c. Monad m => (a -> b -> m c) -> Vector a -> Vector b -> m () V.zipWithM_ LazyByteString -> BuilderColumn -> IO () processValue Vector LazyByteString vals Vector BuilderColumn cols where processValue :: LazyByteString -> BuilderColumn -> IO () processValue !LazyByteString bs !BuilderColumn col = do let !bs' :: ByteString bs' = LazyByteString -> ByteString BL.toStrict LazyByteString bs case BuilderColumn col of BuilderInt PagedUnboxedVector Int gv PagedUnboxedVector Word8 valid -> case HasCallStack => ByteString -> Maybe Int ByteString -> Maybe Int readByteStringInt ByteString bs' of Just !Int i -> PagedUnboxedVector Int -> Int -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Int gv Int i IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> PagedUnboxedVector Word8 -> Word8 -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Word8 valid Word8 1 Maybe Int Nothing -> PagedUnboxedVector Int -> Int -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Int gv Int 0 IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> PagedUnboxedVector Word8 -> Word8 -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Word8 valid Word8 0 BuilderDouble PagedUnboxedVector Double gv PagedUnboxedVector Word8 valid -> case HasCallStack => ByteString -> Maybe Double ByteString -> Maybe Double readByteStringDouble ByteString bs' of Just !Double d -> PagedUnboxedVector Double -> Double -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Double gv Double d IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> PagedUnboxedVector Word8 -> Word8 -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Word8 valid Word8 1 Maybe Double Nothing -> PagedUnboxedVector Double -> Double -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Double gv Double 0.0 IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> PagedUnboxedVector Word8 -> Word8 -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Word8 valid Word8 0 BuilderText PagedVector Text gv PagedUnboxedVector Word8 valid -> do let !val :: Text val = Text -> Text T.strip (ByteString -> Text TE.decodeUtf8Lenient ByteString bs') if Text val Text -> [Text] -> Bool forall a. Eq a => a -> [a] -> Bool forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool `elem` [Text] missing then PagedVector Text -> Text -> IO () forall a. PagedVector a -> a -> IO () appendPagedVector PagedVector Text gv Text T.empty IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> PagedUnboxedVector Word8 -> Word8 -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Word8 valid Word8 0 else PagedVector Text -> Text -> IO () forall a. PagedVector a -> a -> IO () appendPagedVector PagedVector Text gv Text val IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> PagedUnboxedVector Word8 -> Word8 -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Word8 valid Word8 1 BuilderBS PagedVector ByteString gv PagedUnboxedVector Word8 valid -> do let !bs'' :: ByteString bs'' = ByteString -> ByteString C.strip ByteString bs' if ByteString -> Text TE.decodeUtf8Lenient ByteString bs'' Text -> [Text] -> Bool forall a. Eq a => a -> [a] -> Bool forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool `elem` [Text] missing then PagedVector ByteString -> ByteString -> IO () forall a. PagedVector a -> a -> IO () appendPagedVector PagedVector ByteString gv ByteString BS.empty IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> PagedUnboxedVector Word8 -> Word8 -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Word8 valid Word8 0 else PagedVector ByteString -> ByteString -> IO () forall a. PagedVector a -> a -> IO () appendPagedVector PagedVector ByteString gv ByteString bs'' IO () -> IO () -> IO () forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> PagedUnboxedVector Word8 -> Word8 -> IO () forall a. Unbox a => PagedUnboxedVector a -> a -> IO () appendPagedUnboxedVector PagedUnboxedVector Word8 valid Word8 1 freezeBuilderColumn :: BuilderColumn -> IO Column freezeBuilderColumn :: BuilderColumn -> IO Column freezeBuilderColumn (BuilderInt PagedUnboxedVector Int gv PagedUnboxedVector Word8 validRef) = do Vector Int vec <- PagedUnboxedVector Int -> IO (Vector Int) forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a) freezePagedUnboxedVector PagedUnboxedVector Int gv Vector Word8 valid <- PagedUnboxedVector Word8 -> IO (Vector Word8) forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a) freezePagedUnboxedVector PagedUnboxedVector Word8 validRef if (Word8 -> Bool) -> Vector Word8 -> Bool forall a. Unbox a => (a -> Bool) -> Vector a -> Bool VU.all (Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 1) Vector Word8 valid then Column -> IO Column forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (Column -> IO Column) -> Column -> IO Column forall a b. (a -> b) -> a -> b $! Vector Int -> Column forall a. (Columnable a, Unbox a) => Vector a -> Column UnboxedColumn Vector Int vec else Vector Int -> Vector Word8 -> IO Column forall a. (Unbox a, Columnable a) => Vector a -> Vector Word8 -> IO Column constructOptional Vector Int vec Vector Word8 valid freezeBuilderColumn (BuilderDouble PagedUnboxedVector Double gv PagedUnboxedVector Word8 validRef) = do Vector Double vec <- PagedUnboxedVector Double -> IO (Vector Double) forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a) freezePagedUnboxedVector PagedUnboxedVector Double gv Vector Word8 valid <- PagedUnboxedVector Word8 -> IO (Vector Word8) forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a) freezePagedUnboxedVector PagedUnboxedVector Word8 validRef if (Word8 -> Bool) -> Vector Word8 -> Bool forall a. Unbox a => (a -> Bool) -> Vector a -> Bool VU.all (Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 1) Vector Word8 valid then Column -> IO Column forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (Column -> IO Column) -> Column -> IO Column forall a b. (a -> b) -> a -> b $! Vector Double -> Column forall a. (Columnable a, Unbox a) => Vector a -> Column UnboxedColumn Vector Double vec else Vector Double -> Vector Word8 -> IO Column forall a. (Unbox a, Columnable a) => Vector a -> Vector Word8 -> IO Column constructOptional Vector Double vec Vector Word8 valid freezeBuilderColumn (BuilderText PagedVector Text gv PagedUnboxedVector Word8 validRef) = do Vector Text vec <- PagedVector Text -> IO (Vector Text) forall a. PagedVector a -> IO (Vector a) freezePagedVector PagedVector Text gv Vector Word8 valid <- PagedUnboxedVector Word8 -> IO (Vector Word8) forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a) freezePagedUnboxedVector PagedUnboxedVector Word8 validRef if (Word8 -> Bool) -> Vector Word8 -> Bool forall a. Unbox a => (a -> Bool) -> Vector a -> Bool VU.all (Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 1) Vector Word8 valid then Column -> IO Column forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (Column -> IO Column) -> Column -> IO Column forall a b. (a -> b) -> a -> b $! Vector Text -> Column forall a. Columnable a => Vector a -> Column BoxedColumn Vector Text vec else Vector Text -> Vector Word8 -> IO Column constructOptionalBoxed Vector Text vec Vector Word8 valid freezeBuilderColumn (BuilderBS PagedVector ByteString _ PagedUnboxedVector Word8 _) = [Char] -> IO Column forall a. HasCallStack => [Char] -> a error [Char] "freezeBuilderColumn: BuilderBS must be finalized via finalizeBuilderColumn" finalizeBuilderColumn :: ReadOptions -> BuilderColumn -> IO Column finalizeBuilderColumn :: ReadOptions -> BuilderColumn -> IO Column finalizeBuilderColumn ReadOptions opts (BuilderBS PagedVector ByteString gv PagedUnboxedVector Word8 validRef) = do Vector ByteString vec <- PagedVector ByteString -> IO (Vector ByteString) forall a. PagedVector a -> IO (Vector a) freezePagedVector PagedVector ByteString gv Vector Word8 valid <- PagedUnboxedVector Word8 -> IO (Vector Word8) forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a) freezePagedUnboxedVector PagedUnboxedVector Word8 validRef Column -> IO Column forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (Column -> IO Column) -> Column -> IO Column forall a b. (a -> b) -> a -> b $! ReadOptions -> Vector ByteString -> Vector Word8 -> Column inferColumnFromBS ReadOptions opts Vector ByteString vec Vector Word8 valid finalizeBuilderColumn ReadOptions _ BuilderColumn bc = BuilderColumn -> IO Column freezeBuilderColumn BuilderColumn bc inferColumnFromBS :: ReadOptions -> V.Vector BS.ByteString -> VU.Vector Word8 -> Column inferColumnFromBS :: ReadOptions -> Vector ByteString -> Vector Word8 -> Column inferColumnFromBS ReadOptions opts Vector ByteString vec Vector Word8 valid = let sampleN :: Int sampleN = let n :: Int n = TypeSpec -> Int typeInferenceSampleSize (ReadOptions -> TypeSpec typeSpec ReadOptions opts) in if Int n Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int 0 then Int 100 else Int n dfmt :: [Char] dfmt = ReadOptions -> [Char] dateFormat ReadOptions opts asMaybeFull :: Vector (Maybe ByteString) asMaybeFull = Int -> (Int -> Maybe ByteString) -> Vector (Maybe ByteString) forall a. Int -> (Int -> a) -> Vector a V.generate (Vector ByteString -> Int forall a. Vector a -> Int V.length Vector ByteString vec) ((Int -> Maybe ByteString) -> Vector (Maybe ByteString)) -> (Int -> Maybe ByteString) -> Vector (Maybe ByteString) forall a b. (a -> b) -> a -> b $ \Int i -> if Vector Word8 valid Vector Word8 -> Int -> Word8 forall a. Unbox a => Vector a -> Int -> a VU.! Int i Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 1 then ByteString -> Maybe ByteString forall a. a -> Maybe a Just (Vector ByteString vec Vector ByteString -> Int -> ByteString forall a. Vector a -> Int -> a V.! Int i) else Maybe ByteString forall a. Maybe a Nothing samples :: Vector (Maybe ByteString) samples = Int -> Vector (Maybe ByteString) -> Vector (Maybe ByteString) forall a. Int -> Vector a -> Vector a V.take Int sampleN Vector (Maybe ByteString) asMaybeFull assumption :: ParsingAssumption assumption = [Char] -> Vector (Maybe ByteString) -> ParsingAssumption makeParsingAssumptionBS [Char] dfmt Vector (Maybe ByteString) samples in case ParsingAssumption assumption of ParsingAssumption IntAssumption -> [Char] -> Vector (Maybe ByteString) -> Column handleBSInt [Char] dfmt Vector (Maybe ByteString) asMaybeFull ParsingAssumption DoubleAssumption -> Vector (Maybe ByteString) -> Column handleBSDouble Vector (Maybe ByteString) asMaybeFull ParsingAssumption BoolAssumption -> Vector (Maybe ByteString) -> Column handleBSBool Vector (Maybe ByteString) asMaybeFull ParsingAssumption DateAssumption -> [Char] -> Vector (Maybe ByteString) -> Column handleBSDate [Char] dfmt Vector (Maybe ByteString) asMaybeFull ParsingAssumption TextAssumption -> Vector (Maybe ByteString) -> Column handleBSText Vector (Maybe ByteString) asMaybeFull ParsingAssumption NoAssumption -> [Char] -> Vector (Maybe ByteString) -> Column handleBSNo [Char] dfmt Vector (Maybe ByteString) asMaybeFull makeParsingAssumptionBS :: String -> V.Vector (Maybe BS.ByteString) -> ParsingAssumption makeParsingAssumptionBS :: [Char] -> Vector (Maybe ByteString) -> ParsingAssumption makeParsingAssumptionBS [Char] dfmt Vector (Maybe ByteString) asMaybe | (Maybe ByteString -> Bool) -> Vector (Maybe ByteString) -> Bool forall a. (a -> Bool) -> Vector a -> Bool V.all (Maybe ByteString -> Maybe ByteString -> Bool forall a. Eq a => a -> a -> Bool == Maybe ByteString forall a. Maybe a Nothing) Vector (Maybe ByteString) asMaybe = ParsingAssumption NoAssumption | Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Bool) asMaybeBool = ParsingAssumption BoolAssumption | Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Int) asMaybeInt Bool -> Bool -> Bool && Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Double) asMaybeDouble = ParsingAssumption IntAssumption | Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Double) asMaybeDouble = ParsingAssumption DoubleAssumption | Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Day) asMaybeDate = ParsingAssumption DateAssumption | Bool otherwise = ParsingAssumption TextAssumption where asMaybeBool :: Vector (Maybe Bool) asMaybeBool = (Maybe ByteString -> Maybe Bool) -> Vector (Maybe ByteString) -> Vector (Maybe Bool) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= ByteString -> Maybe Bool readByteStringBool) Vector (Maybe ByteString) asMaybe asMaybeInt :: Vector (Maybe Int) asMaybeInt = (Maybe ByteString -> Maybe Int) -> Vector (Maybe ByteString) -> Vector (Maybe Int) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= HasCallStack => ByteString -> Maybe Int ByteString -> Maybe Int readByteStringInt) Vector (Maybe ByteString) asMaybe asMaybeDouble :: Vector (Maybe Double) asMaybeDouble = (Maybe ByteString -> Maybe Double) -> Vector (Maybe ByteString) -> Vector (Maybe Double) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= HasCallStack => ByteString -> Maybe Double ByteString -> Maybe Double readByteStringDouble) Vector (Maybe ByteString) asMaybe asMaybeDate :: Vector (Maybe Day) asMaybeDate = (Maybe ByteString -> Maybe Day) -> Vector (Maybe ByteString) -> Vector (Maybe Day) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= [Char] -> ByteString -> Maybe Day readByteStringDate [Char] dfmt) Vector (Maybe ByteString) asMaybe handleBSBool :: V.Vector (Maybe BS.ByteString) -> Column handleBSBool :: Vector (Maybe ByteString) -> Column handleBSBool Vector (Maybe ByteString) asMaybe | Bool parsableAsBool = Column -> (Vector Bool -> Column) -> Maybe (Vector Bool) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Bool) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Bool) asMaybeBool) Vector Bool -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Bool) -> Maybe (Vector Bool) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Bool) asMaybeBool) | Bool otherwise = Vector (Maybe ByteString) -> Column handleBSText Vector (Maybe ByteString) asMaybe where asMaybeBool :: Vector (Maybe Bool) asMaybeBool = (Maybe ByteString -> Maybe Bool) -> Vector (Maybe ByteString) -> Vector (Maybe Bool) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= ByteString -> Maybe Bool readByteStringBool) Vector (Maybe ByteString) asMaybe parsableAsBool :: Bool parsableAsBool = Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Bool) asMaybeBool handleBSInt :: String -> V.Vector (Maybe BS.ByteString) -> Column handleBSInt :: [Char] -> Vector (Maybe ByteString) -> Column handleBSInt [Char] dfmt Vector (Maybe ByteString) asMaybe | Bool parsableAsInt = Column -> (Vector Int -> Column) -> Maybe (Vector Int) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Int) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Int) asMaybeInt) Vector Int -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Int) -> Maybe (Vector Int) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Int) asMaybeInt) | Bool parsableAsDouble = Column -> (Vector Double -> Column) -> Maybe (Vector Double) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Double) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Double) asMaybeDouble) Vector Double -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Double) -> Maybe (Vector Double) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Double) asMaybeDouble) | Bool otherwise = Vector (Maybe ByteString) -> Column handleBSText Vector (Maybe ByteString) asMaybe where asMaybeInt :: Vector (Maybe Int) asMaybeInt = (Maybe ByteString -> Maybe Int) -> Vector (Maybe ByteString) -> Vector (Maybe Int) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= HasCallStack => ByteString -> Maybe Int ByteString -> Maybe Int readByteStringInt) Vector (Maybe ByteString) asMaybe asMaybeDouble :: Vector (Maybe Double) asMaybeDouble = (Maybe ByteString -> Maybe Double) -> Vector (Maybe ByteString) -> Vector (Maybe Double) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= HasCallStack => ByteString -> Maybe Double ByteString -> Maybe Double readByteStringDouble) Vector (Maybe ByteString) asMaybe parsableAsInt :: Bool parsableAsInt = Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Int) asMaybeInt Bool -> Bool -> Bool && Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Double) asMaybeDouble parsableAsDouble :: Bool parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Double) asMaybeDouble handleBSDouble :: V.Vector (Maybe BS.ByteString) -> Column handleBSDouble :: Vector (Maybe ByteString) -> Column handleBSDouble Vector (Maybe ByteString) asMaybe | Bool parsableAsDouble = Column -> (Vector Double -> Column) -> Maybe (Vector Double) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Double) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Double) asMaybeDouble) Vector Double -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Double) -> Maybe (Vector Double) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Double) asMaybeDouble) | Bool otherwise = Vector (Maybe ByteString) -> Column handleBSText Vector (Maybe ByteString) asMaybe where asMaybeDouble :: Vector (Maybe Double) asMaybeDouble = (Maybe ByteString -> Maybe Double) -> Vector (Maybe ByteString) -> Vector (Maybe Double) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= HasCallStack => ByteString -> Maybe Double ByteString -> Maybe Double readByteStringDouble) Vector (Maybe ByteString) asMaybe parsableAsDouble :: Bool parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Double) asMaybeDouble handleBSDate :: String -> V.Vector (Maybe BS.ByteString) -> Column handleBSDate :: [Char] -> Vector (Maybe ByteString) -> Column handleBSDate [Char] dfmt Vector (Maybe ByteString) asMaybe | Bool parsableAsDate = Column -> (Vector Day -> Column) -> Maybe (Vector Day) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Day) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Day) asMaybeDate) Vector Day -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Day) -> Maybe (Vector Day) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Day) asMaybeDate) | Bool otherwise = Vector (Maybe ByteString) -> Column handleBSText Vector (Maybe ByteString) asMaybe where asMaybeDate :: Vector (Maybe Day) asMaybeDate = (Maybe ByteString -> Maybe Day) -> Vector (Maybe ByteString) -> Vector (Maybe Day) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= [Char] -> ByteString -> Maybe Day readByteStringDate [Char] dfmt) Vector (Maybe ByteString) asMaybe parsableAsDate :: Bool parsableAsDate = Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Day) asMaybeDate handleBSText :: V.Vector (Maybe BS.ByteString) -> Column handleBSText :: Vector (Maybe ByteString) -> Column handleBSText Vector (Maybe ByteString) asMaybe = let asMaybeText :: Vector (Maybe Text) asMaybeText = (Maybe ByteString -> Maybe Text) -> Vector (Maybe ByteString) -> Vector (Maybe Text) forall a b. (a -> b) -> Vector a -> Vector b V.map ((ByteString -> Text) -> Maybe ByteString -> Maybe Text forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap ByteString -> Text TE.decodeUtf8Lenient) Vector (Maybe ByteString) asMaybe in Column -> (Vector Text -> Column) -> Maybe (Vector Text) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Text) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Text) asMaybeText) Vector Text -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Text) -> Maybe (Vector Text) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Text) asMaybeText) handleBSNo :: String -> V.Vector (Maybe BS.ByteString) -> Column handleBSNo :: [Char] -> Vector (Maybe ByteString) -> Column handleBSNo [Char] dfmt Vector (Maybe ByteString) asMaybe | (Maybe ByteString -> Bool) -> Vector (Maybe ByteString) -> Bool forall a. (a -> Bool) -> Vector a -> Bool V.all (Maybe ByteString -> Maybe ByteString -> Bool forall a. Eq a => a -> a -> Bool == Maybe ByteString forall a. Maybe a Nothing) Vector (Maybe ByteString) asMaybe = Vector (Maybe Text) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector ((Maybe ByteString -> Maybe Text) -> Vector (Maybe ByteString) -> Vector (Maybe Text) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe Text -> Maybe ByteString -> Maybe Text forall a b. a -> b -> a const (Maybe Text forall a. Maybe a Nothing :: Maybe T.Text)) Vector (Maybe ByteString) asMaybe) | Bool parsableAsBool = Column -> (Vector Bool -> Column) -> Maybe (Vector Bool) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Bool) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Bool) asMaybeBool) Vector Bool -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Bool) -> Maybe (Vector Bool) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Bool) asMaybeBool) | Bool parsableAsInt = Column -> (Vector Int -> Column) -> Maybe (Vector Int) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Int) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Int) asMaybeInt) Vector Int -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Int) -> Maybe (Vector Int) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Int) asMaybeInt) | Bool parsableAsDouble = Column -> (Vector Double -> Column) -> Maybe (Vector Double) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Double) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Double) asMaybeDouble) Vector Double -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Double) -> Maybe (Vector Double) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Double) asMaybeDouble) | Bool parsableAsDate = Column -> (Vector Day -> Column) -> Maybe (Vector Day) -> Column forall b a. b -> (a -> b) -> Maybe a -> b maybe (Vector (Maybe Day) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector Vector (Maybe Day) asMaybeDate) Vector Day -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column fromVector (Vector (Maybe Day) -> Maybe (Vector Day) forall (t :: * -> *) (f :: * -> *) a. (Traversable t, Applicative f) => t (f a) -> f (t a) forall (f :: * -> *) a. Applicative f => Vector (f a) -> f (Vector a) sequenceA Vector (Maybe Day) asMaybeDate) | Bool otherwise = Vector (Maybe ByteString) -> Column handleBSText Vector (Maybe ByteString) asMaybe where asMaybeBool :: Vector (Maybe Bool) asMaybeBool = (Maybe ByteString -> Maybe Bool) -> Vector (Maybe ByteString) -> Vector (Maybe Bool) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= ByteString -> Maybe Bool readByteStringBool) Vector (Maybe ByteString) asMaybe asMaybeInt :: Vector (Maybe Int) asMaybeInt = (Maybe ByteString -> Maybe Int) -> Vector (Maybe ByteString) -> Vector (Maybe Int) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= HasCallStack => ByteString -> Maybe Int ByteString -> Maybe Int readByteStringInt) Vector (Maybe ByteString) asMaybe asMaybeDouble :: Vector (Maybe Double) asMaybeDouble = (Maybe ByteString -> Maybe Double) -> Vector (Maybe ByteString) -> Vector (Maybe Double) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= HasCallStack => ByteString -> Maybe Double ByteString -> Maybe Double readByteStringDouble) Vector (Maybe ByteString) asMaybe asMaybeDate :: Vector (Maybe Day) asMaybeDate = (Maybe ByteString -> Maybe Day) -> Vector (Maybe ByteString) -> Vector (Maybe Day) forall a b. (a -> b) -> Vector a -> Vector b V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day forall a b. Maybe a -> (a -> Maybe b) -> Maybe b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= [Char] -> ByteString -> Maybe Day readByteStringDate [Char] dfmt) Vector (Maybe ByteString) asMaybe parsableAsBool :: Bool parsableAsBool = Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Bool) asMaybeBool parsableAsInt :: Bool parsableAsInt = Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Int) asMaybeInt Bool -> Bool -> Bool && Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Double) asMaybeDouble parsableAsDouble :: Bool parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Double) asMaybeDouble parsableAsDate :: Bool parsableAsDate = Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool vecSameConstructor Vector (Maybe ByteString) asMaybe Vector (Maybe Day) asMaybeDate constructOptional :: (VU.Unbox a, Columnable a) => VU.Vector a -> VU.Vector Word8 -> IO Column constructOptional :: forall a. (Unbox a, Columnable a) => Vector a -> Vector Word8 -> IO Column constructOptional Vector a vec Vector Word8 valid = do let size :: Int size = Vector a -> Int forall a. Unbox a => Vector a -> Int VU.length Vector a vec MVector RealWorld (Maybe a) mvec <- Int -> IO (MVector (PrimState IO) (Maybe a)) forall (m :: * -> *) a. PrimMonad m => Int -> m (MVector (PrimState m) a) VM.new Int size [Int] -> (Int -> IO ()) -> IO () forall (t :: * -> *) (m :: * -> *) a b. (Foldable t, Monad m) => t a -> (a -> m b) -> m () forM_ [Int 0 .. Int size Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO () forall a b. (a -> b) -> a -> b $ \Int i -> if (Vector Word8 valid Vector Word8 -> Int -> Word8 forall a. Unbox a => Vector a -> Int -> a VU.! Int i) Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 0 then MVector (PrimState IO) (Maybe a) -> Int -> Maybe a -> IO () forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> Int -> a -> m () VM.write MVector RealWorld (Maybe a) MVector (PrimState IO) (Maybe a) mvec Int i Maybe a forall a. Maybe a Nothing else MVector (PrimState IO) (Maybe a) -> Int -> Maybe a -> IO () forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> Int -> a -> m () VM.write MVector RealWorld (Maybe a) MVector (PrimState IO) (Maybe a) mvec Int i (a -> Maybe a forall a. a -> Maybe a Just (Vector a vec Vector a -> Int -> a forall a. Unbox a => Vector a -> Int -> a VU.! Int i)) Vector (Maybe a) -> Column forall a. Columnable a => Vector (Maybe a) -> Column OptionalColumn (Vector (Maybe a) -> Column) -> IO (Vector (Maybe a)) -> IO Column forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> MVector (PrimState IO) (Maybe a) -> IO (Vector (Maybe a)) forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> m (Vector a) V.freeze MVector RealWorld (Maybe a) MVector (PrimState IO) (Maybe a) mvec constructOptionalBoxed :: V.Vector T.Text -> VU.Vector Word8 -> IO Column constructOptionalBoxed :: Vector Text -> Vector Word8 -> IO Column constructOptionalBoxed Vector Text vec Vector Word8 valid = do let size :: Int size = Vector Text -> Int forall a. Vector a -> Int V.length Vector Text vec MVector RealWorld (Maybe Text) mvec <- Int -> IO (MVector (PrimState IO) (Maybe Text)) forall (m :: * -> *) a. PrimMonad m => Int -> m (MVector (PrimState m) a) VM.new Int size [Int] -> (Int -> IO ()) -> IO () forall (t :: * -> *) (m :: * -> *) a b. (Foldable t, Monad m) => t a -> (a -> m b) -> m () forM_ [Int 0 .. Int size Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO () forall a b. (a -> b) -> a -> b $ \Int i -> if (Vector Word8 valid Vector Word8 -> Int -> Word8 forall a. Unbox a => Vector a -> Int -> a VU.! Int i) Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 0 then MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO () forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> Int -> a -> m () VM.write MVector RealWorld (Maybe Text) MVector (PrimState IO) (Maybe Text) mvec Int i Maybe Text forall a. Maybe a Nothing else MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO () forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> Int -> a -> m () VM.write MVector RealWorld (Maybe Text) MVector (PrimState IO) (Maybe Text) mvec Int i (Text -> Maybe Text forall a. a -> Maybe a Just (Vector Text vec Vector Text -> Int -> Text forall a. Vector a -> Int -> a V.! Int i)) Vector (Maybe Text) -> Column forall a. Columnable a => Vector (Maybe a) -> Column OptionalColumn (Vector (Maybe Text) -> Column) -> IO (Vector (Maybe Text)) -> IO Column forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> MVector (PrimState IO) (Maybe Text) -> IO (Vector (Maybe Text)) forall (m :: * -> *) a. PrimMonad m => MVector (PrimState m) a -> m (Vector a) V.freeze MVector RealWorld (Maybe Text) MVector (PrimState IO) (Maybe Text) mvec writeCsv :: FilePath -> DataFrame -> IO () writeCsv :: [Char] -> DataFrame -> IO () writeCsv = Char -> [Char] -> DataFrame -> IO () writeSeparated Char ',' writeTsv :: FilePath -> DataFrame -> IO () writeTsv :: [Char] -> DataFrame -> IO () writeTsv = Char -> [Char] -> DataFrame -> IO () writeSeparated Char '\t' writeSeparated :: Char -> FilePath -> DataFrame -> IO () writeSeparated :: Char -> [Char] -> DataFrame -> IO () writeSeparated Char c [Char] filepath DataFrame df = [Char] -> IOMode -> (Handle -> IO ()) -> IO () forall r. [Char] -> IOMode -> (Handle -> IO r) -> IO r withFile [Char] filepath IOMode WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO () forall a b. (a -> b) -> a -> b $ \Handle handle -> do let (Int rows, Int _) = DataFrame -> (Int, Int) dataframeDimensions DataFrame df let headers :: [Text] headers = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text] forall a b. (a -> b) -> [a] -> [b] map (Text, Int) -> Text forall a b. (a, b) -> a fst (((Text, Int) -> (Text, Int) -> Ordering) -> [(Text, Int)] -> [(Text, Int)] forall a. (a -> a -> Ordering) -> [a] -> [a] L.sortBy (Int -> Int -> Ordering forall a. Ord a => a -> a -> Ordering compare (Int -> Int -> Ordering) -> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c `on` (Text, Int) -> Int forall a b. (a, b) -> b snd) (Map Text Int -> [(Text, Int)] forall k a. Map k a -> [(k, a)] M.toList (DataFrame -> Map Text Int columnIndices DataFrame df))) Handle -> Text -> IO () TIO.hPutStrLn Handle handle (Text -> [Text] -> Text T.intercalate Text "," [Text] headers) [Int] -> (Int -> IO ()) -> IO () forall (t :: * -> *) (m :: * -> *) a b. (Foldable t, Monad m) => t a -> (a -> m b) -> m () forM_ [Int 0 .. (Int rows Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1)] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO () forall a b. (a -> b) -> a -> b $ \Int i -> do let row :: [Text] row = DataFrame -> Int -> [Text] getRowAsText DataFrame df Int i Handle -> Text -> IO () TIO.hPutStrLn Handle handle (Text -> [Text] -> Text T.intercalate Text "," [Text] row) getRowAsText :: DataFrame -> Int -> [T.Text] getRowAsText :: DataFrame -> Int -> [Text] getRowAsText DataFrame df Int i = (Int -> Column -> [Text] -> [Text]) -> [Text] -> Vector Column -> [Text] forall a b. (Int -> a -> b -> b) -> b -> Vector a -> b V.ifoldr Int -> Column -> [Text] -> [Text] go [] (DataFrame -> Vector Column columns DataFrame df) where indexMap :: Map Int Text indexMap = [(Int, Text)] -> Map Int Text forall k a. Ord k => [(k, a)] -> Map k a M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)] forall a b. (a -> b) -> [a] -> [b] map (\(Text a, Int b) -> (Int b, Text a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)] forall a b. (a -> b) -> a -> b $ Map Text Int -> [(Text, Int)] forall k a. Map k a -> [(k, a)] M.toList (DataFrame -> Map Text Int columnIndices DataFrame df)) go :: Int -> Column -> [Text] -> [Text] go Int k (BoxedColumn (Vector a c :: V.Vector a)) [Text] acc = case Vector a c Vector a -> Int -> Maybe a forall a. Vector a -> Int -> Maybe a V.!? Int i of Just a e -> Text textRep Text -> [Text] -> [Text] forall a. a -> [a] -> [a] : [Text] acc where textRep :: Text textRep = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @T.Text) of Just a :~: Text Refl -> a Text e Maybe (a :~: Text) Nothing -> case forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a of App TypeRep a t1 TypeRep b t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe) forall k1 k2 (a :: k1) (b :: k2). TypeRep a -> TypeRep b -> Maybe (a :~~: b) eqTypeRep TypeRep a t1 (forall {k} (a :: k). Typeable a => TypeRep a forall (a :: * -> *). Typeable a => TypeRep a typeRep @Maybe) of Just a :~~: Maybe HRefl -> case TypeRep b -> TypeRep Text -> Maybe (b :~: Text) forall (a :: k1) (b :: k1). TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality TypeRep b t2 (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @T.Text) of Just b :~: Text Refl -> Text -> Maybe Text -> Text forall a. a -> Maybe a -> a fromMaybe Text "null" a Maybe Text e Maybe (b :~: Text) Nothing -> (Text -> Text fromOptional (Text -> Text) -> (a -> Text) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . [Char] -> Text T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> [Char] forall a. Show a => a -> [Char] show) a e where fromOptional :: Text -> Text fromOptional Text s | Text -> Text -> Bool T.isPrefixOf Text "Just " Text s = Int -> Text -> Text T.drop (Text -> Int T.length Text "Just ") Text s | Bool otherwise = Text "null" Maybe (a :~~: Maybe) Nothing -> ([Char] -> Text T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> [Char] forall a. Show a => a -> [Char] show) a e TypeRep a _ -> ([Char] -> Text T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> [Char] forall a. Show a => a -> [Char] show) a e Maybe a Nothing -> [Char] -> [Text] forall a. HasCallStack => [Char] -> a error ([Char] -> [Text]) -> [Char] -> [Text] forall a b. (a -> b) -> a -> b $ [Char] "Column " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Text -> [Char] T.unpack (Map Int Text indexMap Map Int Text -> Int -> Text forall k a. Ord k => Map k a -> k -> a M.! Int k) [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] " has less items than " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] "the other columns at index " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Int -> [Char] forall a. Show a => a -> [Char] show Int i go Int k (UnboxedColumn Vector a c) [Text] acc = case Vector a c Vector a -> Int -> Maybe a forall a. Unbox a => Vector a -> Int -> Maybe a VU.!? Int i of Just a e -> [Char] -> Text T.pack (a -> [Char] forall a. Show a => a -> [Char] show a e) Text -> [Text] -> [Text] forall a. a -> [a] -> [a] : [Text] acc Maybe a Nothing -> [Char] -> [Text] forall a. HasCallStack => [Char] -> a error ([Char] -> [Text]) -> [Char] -> [Text] forall a b. (a -> b) -> a -> b $ [Char] "Column " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Text -> [Char] T.unpack (Map Int Text indexMap Map Int Text -> Int -> Text forall k a. Ord k => Map k a -> k -> a M.! Int k) [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] " has less items than " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] "the other columns at index " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Int -> [Char] forall a. Show a => a -> [Char] show Int i go Int k (OptionalColumn (Vector (Maybe a) c :: V.Vector (Maybe a))) [Text] acc = case Vector (Maybe a) c Vector (Maybe a) -> Int -> Maybe (Maybe a) forall a. Vector a -> Int -> Maybe a V.!? Int i of Just Maybe a e -> case TypeRep a -> TypeRep Text -> Maybe (a :~: Text) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @T.Text) of Just a :~: Text Refl -> Text -> Maybe Text -> Text forall a. a -> Maybe a -> a fromMaybe Text T.empty Maybe a Maybe Text e Text -> [Text] -> [Text] forall a. a -> [a] -> [a] : [Text] acc Maybe (a :~: Text) Nothing -> Text -> (a -> Text) -> Maybe a -> Text forall b a. b -> (a -> b) -> Maybe a -> b maybe Text T.empty ([Char] -> Text T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> [Char] forall a. Show a => a -> [Char] show) Maybe a e Text -> [Text] -> [Text] forall a. a -> [a] -> [a] : [Text] acc Maybe (Maybe a) Nothing -> [Char] -> [Text] forall a. HasCallStack => [Char] -> a error ([Char] -> [Text]) -> [Char] -> [Text] forall a b. (a -> b) -> a -> b $ [Char] "Column " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Text -> [Char] T.unpack (Map Int Text indexMap Map Int Text -> Int -> Text forall k a. Ord k => Map k a -> k -> a M.! Int k) [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] " has less items than " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] "the other columns at index " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Int -> [Char] forall a. Show a => a -> [Char] show Int i stripQuotes :: T.Text -> T.Text stripQuotes :: Text -> Text stripQuotes Text txt = case Text -> Maybe (Char, Text) T.uncons Text txt of Just (Char '"', Text rest) -> case Text -> Maybe (Text, Char) T.unsnoc Text rest of Just (Text middle, Char '"') -> Text middle Maybe (Text, Char) _ -> Text txt Maybe (Char, Text) _ -> Text txt