Processing larger amounts of data, compression, serialisation and calling C.
An elaboration of the previous example: * Build a largish structure in Haskell * Compress it in memory * Serialise it to disk * Deserialise it * Decompress * Pass it to C * Display the result Pretty common pattern for low level stuff. We use zlib + lazy bytestrings for streaming decompression, and Data.Binary for the serialisation. We will use * Foreign.* to generate the data * Wrap it as a lazy bytestring * Data.Binary to serialise it * Code.Compression.Gzip to compress/uncompress * Pass it to C and make a simple FFI call on the result * Display the result Running: $ ghc -O2 A.hs --make $ time ./A Built table Compressed 25600000 bytes Compressed size 2231545 bytes (91.28%) Decompressed 25600000 bytes Calling into C ... -8.742278e-8 -0.6865875 -0.7207948 -0.1401903 0.63918984 0.7437966 0.27236375 -0.5763547 -0.75708854 -0.39026973 ./A 2.98s user 0.11s system 94% cpu 3.275 total The code: {-# OPTIONS -fglasgow-exts #-} -- -- Some imports -- import Foreign import Foreign.C.Types import Data.Int import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Base as S import qualified Data.ByteString as S import Data.Binary import Codec.Compression.GZip import System.IO import Text.Printf import Control.Monad ------------------------------------------------------------------------ -- Foreign Ptrs -- -- A simple wrapper type -- data Table = Table { floats :: ForeignPtr CFloat , ints :: ForeignPtr Int } -- Statically fixed sizes floatSize = 4800000 intSize = 1600000 totalBytes = sizeOf (undefined :: CFloat) * floatSize + sizeOf (undefined :: Int) * intSize -- -- Build a table populated with some defaults -- Float table filled with 'pi' , ints numbered consecutively -- newTable :: IO Table newTable = do fp <- S.mallocByteString (floatSize * sizeOf (undefined :: CFloat)) ip <- S.mallocByteString (intSize * sizeOf (undefined :: Int )) withForeignPtr fp $ \p -> forM_ [0..floatSize-1] $ \n -> pokeElemOff p n pi withForeignPtr ip $ \p -> forM_ [0..intSize-1] $ \n -> pokeElemOff p n n return (Table fp ip) ------------------------------------------------------------------------ -- Lazy ByteStrings -- -- Convert ForeignPtr a to and from a lazy ByteString -- toByteString :: Storable a => ForeignPtr a -> Int -> L.ByteString toByteString (fp :: ForeignPtr a) n = L.fromChunks . (:[]) $ S.fromForeignPtr (castForeignPtr fp) (n * sizeOf (undefined :: a)) -- -- Flatten a lazy bytestring back to a ForeignPtr. -- fromByteString :: Storable a => L.ByteString -> ForeignPtr a fromByteString lbs = castForeignPtr fp where (fp,_,n) = S.toForeignPtr . S.concat $ L.toChunks lbs ------------------------------------------------------------------------ -- GZip and Data.Binary -- -- Serialise a Table, compressing with gzip it as we go: -- instance Binary Table where put (Table f i) = do put . compress . toByteString f $ floatSize put . compress . toByteString i $ intSize get = do fs <- liftM decompress get is <- liftM decompress get -- check we read the correct amount: if L.length fs + L.length is == fromIntegral totalBytes then return $ Table (fromByteString fs) (fromByteString is) else error "Partial read" ------------------------------------------------------------------------ -- FFI -- -- Example call to process the data using C functions. -- rounded :: Int -> ForeignPtr CFloat -> IO [CFloat] rounded l fp = withForeignPtr fp $ \p -> go p where go p = forM [0..l-1] $ \n -> do v <- peekElemOff p n return $ c_tanhf (c_sinf (v + fromIntegral n)) -- A random C function to use: foreign import ccall unsafe "math.h sinf" c_sinf :: CFloat -> CFloat foreign import ccall unsafe "math.h tanhf" c_tanhf :: CFloat -> CFloat ------------------------------------------------------------------------ -- -- Now glue it all together -- main = do table <- newTable putStrLn "Built table" -- write the data to disk, compressed with gzip as we go. encodeFile "/tmp/table.gz" table printf "Compressed %d bytes\n" totalBytes -- how good was the compression? h <- openFile "/tmp/table.gz" ReadMode n <- hFileSize h hClose h printf "Compressed size %d bytes (%0.2f%%)\n" n (100 - (fromIntegral n/fromIntegral totalBytes*100) :: Double) -- load it back in, decompressing on the fly table' <- decodeFile "/tmp/table.gz" printf "Decompressed %d bytes\n" totalBytes -- now process the floats with C printf "Calling into C ...\n" ps <- rounded 10 (floats table') forM_ ps print _______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe