zip-stream-0.2.2.0/Codec/0000755000000000000000000000000013104361317013146 5ustar0000000000000000zip-stream-0.2.2.0/Codec/Archive/0000755000000000000000000000000013104361317014527 5ustar0000000000000000zip-stream-0.2.2.0/Codec/Archive/Zip/0000755000000000000000000000000013104411667015275 5ustar0000000000000000zip-stream-0.2.2.0/Codec/Archive/Zip/Conduit/0000755000000000000000000000000014336716710016707 5ustar0000000000000000zip-stream-0.2.2.0/cmd/0000755000000000000000000000000014016251300012665 5ustar0000000000000000zip-stream-0.2.2.0/tests/0000755000000000000000000000000014336716710013304 5ustar0000000000000000zip-stream-0.2.2.0/Codec/Archive/Zip/Conduit/Types.hs0000644000000000000000000000575014016251300020336 0ustar0000000000000000module Codec.Archive.Zip.Conduit.Types where import Control.Exception (Exception(..)) import Data.ByteString (ByteString) import qualified Data.ByteString.Lazy as BSL import qualified Data.Conduit as C import Data.Conduit.Binary (sourceLbs) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import qualified Data.Text as T import Data.Time.LocalTime (LocalTime) import Data.Typeable (Typeable) import Data.Word (Word32, Word64) -- |Errors thrown during zip file processing newtype ZipError = ZipError String deriving (Show, Typeable) instance IsString ZipError where fromString = ZipError instance Exception ZipError where displayException (ZipError e) = "ZipError: " ++ e -- |Summary information at the end of a zip stream. data ZipInfo = ZipInfo { zipComment :: ByteString } deriving (Eq, Show) -- |(The beginning of) a single entry in a zip stream, which may be any file or directory. -- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that. data ZipEntry = ZipEntry { zipEntryName :: Either T.Text ByteString -- ^File name (in posix format, no leading slashes), either UTF-8 encoded text or raw bytes (CP437), with a trailing slash for directories , zipEntryTime :: LocalTime -- ^Modification time , zipEntrySize :: Maybe Word64 -- ^Size of file data (if known); checked on zipping and also used as hint to enable zip64. Disables compression for known 0-byte files. , zipEntryExternalAttributes :: Maybe Word32 -- ^Host-dependent attributes, often MS-DOS directory attribute byte (only supported when zipping) } deriving (Eq, Show) -- |The data contents for a 'ZipEntry'. For empty entries (e.g., directories), use 'mempty'. data ZipData m = ZipDataByteString BSL.ByteString -- ^A known ByteString, which will be fully evaluated (not streamed) | ZipDataSource (C.ConduitM () ByteString m ()) -- ^A byte stream producer, streamed (and compressed) directly into the zip instance Monad m => Semigroup (ZipData m) where ZipDataByteString a <> ZipDataByteString b = ZipDataByteString $ mappend a b a <> b = ZipDataSource $ mappend (sourceZipData a) (sourceZipData b) instance Monad m => Monoid (ZipData m) where mempty = ZipDataByteString BSL.empty mappend = (<>) -- |Normalize any 'ZipData' to a simple source sourceZipData :: Monad m => ZipData m -> C.ConduitM () ByteString m () sourceZipData (ZipDataByteString b) = sourceLbs b sourceZipData (ZipDataSource s) = s -- |Convert between unpacked (as 'Codec.Archive.Zip.Conduit.UnZip.unZipStream' produces) and packed (as 'Codec.Archive.Zip.Conduit.Zip.zipStream' consumes) representations. -- This is mainly for testing purposes, or if you really want to re-zip a stream on the fly for some reason. -- Note that each 'ZipData' must be consumed completely before the next entry can be produced. -- packZipEntries :: C.Conduit (Either ZipEntry BS.ByteString) m (ZipEntry, ZipData m) zip-stream-0.2.2.0/Codec/Archive/Zip/Conduit/UnZip.hs0000644000000000000000000002246114016251330020300 0ustar0000000000000000-- |Stream the extraction of a zip file, e.g., as it's being downloaded. {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE RankNTypes #-} module Codec.Archive.Zip.Conduit.UnZip ( unZipStream , ZipEntry(..) , ZipInfo(..) ) where import Control.Applicative ((<|>), empty) import Control.Monad (when, unless, guard) import Control.Monad.Catch (MonadThrow) import Control.Monad.Primitive (PrimMonad) import qualified Data.Binary.Get as G import Data.Bits ((.&.), testBit, clearBit, shiftL, shiftR) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C import qualified Data.Conduit.Combinators as CC import Data.Conduit.Serialization.Binary (sinkGet) import qualified Data.Conduit.Zlib as CZ import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian) import Data.Word (Word16, Word32, Word64) import Codec.Archive.Zip.Conduit.Types import Codec.Archive.Zip.Conduit.Internal data Header m = FileHeader { fileDecompress :: C.ConduitM BS.ByteString BS.ByteString m () , fileEntry :: !ZipEntry , fileCRC :: !Word32 , fileCSize :: !Word64 , fileZip64 :: !Bool } | EndOfCentralDirectory { endInfo :: ZipInfo } data ExtField = ExtField { extZip64 :: Bool , extZip64USize , extZip64CSize :: Word64 } {- ExtUnix { extUnixATime , extUnixMTime :: UTCTime , extUnixUID , extUnixGID :: Word16 , extUnixData :: BS.ByteString } -} pass :: (MonadThrow m, Integral n) => n -> C.ConduitM BS.ByteString BS.ByteString m () pass 0 = return () pass n = C.await >>= maybe (zipError $ "EOF in file data, expecting " ++ show ni ++ " more bytes") (\b -> let n' = ni - toInteger (BS.length b) in if n' < 0 then do let (b', r) = BS.splitAt (fromIntegral n) b C.yield b' C.leftover r else do C.yield b pass n') where ni = toInteger n foldGet :: (a -> G.Get a) -> a -> G.Get a foldGet g z = do e <- G.isEmpty if e then return z else g z >>= foldGet g fromDOSTime :: Word16 -> Word16 -> LocalTime fromDOSTime time date = LocalTime (fromGregorian (fromIntegral $ date `shiftR` 9 + 1980) (fromIntegral $ date `shiftR` 5 .&. 0x0f) (fromIntegral $ date .&. 0x1f)) (TimeOfDay (fromIntegral $ time `shiftR` 11) (fromIntegral $ time `shiftR` 5 .&. 0x3f) (fromIntegral $ time `shiftL` 1 .&. 0x3f)) -- |Stream process a zip file, producing a sequence of entry headers and data blocks. -- For example, this might produce: @Left (ZipEntry "directory\/" ...), Left (ZipEntry "directory\/file.txt" ...), Right "hello w", Right "orld!\\n", Left ...@ -- The final result is summary information taken from the end of the zip file. -- No state is maintained during processing, and, in particular, any information in the central directory is discarded. -- -- This only supports a limited number of zip file features, including deflate compression and zip64. -- It does not (ironically) support uncompressed zip files that have been created as streams, where file sizes are not known beforehand. -- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case. -- Any errors are thrown in the underlying monad (as 'ZipError's or 'Data.Conduit.Serialization.Binary.ParseError'). unZipStream :: ( MonadThrow m , PrimMonad m ) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo unZipStream = next where next = do -- local header, or start central directory h <- sinkGet $ do sig <- G.getWord32le case sig of 0x04034b50 -> fileHeader _ -> centralBody sig case h of FileHeader{..} -> do C.yield $ Left fileEntry r <- C.mapOutput Right $ case zipEntrySize fileEntry of Nothing -> do -- unknown size (csize, (size, crc)) <- inputSize fileDecompress `C.fuseBoth` sizeCRC -- traceM $ "csize=" ++ show csize ++ " size=" ++ show size ++ " crc=" ++ show crc -- required data description sinkGet $ dataDesc h { fileCSize = csize , fileCRC = crc , fileEntry = fileEntry { zipEntrySize = Just size } } Just usize -> do -- known size (size, crc) <- pass fileCSize C..| (fileDecompress >> CC.sinkNull) C..| sizeCRC -- traceM $ "size=" ++ show size ++ "," ++ show (zipEntrySize fileEntry) ++ " crc=" ++ show crc ++ "," ++ show fileCRC -- optional data description (possibly ambiguous!) sinkGet $ (guard =<< dataDesc h) <|> return () return (size == usize && crc == fileCRC) unless r $ zipError $ either T.unpack BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed" next EndOfCentralDirectory{..} -> do return endInfo dataDesc h = -- this takes a bit of flexibility to account for the various cases (do -- with signature sig <- G.getWord32le guard (sig == 0x08074b50) dataDescBody h) <|> dataDescBody h -- without signature dataDescBody FileHeader{..} = do crc <- G.getWord32le let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le csiz <- getSize usiz <- getSize -- traceM $ "crc=" ++ show crc ++ "," ++ show fileCRC ++ " csiz=" ++ show csiz ++ "," ++ show fileCSize ++ " usiz=" ++ show usiz ++ "," ++ show (zipEntrySize fileEntry) return $ crc == fileCRC && csiz == fileCSize && (usiz ==) `all` zipEntrySize fileEntry dataDescBody _ = empty central = G.getWord32le >>= centralBody centralBody 0x02014b50 = centralHeader >> central centralBody 0x06064b50 = zip64EndDirectory >> central centralBody 0x07064b50 = G.skip 16 >> central centralBody 0x06054b50 = EndOfCentralDirectory <$> endDirectory centralBody sig = fail $ "Unknown header signature: " ++ show sig fileHeader = do ver <- G.getWord8 _os <- G.getWord8 -- OS Version (could require 0 = DOS, but we ignore ext attrs altogether) when (ver > zipVersion) $ fail $ "Unsupported version: " ++ show ver gpf <- G.getWord16le -- when (gpf .&. complement (bit 1 .|. bit 2 .|. bit 3) /= 0) $ fail $ "Unsupported flags: " ++ show gpf when (gpf `clearBit` 1 `clearBit` 2 `clearBit` 3 `clearBit` 11 /= 0) $ fail $ "Unsupported flags: " ++ show gpf comp <- G.getWord16le dcomp <- case comp of 0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data" | otherwise -> return idConduit 8 -> return $ CZ.decompress deflateWindowBits _ -> fail $ "Unsupported compression method: " ++ show comp time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le crc <- G.getWord32le csiz <- G.getWord32le usiz <- G.getWord32le nlen <- fromIntegral <$> G.getWord16le elen <- fromIntegral <$> G.getWord16le name <- G.getByteString nlen let getExt ext = do t <- G.getWord16le z <- fromIntegral <$> G.getWord16le G.isolate z $ case t of 0x0001 -> do -- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF" usiz' <- if usiz == maxBound32 then G.getWord64le else return $ extZip64USize ext csiz' <- if csiz == maxBound32 then G.getWord64le else return $ extZip64CSize ext return ext { extZip64 = True , extZip64USize = usiz' , extZip64CSize = csiz' } {- 0x000d -> do atim <- G.getWord32le mtim <- G.getWord32le uid <- G.getWord16le gid <- G.getWord16le dat <- G.getByteString $ z - 12 return ExtUnix { extUnixATime = posixSecondsToUTCTime atim , extUnixMTime = posixSecondsToUTCTime mtim , extUnixUID = uid , extUnixGID = gid , extUnixData = dat } -} _ -> ext <$ G.skip z ExtField{..} <- G.isolate elen $ foldGet getExt ExtField { extZip64 = False , extZip64USize = fromIntegral usiz , extZip64CSize = fromIntegral csiz } return FileHeader { fileEntry = ZipEntry { zipEntryName = if testBit gpf 11 then Left (TE.decodeUtf8 name) else Right name , zipEntryTime = time , zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize , zipEntryExternalAttributes = Nothing } , fileDecompress = dcomp , fileCSize = extZip64CSize , fileCRC = crc , fileZip64 = extZip64 } centralHeader = do -- ignore everything G.skip 24 nlen <- fromIntegral <$> G.getWord16le elen <- fromIntegral <$> G.getWord16le clen <- fromIntegral <$> G.getWord16le G.skip $ 12 + nlen + elen + clen zip64EndDirectory = do len <- G.getWord64le G.skip $ fromIntegral len -- would not expect to overflow... endDirectory = do G.skip 16 clen <- fromIntegral <$> G.getWord16le comm <- G.getByteString clen return ZipInfo { zipComment = comm } zip-stream-0.2.2.0/Codec/Archive/Zip/Conduit/Zip.hs0000644000000000000000000002553114336716710020013 0ustar0000000000000000-- |Stream the creation of a zip file, e.g., as it's being uploaded. {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE BangPatterns #-} module Codec.Archive.Zip.Conduit.Zip ( zipStream , ZipOptions(..) , ZipInfo(..) , defaultZipOptions , ZipEntry(..) , ZipData(..) , zipFileData ) where import qualified Codec.Compression.Zlib.Raw as Z import Control.Arrow ((&&&), (+++), left) import Control.DeepSeq (force) import Control.Monad (when) import Control.Monad.Catch (MonadThrow) import Control.Monad.Primitive (PrimMonad) import Control.Monad.State.Strict (StateT, get) import Control.Monad.Trans.Resource (MonadResource) import qualified Data.Binary.Put as P import Data.Bits (bit, shiftL, shiftR, (.|.)) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Lazy as BSL import qualified Data.Conduit as C import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Combinators as CC import Data.Conduit.Lift (stateC, execStateC) import Data.Conduit.Serialization.Binary (sourcePut) import qualified Data.Conduit.Zlib as CZ import Data.Digest.CRC32 (crc32) import Data.Either (isLeft) import Data.Maybe (fromMaybe, fromJust) import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian) import Data.Word (Word16, Word32, Word64) import Codec.Archive.Zip.Conduit.Types import Codec.Archive.Zip.Conduit.Internal -- |Options controlling zip file parameters and features data ZipOptions = ZipOptions { zipOpt64 :: !Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases); this is automatically enabled for any files of known size (e.g., 'zipEntrySize') , zipOptCompressLevel :: !Int -- ^Compress zipped files (0 = store only, 1 = minimal, 9 = best; non-zero improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package) , zipOptInfo :: !ZipInfo -- ^Other parameters to store in the zip file } defaultZipOptions :: ZipOptions defaultZipOptions = ZipOptions { zipOpt64 = False , zipOptCompressLevel = -1 , zipOptInfo = ZipInfo { zipComment = BS.empty } } infixr 7 ?* (?*) :: Num a => Bool -> a -> a True ?* x = x False ?* _ = 0 -- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'CC.sourceFile'@). zipFileData :: MonadResource m => FilePath -> ZipData m zipFileData = ZipDataSource . CC.sourceFile zipData :: Monad m => ZipData m -> Either (C.ConduitM () BS.ByteString m ()) BSL.ByteString zipData (ZipDataByteString b) = Right b zipData (ZipDataSource s) = Left s dataSize :: Either a BSL.ByteString -> Maybe Word64 dataSize (Left _) = Nothing dataSize (Right b) = Just $ fromIntegral $ BSL.length b toDOSTime :: LocalTime -> (Word16, Word16) toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins secs)) = ( fromIntegral hour `shiftL` 11 .|. fromIntegral mins `shiftL` 5 .|. truncate secs `shiftR` 1 , fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day ) countOutput :: Monad m => C.ConduitM i BS.ByteString m () -> C.ConduitM i BS.ByteString (StateT Word64 m) () countOutput c = stateC $ \s -> (,) () . (s +) <$> outputSize c output :: MonadThrow m => P.Put -> C.ConduitM i BS.ByteString (StateT Word64 m) () output = countOutput . sourcePut maxBound16 :: Integral n => n maxBound16 = fromIntegral (maxBound :: Word16) data CommonFileHeaderInfo = CommonFileHeaderInfo { cfhiIsStreamingEntry :: !Bool , cfhiHasUtf8Filename :: !Bool , cfhiIsCompressed :: !Bool , cfhiTime :: !Word16 , cfhiDate :: !Word16 } deriving (Eq, Ord, Show) putCommonFileHeaderPart :: CommonFileHeaderInfo -> P.PutM () putCommonFileHeaderPart CommonFileHeaderInfo{..} = do P.putWord16le $ cfhiIsStreamingEntry ?* bit 3 .|. cfhiHasUtf8Filename ?* bit 11 P.putWord16le $ cfhiIsCompressed ?* 8 P.putWord16le $ cfhiTime P.putWord16le $ cfhiDate -- | This is retained in memory until the end of the archive is written. -- -- To avoid space leaks, this should contain only strict data. data CentralDirectoryInfo = CentralDirectoryInfo { cdiOff :: !Word64 , cdiZ64 :: !Bool , cdiCommonFileHeaderInfo :: !CommonFileHeaderInfo , cdiCrc :: !Word32 , cdiUsz :: !Word64 , cdiName :: !BSC.ByteString , cdiCsz :: !Word64 , cdiZipEntryExternalAttributes :: !(Maybe Word32) -- lazy Maybe must be e.g. via `force` at creation } deriving (Eq, Ord, Show) putCentralDirectory :: CentralDirectoryInfo -> P.PutM () putCentralDirectory CentralDirectoryInfo{..} = do -- central directory let o64 = cdiOff >= maxBound32 l64 = cdiZ64 ?* 16 + o64 ?* 8 a64 = cdiZ64 || o64 P.putWord32le 0x02014b50 P.putWord8 zipVersion P.putWord8 osVersion P.putWord8 $ if a64 then 45 else 20 P.putWord8 osVersion putCommonFileHeaderPart cdiCommonFileHeaderInfo P.putWord32le cdiCrc P.putWord32le $ if cdiZ64 then maxBound32 else fromIntegral cdiCsz P.putWord32le $ if cdiZ64 then maxBound32 else fromIntegral cdiUsz P.putWord16le $ fromIntegral (BS.length cdiName) P.putWord16le $ a64 ?* (4 + l64) P.putWord16le 0 -- comment length P.putWord16le 0 -- disk number P.putWord16le 0 -- internal file attributes P.putWord32le $ fromMaybe 0 cdiZipEntryExternalAttributes P.putWord32le $ if o64 then maxBound32 else fromIntegral cdiOff P.putByteString cdiName when a64 $ do P.putWord16le 0x0001 P.putWord16le l64 when cdiZ64 $ do P.putWord64le cdiUsz P.putWord64le cdiCsz when o64 $ P.putWord64le cdiOff -- |Stream produce a zip file, reading a sequence of entries with data. -- Although file data is never kept in memory (beyond a single 'ZipDataByteString'), the format of zip files requires producing a final directory of entries at the end of the file, consuming an additional ~100 bytes of state per entry during streaming. -- The final result is the total size of the zip file. -- -- Depending on options, the resulting zip file should be compatible with most unzipping applications. -- Any errors are thrown in the underlying monad (as 'ZipError's). zipStream :: ( MonadThrow m , PrimMonad m ) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64 zipStream ZipOptions{..} = execStateC 0 $ do (cnt, cdir) <- next 0 (return ()) cdoff <- get output cdir eoff <- get endDirectory cdoff (eoff - cdoff) cnt where next cnt dir = C.await >>= maybe (return (cnt, dir)) (\e -> do d <- entry e next (succ cnt) $ dir >> d) entry (ZipEntry{..}, zipData -> dat) = do let usiz = dataSize dat sdat = left (\x -> C.toProducer x C..| sizeCRC) dat cfhiIsCompressed = zipOptCompressLevel /= 0 && all (0 /=) usiz && all (0 /=) zipEntrySize cfhiIsStreamingEntry = isLeft dat compressPlainBs = Z.compressWith Z.defaultCompressParams { Z.compressLevel = if zipOptCompressLevel == -1 then Z.defaultCompression else Z.compressionLevel zipOptCompressLevel } (cdat, csiz) | cfhiIsCompressed = ( ((`C.fuseBoth` (outputSize $ CZ.compress zipOptCompressLevel deflateWindowBits)) +++ compressPlainBs) sdat , dataSize cdat) | otherwise = (left (fmap (id &&& fst)) sdat, usiz) cdiZ64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize) (maxBound32 <) (max <$> usiz <*> csiz) cfhiHasUtf8Filename = isLeft zipEntryName cdiName = either TE.encodeUtf8 id zipEntryName namelen = BS.length cdiName (cfhiTime, cfhiDate) = toDOSTime zipEntryTime mcrc = either (const Nothing) (Just . crc32) dat !cdiCommonFileHeaderInfo = CommonFileHeaderInfo{..} when (namelen > maxBound16) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": entry name too long" cdiOff <- get output $ do P.putWord32le 0x04034b50 P.putWord8 $ if cdiZ64 then 45 else 20 P.putWord8 osVersion putCommonFileHeaderPart cdiCommonFileHeaderInfo P.putWord32le $ fromMaybe 0 mcrc P.putWord32le $ if cdiZ64 then maxBound32 else maybe 0 fromIntegral csiz P.putWord32le $ if cdiZ64 then maxBound32 else maybe 0 fromIntegral usiz P.putWord16le $ fromIntegral namelen P.putWord16le $ cdiZ64 ?* 20 P.putByteString cdiName when cdiZ64 $ do P.putWord16le 0x0001 P.putWord16le 16 P.putWord64le $ fromMaybe 0 usiz P.putWord64le $ fromMaybe 0 csiz let outsz c = stateC $ \(!o) -> (id &&& (o +) . snd) <$> c ((cdiUsz, cdiCrc), cdiCsz) <- either (\cd -> do r@((usz, crc), csz) <- outsz cd -- write compressed data when (not cdiZ64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled" output $ do P.putWord32le 0x08074b50 P.putWord32le crc let putsz | cdiZ64 = P.putWord64le | otherwise = P.putWord32le . fromIntegral putsz csz putsz usz return r) (\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b) cdat when (any (cdiUsz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize" let !centralDirectoryInfo = CentralDirectoryInfo { cdiZipEntryExternalAttributes = force zipEntryExternalAttributes , .. } return $ putCentralDirectory centralDirectoryInfo endDirectory cdoff cdlen cnt = do let z64 = zipOpt64 || cdoff > maxBound32 || cnt > maxBound16 when z64 $ output $ do P.putWord32le 0x06064b50 -- zip64 end P.putWord64le 44 -- length of this record P.putWord8 zipVersion P.putWord8 osVersion P.putWord8 45 P.putWord8 osVersion P.putWord32le 0 -- disk P.putWord32le 0 -- central disk P.putWord64le cnt P.putWord64le cnt P.putWord64le cdlen P.putWord64le cdoff P.putWord32le 0x07064b50 -- locator: P.putWord32le 0 -- central disk P.putWord64le $ cdoff + cdlen P.putWord32le 1 -- total disks let comment = zipComment zipOptInfo commlen = BS.length comment when (commlen > maxBound16) $ zipError "comment too long" output $ do P.putWord32le 0x06054b50 -- end P.putWord16le 0 -- disk P.putWord16le 0 -- central disk P.putWord16le $ fromIntegral $ min maxBound16 cnt P.putWord16le $ fromIntegral $ min maxBound16 cnt P.putWord32le $ fromIntegral $ min maxBound32 cdlen P.putWord32le $ fromIntegral $ min maxBound32 cdoff P.putWord16le $ fromIntegral commlen P.putByteString comment zip-stream-0.2.2.0/Codec/Archive/Zip/Conduit/Internal.hs0000644000000000000000000000445214016251330021007 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} module Codec.Archive.Zip.Conduit.Internal ( osVersion, zipVersion , zipError , idConduit , sizeCRC , outputSize , inputSize , maxBound32 , deflateWindowBits ) where import Codec.Compression.Zlib.Raw (WindowBits(..)) import Control.Monad.Catch (MonadThrow, throwM) import qualified Data.ByteString as BS import qualified Data.Conduit as C import qualified Data.Conduit.Internal as CI import Data.Digest.CRC32 (crc32Update) import Data.Word (Word8, Word32, Word64) import Codec.Archive.Zip.Conduit.Types -- | The version of this zip program, really just rough indicator of compatibility zipVersion :: Word8 zipVersion = 48 -- | The OS this implementation tries to be compatible to osVersion :: Word8 osVersion = 0 -- DOS zipError :: MonadThrow m => String -> m a zipError = throwM . ZipError idConduit :: Monad m => C.ConduitT a a m () idConduit = C.awaitForever C.yield passthroughFold :: Monad m => (a -> b -> a) -> a -> C.ConduitT b b m a passthroughFold f !z = C.await >>= maybe (return z) (\x -> do C.yield x passthroughFold f (f z x)) sizeCRC :: Monad m => C.ConduitT BS.ByteString BS.ByteString m (Word64, Word32) sizeCRC = passthroughFold (\(!l, !c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0) sizeC :: Monad m => C.ConduitT BS.ByteString BS.ByteString m Word64 sizeC = passthroughFold (\l b -> l + fromIntegral (BS.length b)) 0 -- fst <$> sizeCRC outputSize :: Monad m => C.ConduitT i BS.ByteString m () -> C.ConduitT i BS.ByteString m Word64 outputSize = (C..| sizeC) inputSize :: Monad m => C.ConduitT BS.ByteString o m () -> C.ConduitT BS.ByteString o m Word64 -- inputSize = fuseUpstream sizeC -- won't work because we need to deal with leftovers properly inputSize (CI.ConduitT src) = CI.ConduitT $ \rest -> let go n (CI.Done ()) = rest n go n (CI.PipeM m) = CI.PipeM $ go n <$> m go n (CI.Leftover p b) = CI.Leftover (go (n - fromIntegral (BS.length b)) p) b go n (CI.HaveOutput p o) = CI.HaveOutput (go n p) o go n (CI.NeedInput p q) = CI.NeedInput (\b -> go (n + fromIntegral (BS.length b)) (p b)) (go n . q) in go 0 (src CI.Done) maxBound32 :: Integral n => n maxBound32 = fromIntegral (maxBound :: Word32) deflateWindowBits :: WindowBits deflateWindowBits = WindowBits (-15) zip-stream-0.2.2.0/cmd/zip.hs0000644000000000000000000000625713267666210014056 0ustar0000000000000000{-# LANGUAGE CPP #-} import Control.Arrow ((+++)) import Control.Monad (filterM, void) import Control.Monad.IO.Class (MonadIO, liftIO) import Control.Monad.Trans.Resource (MonadResource, runResourceT) import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C import qualified Data.Conduit.Binary as CB import Data.List (foldl') import qualified Data.Text as T import Data.Time.LocalTime (utcToLocalTime, utc) import qualified System.Console.GetOpt as Opt import System.Directory (doesDirectoryExist, getModificationTime #if MIN_VERSION_directory(1,2,6) #if MIN_VERSION_directory(1,3,0) , pathIsSymbolicLink #else , isSymbolicLink #endif , listDirectory #else , getDirectoryContents #endif ) import System.Environment (getProgName, getArgs) import System.Exit (exitFailure) import System.FilePath.Posix (()) -- zip files only want forward slashes import System.IO (stdout, hPutStrLn, stderr) import Codec.Archive.Zip.Conduit.Zip opts :: [Opt.OptDescr (ZipOptions -> ZipOptions)] opts = [ Opt.Option "z" ["compress"] (Opt.ReqArg (\l o -> o{ zipOptCompressLevel = read l }) "LEVEL") "set compression level for files (0-9)" , Opt.Option "0" ["store"] (Opt.NoArg (\o -> o{ zipOptCompressLevel = 0 })) "don't compress files (-z0)" , Opt.Option "e" ["zip64"] (Opt.NoArg (\o -> o{ zipOpt64 = True })) "enable zip64 support for files over 4GB" , Opt.Option "c" ["comment"] (Opt.ReqArg (\c o -> o{ zipOptInfo = (zipOptInfo o){ zipComment = BSC.pack c }}) "TEXT") "set zip comment" ] generate :: (MonadIO m, MonadResource m) => [FilePath] -> C.ConduitM () (ZipEntry, ZipData m) m () generate (p:paths) = do t <- liftIO $ getModificationTime p let e = ZipEntry { zipEntryName = Right $ BSC.pack $ dropWhile ('/' ==) p , zipEntryTime = utcToLocalTime utc t -- FIXME: timezone , zipEntrySize = Nothing , zipEntryExternalAttributes = Nothing } isd <- liftIO $ doesDirectoryExist p if isd then do dl <- liftIO $ #if MIN_VERSION_directory(1,2,6) filterM (fmap not . #if MIN_VERSION_directory(1,3,0) pathIsSymbolicLink #else isSymbolicLink #endif ) . map (p ) =<< listDirectory p #else filter (`notElem` [".",".."]) . map (p ) <$> getDirectoryContents p #endif C.yield (e{ zipEntryName = (`T.snoc` '/') +++ (`BSC.snoc` '/') $ zipEntryName e, zipEntrySize = Just 0 }, mempty) generate $ dl ++ paths else do C.yield (e, zipFileData p) generate paths generate [] = return () main :: IO () main = do prog <- getProgName args <- getArgs (opt, paths) <- case Opt.getOpt Opt.Permute opts args of (ol, paths@(_:_), []) -> return (foldl' (flip ($)) defaultZipOptions ol, paths) (_, _, err) -> do mapM_ (hPutStrLn stderr) err hPutStrLn stderr $ Opt.usageInfo ("Usage: " ++ prog ++ " [OPTION...] PATH ...\nWrite a zip file to stdout containing the given files or directories (recursively).") opts exitFailure runResourceT $ C.runConduit $ generate paths C..| void (zipStream opt) C..| CB.sinkHandle stdout zip-stream-0.2.2.0/cmd/unzip.hs0000644000000000000000000000443014016251300014367 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE RecordWildCards #-} import Control.Monad (when, unless) import Control.Monad.IO.Class (liftIO) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C import qualified Data.Conduit.Binary as CB import qualified Data.Text as T import qualified Data.Text.IO as TIO import Data.Time.LocalTime (localTimeToUTC, utc) import Data.Void (Void) import System.Directory (createDirectoryIfMissing #if MIN_VERSION_directory(1,2,3) , setModificationTime #endif ) import System.Environment (getProgName, getArgs) import System.Exit (exitFailure) import System.FilePath.Posix (takeDirectory) -- zip files only use forward slashes import System.IO (stdin, openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr) import Codec.Archive.Zip.Conduit.UnZip extract :: C.ConduitM (Either ZipEntry BS.ByteString) Void IO () extract = C.awaitForever start where start (Left ZipEntry{..}) = do liftIO $ either TIO.putStrLn BSC.putStrLn zipEntryName liftIO $ createDirectoryIfMissing True (takeDirectory name) if either T.last BSC.last zipEntryName == '/' then when ((0 /=) `any` zipEntrySize) $ fail $ name ++ ": non-empty directory" else do -- C.bracketP h <- liftIO $ openFile name WriteMode mapM_ (liftIO . hSetFileSize h . toInteger) zipEntrySize write C..| CB.sinkHandle h liftIO $ hClose h #if MIN_VERSION_directory(1,2,3) liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone #endif where name = either (T.unpack . T.dropWhile ('/' ==)) (BSC.unpack . BSC.dropWhile ('/' ==)) zipEntryName start (Right _) = fail "Unexpected leading or directory data contents" write = C.await >>= maybe (return ()) block block (Right b) = C.yield b >> write block a = C.leftover a main :: IO () main = do prog <- getProgName args <- getArgs unless (null args) $ do hPutStrLn stderr $ "Usage: " ++ prog ++ "\nRead a zip file from stdin and extract it in the current directory." exitFailure ZipInfo{..} <- C.runConduit $ CB.sourceHandle stdin C..| C.fuseUpstream unZipStream extract BSC.putStrLn zipComment zip-stream-0.2.2.0/tests/Main.hs0000644000000000000000000000445014336716710014527 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} module Main (main) where import Control.Monad (when, void) import Control.Monad.IO.Class (liftIO) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as BSL import qualified Data.Conduit as C import Data.Conduit.Combinators (sinkNull) import Data.Foldable (for_) import qualified Data.Text as T import Data.Time.LocalTime (utc, utcToLocalTime) import Data.Time.Clock.POSIX (posixSecondsToUTCTime) import GHC.Stats (getRTSStats, RTSStats(..), GCDetails(..)) import System.Mem (performMajorGC) import Test.Hspec (hspec, describe, it) import Codec.Archive.Zip.Conduit.Zip main :: IO () main = hspec $ do describe "zipping" $ do it "ZipDataByteString streams in constant memory" $ do C.runConduitRes $ (do -- Stream 1000 * 4 MiB = 4 GiB for_ [(1::Int)..1024] $ \i -> do -- `bs` needs to depend on loop variable `i`, otherwise GHC may hoist -- it out of the loop ("floating"), making the memory constant -- even for incorrect implementations, thus making the test useless. let !bs = BS.replicate (4 * 1024 * 1024) (fromIntegral i) -- 4 MiB C.yield ( ZipEntry { zipEntryName = Left ("file-" <> T.pack (show i) <> ".bin") , zipEntryTime = utcToLocalTime utc (posixSecondsToUTCTime 0) , zipEntrySize = Nothing , zipEntryExternalAttributes = Nothing } , ZipDataByteString (BSL.fromStrict bs) -- `copy` to avoid sharing ) liftIO $ do -- GC every 40 MB to make it easy to observe constant memory. when (i `mod` 10 == 0) performMajorGC RTSStats{ gc = GCDetails{ gcdetails_live_bytes } } <- getRTSStats when (gcdetails_live_bytes > 3 * 1024 * 1024 * 1024) $ do -- 3 GiB error $ "Memory usage too high (" ++ show gcdetails_live_bytes ++ " B), probably streaming is not constant-memory" ) C..| void (zipStream defaultZipOptions{ zipOptCompressLevel = 0 }) C..| sinkNull :: IO () zip-stream-0.2.2.0/LICENSE0000644000000000000000000000275613104360702013145 0ustar0000000000000000Copyright Dylan Simon (c) 2017 All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Dylan Simon nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.zip-stream-0.2.2.0/Setup.hs0000644000000000000000000000005613104360702013563 0ustar0000000000000000import Distribution.Simple main = defaultMain zip-stream-0.2.2.0/zip-stream.cabal0000644000000000000000000000412614336717420015223 0ustar0000000000000000name: zip-stream version: 0.2.2.0 synopsis: ZIP archive streaming using conduits description: Process (extract and create) zip files as streams (e.g., over the network), accessing contained files without having to write the zip file to disk (unlike zip-conduit). license: BSD3 license-file: LICENSE author: Dylan Simon maintainer: dylan@dylex.net copyright: 2017 category: Codec, Conduit build-type: Simple cabal-version: >=1.10 source-repository head type: git location: https://github.com/dylex/zip-stream library exposed-modules: Codec.Archive.Zip.Conduit.Types Codec.Archive.Zip.Conduit.UnZip Codec.Archive.Zip.Conduit.Zip other-modules: Codec.Archive.Zip.Conduit.Internal default-language: Haskell2010 ghc-options: -Wall build-depends: base >= 4.9 && < 5, binary >= 0.7.2, binary-conduit, bytestring, conduit >= 1.3, conduit-extra, deepseq, digest, exceptions, mtl, primitive, resourcet, text, time, transformers-base, zlib executable unzip-stream main-is: unzip.hs hs-source-dirs: cmd default-language: Haskell2010 ghc-options: -Wall build-depends: base >=4.8 && <5, bytestring, conduit, conduit-extra, directory, filepath, text, time, transformers, zip-stream executable zip-stream main-is: zip.hs hs-source-dirs: cmd default-language: Haskell2010 ghc-options: -Wall build-depends: base >=4.8 && <5, bytestring, conduit, conduit-extra, directory, filepath, resourcet, text, time, transformers, zip-stream test-suite tests type: exitcode-stdio-1.0 hs-source-dirs: tests default-language: Haskell2010 main-is: Main.hs -- `-T` is needed for https://hackage.haskell.org/package/base-4.17.0.0/docs/GHC-Stats.html ghc-options: -Wall -threaded -with-rtsopts=-T build-depends: base , zip-stream , bytestring , conduit , hspec , text , time