diff --git a/.github/workflows/haskell-ci.yml b/.github/workflows/haskell-ci.yml index 128f6eb..e7d3500 100644 --- a/.github/workflows/haskell-ci.yml +++ b/.github/workflows/haskell-ci.yml @@ -63,16 +63,6 @@ jobs: compilerVersion: 8.8.4 setup-method: hvr-ppa allow-failure: false - - compiler: ghc-8.6.5 - compilerKind: ghc - compilerVersion: 8.6.5 - setup-method: hvr-ppa - allow-failure: false - - compiler: ghc-8.4.4 - compilerKind: ghc - compilerVersion: 8.4.4 - setup-method: hvr-ppa - allow-failure: false fail-fast: false steps: - name: apt @@ -211,7 +201,7 @@ jobs: echo " ghc-options: -Werror=missing-methods" >> cabal.project cat >> cabal.project <> cabal.project.local + $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: $_ installed\n" unless /^(bytestring|directory|htar|tar|unix|filepath)$/; }' >> cabal.project.local cat cabal.project cat cabal.project.local - name: dump install plan diff --git a/Codec/Archive/Tar.hs b/Codec/Archive/Tar.hs index 4e5448c..1dbcdfb 100644 --- a/Codec/Archive/Tar.hs +++ b/Codec/Archive/Tar.hs @@ -164,7 +164,6 @@ module Codec.Archive.Tar ( FormatError(..), ) where -import Codec.Archive.Tar.Check import Codec.Archive.Tar.Entry import Codec.Archive.Tar.Index (hSeekEndEntryOffset) import Codec.Archive.Tar.LongNames (decodeLongNames, encodeLongNames, DecodeLongNamesError(..)) @@ -174,12 +173,13 @@ import Codec.Archive.Tar.Types (unfoldEntries, foldlEntries, foldEntries, mapEnt import Codec.Archive.Tar.Unpack (unpack, unpackAndCheck) import Codec.Archive.Tar.Write (write) -import Control.Applicative ((<|>)) -import Control.Exception (Exception, throw, catch, SomeException(..)) import qualified Data.ByteString.Lazy as BL -import System.IO (withFile, IOMode(..)) +import System.IO (IOMode(..)) import Prelude hiding (read) +import System.OsPath (OsPath) +import qualified System.File.OsPath as OSP + -- | Create a new @\".tar\"@ file from a directory of files. -- -- It is equivalent to calling the standard @tar@ program like so: @@ -213,11 +213,11 @@ import Prelude hiding (read) -- -- * @rwxr-xr-x@ for directories -- -create :: FilePath -- ^ Path of the \".tar\" file to write. - -> FilePath -- ^ Base directory - -> [FilePath] -- ^ Files and directories to archive, relative to base dir +create :: OsPath -- ^ Path of the \".tar\" file to write. + -> OsPath -- ^ Base directory + -> [OsPath] -- ^ Files and directories to archive, relative to base dir -> IO () -create tar base paths = BL.writeFile tar . write =<< pack base paths +create tar base paths = OSP.writeFile tar . write =<< pack base paths -- | Extract all the files contained in a @\".tar\"@ file. -- @@ -249,10 +249,10 @@ create tar base paths = BL.writeFile tar . write =<< pack base paths -- containing entries that point outside of the tarball (either absolute paths -- or relative paths) will be caught and an exception will be thrown. -- -extract :: FilePath -- ^ Destination directory - -> FilePath -- ^ Tarball +extract :: OsPath -- ^ Destination directory + -> OsPath -- ^ Tarball -> IO () -extract dir tar = unpack dir . read =<< BL.readFile tar +extract dir tar = unpack dir . read =<< OSP.readFile tar -- | Append new entries to a @\".tar\"@ file from a directory of files. -- @@ -260,11 +260,11 @@ extract dir tar = unpack dir . read =<< BL.readFile tar -- end of an existing tar file. Or if the file does not already exists then -- it behaves the same as 'create'. -- -append :: FilePath -- ^ Path of the \".tar\" file to write. - -> FilePath -- ^ Base directory - -> [FilePath] -- ^ Files and directories to archive, relative to base dir +append :: OsPath -- ^ Path of the \".tar\" file to write. + -> OsPath -- ^ Base directory + -> [OsPath] -- ^ Files and directories to archive, relative to base dir -> IO () append tar base paths = - withFile tar ReadWriteMode $ \hnd -> do + OSP.withFile tar ReadWriteMode $ \hnd -> do _ <- hSeekEndEntryOffset hnd Nothing BL.hPut hnd . write =<< pack base paths diff --git a/Codec/Archive/Tar/Check/Internal.hs b/Codec/Archive/Tar/Check/Internal.hs index b603846..26005e1 100644 --- a/Codec/Archive/Tar/Check/Internal.hs +++ b/Codec/Archive/Tar/Check/Internal.hs @@ -3,6 +3,8 @@ {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE MultiWayIf #-} {-# OPTIONS_GHC -Wno-orphans #-} {-# OPTIONS_HADDOCK hide #-} ----------------------------------------------------------------------------- @@ -40,15 +42,19 @@ module Codec.Archive.Tar.Check.Internal ( import Codec.Archive.Tar.LongNames import Codec.Archive.Tar.Types import Control.Applicative ((<|>)) -import qualified Data.ByteString.Lazy.Char8 as Char8 -import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) import Control.Exception (Exception(..)) -import qualified System.FilePath as FilePath.Native - ( splitDirectories, isAbsolute, isValid, (), takeDirectory, hasDrive ) -import qualified System.FilePath.Windows as FilePath.Windows -import qualified System.FilePath.Posix as FilePath.Posix +import System.OsPath (OsPath) +import System.OsPath.Posix (PosixPath) +import qualified System.OsPath as OSP +import qualified System.OsPath.Posix as PFP +import qualified System.OsPath.Windows as WFP + +import System.OsString.Posix (pstr) +import System.OsString (osstr) +import qualified System.OsString.Posix as PS +import qualified System.OsString.Windows as WS -------------------------- @@ -78,57 +84,79 @@ import qualified System.FilePath.Posix as FilePath.Posix -- such as exhaustion of file handlers. checkSecurity :: Entries e - -> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) FileNameError) + -> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) FileNameError) checkSecurity = checkEntries checkEntrySecurity . decodeLongNames -- | Worker of 'Codec.Archive.Tar.Check.checkSecurity'. -- -- @since 0.6.0.0 -checkEntrySecurity :: GenEntry FilePath FilePath -> Maybe FileNameError +checkEntrySecurity :: GenEntry PosixPath PosixPath -> Maybe FileNameError checkEntrySecurity e = check (entryTarPath e) <|> case entryContent e of HardLink link -> check link SymbolicLink link -> - check (FilePath.Posix.takeDirectory (entryTarPath e) FilePath.Posix. link) + check (PFP.takeDirectory (entryTarPath e) PFP. link) _ -> Nothing where + checkPosix :: PosixPath -> Maybe FileNameError checkPosix name - | FilePath.Posix.isAbsolute name - = Just $ AbsoluteFileName name - | not (FilePath.Posix.isValid name) - = Just $ InvalidFileName name - | not (isInsideBaseDir (FilePath.Posix.splitDirectories name)) - = Just $ UnsafeLinkTarget name - | otherwise = Nothing - - checkNative (fromFilePathToNative -> name) - | FilePath.Native.isAbsolute name || FilePath.Native.hasDrive name + | PFP.isAbsolute name = Just $ AbsoluteFileName name - | not (FilePath.Native.isValid name) + | not (PFP.isValid name) = Just $ InvalidFileName name - | not (isInsideBaseDir (FilePath.Native.splitDirectories name)) + | not (isInsideBaseDir (PFP.splitDirectories name)) = Just $ UnsafeLinkTarget name | otherwise = Nothing - check name = checkPosix name <|> checkNative (fromFilePathToNative name) - -isInsideBaseDir :: [FilePath] -> Bool + checkNative :: PosixPath -> Maybe FileNameError + checkNative name' + | OSP.isAbsolute name || OSP.hasDrive name + = Just $ AbsoluteFileName name' + | not (OSP.isValid name) + = Just $ InvalidFileName name' + | not (isInsideBaseDir' (OSP.splitDirectories name)) + = Just $ UnsafeLinkTarget name' + | otherwise + = Nothing + where + name = fromPosixPath name' + + check name = checkPosix name <|> checkNative name + +isInsideBaseDir :: [PosixPath] -> Bool isInsideBaseDir = go 0 where - go :: Word -> [FilePath] -> Bool + go :: Word -> [PosixPath] -> Bool + go !_ [] = True + go 0 (x : _) + | x == [pstr|..|] = False + go lvl (x : xs) + | x == [pstr|..|] = go (lvl - 1) xs + go lvl (x : xs) + | x == [pstr|.|] = go lvl xs + go lvl (_ : xs) = go (lvl + 1) xs + +isInsideBaseDir' :: [OsPath] -> Bool +isInsideBaseDir' = go 0 + where + go :: Word -> [OsPath] -> Bool go !_ [] = True - go 0 (".." : _) = False - go lvl (".." : xs) = go (lvl - 1) xs - go lvl ("." : xs) = go lvl xs + go 0 (x : _) + | x == [osstr|..|] = False + go lvl (x : xs) + | x == [osstr|..|] = go (lvl - 1) xs + go lvl (x : xs) + | x == [osstr|.|] = go lvl xs go lvl (_ : xs) = go (lvl + 1) xs -- | Errors arising from tar file names being in some way invalid or dangerous data FileNameError - = InvalidFileName FilePath - | AbsoluteFileName FilePath - | UnsafeLinkTarget FilePath + = InvalidFileName PosixPath + | AbsoluteFileName PosixPath + | UnsafeLinkTarget PosixPath + | FileNameDecodingFailure PosixPath -- ^ @since 0.6.0.0 deriving (Typeable) @@ -142,6 +170,7 @@ showFileNameError mb_plat err = case err of InvalidFileName path -> "Invalid" ++ plat ++ " file name in tar archive: " ++ show path AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path UnsafeLinkTarget path -> "Unsafe" ++ plat ++ " link target in tar archive: " ++ show path + FileNameDecodingFailure path -> "Decoding failure of path " ++ show path where plat = maybe "" (' ':) mb_plat @@ -167,9 +196,9 @@ showFileNameError mb_plat err = case err of -- Not only it is faster, but also alleviates issues with lazy I/O -- such as exhaustion of file handlers. checkTarbomb - :: FilePath + :: PosixPath -> Entries e - -> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) TarBombError) + -> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) TarBombError) checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir) . decodeLongNames @@ -177,7 +206,7 @@ checkTarbomb expectedTopDir -- | Worker of 'checkTarbomb'. -- -- @since 0.6.0.0 -checkEntryTarbomb :: FilePath -> GenEntry FilePath linkTarget -> Maybe TarBombError +checkEntryTarbomb :: PosixPath -> GenEntry PosixPath linkTarget -> Maybe TarBombError checkEntryTarbomb expectedTopDir entry = do case entryContent entry of -- Global extended header aka XGLTYPE aka pax_global_header @@ -186,7 +215,7 @@ checkEntryTarbomb expectedTopDir entry = do -- Extended header referring to the next file in the archive aka XHDTYPE OtherEntryType 'x' _ _ -> Nothing _ -> - case FilePath.Posix.splitDirectories (entryTarPath entry) of + case PFP.splitDirectories (entryTarPath entry) of (topDir:_) | topDir == expectedTopDir -> Nothing _ -> Just $ TarBombError expectedTopDir (entryTarPath entry) @@ -194,10 +223,10 @@ checkEntryTarbomb expectedTopDir entry = do -- files outside of the intended directory. data TarBombError = TarBombError - FilePath -- ^ Path inside archive. + PosixPath -- ^ Path inside archive. -- -- @since 0.6.0.0 - FilePath -- ^ Expected top directory. + PosixPath -- ^ Expected top directory. deriving (Typeable) instance Exception TarBombError @@ -236,13 +265,13 @@ instance Show TarBombError where -- such as exhaustion of file handlers. checkPortability :: Entries e - -> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) PortabilityError) + -> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) PortabilityError) checkPortability = checkEntries checkEntryPortability . decodeLongNames -- | Worker of 'checkPortability'. -- -- @since 0.6.0.0 -checkEntryPortability :: GenEntry FilePath linkTarget -> Maybe PortabilityError +checkEntryPortability :: GenEntry PosixPath linkTarget -> Maybe PortabilityError checkEntryPortability entry | entryFormat entry `elem` [V7Format, GnuFormat] = Just $ NonPortableFormat (entryFormat entry) @@ -250,29 +279,30 @@ checkEntryPortability entry | not (portableFileType (entryContent entry)) = Just NonPortableFileType - | not (all portableChar posixPath) + | not (PS.all portableChar posixPath) = Just $ NonPortableEntryNameChar posixPath - | not (FilePath.Posix.isValid posixPath) + | not (PFP.isValid posixPath) = Just $ NonPortableFileName "unix" (InvalidFileName posixPath) - | not (FilePath.Windows.isValid windowsPath) - = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath) + | not (WFP.isValid windowsPath) + = Just $ NonPortableFileName "windows" (InvalidFileName posixPath) - | FilePath.Posix.isAbsolute posixPath + | PFP.isAbsolute posixPath = Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath) - | FilePath.Windows.isAbsolute windowsPath - = Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath) + | WFP.isAbsolute windowsPath + = Just $ NonPortableFileName "windows" (AbsoluteFileName posixPath) - | any (=="..") (FilePath.Posix.splitDirectories posixPath) + | any (== [PS.pstr|..|]) (PFP.splitDirectories posixPath) = Just $ NonPortableFileName "unix" (InvalidFileName posixPath) - | any (=="..") (FilePath.Windows.splitDirectories windowsPath) - = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath) + | any (== [WS.pstr|..|]) (WFP.splitDirectories windowsPath) + = Just $ NonPortableFileName "windows" (InvalidFileName posixPath) - | otherwise = Nothing + | otherwise + = Nothing where - posixPath = entryTarPath entry - windowsPath = fromFilePathToWindowsPath posixPath + posixPath = entryTarPath entry + windowsPath = toWindowsPath posixPath portableFileType ftype = case ftype of NormalFile {} -> True @@ -281,14 +311,15 @@ checkEntryPortability entry Directory -> True _ -> False - portableChar c = c <= '\127' + portableChar c = PS.toChar c <= '\127' -- | Portability problems in a tar archive data PortabilityError = NonPortableFormat Format | NonPortableFileType - | NonPortableEntryNameChar FilePath + | NonPortableEntryNameChar PosixPath | NonPortableFileName PortabilityPlatform FileNameError + | NonPortableDecodingFailure PosixPath deriving (Typeable) -- | The name of a platform that portability issues arise from @@ -306,6 +337,8 @@ instance Show PortabilityError where = "Non-portable character in archive entry name: " ++ show posixPath show (NonPortableFileName platform err) = showFileNameError (Just platform) err + show (NonPortableDecodingFailure posixPath) + = "Decoding failure of path " ++ show posixPath -------------------------- -- Utils diff --git a/Codec/Archive/Tar/Index/IntTrie.hs b/Codec/Archive/Tar/Index/IntTrie.hs index 1d8d162..8d09633 100644 --- a/Codec/Archive/Tar/Index/IntTrie.hs +++ b/Codec/Archive/Tar/Index/IntTrie.hs @@ -42,10 +42,7 @@ import Data.Array.IArray ((!)) import qualified Data.Bits as Bits import Data.Word (Word32) import Data.Bits -import Data.Monoid (Monoid(..)) -import Data.Monoid ((<>)) import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Unsafe as BS import Data.ByteString.Builder as BS import Control.Exception (assert) @@ -53,8 +50,7 @@ import qualified Data.Map.Strict as Map import qualified Data.IntMap.Strict as IntMap import Data.IntMap.Strict (IntMap) -import Data.List hiding (lookup, insert) -import Data.Function (on) +import qualified Data.List as L -- | A compact mapping from sequences of nats to nats. -- @@ -224,7 +220,7 @@ freshTrieNode (k:ks) v = TrieNode (freshTrie k ks v) inserts :: [([Key], Value)] -> IntTrieBuilder -> IntTrieBuilder -inserts kvs t = foldl' (\t' (ks, v) -> insert ks v t') t kvs +inserts kvs t = L.foldl' (\t' (ks, v) -> insert ks v t') t kvs finalise :: IntTrieBuilder -> IntTrie finalise trie = diff --git a/Codec/Archive/Tar/Index/Internal.hs b/Codec/Archive/Tar/Index/Internal.hs index b81e9f9..d64e658 100644 --- a/Codec/Archive/Tar/Index/Internal.hs +++ b/Codec/Archive/Tar/Index/Internal.hs @@ -66,26 +66,27 @@ import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder) import Codec.Archive.Tar.PackAscii -import qualified System.FilePath.Posix as FilePath -import Data.Monoid (Monoid(..)) -import Data.Monoid ((<>)) import Data.Word import Data.Int import Data.Bits -import qualified Data.Array.Unboxed as A import Prelude hiding (lookup) import System.IO import Control.Exception (assert, throwIO) import Control.DeepSeq import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Unsafe as BS import Data.ByteString.Builder as BS import Data.ByteString.Builder.Extra as BS (toLazyByteStringWith, untrimmedStrategy) +import System.OsPath.Posix (PosixPath) +import qualified System.OsPath.Posix as PFP + +import qualified System.OsString.Posix as PS + + -- | An index of the entries in a tar file. -- -- This index type is designed to be quite compact and suitable to store either @@ -129,7 +130,7 @@ instance NFData TarIndex where -- cheaper if you don't look at them. -- data TarIndexEntry = TarFileEntry {-# UNPACK #-} !TarEntryOffset - | TarDir [(FilePath, TarIndexEntry)] + | TarDir [(PosixPath, TarIndexEntry)] deriving (Show, Typeable) @@ -155,7 +156,7 @@ type TarEntryOffset = Word32 -- -- * 'hReadEntryHeader' to read just the file metadata (e.g. its length); -- -lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry +lookup :: TarIndex -> PosixPath -> Maybe TarIndexEntry lookup (TarIndex pathTable pathTrie _) path = do fpath <- toComponentIds pathTable path tentry <- IntTrie.lookup pathTrie $ map pathComponentIdToKey fpath @@ -167,32 +168,31 @@ lookup (TarIndex pathTable pathTrie _) path = do | (key, entry) <- entries ] -toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId] +toComponentIds :: StringTable PathComponentId -> PosixPath -> Maybe [PathComponentId] toComponentIds table = lookupComponents [] - . filter (/= BS.Char8.singleton '.') + . fmap posixToByteString + . filter (/= (PS.singleton $ PS.unsafeFromChar '.')) . splitDirectories - . posixToByteString - . toPosixString where lookupComponents cs' [] = Just (reverse cs') lookupComponents cs' (c:cs) = case StringTable.lookup table c of Nothing -> Nothing Just cid -> lookupComponents (cid:cs') cs -fromComponentId :: StringTable PathComponentId -> PathComponentId -> FilePath -fromComponentId table = fromPosixString . byteToPosixString . StringTable.index table +fromComponentId :: StringTable PathComponentId -> PathComponentId -> PosixPath +fromComponentId table = byteToPosixString . StringTable.index table -- | All the files in the index with their corresponding 'TarEntryOffset's. -- -- Note that the files are in no special order. If you intend to read all or -- most files then is is recommended to sort by the 'TarEntryOffset'. -- -toList :: TarIndex -> [(FilePath, TarEntryOffset)] +toList :: TarIndex -> [(PosixPath, TarEntryOffset)] toList (TarIndex pathTable pathTrie _) = [ (path, IntTrie.unValue off) | (cids, off) <- IntTrie.toList pathTrie - , let path = FilePath.joinPath (map (fromComponentId pathTable . keyToPathComponentId) cids) ] + , let path = PFP.joinPath (map (fromComponentId pathTable . keyToPathComponentId) cids) ] -- | Build a 'TarIndex' from a sequence of tar 'Entries'. The 'Entries' are @@ -229,7 +229,7 @@ addNextEntry entry (IndexBuilder stbl itrie nextOffset) = (nextEntryOffset entry nextOffset) where !entrypath = splitTarPath (entryTarPath entry) - (stbl', cids) = StringTable.inserts entrypath stbl + (stbl', cids) = StringTable.inserts (posixToByteString <$> entrypath) stbl itrie' = IntTrie.insert (map pathComponentIdToKey cids) (IntTrie.Value nextOffset) itrie -- | Use this function if you want to skip some entries and not add them to the @@ -283,17 +283,18 @@ nextEntryOffset entry offset = blocks :: Int64 -> TarEntryOffset blocks size = fromIntegral (1 + (size - 1) `div` 512) -type FilePathBS = BS.ByteString -splitTarPath :: TarPath -> [FilePathBS] +splitTarPath :: TarPath -> [PosixPath] splitTarPath (TarPath name prefix) = - splitDirectories (posixToByteString prefix) ++ splitDirectories (posixToByteString name) + splitDirectories prefix ++ splitDirectories name -splitDirectories :: FilePathBS -> [FilePathBS] +splitDirectories :: PosixPath -> [PosixPath] splitDirectories bs = - case BS.Char8.split '/' bs of - c:cs | BS.null c -> BS.Char8.singleton '/' : filter (not . BS.null) cs - cs -> filter (not . BS.null) cs + case PS.split sep bs of + c:cs | PS.null c -> PS.singleton sep : filter (not . PS.null) cs + cs -> filter (not . PS.null) cs + where + sep = PS.unsafeFromChar '/' ------------------------- diff --git a/Codec/Archive/Tar/Index/StringTable.hs b/Codec/Archive/Tar/Index/StringTable.hs index 5ef3a58..c2a47c6 100644 --- a/Codec/Archive/Tar/Index/StringTable.hs +++ b/Codec/Archive/Tar/Index/StringTable.hs @@ -27,13 +27,10 @@ module Codec.Archive.Tar.Index.StringTable ( import Data.Typeable (Typeable) import Prelude hiding (lookup, id) -import Data.List hiding (lookup, insert) -import Data.Function (on) +import qualified Data.List as L import Data.Word (Word32) import Data.Int (Int32) import Data.Bits -import Data.Monoid (Monoid(..)) -import Data.Monoid ((<>)) import Control.Exception (assert) import qualified Data.Array.Unboxed as A @@ -42,7 +39,6 @@ import qualified Data.Map.Strict as Map import Data.Map.Strict (Map) import qualified Data.ByteString as BS import qualified Data.ByteString.Unsafe as BS -import qualified Data.ByteString.Lazy as LBS import Data.ByteString.Builder as BS import Data.ByteString.Builder.Extra as BS (byteStringCopy) @@ -96,7 +92,7 @@ index (StringTable bs offsets _ids ixs) = -- in the construction. -- construct :: Enum id => [BS.ByteString] -> StringTable id -construct = finalise . foldl' (\tbl s -> fst (insert s tbl)) empty +construct = finalise . L.foldl' (\tbl s -> fst (insert s tbl)) empty data StringTableBuilder id = StringTableBuilder @@ -116,7 +112,7 @@ insert str builder@(StringTableBuilder smap nextid) = in (StringTableBuilder smap' (nextid+1), id) inserts :: Enum id => [BS.ByteString] -> StringTableBuilder id -> (StringTableBuilder id, [id]) -inserts bss builder = mapAccumL (flip insert) builder bss +inserts bss builder = L.mapAccumL (flip insert) builder bss finalise :: Enum id => StringTableBuilder id -> StringTable id finalise (StringTableBuilder smap _) = diff --git a/Codec/Archive/Tar/LongNames.hs b/Codec/Archive/Tar/LongNames.hs index e349990..1a268c1 100644 --- a/Codec/Archive/Tar/LongNames.hs +++ b/Codec/Archive/Tar/LongNames.hs @@ -13,9 +13,10 @@ import Codec.Archive.Tar.Types import Control.Exception import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as BL -import "os-string" System.OsString.Posix (PosixString, PosixChar) import qualified "os-string" System.OsString.Posix as PS +import System.OsPath.Posix (PosixPath) + -- | Errors raised by 'decodeLongNames'. -- -- @since 0.6.0.0 @@ -38,7 +39,7 @@ instance Exception DecodeLongNamesError -- -- @since 0.6.0.0 encodeLongNames - :: GenEntry FilePath FilePath + :: GenEntry PosixPath PosixPath -> [Entry] encodeLongNames e = maybe id (:) mEntry $ maybe id (:) mEntry' [e''] where @@ -46,16 +47,16 @@ encodeLongNames e = maybe id (:) mEntry $ maybe id (:) mEntry' [e''] (mEntry', e'') = encodeTarPath e' encodeTarPath - :: GenEntry FilePath linkTarget + :: GenEntry PosixPath linkTarget -> (Maybe (GenEntry TarPath whatever), GenEntry TarPath linkTarget) -- ^ (LongLink entry, actual entry) -encodeTarPath e = case toTarPath' (entryTarPath e) of +encodeTarPath e = case splitLongPath (entryTarPath e) of FileNameEmpty -> (Nothing, e { entryTarPath = TarPath mempty mempty }) FileNameOK tarPath -> (Nothing, e { entryTarPath = tarPath }) FileNameTooLong tarPath -> (Just $ longLinkEntry $ entryTarPath e, e { entryTarPath = tarPath }) encodeLinkTarget - :: GenEntry tarPath FilePath + :: GenEntry tarPath PosixPath -> (Maybe (GenEntry TarPath LinkTarget), GenEntry tarPath LinkTarget) -- ^ (LongLink symlink entry, actual entry) encodeLinkTarget e = case entryContent e of @@ -71,9 +72,9 @@ encodeLinkTarget e = case entryContent e of OtherEntryType x y z -> (Nothing, e { entryContent = OtherEntryType x y z }) encodeLinkPath - :: FilePath + :: PosixPath -> (Maybe (GenEntry TarPath LinkTarget), LinkTarget) -encodeLinkPath lnk = case toTarPath' lnk of +encodeLinkPath lnk = case splitLongPath lnk of FileNameEmpty -> (Nothing, LinkTarget mempty) FileNameOK (TarPath name prefix) | PS.null prefix -> (Nothing, LinkTarget name) @@ -91,10 +92,10 @@ encodeLinkPath lnk = case toTarPath' lnk of -- @since 0.6.0.0 decodeLongNames :: Entries e - -> GenEntries FilePath FilePath (Either e DecodeLongNamesError) + -> GenEntries PosixPath PosixPath (Either e DecodeLongNamesError) decodeLongNames = go Nothing Nothing where - go :: Maybe FilePath -> Maybe FilePath -> Entries e -> GenEntries FilePath FilePath (Either e DecodeLongNamesError) + go :: Maybe PosixPath -> Maybe PosixPath -> Entries e -> GenEntries PosixPath PosixPath (Either e DecodeLongNamesError) go _ _ (Fail err) = Fail (Left err) go _ _ Done = Done @@ -137,17 +138,16 @@ decodeLongNames = go Nothing Nothing _ -> Fail $ Right NoLinkEntryAfterTypeKEntry -otherEntryPayloadToFilePath :: BL.ByteString -> FilePath -otherEntryPayloadToFilePath = - fromPosixString . byteToPosixString . B.takeWhile (/= '\0') . BL.toStrict +otherEntryPayloadToFilePath :: BL.ByteString -> PosixPath +otherEntryPayloadToFilePath = byteToPosixString . B.takeWhile (/= '\0') . BL.toStrict -castEntry :: Entry -> GenEntry FilePath FilePath +castEntry :: Entry -> GenEntry PosixPath PosixPath castEntry e = e { entryTarPath = fromTarPathToPosixPath (entryTarPath e) , entryContent = castEntryContent (entryContent e) } -castEntryContent :: EntryContent -> GenEntryContent FilePath +castEntryContent :: EntryContent -> GenEntryContent PosixPath castEntryContent = \case NormalFile x y -> NormalFile x y Directory -> Directory diff --git a/Codec/Archive/Tar/Pack.hs b/Codec/Archive/Tar/Pack.hs index 3ee08d5..333533f 100644 --- a/Codec/Archive/Tar/Pack.hs +++ b/Codec/Archive/Tar/Pack.hs @@ -3,6 +3,7 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE QuasiQuotes #-} {-# OPTIONS_HADDOCK hide #-} ----------------------------------------------------------------------------- -- | @@ -29,27 +30,27 @@ module Codec.Archive.Tar.Pack ( import Codec.Archive.Tar.LongNames import Codec.Archive.Tar.Types -import Control.Monad (join, when, forM, (>=>)) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import Data.Foldable -import System.FilePath - ( () ) -import qualified System.FilePath as FilePath.Native - ( addTrailingPathSeparator, hasTrailingPathSeparator, splitDirectories ) -import System.Directory +import System.Directory.OsPath ( listDirectory, doesDirectoryExist, getModificationTime , pathIsSymbolicLink, getSymbolicLinkTarget , Permissions(..), getPermissions, getFileSize ) -import Data.Time.Clock - ( UTCTime ) import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds ) import System.IO - ( IOMode(ReadMode), openBinaryFile, hFileSize ) + ( IOMode(ReadMode), hFileSize ) import System.IO.Unsafe (unsafeInterleaveIO) import Control.Exception (throwIO, SomeException) -import Codec.Archive.Tar.Check.Internal (checkEntrySecurity) + +import System.OsPath (OsPath) +import System.OsPath.Posix (PosixPath) + +import qualified System.File.OsPath as OSP +import qualified System.OsPath as OSP +import qualified System.OsString as OS + -- | Creates a tar archive from a list of directory or files. Any directories -- specified will have their contents included recursively. Paths in the @@ -65,8 +66,8 @@ import Codec.Archive.Tar.Check.Internal (checkEntrySecurity) -- Do not change their contents before the output of 'pack' was consumed in full. -- pack - :: FilePath -- ^ Base directory - -> [FilePath] -- ^ Files and directories to pack, relative to the base dir + :: OsPath -- ^ Base directory + -> [OsPath] -- ^ Files and directories to pack, relative to the base dir -> IO [Entry] pack = packAndCheck (const Nothing) @@ -77,9 +78,9 @@ pack = packAndCheck (const Nothing) -- -- @since 0.6.0.0 packAndCheck - :: (GenEntry FilePath FilePath -> Maybe SomeException) - -> FilePath -- ^ Base directory - -> [FilePath] -- ^ Files and directories to pack, relative to the base dir + :: (GenEntry PosixPath PosixPath -> Maybe SomeException) + -> OsPath -- ^ Base directory + -> [OsPath] -- ^ Files and directories to pack, relative to the base dir -> IO [Entry] packAndCheck secCB baseDir relpaths = do paths <- preparePaths baseDir relpaths @@ -87,34 +88,36 @@ packAndCheck secCB baseDir relpaths = do traverse_ (maybe (pure ()) throwIO . secCB) entries pure $ concatMap encodeLongNames entries -preparePaths :: FilePath -> [FilePath] -> IO [FilePath] +preparePaths :: OsPath -> [OsPath] -> IO [OsPath] preparePaths baseDir = fmap concat . interleave . map go where + go :: OsPath -> IO [OsPath] go relpath = do - let abspath = baseDir relpath + let abspath = baseDir OSP. relpath isDir <- doesDirectoryExist abspath isSymlink <- pathIsSymbolicLink abspath if isDir && not isSymlink then do entries <- getDirectoryContentsRecursive abspath - let entries' = map (relpath ) entries - return $ if null relpath + let entries' = map (relpath OSP.) entries + return $ if OS.null relpath then entries' - else FilePath.Native.addTrailingPathSeparator relpath : entries' + else OSP.addTrailingPathSeparator relpath : entries' else return [relpath] -- | Pack paths while accounting for overlong filepaths. packPaths - :: FilePath - -> [FilePath] - -> IO [GenEntry FilePath FilePath] -packPaths baseDir paths = interleave $ flip map paths $ \relpath -> do - let isDir = FilePath.Native.hasTrailingPathSeparator abspath - abspath = baseDir relpath + :: OsPath + -> [OsPath] + -> IO [GenEntry PosixPath PosixPath] +packPaths baseDir paths = interleave $ flip map paths $ \relpath' -> do + let isDir = OSP.hasTrailingPathSeparator abspath + abspath = baseDir OSP. relpath' isSymlink <- pathIsSymbolicLink abspath let mkEntry | isSymlink = packSymlinkEntry | isDir = packDirectoryEntry | otherwise = packFileEntry + let relpath = toFSPosixPath' relpath' mkEntry abspath relpath interleave :: [IO a] -> IO [a] @@ -135,8 +138,8 @@ interleave = unsafeInterleaveIO . go -- * The file contents is read lazily. -- packFileEntry - :: FilePath -- ^ Full path to find the file on the local disk - -> tarPath -- ^ Path to use for the tar 'GenEntry' in the archive + :: OsPath -- ^ Full path to find the file on the local disk + -> tarPath -- ^ Path to use for the tar 'Entry' in the archive -> IO (GenEntry tarPath linkTarget) packFileEntry filepath tarpath = do mtime <- getModTime filepath @@ -148,10 +151,10 @@ packFileEntry filepath tarpath = do -- If file is short enough, just read it strictly -- so that no file handle dangles around indefinitely. then do - cnt <- B.readFile filepath + cnt <- OSP.readFile' filepath pure (BL.fromStrict cnt, fromIntegral $ B.length cnt) else do - hndl <- openBinaryFile filepath ReadMode + hndl <- OSP.openBinaryFile filepath ReadMode -- File size could have changed between measuring approxSize -- and here. Measuring again. sz <- hFileSize hndl @@ -175,8 +178,8 @@ packFileEntry filepath tarpath = do -- Directory ownership and detailed permissions are not preserved. -- packDirectoryEntry - :: FilePath -- ^ Full path to find the file on the local disk - -> tarPath -- ^ Path to use for the tar 'GenEntry' in the archive + :: OsPath -- ^ Full path to find the file on the local disk + -> tarPath -- ^ Path to use for the tar 'Entry' in the archive -> IO (GenEntry tarPath linkTarget) packDirectoryEntry filepath tarpath = do mtime <- getModTime filepath @@ -190,11 +193,12 @@ packDirectoryEntry filepath tarpath = do -- -- @since 0.6.0.0 packSymlinkEntry - :: FilePath -- ^ Full path to find the file on the local disk - -> tarPath -- ^ Path to use for the tar 'GenEntry' in the archive - -> IO (GenEntry tarPath FilePath) + :: OsPath -- ^ Full path to find the file on the local disk + -> tarPath -- ^ Path to use for the tar 'Entry' in the archive + -> IO (GenEntry tarPath PosixPath) packSymlinkEntry filepath tarpath = do - linkTarget <- getSymbolicLinkTarget filepath + linkTarget' <- getSymbolicLinkTarget filepath + let linkTarget = toFSPosixPath' linkTarget' pure $ symlinkEntry tarpath linkTarget -- | This is a utility function, much like 'listDirectory'. The @@ -215,14 +219,14 @@ packSymlinkEntry filepath tarpath = do -- If the source directory structure changes before the result is used in full, -- the behaviour is undefined. -- -getDirectoryContentsRecursive :: FilePath -> IO [FilePath] +getDirectoryContentsRecursive :: OsPath -> IO [OsPath] getDirectoryContentsRecursive dir0 = - fmap (drop 1) (recurseDirectories dir0 [""]) + fmap tail (recurseDirectories dir0 [[OS.osstr||]]) -recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath] +recurseDirectories :: OsPath -> [OsPath] -> IO [OsPath] recurseDirectories _ [] = return [] recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do - (files, dirs') <- collect [] [] =<< listDirectory (base dir) + (files, dirs') <- collect [] [] =<< listDirectory (base OSP. dir) files' <- recurseDirectories base (dirs' ++ dirs) return (dir : files ++ files') @@ -230,15 +234,15 @@ recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do where collect files dirs' [] = return (reverse files, reverse dirs') collect files dirs' (entry:entries) = do - let dirEntry = dir entry - dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry - isDirectory <- doesDirectoryExist (base dirEntry) - isSymlink <- pathIsSymbolicLink (base dirEntry) + let dirEntry = dir OSP. entry + dirEntry' = OSP.addTrailingPathSeparator dirEntry + isDirectory <- doesDirectoryExist (base OSP. dirEntry) + isSymlink <- pathIsSymbolicLink (base OSP. dirEntry) if isDirectory && not isSymlink then collect files (dirEntry':dirs') entries else collect (dirEntry:files) dirs' entries -getModTime :: FilePath -> IO EpochTime +getModTime :: OsPath -> IO EpochTime getModTime path = do -- The directory package switched to the new time package t <- getModificationTime path diff --git a/Codec/Archive/Tar/PackAscii.hs b/Codec/Archive/Tar/PackAscii.hs index 3538a56..e3dfedd 100644 --- a/Codec/Archive/Tar/PackAscii.hs +++ b/Codec/Archive/Tar/PackAscii.hs @@ -2,25 +2,15 @@ {-# OPTIONS_HADDOCK hide #-} module Codec.Archive.Tar.PackAscii - ( toPosixString - , fromPosixString - , posixToByteString + ( posixToByteString , byteToPosixString ) where import Data.ByteString (ByteString) import qualified Data.ByteString.Short as Sh -import System.IO.Unsafe (unsafePerformIO) import "os-string" System.OsString.Posix (PosixString) -import qualified "os-string" System.OsString.Posix as PS import qualified "os-string" System.OsString.Internal.Types as PS -toPosixString :: FilePath -> PosixString -toPosixString = unsafePerformIO . PS.encodeFS - -fromPosixString :: PosixString -> FilePath -fromPosixString = unsafePerformIO . PS.decodeFS - posixToByteString :: PosixString -> ByteString posixToByteString = Sh.fromShort . PS.getPosixString diff --git a/Codec/Archive/Tar/Read.hs b/Codec/Archive/Tar/Read.hs index 8a11365..95fd0c3 100644 --- a/Codec/Archive/Tar/Read.hs +++ b/Codec/Archive/Tar/Read.hs @@ -22,7 +22,6 @@ module Codec.Archive.Tar.Read import Codec.Archive.Tar.PackAscii import Codec.Archive.Tar.Types -import Data.Char (ord) import Data.Int (Int64) import Data.Bits (Bits(shiftL, (.&.), complement)) import Control.Exception (Exception(..)) @@ -34,14 +33,14 @@ import Control.Monad.Trans.State.Lazy import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS.Char8 -import qualified Data.ByteString.Unsafe as BS import qualified Data.ByteString.Lazy as LBS -import System.IO.Unsafe (unsafePerformIO) -import "os-string" System.OsString.Posix (PosixString, PosixChar) -import qualified "os-string" System.OsString.Posix as PS import Prelude hiding (read) +import "os-string" System.OsString.Internal.Types (PosixString(..)) + +import qualified "os-string" System.OsString.Posix as PS + -- | Errors that can be encountered when parsing a Tar archive. data FormatError = TruncatedArchive @@ -130,12 +129,12 @@ getEntryStreaming getN getAll = do let content = LBS.take size paddedContent pure $ Right $ Just $ Entry { - entryTarPath = TarPath (byteToPosixString name) (byteToPosixString prefix), + entryTarPath = TarPath name prefix, entryContent = case typecode of '\0' -> NormalFile content size '0' -> NormalFile content size - '1' -> HardLink (LinkTarget $ byteToPosixString linkname) - '2' -> SymbolicLink (LinkTarget $ byteToPosixString linkname) + '1' -> HardLink (LinkTarget linkname) + '2' -> SymbolicLink (LinkTarget linkname) _ | format == V7Format -> OtherEntryType typecode content size '3' -> CharacterDevice devmajor devminor @@ -145,15 +144,14 @@ getEntryStreaming getN getAll = do '7' -> NormalFile content size _ -> OtherEntryType typecode content size, entryPermissions = mode, - entryOwnership = Ownership (BS.Char8.unpack uname) - (BS.Char8.unpack gname) uid gid, + entryOwnership = Ownership uname gname uid gid, entryTime = mtime, entryFormat = format } parseHeader :: LBS.ByteString - -> Either FormatError (BS.ByteString, Permissions, Int, Int, Int64, EpochTime, Char, BS.ByteString, Format, BS.ByteString, BS.ByteString, DevMajor, DevMinor, BS.ByteString) + -> Either FormatError (PosixString, Permissions, Int, Int, Int64, EpochTime, Char, PosixString, Format, PosixString, PosixString, DevMajor, DevMinor, PosixString) parseHeader header' = do case (chksum_, format_ magic) of (Right chksum, _ ) | correctChecksum header chksum -> return () @@ -254,8 +252,8 @@ getByte off bs = BS.Char8.index bs off getChars :: Int -> Int -> BS.ByteString -> BS.ByteString getChars = getBytes -getString :: Int -> Int -> BS.ByteString -> BS.ByteString -getString off len = BS.copy . BS.Char8.takeWhile (/='\0') . getBytes off len +getString :: Int -> Int -> BS.ByteString -> PS.PosixString +getString off len = PS.takeWhile (/= PS.unsafeFromChar '\0') . byteToPosixString . getBytes off len {-# SPECIALISE readOct :: BS.ByteString -> Maybe Int #-} {-# SPECIALISE readOct :: BS.ByteString -> Maybe Int64 #-} diff --git a/Codec/Archive/Tar/Types.hs b/Codec/Archive/Tar/Types.hs index 02810a9..1d6417e 100644 --- a/Codec/Archive/Tar/Types.hs +++ b/Codec/Archive/Tar/Types.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} @@ -53,18 +55,22 @@ module Codec.Archive.Tar.Types ( TarPath(..), toTarPath, toTarPath', + splitLongPath, ToTarPathResult(..), fromTarPath, fromTarPathToPosixPath, fromTarPathToWindowsPath, - fromFilePathToNative, LinkTarget(..), toLinkTarget, fromLinkTarget, fromLinkTargetToPosixPath, fromLinkTargetToWindowsPath, - fromFilePathToWindowsPath, + + toFSPosixPath, + toFSPosixPath', + toWindowsPath, + fromPosixPath, GenEntries(..), Entries, @@ -78,28 +84,33 @@ module Codec.Archive.Tar.Types ( import Data.Int (Int64) import Data.List.NonEmpty (NonEmpty(..)) -import Data.Monoid (Monoid(..)) import Data.Semigroup as Sem import Data.Typeable -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS import Control.DeepSeq import Control.Exception (Exception, displayException) -import qualified System.FilePath as FilePath.Native - ( joinPath, splitDirectories, addTrailingPathSeparator, hasTrailingPathSeparator, pathSeparator, isAbsolute, hasTrailingPathSeparator ) -import qualified System.FilePath.Posix as FilePath.Posix - ( joinPath, splitPath, splitDirectories, hasTrailingPathSeparator - , addTrailingPathSeparator, pathSeparator ) -import qualified System.FilePath.Windows as FilePath.Windows - ( joinPath, addTrailingPathSeparator, pathSeparator ) +import GHC.Stack (HasCallStack) + import System.Posix.Types ( FileMode ) -import "os-string" System.OsString.Posix (PosixString, PosixChar) -import qualified "os-string" System.OsString.Posix as PS -import Codec.Archive.Tar.PackAscii +import System.IO.Unsafe (unsafePerformIO) +import System.OsString.Posix (pstr) +import qualified System.OsString.Posix as Posix +import System.OsString.Internal.Types (OsString(..), PosixString(..)) +import qualified System.OsString.Posix as PS +import qualified System.OsString.Windows as WS + +import System.OsPath (OsPath) +import System.OsPath.Windows (WindowsPath) +import System.OsPath.Posix (PosixPath) +import qualified System.OsPath as OSP +import qualified System.OsPath.Posix as PFP +import qualified System.OsPath.Windows as WFP + +import qualified Data.ByteString.Short as SBS + -- | File size in bytes. type FileSize = Int64 @@ -151,16 +162,16 @@ data GenEntry tarPath linkTarget = Entry { -- type Entry = GenEntry TarPath LinkTarget --- | Low-level function to get a native 'FilePath' of the file or directory +-- | Low-level function to get a native 'OsPath of the file or directory -- within the archive, not accounting for long names. It's likely -- that you want to apply 'Codec.Archive.Tar.decodeLongNames' -- and use 'Codec.Archive.Tar.Entry.entryTarPath' afterwards instead of 'entryPath'. -- -entryPath :: GenEntry TarPath linkTarget -> FilePath +entryPath :: GenEntry TarPath linkTarget -> OsPath entryPath = fromTarPath . entryTarPath -- | Polymorphic content of a tar archive entry. High-level interfaces --- commonly work with 'GenEntryContent' 'FilePath', +-- commonly work with 'GenEntryContent' 'OsPath', -- while low-level ones use 'GenEntryContent' 'LinkTarget'. -- -- Portable archives should contain only 'NormalFile' and 'Directory'. @@ -187,10 +198,10 @@ type EntryContent = GenEntryContent LinkTarget -- | Ownership information for 'GenEntry'. data Ownership = Ownership { -- | The owner user name. Should be set to @\"\"@ if unknown. - ownerName :: String, + ownerName :: PosixString, -- | The owner group name. Should be set to @\"\"@ if unknown. - groupName :: String, + groupName :: PosixString, -- | Numeric owner user id. Should be set to @0@ if unknown. ownerId :: {-# UNPACK #-} !Int, @@ -268,7 +279,7 @@ simpleEntry tarpath content = Entry { Directory -> directoryPermissions SymbolicLink _ -> symbolicLinkPermission _ -> ordinaryFilePermissions, - entryOwnership = Ownership "" "" 0 0, + entryOwnership = Ownership PS.empty PS.empty 0 0, entryTime = 0, entryFormat = UstarFormat } @@ -300,12 +311,12 @@ symlinkEntry name targetLink = -- See [What exactly is the GNU tar ././@LongLink "trick"?](https://stackoverflow.com/questions/2078778/what-exactly-is-the-gnu-tar-longlink-trick) -- -- @since 0.6.0.0 -longLinkEntry :: FilePath -> GenEntry TarPath linkTarget -longLinkEntry tarpath = Entry { - entryTarPath = TarPath [PS.pstr|././@LongLink|] mempty, - entryContent = OtherEntryType 'L' (LBS.fromStrict $ posixToByteString $ toPosixString tarpath) (fromIntegral $ length tarpath), +longLinkEntry :: PosixPath -> GenEntry TarPath linkTarget +longLinkEntry (PosixString tarpath) = Entry { + entryTarPath = TarPath [pstr|././@LongLink|] PS.empty, + entryContent = OtherEntryType 'L' (LBS.fromStrict . SBS.fromShort $ tarpath) (fromIntegral $ SBS.length tarpath), entryPermissions = ordinaryFilePermissions, - entryOwnership = Ownership "" "" 0 0, + entryOwnership = Ownership PS.empty PS.empty 0 0, entryTime = 0, entryFormat = GnuFormat } @@ -317,12 +328,12 @@ longLinkEntry tarpath = Entry { -- data with truncated 'Codec.Archive.Tar.Entry.entryTarPath'. -- -- @since 0.6.0.0 -longSymLinkEntry :: FilePath -> GenEntry TarPath linkTarget -longSymLinkEntry linkTarget = Entry { - entryTarPath = TarPath [PS.pstr|././@LongLink|] mempty, - entryContent = OtherEntryType 'K' (LBS.fromStrict $ posixToByteString $ toPosixString $ linkTarget) (fromIntegral $ length linkTarget), +longSymLinkEntry :: PosixPath -> GenEntry TarPath linkTarget +longSymLinkEntry (PosixString linkTarget) = Entry { + entryTarPath = TarPath [pstr|././@LongLink|] PS.empty, + entryContent = OtherEntryType 'K' (LBS.fromStrict . SBS.fromShort $ linkTarget) (fromIntegral $ SBS.length linkTarget), entryPermissions = ordinaryFilePermissions, - entryOwnership = Ownership "" "" 0 0, + entryOwnership = Ownership PS.empty PS.empty 0 0, entryTime = 0, entryFormat = GnuFormat } @@ -375,7 +386,7 @@ instance NFData TarPath where instance Show TarPath where show = show . fromTarPath --- | Convert a 'TarPath' to a native 'FilePath'. +-- | Convert a 'TarPath' to a native 'OsPath'. -- -- The native 'FilePath' will use the native directory separator but it is not -- otherwise checked for validity or sanity. In particular: @@ -388,10 +399,14 @@ instance Show TarPath where -- responsibility to check for these conditions -- (e.g., using 'Codec.Archive.Tar.Check.checkEntrySecurity'). -- -fromTarPath :: TarPath -> FilePath -fromTarPath = fromPosixString . fromTarPathInternal (PS.unsafeFromChar FilePath.Native.pathSeparator) +fromTarPath :: TarPath -> OsPath +#if defined(mingw32_HOST_OS) +fromTarPath = OsString . fromTarPathToWindowsPath +#else +fromTarPath = OsString . fromTarPathToPosixPath +#endif --- | Convert a 'TarPath' to a Unix\/Posix 'FilePath'. +-- | Convert a 'TarPath' to a Unix\/Posix 'OsPath'. -- -- The difference compared to 'fromTarPath' is that it always returns a Unix -- style path irrespective of the current operating system. @@ -399,10 +414,13 @@ fromTarPath = fromPosixString . fromTarPathInternal (PS.unsafeFromChar FilePath. -- This is useful to check how a 'TarPath' would be interpreted on a specific -- operating system, eg to perform portability checks. -- -fromTarPathToPosixPath :: TarPath -> FilePath -fromTarPathToPosixPath = fromPosixString . fromTarPathInternal (PS.unsafeFromChar FilePath.Posix.pathSeparator) +fromTarPathToPosixPath :: TarPath -> PosixPath +fromTarPathToPosixPath (TarPath name prefix) + | PS.null prefix = name + | PS.null name = prefix + | otherwise = prefix <> PS.cons PFP.pathSeparator name --- | Convert a 'TarPath' to a Windows 'FilePath'. +-- | Convert a 'TarPath' to a Windows 'OsPath'. -- -- The only difference compared to 'fromTarPath' is that it always returns a -- Windows style path irrespective of the current operating system. @@ -410,62 +428,82 @@ fromTarPathToPosixPath = fromPosixString . fromTarPathInternal (PS.unsafeFromCha -- This is useful to check how a 'TarPath' would be interpreted on a specific -- operating system, eg to perform portability checks. -- -fromTarPathToWindowsPath :: TarPath -> FilePath -fromTarPathToWindowsPath = fromPosixString . fromTarPathInternal (PS.unsafeFromChar FilePath.Windows.pathSeparator) - -fromTarPathInternal :: PosixChar -> TarPath -> PosixString -fromTarPathInternal sep = go - where - posixSep = PS.unsafeFromChar FilePath.Posix.pathSeparator - adjustSeps = if sep == posixSep then id else - PS.map $ \c -> if c == posixSep then sep else c - go (TarPath name prefix) - | PS.null prefix = adjustSeps name - | PS.null name = adjustSeps prefix - | otherwise = adjustSeps prefix <> PS.cons sep (adjustSeps name) -{-# INLINE fromTarPathInternal #-} - --- | Convert a native 'FilePath' to a 'TarPath'. --- --- The conversion may fail if the 'FilePath' is empty or too long. +fromTarPathToWindowsPath :: HasCallStack => TarPath -> WindowsPath +fromTarPathToWindowsPath tarPath = + let posix = fromTarPathToPosixPath tarPath + in toWindowsPath posix + +-- | We assume UTF-8 on posix and filesystem encoding on windows. +toWindowsPath :: HasCallStack => PosixPath -> WindowsPath +toWindowsPath posix = + let str = unsafePerformIO $ PFP.decodeUtf posix + win = unsafePerformIO $ WFP.encodeFS str + in WS.map (\c -> if WFP.isPathSeparator c then WFP.pathSeparator else c) win + + +-- | We assume filesystem encoding on windows and UTF-8 on posix. +toFSPosixPath :: HasCallStack => WindowsPath -> PosixPath +toFSPosixPath win = + let str = unsafePerformIO $ WFP.decodeFS win + posix = Posix.unsafeEncodeUtf str + in PS.map (\c -> if PFP.isPathSeparator c then PFP.pathSeparator else c) posix + +-- | We assume filesystem encoding on windows and UTF-8 on posix. +toFSPosixPath' :: HasCallStack => OsPath -> PosixPath +#if defined(mingw32_HOST_OS) +toFSPosixPath' (OsString ws) = toFSPosixPath ws +#else +toFSPosixPath' (OsString ps) = ps +#endif + +-- | We assume UTF-8 on posix and filesystem encoding on windows. +fromPosixPath :: HasCallStack => PosixPath -> OsPath +#if defined(mingw32_HOST_OS) +fromPosixPath ps = OsPath $ toWindowsPath ps +#else +fromPosixPath = OsString +#endif + + +-- | Convert a native 'OsPath' to a 'TarPath'. +-- +-- The conversion may fail if the 'OsPath' is empty or too long. +-- Use 'toTarPath'' for a structured output. toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for -- directories a 'TarPath' must always use a trailing @\/@. - -> FilePath + -> OsPath -> Either String TarPath toTarPath isDir path = case toTarPath' path' of - FileNameEmpty -> Left "File name empty" - FileNameOK tarPath -> Right tarPath - FileNameTooLong{} -> Left "File name too long" + FileNameEmpty -> Left "File name empty" + (FileNameOK tarPath) -> Right tarPath + (FileNameTooLong{}) -> Left "File name too long" where - path' = if isDir && not (FilePath.Native.hasTrailingPathSeparator path) - then path <> [FilePath.Native.pathSeparator] + path' = if isDir && not (OSP.hasTrailingPathSeparator path) + then path <> OSP.pack [OSP.pathSeparator] else path --- | Convert a native 'FilePath' to a 'TarPath'. +-- | Convert a native 'OsPath' to a 'TarPath'. -- Directory paths must always have a trailing @\/@, this is not checked. -- -- @since 0.6.0.0 toTarPath' - :: FilePath + :: HasCallStack + => OsPath -> ToTarPathResult -toTarPath' - = splitLongPath - . (if nativeSep == posixSep then id else adjustSeps) - where - nativeSep = FilePath.Native.pathSeparator - posixSep = FilePath.Posix.pathSeparator - adjustSeps = map $ \c -> if c == nativeSep then posixSep else c +toTarPath' osp' = + let posix = toFSPosixPath' osp' + in splitLongPath posix -- | Return type of 'toTarPath''. -- -- @since 0.6.0.0 data ToTarPathResult = FileNameEmpty - -- ^ 'FilePath' was empty, but 'TarPath' must be non-empty. + -- ^ 'OsPath' was empty, but 'TarPath' must be non-empty. | FileNameOK TarPath -- ^ All good, this is just a normal 'TarPath'. | FileNameTooLong TarPath - -- ^ 'FilePath' was longer than 255 characters, 'TarPath' contains + -- ^ 'OsPath' was longer than 255 characters, 'TarPath' contains -- a truncated part only. An actual entry must be preceded by -- 'longLinkEntry'. @@ -475,104 +513,83 @@ data ToTarPathResult -- The strategy is this: take the name-directory components in reverse order -- and try to fit as many components into the 100 long name area as possible. -- If all the remaining components fit in the 155 name area then we win. -splitLongPath :: FilePath -> ToTarPathResult -splitLongPath path = case reverse (FilePath.Posix.splitPath path) of +splitLongPath :: PosixPath -> ToTarPathResult +splitLongPath path = case reverse (PFP.splitPath path) of [] -> FileNameEmpty c : cs -> case packName nameMax (c :| cs) of - Nothing -> FileNameTooLong $ TarPath (toPosixString $ take 100 path) mempty - Just (name, []) -> FileNameOK $! TarPath (toPosixString name) mempty + Nothing -> FileNameTooLong $ TarPath (PS.take 100 path) PS.empty + Just (name, []) -> FileNameOK $! TarPath name PS.empty Just (name, first:rest) -> case packName prefixMax remainder of - Nothing -> FileNameTooLong $ TarPath (toPosixString $ take 100 path) mempty - Just (_ , _:_) -> FileNameTooLong $ TarPath (toPosixString $ take 100 path) mempty - Just (prefix, []) -> FileNameOK $! TarPath (toPosixString name) (toPosixString prefix) + Nothing -> FileNameTooLong $ TarPath (PS.take 100 path) PS.empty + Just (_ , _:_) -> FileNameTooLong $ TarPath (PS.take 100 path) PS.empty + Just (prefix, []) -> FileNameOK $! TarPath name prefix where -- drop the '/' between the name and prefix: - remainder = init first :| rest + remainder = PS.init first :| rest where nameMax, prefixMax :: Int nameMax = 100 prefixMax = 155 - packName :: Int -> NonEmpty FilePath -> Maybe (FilePath, [FilePath]) + packName :: Int -> NonEmpty PosixPath -> Maybe (PosixPath, [PosixPath]) packName maxLen (c :| cs) | n > maxLen = Nothing | otherwise = Just (packName' maxLen n [c] cs) - where n = length c + where n = PS.length c - packName' :: Int -> Int -> [FilePath] -> [FilePath] -> (FilePath, [FilePath]) + packName' :: Int -> Int -> [PosixPath] -> [PosixPath] -> (PosixPath, [PosixPath]) packName' maxLen n ok (c:cs) | n' <= maxLen = packName' maxLen n' (c:ok) cs - where n' = n + length c - packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs) + where n' = n + PS.length c + packName' _ _ ok cs = (PFP.joinPath ok, cs) -- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and -- 'HardLink' entry types. -- -newtype LinkTarget = LinkTarget PosixString +newtype LinkTarget = LinkTarget PosixPath deriving (Eq, Ord, Show) instance NFData LinkTarget where rnf (LinkTarget bs) = rnf bs --- | Convert a native 'FilePath' to a tar 'LinkTarget'. +-- | Convert a native 'OsPath' to a tar 'LinkTarget'. -- string is longer than 100 characters or if it contains non-portable -- characters. -toLinkTarget :: FilePath -> Maybe LinkTarget -toLinkTarget path - | length path <= 100 = do - target <- toLinkTarget' path - Just $! LinkTarget (toPosixString target) - | otherwise = Nothing - -data LinkTargetException = IsAbsolute FilePath - | TooLong FilePath +toLinkTarget :: HasCallStack => OsPath -> Either LinkTargetException LinkTarget +toLinkTarget osPath = + let path = toFSPosixPath' osPath + in if | PFP.isAbsolute path -> Left (IsAbsolute osPath) + | PS.length path <= 100 -> Right $ LinkTarget path + | otherwise -> Left (TooLong osPath) + +data LinkTargetException = IsAbsolute OsPath + | TooLong OsPath deriving (Show,Typeable) instance Exception LinkTargetException where - displayException (IsAbsolute fp) = "Link target \"" <> fp <> "\" is unexpectedly absolute" + displayException (IsAbsolute fp) = "Link target \"" <> show fp <> "\" is unexpectedly absolute" displayException (TooLong _) = "The link target is too long" --- | Convert a native 'FilePath' to a unix filepath suitable for --- using as 'LinkTarget'. Does not error if longer than 100 characters. -toLinkTarget' :: FilePath -> Maybe FilePath -toLinkTarget' path - | FilePath.Native.isAbsolute path = Nothing - | otherwise = Just $ adjustDirectory $ FilePath.Posix.joinPath $ FilePath.Native.splitDirectories path - where - adjustDirectory | FilePath.Native.hasTrailingPathSeparator path - = FilePath.Posix.addTrailingPathSeparator - | otherwise = id - -- | Convert a tar 'LinkTarget' to a native 'FilePath'. -fromLinkTarget :: LinkTarget -> FilePath -fromLinkTarget (LinkTarget pathbs) = fromFilePathToNative $ fromPosixString pathbs +fromLinkTarget :: HasCallStack => LinkTarget -> OsPath +#if defined(mingw32_HOST_OS) +fromLinkTarget linkTarget = + OsString $ fromLinkTargetToWindowsPath linkTarget +#else +fromLinkTarget linkTarget = + OsString $ fromLinkTargetToPosixPath linkTarget +#endif -- | Convert a tar 'LinkTarget' to a Unix\/POSIX 'FilePath' (@\'/\'@ path separators). -fromLinkTargetToPosixPath :: LinkTarget -> FilePath -fromLinkTargetToPosixPath (LinkTarget pathbs) = fromPosixString pathbs +fromLinkTargetToPosixPath :: LinkTarget -> PosixPath +fromLinkTargetToPosixPath (LinkTarget pathbs) = pathbs -- | Convert a tar 'LinkTarget' to a Windows 'FilePath' (@\'\\\\\'@ path separators). -fromLinkTargetToWindowsPath :: LinkTarget -> FilePath -fromLinkTargetToWindowsPath (LinkTarget pathbs) = - fromFilePathToWindowsPath $ fromPosixString pathbs - --- | Convert a unix FilePath to a native 'FilePath'. -fromFilePathToNative :: FilePath -> FilePath -fromFilePathToNative = - fromFilePathInternal FilePath.Posix.pathSeparator FilePath.Native.pathSeparator - --- | Convert a unix FilePath to a Windows 'FilePath'. -fromFilePathToWindowsPath :: FilePath -> FilePath -fromFilePathToWindowsPath = - fromFilePathInternal FilePath.Posix.pathSeparator FilePath.Windows.pathSeparator - -fromFilePathInternal :: Char -> Char -> FilePath -> FilePath -fromFilePathInternal fromSep toSep = adjustSeps - where - adjustSeps = if fromSep == toSep then id else - map $ \c -> if c == fromSep then toSep else c -{-# INLINE fromFilePathInternal #-} +fromLinkTargetToWindowsPath :: HasCallStack => LinkTarget -> WindowsPath +fromLinkTargetToWindowsPath (LinkTarget posix) = toWindowsPath posix + + -- -- * Entries type diff --git a/Codec/Archive/Tar/Unpack.hs b/Codec/Archive/Tar/Unpack.hs index 7ff458c..8fdd9cd 100644 --- a/Codec/Archive/Tar/Unpack.hs +++ b/Codec/Archive/Tar/Unpack.hs @@ -2,6 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE QuasiQuotes #-} {-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} {-# OPTIONS_HADDOCK hide #-} @@ -31,14 +32,8 @@ import Codec.Archive.Tar.LongNames import Data.Bits ( testBit ) import Data.List (partition, nub) -import Data.Maybe ( fromMaybe ) -import qualified Data.ByteString.Char8 as Char8 import qualified Data.ByteString.Lazy as BS -import System.FilePath - ( () ) -import qualified System.FilePath as FilePath.Native - ( takeDirectory ) -import System.Directory +import System.Directory.OsPath ( createDirectoryIfMissing, copyFile, setPermissions, @@ -54,16 +49,25 @@ import System.Directory setOwnerSearchable ) import Control.Exception ( Exception, throwIO, handle ) -import System.IO ( stderr, hPutStr ) -import System.IO.Error ( ioeGetErrorType, isPermissionError ) +import System.IO.Error ( ioeGetErrorType ) import GHC.IO (unsafeInterleaveIO) import Data.Foldable (traverse_) -import GHC.IO.Exception (IOErrorType(InappropriateType, IllegalOperation, PermissionDenied, InvalidArgument)) +import GHC.IO.Exception (IOErrorType(IllegalOperation, PermissionDenied, InvalidArgument)) import Data.Time.Clock.POSIX ( posixSecondsToUTCTime ) import Control.Exception as Exception ( catch, SomeException(..) ) +import System.OsPath (OsPath) +import System.OsPath.Posix (PosixPath) + +import qualified System.OsPath as OSP +import qualified System.File.OsPath as OSP + +import qualified System.OsString as OS +import qualified System.OsString.Posix as PS + + -- | Create local files and directories based on the entries of a tar archive. -- -- This is a portable implementation of unpacking suitable for portable @@ -84,7 +88,7 @@ import Control.Exception as Exception -- unpack :: Exception e - => FilePath + => OsPath -- ^ Base directory -> Entries e -- ^ Entries to upack @@ -103,9 +107,9 @@ unpack = unpackAndCheck (fmap SomeException . checkEntrySecurity) -- @since 0.6.0.0 unpackAndCheck :: Exception e - => (GenEntry FilePath FilePath -> Maybe SomeException) + => (GenEntry PosixPath PosixPath -> Maybe SomeException) -- ^ Checks to run on each entry before unpacking - -> FilePath + -> OsPath -- ^ Base directory -> Entries e -- ^ Entries to upack @@ -123,11 +127,11 @@ unpackAndCheck secCB baseDir entries = do -- files all over the place. unpackEntries :: Exception e - => [(FilePath, FilePath, Bool)] + => [(PosixPath, PosixPath, Bool)] -- ^ links (path, link, isHardLink) - -> GenEntries FilePath FilePath (Either e DecodeLongNamesError) + -> GenEntries PosixPath PosixPath (Either e DecodeLongNamesError) -- ^ entries - -> IO [(FilePath, FilePath, Bool)] + -> IO [(PosixPath, PosixPath, Bool)] unpackEntries _ (Fail err) = either throwIO throwIO err unpackEntries links Done = return links unpackEntries links (Next entry es) = do @@ -154,42 +158,49 @@ unpackAndCheck secCB baseDir entries = do BlockDevice{} -> unpackEntries links es NamedPipe -> unpackEntries links es - extractFile permissions (fromFilePathToNative -> path) content mtime = do + extractFile :: Permissions -> PosixPath -> BS.ByteString -> EpochTime -> IO () + extractFile permissions path' content mtime = do + let path = fromPosixPath path' + let absDir = baseDir OSP. OSP.takeDirectory path + let absPath = baseDir OSP. path + -- Note that tar archives do not make sure each directory is created -- before files they contain, indeed we may have to create several -- levels of directory. createDirectoryIfMissing True absDir - BS.writeFile absPath content + OSP.writeFile absPath content setOwnerPermissions absPath permissions setModTime absPath mtime - where - absDir = baseDir FilePath.Native.takeDirectory path - absPath = baseDir path - extractDir (fromFilePathToNative -> path) mtime = do + extractDir :: PosixPath -> EpochTime -> IO () + extractDir path' mtime = do + let path = fromPosixPath path' + let absPath = baseDir OSP. path createDirectoryIfMissing True absPath setModTime absPath mtime - where - absPath = baseDir path - saveLink isHardLink (fromFilePathToNative -> path) (fromFilePathToNative -> link) links - = seq (length path) - $ seq (length link) + saveLink :: Bool -> PosixPath -> PosixPath -> [(PosixPath, PosixPath, Bool)] -> [(PosixPath, PosixPath, Bool)] + saveLink isHardLink path link links + = seq (PS.length path) + $ seq (PS.length link) $ (path, link, isHardLink):links -- for hardlinks, we just copy - handleHardLinks = mapM_ $ \(relPath, relLinkTarget, _) -> - let absPath = baseDir relPath + handleHardLinks :: [(PosixPath, PosixPath, Bool)] -> IO () + handleHardLinks = mapM_ $ \(relPath', relLinkTarget', _) -> do + let relPath = fromPosixPath relPath' + let relLinkTarget = fromPosixPath relLinkTarget' + let absPath = baseDir OSP. relPath -- hard links link targets are always "absolute" paths in -- the context of the tar root - absTarget = baseDir relLinkTarget + absTarget = baseDir OSP. relLinkTarget -- we don't expect races here, since we should be the -- only process unpacking the tar archive and writing to -- the destination - in doesDirectoryExist absTarget >>= \case - True -> copyDirectoryRecursive absTarget absPath - False -> copyFile absTarget absPath + doesDirectoryExist absTarget >>= \case + True -> copyDirectoryRecursive absTarget absPath + False -> copyFile absTarget absPath -- For symlinks, we first try to recreate them and if that fails -- with 'IllegalOperation', 'PermissionDenied' or 'InvalidArgument', @@ -197,19 +208,22 @@ unpackAndCheck secCB baseDir entries = do -- This error handling isn't too fine grained and maybe should be -- platform specific, but this way it might catch erros on unix even on -- FAT32 fuse mounted volumes. - handleSymlinks = mapM_ $ \(relPath, relLinkTarget, _) -> - let absPath = baseDir relPath + handleSymlinks :: [(PosixPath, PosixPath, Bool)] -> IO () + handleSymlinks = mapM_ $ \(relPath', relLinkTarget', _) -> do + let relPath = fromPosixPath relPath' + let relLinkTarget = fromPosixPath relLinkTarget' + let absPath = baseDir OSP. relPath -- hard links link targets are always "absolute" paths in -- the context of the tar root - absTarget = FilePath.Native.takeDirectory absPath relLinkTarget + absTarget = OSP.takeDirectory absPath OSP. relLinkTarget -- we don't expect races here, since we should be the -- only process unpacking the tar archive and writing to -- the destination - in doesDirectoryExist absTarget >>= \case - True -> handleSymlinkError (copyDirectoryRecursive absTarget absPath) - $ createDirectoryLink relLinkTarget absPath - False -> handleSymlinkError (copyFile absTarget absPath) - $ createFileLink relLinkTarget absPath + doesDirectoryExist absTarget >>= \case + True -> handleSymlinkError (copyDirectoryRecursive absTarget absPath) + $ createDirectoryLink relLinkTarget absPath + False -> handleSymlinkError (copyFile absTarget absPath) + $ createFileLink relLinkTarget absPath where handleSymlinkError action = @@ -223,7 +237,7 @@ unpackAndCheck secCB baseDir entries = do -- | Recursively copy the contents of one directory to another path. -- -- This is a rip-off of Cabal library. -copyDirectoryRecursive :: FilePath -> FilePath -> IO () +copyDirectoryRecursive :: OsPath -> OsPath -> IO () copyDirectoryRecursive srcDir destDir = do srcFiles <- getDirectoryContentsRecursive srcDir copyFilesWith copyFile destDir [ (srcDir, f) @@ -231,17 +245,17 @@ copyDirectoryRecursive srcDir destDir = do where -- | Common implementation of 'copyFiles', 'installOrdinaryFiles', -- 'installExecutableFiles' and 'installMaybeExecutableFiles'. - copyFilesWith :: (FilePath -> FilePath -> IO ()) - -> FilePath -> [(FilePath, FilePath)] -> IO () + copyFilesWith :: (OsPath -> OsPath -> IO ()) + -> OsPath -> [(OsPath, OsPath)] -> IO () copyFilesWith doCopy targetDir srcFiles = do -- Create parent directories for everything - let dirs = map (targetDir ) . nub . map (FilePath.Native.takeDirectory . snd) $ srcFiles + let dirs = map (targetDir OSP.) . nub . map (OSP.takeDirectory . snd) $ srcFiles traverse_ (createDirectoryIfMissing True) dirs -- Copy all the files - sequence_ [ let src = srcBase srcFile - dest = targetDir srcFile + sequence_ [ let src = srcBase OSP. srcFile + dest = targetDir OSP. srcFile in doCopy src dest | (srcBase, srcFile) <- srcFiles ] @@ -251,13 +265,13 @@ copyDirectoryRecursive srcDir destDir = do -- parent directories. The list is generated lazily so is not well defined if -- the source directory structure changes before the list is used. -- - getDirectoryContentsRecursive :: FilePath -> IO [FilePath] - getDirectoryContentsRecursive topdir = recurseDirectories [""] + getDirectoryContentsRecursive :: OsPath -> IO [OsPath] + getDirectoryContentsRecursive topdir = recurseDirectories [[OS.osstr||]] where - recurseDirectories :: [FilePath] -> IO [FilePath] + recurseDirectories :: [OsPath] -> IO [OsPath] recurseDirectories [] = return [] recurseDirectories (dir:dirs) = unsafeInterleaveIO $ do - (files, dirs') <- collect [] [] =<< listDirectory (topdir dir) + (files, dirs') <- collect [] [] =<< listDirectory (topdir OSP. dir) files' <- recurseDirectories (dirs' ++ dirs) return (files ++ files') @@ -265,13 +279,13 @@ copyDirectoryRecursive srcDir destDir = do collect files dirs' [] = return (reverse files ,reverse dirs') collect files dirs' (entry:entries) = do - let dirEntry = dir entry - isDirectory <- doesDirectoryExist (topdir dirEntry) + let dirEntry = dir OSP. entry + isDirectory <- doesDirectoryExist (topdir OSP. dirEntry) if isDirectory then collect files (dirEntry:dirs') entries else collect (dirEntry:files) dirs' entries -setModTime :: FilePath -> EpochTime -> IO () +setModTime :: OsPath -> EpochTime -> IO () setModTime path t = setModificationTime path (posixSecondsToUTCTime (fromIntegral t)) `Exception.catch` \e -> case ioeGetErrorType e of @@ -281,7 +295,7 @@ setModTime path t = InvalidArgument -> return () _ -> throwIO e -setOwnerPermissions :: FilePath -> Permissions -> IO () +setOwnerPermissions :: OsPath -> Permissions -> IO () setOwnerPermissions path permissions = setPermissions path ownerPermissions where diff --git a/Codec/Archive/Tar/Write.hs b/Codec/Archive/Tar/Write.hs index 3f5e9f1..ab598ae 100644 --- a/Codec/Archive/Tar/Write.hs +++ b/Codec/Archive/Tar/Write.hs @@ -18,18 +18,18 @@ import Codec.Archive.Tar.PackAscii import Codec.Archive.Tar.Types import Data.Bits -import Data.Char (chr,ord) +import Data.Char (chr) import Data.Int -import Data.List (foldl') -import Data.Monoid (mempty) import Numeric (showOct) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS -import qualified Data.ByteString.Lazy.Char8 as LBS.Char8 -import "os-string" System.OsString.Posix (PosixString) -import qualified "os-string" System.OsString.Posix as PS +import Data.ByteString.Internal (c2w) + +import qualified System.OsString.Posix as PS + + -- | Create the external representation of a tar archive by serialising a list -- of tar entries. @@ -60,16 +60,16 @@ putEntry entry = case entryContent entry of where paddingSize = fromIntegral (negate size `mod` 512) putHeader :: Entry -> LBS.ByteString -putHeader entry = - LBS.Char8.pack - $ take 148 block - ++ putOct 7 checksum - ++ ' ' : drop 156 block +putHeader entry = LBS.fromStrict $ + BS.take 148 block + <> putOct 7 checksum + <> BS.Char8.singleton ' ' + <> BS.drop 156 block where block = putHeaderNoChkSum entry - checksum = foldl' (\x y -> x + ord y) 0 block + checksum = BS.foldl' (\x y -> x + fromIntegral y) (0 :: Int) block -putHeaderNoChkSum :: Entry -> String +putHeaderNoChkSum :: Entry -> BS.ByteString putHeaderNoChkSum Entry { entryTarPath = TarPath name prefix, entryContent = content, @@ -79,40 +79,40 @@ putHeaderNoChkSum Entry { entryFormat = format } = - concat - [ putPosixString 100 name + BS.concat + [ putPString 100 name , putOct 8 permissions , putOct 8 $ ownerId ownership , putOct 8 $ groupId ownership , numField 12 contentSize , putOct 12 modTime - , replicate 8 ' ' -- dummy checksum - , putChar8 typeCode - , putPosixString 100 linkTarget - ] ++ + , BS.replicate 8 (c2w ' ') -- dummy checksum + , putChar8' typeCode + , putPString 100 linkTarget + ] <> case format of - V7Format -> - replicate 255 '\NUL' - UstarFormat -> concat - [ putBString 8 ustarMagic - , putString 32 $ ownerName ownership - , putString 32 $ groupName ownership - , putOct 8 deviceMajor - , putOct 8 deviceMinor - , putPosixString 155 prefix - , replicate 12 '\NUL' - ] - GnuFormat -> concat - [ putBString 8 gnuMagic - , putString 32 $ ownerName ownership - , putString 32 $ groupName ownership - , putGnuDev 8 deviceMajor - , putGnuDev 8 deviceMinor - , putPosixString 155 prefix - , replicate 12 '\NUL' - ] + V7Format -> + BS.replicate 255 0 + UstarFormat -> BS.concat + [ putBString 8 ustarMagic + , putPString 32 $ ownerName ownership + , putPString 32 $ groupName ownership + , putOct 8 deviceMajor + , putOct 8 deviceMinor + , putPString 155 prefix + , BS.replicate 12 0 + ] + GnuFormat -> BS.concat + [ putBString 8 gnuMagic + , putPString 32 $ ownerName ownership + , putPString 32 $ groupName ownership + , putGnuDev 8 deviceMajor + , putGnuDev 8 deviceMinor + , putPString 155 prefix + , BS.replicate 12 0 + ] where - numField :: FieldWidth -> Int64 -> String + numField :: FieldWidth -> Int64 -> BS.ByteString numField w n | n >= 0 && n < 1 `shiftL` (3 * (w - 1)) = putOct w n @@ -133,7 +133,7 @@ putHeaderNoChkSum Entry { putGnuDev w n = case content of CharacterDevice _ _ -> putOct w n BlockDevice _ _ -> putOct w n - _ -> replicate w '\NUL' + _ -> BS.replicate w 0 ustarMagic, gnuMagic :: BS.ByteString ustarMagic = BS.Char8.pack "ustar\NUL00" @@ -143,27 +143,25 @@ gnuMagic = BS.Char8.pack "ustar \NUL" type FieldWidth = Int -putBString :: FieldWidth -> BS.ByteString -> String -putBString n s = BS.Char8.unpack (BS.take n s) ++ replicate (n - BS.length s) '\NUL' +putBString :: FieldWidth -> BS.ByteString -> BS.ByteString +putBString n s = BS.take n s <> BS.replicate (n - BS.length s) 0 -putPosixString :: FieldWidth -> PosixString -> String -putPosixString n s = fromPosixString (PS.take n s) ++ replicate (n - PS.length s) '\NUL' +putPString :: FieldWidth -> PS.PosixString -> BS.ByteString +putPString n s = (posixToByteString $ PS.take n s) <> BS.replicate (n - PS.length s) 0 -putString :: FieldWidth -> String -> String -putString n s = take n s ++ replicate (n - length s) '\NUL' - -{-# SPECIALISE putLarge :: FieldWidth -> Int64 -> String #-} -putLarge :: (Bits a, Integral a) => FieldWidth -> a -> String -putLarge n0 x0 = '\x80' : reverse (go (n0-1) x0) +{-# SPECIALISE putLarge :: FieldWidth -> Int64 -> BS.ByteString #-} +putLarge :: (Bits a, Integral a) => FieldWidth -> a -> BS.ByteString +putLarge n0 x0 = BS.Char8.pack $ '\x80' : reverse (go (n0-1) x0) where go 0 _ = [] go n x = chr (fromIntegral (x .&. 0xff)) : go (n-1) (x `shiftR` 8) -putOct :: (Integral a, Show a) => FieldWidth -> a -> String +putOct :: (Integral a, Show a) => FieldWidth -> a -> BS.ByteString putOct n x = - let octStr = take (n-1) $ showOct x "" - in replicate (n - length octStr - 1) '0' - ++ octStr - ++ putChar8 '\NUL' + let octStr = BS.Char8.pack $ take (n-1) $ showOct x "" + in BS.replicate (n - BS.length octStr - 1) (c2w '0') + <> octStr + <> BS.singleton 0 + +putChar8' :: Char -> BS.ByteString +putChar8' c = BS.Char8.pack [c] -putChar8 :: Char -> String -putChar8 c = [c] diff --git a/bench/Main.hs b/bench/Main.hs index fc7ea4d..36f2258 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -10,6 +10,8 @@ import System.Directory import System.Environment import System.IO.Temp +import qualified System.OsPath as OSP + import Test.Tasty.Bench main = defaultMain benchmarks @@ -29,7 +31,9 @@ benchmarks = bench "index rebuild" (nf (TarIndex.finalise . TarIndex.unfinalise) entries) , env loadTarEntries $ \entries -> - bench "unpack" (nfIO $ withSystemTempDirectory "tar-bench" $ \baseDir -> Tar.unpack baseDir entries) + bench "unpack" (nfIO $ withSystemTempDirectory "tar-bench" $ \baseDir' -> do + baseDir <- OSP.encodeFS baseDir' + Tar.unpack baseDir entries) ] loadTarFile :: IO BS.ByteString diff --git a/cabal.project b/cabal.project index cbde73f..0329868 100644 --- a/cabal.project +++ b/cabal.project @@ -1 +1,7 @@ packages: . htar + +if (os(windows)) + source-repository-package + type: git + location: https://github.com/haskell/win32.git + tag: 86e2737e1c2a668168ba8497c932a058c5c9a600 diff --git a/htar/htar.cabal b/htar/htar.cabal index dde578c..70c1650 100644 --- a/htar/htar.cabal +++ b/htar/htar.cabal @@ -27,12 +27,12 @@ executable htar ghc-options: -Wall -rtsopts build-depends: base >= 4.9 && < 5, - time >= 1.1, - directory >= 1.0, - filepath >= 1.0, bytestring >= 0.9, - tar >= 0.4.2, - zlib >= 0.4 && < 0.7, bzlib >= 0.4 && < 0.7, - time >= 1.5 + directory >= 1.3.8.0 && < 1.4, + filepath >= 1.5 &&< 1.6, + tar >= 0.4.2, + time >= 1.1, + time >= 1.5, + zlib >= 0.4 && < 0.7 diff --git a/htar/htar.hs b/htar/htar.hs index d12c2f1..5b2df3d 100644 --- a/htar/htar.hs +++ b/htar/htar.hs @@ -19,17 +19,24 @@ import System.IO (hPutStrLn, stderr) import Data.Time (formatTime, defaultTimeLocale) import Data.Time.Clock.POSIX (posixSecondsToUTCTime) +import System.OsPath.Posix (PosixPath) +import qualified System.OsPath as OSP + + main :: IO () main = do (opts, files) <- parseOptions =<< getArgs main' opts files main' :: Options -> [FilePath] -> IO () -main' (Options { optFile = file, - optDir = dir, +main' (Options { optFile = file', + optDir = dir', optAction = action, optCompression = compression, - optVerbosity = verbosity }) files = + optVerbosity = verbosity }) files' = do + file <- OSP.encodeFS file' + dir <- OSP.encodeFS dir' + files <- mapM OSP.encodeFS files' case action of NoAction -> die ["No action given. Specify one of -c, -t or -x."] Help -> printUsage @@ -39,13 +46,13 @@ main' (Options { optFile = file, List -> printEntries . Tar.read . decompress compression =<< input Append | compression /= None -> die ["Append cannot be used together with compression."] - | file == "-" + | file' == "-" -> die ["Append must be used on a file, not stdin/stdout."] | otherwise -> Tar.append file dir files where - input = if file == "-" then BS.getContents else BS.readFile file - output = if file == "-" then BS.putStr else BS.writeFile file + input = if file' == "-" then BS.getContents else BS.readFile file' + output = if file' == "-" then BS.putStr else BS.writeFile file' printEntries :: Tar.Entries Tar.FormatError -> IO () printEntries = Tar.foldEntries (\entry rest -> printEntry entry >> rest) @@ -72,16 +79,16 @@ data Verbosity = Verbose | Concise ------------------------ -- List archive contents -entryInfo :: Verbosity -> Tar.GenEntry FilePath FilePath -> String +entryInfo :: Verbosity -> Tar.GenEntry PosixPath PosixPath -> String entryInfo Verbose = detailedInfo -entryInfo Concise = Tar.entryTarPath +entryInfo Concise = show . Tar.entryTarPath -detailedInfo :: Tar.GenEntry FilePath FilePath -> String +detailedInfo :: Tar.GenEntry PosixPath PosixPath -> String detailedInfo entry = unwords [ typeCode : permissions , justify 19 (owner ++ '/' : group) size , time - , name ++ link ] + , show name ++ link ] where typeCode = case Tar.entryContent entry of Tar.HardLink _ -> 'h' @@ -107,7 +114,7 @@ detailedInfo entry = group = nameOrID groupName groupId (Tar.Ownership ownerName groupName ownerId groupId) = Tar.entryOwnership entry - nameOrID n i = if null n then show i else n + nameOrID n i = if n == mempty then show i else show n size = case Tar.entryContent entry of Tar.NormalFile _ fileSize -> show fileSize _ -> "0" @@ -115,8 +122,8 @@ detailedInfo entry = time = formatEpochTime "%Y-%m-%d %H:%M" (Tar.entryTime entry) name = Tar.entryTarPath entry link = case Tar.entryContent entry of - Tar.HardLink l -> " link to " ++ l - Tar.SymbolicLink l -> " -> " ++ l + Tar.HardLink l -> " link to " ++ show l + Tar.SymbolicLink l -> " -> " ++ show l _ -> "" justify :: Int -> String -> String -> String @@ -214,3 +221,4 @@ die errs = do mapM_ (\e -> hPutStrLn stderr $ "htar: " ++ e) errs hPutStrLn stderr "Try `htar --help' for more information." exitFailure + diff --git a/tar.cabal b/tar.cabal index e320334..c1546e1 100644 --- a/tar.cabal +++ b/tar.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: tar -version: 0.6.0.0 +version: 0.7.0.0 license: BSD-3-Clause license-file: LICENSE author: Duncan Coutts @@ -27,10 +27,11 @@ extra-source-files: test/data/long-filepath.tar test/data/long-symlink.tar test/data/symlink.tar + test/data/unicode.tar extra-doc-files: changelog.md README.md tested-with: GHC==9.8.1, GHC==9.6.3, GHC==9.4.8, GHC==9.2.8, GHC==9.0.2, - GHC==8.10.7, GHC==8.8.4, GHC==8.6.5, GHC==8.4.4 + GHC==8.10.7, GHC==8.8.4 source-repository head type: git @@ -48,13 +49,15 @@ library library tar-internal default-language: Haskell2010 - build-depends: base >= 4.11 && < 5, + build-depends: base >=4.12.0.0 && < 5, array < 0.6, bytestring >= 0.10 && < 0.13, containers >= 0.2 && < 0.8, deepseq >= 1.1 && < 1.6, - directory >= 1.3.1 && < 1.4, - filepath < 1.6, + directory >= 1.3.8.2 && < 1.4, + exceptions, + filepath >= 1.5 &&< 1.6, + file-io >= 0.1.0.2 &&< 0.2, os-string >= 2.0 && < 2.1, time < 1.13, transformers < 0.7, @@ -89,14 +92,17 @@ library tar-internal test-suite properties type: exitcode-stdio-1.0 default-language: Haskell2010 - build-depends: base < 5, + build-depends: base >=4.12.0.0 && < 5, array, bytestring >= 0.10, containers, deepseq, - directory >= 1.2, + directory, + exceptions, file-embed, filepath, + file-io, + os-string, QuickCheck == 2.*, tar-internal, tasty >= 0.10 && <1.6, @@ -132,11 +138,11 @@ benchmark bench default-language: Haskell2010 hs-source-dirs: bench main-is: Main.hs - build-depends: base < 5, + build-depends: base >=4.12.0.0 && < 5, tar, bytestring >= 0.10, filepath, - directory >= 1.2, + directory, array, containers, deepseq, diff --git a/test/Codec/Archive/Tar/Index/IntTrie/Tests.hs b/test/Codec/Archive/Tar/Index/IntTrie/Tests.hs index 9325c3d..b776291 100644 --- a/test/Codec/Archive/Tar/Index/IntTrie/Tests.hs +++ b/test/Codec/Archive/Tar/Index/IntTrie/Tests.hs @@ -19,9 +19,8 @@ import Prelude hiding (lookup) import Codec.Archive.Tar.Index.IntTrie import qualified Data.Array.Unboxed as A -import Data.Char import Data.Function (on) -import Data.List hiding (lookup, insert) +import qualified Data.List as L import Data.Word (Word32) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS @@ -32,15 +31,11 @@ import Data.ByteString.Lazy.Builder as BS #endif #if MIN_VERSION_containers(0,5,0) import qualified Data.IntMap.Strict as IntMap -import Data.IntMap.Strict (IntMap) #else import qualified Data.IntMap as IntMap -import Data.IntMap (IntMap) #endif import Test.QuickCheck -import Control.Applicative ((<$>), (<*>)) -import Data.Bits import Data.Int -- Example mapping: @@ -183,7 +178,7 @@ prop_completions paths = [ case l of Entry v -> mkleaf k v Completions kls' -> mknode k (convertCompletions kls') - | (k, l) <- sortBy (compare `on` fst) kls ] + | (k, l) <- L.sortBy (compare `on` fst) kls ] prop_lookup_mono :: ValidPaths -> Property @@ -194,8 +189,8 @@ prop_completions_mono (ValidPaths paths) = prop_completions paths prop_construct_toList :: ValidPaths -> Property prop_construct_toList (ValidPaths paths) = - sortBy (compare `on` fst) (toList (construct paths)) - === sortBy (compare `on` fst) paths + L.sortBy (compare `on` fst) (toList (construct paths)) + === L.sortBy (compare `on` fst) paths prop_finalise_unfinalise :: ValidPaths -> Property prop_finalise_unfinalise (ValidPaths paths) = @@ -249,4 +244,4 @@ instance Arbitrary ValidPaths where nonEmpty = all (not . null . fst) isPrefixOfOther :: [Key] -> [Key] -> Bool -isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a +isPrefixOfOther a b = a `L.isPrefixOf` b || b `L.isPrefixOf` a diff --git a/test/Codec/Archive/Tar/Index/StringTable/Tests.hs b/test/Codec/Archive/Tar/Index/StringTable/Tests.hs index 353b06b..51907bd 100644 --- a/test/Codec/Archive/Tar/Index/StringTable/Tests.hs +++ b/test/Codec/Archive/Tar/Index/StringTable/Tests.hs @@ -12,13 +12,12 @@ import Prelude hiding (lookup) import Codec.Archive.Tar.Index.StringTable import Test.Tasty.QuickCheck -import Data.List hiding (lookup, insert) +import qualified Data.List as L import qualified Data.Array.Unboxed as A import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder) import Data.ByteString.Builder as BS -import Data.ByteString.Builder.Extra as BS (byteStringCopy) #else import Data.ByteString.Lazy.Builder as BS import Data.ByteString.Lazy.Builder.Extras as BS (byteStringCopy) @@ -56,7 +55,7 @@ prop_finalise_unfinalise strs = builder === unfinalise (finalise builder) where builder :: StringTableBuilder Int - builder = foldl' (\tbl s -> fst (insert s tbl)) empty strs + builder = L.foldl' (\tbl s -> fst (insert s tbl)) empty strs prop_serialise_deserialise :: [BS.ByteString] -> Property prop_serialise_deserialise strs = diff --git a/test/Codec/Archive/Tar/Index/Tests.hs b/test/Codec/Archive/Tar/Index/Tests.hs index a34e664..2d909f7 100644 --- a/test/Codec/Archive/Tar/Index/Tests.hs +++ b/test/Codec/Archive/Tar/Index/Tests.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE CPP #-} ----------------------------------------------------------------------------- -- | @@ -22,41 +23,43 @@ module Codec.Archive.Tar.Index.Tests ( prop_finalise_unfinalise, ) where -import Codec.Archive.Tar (GenEntries(..), Entries, GenEntry, Entry, GenEntryContent(..)) -import Codec.Archive.Tar.Index.Internal (TarIndexEntry(..), TarIndex(..), IndexBuilder, TarEntryOffset(..)) +import Codec.Archive.Tar.PackAscii +import Codec.Archive.Tar (GenEntries(..), Entries, Entry, GenEntryContent(..)) +import Codec.Archive.Tar.Index.Internal (TarIndexEntry(..), TarIndex(..), IndexBuilder, TarEntryOffset) import qualified Codec.Archive.Tar.Index.Internal as Tar import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie import qualified Codec.Archive.Tar.Index.IntTrie.Tests as IntTrie -import qualified Codec.Archive.Tar.Index.StringTable as StringTable import qualified Codec.Archive.Tar.Index.StringTable.Tests as StringTable import qualified Codec.Archive.Tar.Types as Tar import qualified Codec.Archive.Tar.Write as Tar import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS import Data.Int -#if (MIN_VERSION_base(4,5,0)) -import Data.Monoid ((<>)) -#endif import qualified System.FilePath.Posix as FilePath import System.IO import Prelude hiding (lookup) import qualified Prelude import Test.QuickCheck -import Test.QuickCheck.Property (ioProperty) -import Control.Applicative ((<$>), (<*>)) import Control.Monad (unless) -import Data.List (nub, sort, sortBy, stripPrefix, isPrefixOf) +import Data.List (nub, sort) import Data.Maybe -import Data.Function (on) import Control.Exception (SomeException, try, throwIO) #ifdef MIN_VERSION_bytestring_handle import qualified Data.ByteString.Handle as HBS #endif +import System.OsString.Internal.Types (OsString(..)) +import qualified System.OsString.Posix as PS + +import qualified System.OsPath as OSP +import System.OsPath (OsPath, osp) +import System.OsPath.Posix (PosixPath, pstr) +import qualified System.OsPath.Posix as PFP + + -- Not quite the properties of a finite mapping because we also have lookups -- that result in completions. @@ -70,9 +73,9 @@ prop_lookup (ValidPaths paths) (NonEmptyFilePath p) = _ -> property False where index = construct paths - completions = [ head (FilePath.splitDirectories completion) + completions = [ head (PFP.splitDirectories completion) | (path,_) <- paths - , completion <- maybeToList $ stripPrefix (p ++ "/") path ] + , completion <- maybeToList $ PS.stripPrefix (p <> PS.singleton PFP.pathSeparator) path ] prop_toList :: ValidPaths -> Property prop_toList (ValidPaths paths) = @@ -91,8 +94,7 @@ prop_valid (ValidPaths paths) = where index@(TarIndex pathTable _ _) = construct paths - pathbits = concatMap (map BS.Char8.pack . FilePath.splitDirectories . fst) - paths + pathbits = concatMap (fmap posixToByteString . PFP.splitDirectories . fst) paths intpaths :: [([IntTrie.Key], IntTrie.Value)] intpaths = [ (map (\(Tar.PathComponentId n) -> IntTrie.Key (fromIntegral n)) cids, IntTrie.Value offset) | (path, (_size, offset)) <- paths @@ -116,13 +118,13 @@ prop_serialiseSize (ValidPaths paths) = where index = construct paths -newtype NonEmptyFilePath = NonEmptyFilePath FilePath deriving Show +newtype NonEmptyFilePath = NonEmptyFilePath PosixPath deriving Show instance Arbitrary NonEmptyFilePath where - arbitrary = NonEmptyFilePath . FilePath.joinPath + arbitrary = NonEmptyFilePath . fromJust . PFP.encodeUtf . FilePath.joinPath <$> listOf1 (elements ["a", "b", "c", "d"]) -newtype ValidPaths = ValidPaths [(FilePath, (Int64, TarEntryOffset))] deriving Show +newtype ValidPaths = ValidPaths [(PosixPath, (Int64, TarEntryOffset))] deriving Show instance Arbitrary ValidPaths where arbitrary = do @@ -131,7 +133,7 @@ instance Arbitrary ValidPaths where let offsets = scanl (\o sz -> o + 1 + blocks sz) 0 sizes return (ValidPaths (zip paths (zip sizes offsets))) where - arbitraryPath = FilePath.joinPath + arbitraryPath = fromJust . PFP.encodeUtf . FilePath.joinPath <$> listOf1 (elements ["a", "b", "c", "d"]) makeNoPrefix [] = [] makeNoPrefix (k:ks) @@ -139,13 +141,13 @@ instance Arbitrary ValidPaths where = k : makeNoPrefix ks | otherwise = makeNoPrefix ks - isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a + isPrefixOfOther a b = a `PS.isPrefixOf` b || b `PS.isPrefixOf` a blocks :: Int64 -> TarEntryOffset blocks size = fromIntegral (1 + ((size - 1) `div` 512)) -- Helper for bulk construction. -construct :: [(FilePath, (Int64, TarEntryOffset))] -> TarIndex +construct :: [(PosixPath, (Int64, TarEntryOffset))] -> TarIndex construct = either (const undefined) id . Tar.build @@ -153,24 +155,24 @@ construct = example0 :: Entries () example0 = - testEntry "foo-1.0/foo-1.0.cabal" 1500 -- at block 0 - `Next` testEntry "foo-1.0/LICENSE" 2000 -- at block 4 - `Next` testEntry "foo-1.0/Data/Foo.hs" 1000 -- at block 9 + testEntry [pstr|foo-1.0/foo-1.0.cabal|] 1500 -- at block 0 + `Next` testEntry [pstr|foo-1.0/LICENSE|] 2000 -- at block 4 + `Next` testEntry [pstr|foo-1.0/Data/Foo.hs|] 1000 -- at block 9 `Next` Done example1 :: Entries () example1 = - Next (testEntry "./" 1500) Done <> example0 + Next (testEntry [pstr|./|] 1500) Done <> example0 -testEntry :: FilePath -> Int64 -> Entry +testEntry :: PosixPath -> Int64 -> Entry testEntry name size = Tar.simpleEntry path (NormalFile mempty size) where - Right path = Tar.toTarPath False name + Right path = Tar.toTarPath False (OsString name) -- | Simple tar archive containing regular files only data SimpleTarArchive = SimpleTarArchive { simpleTarEntries :: Tar.Entries () - , simpleTarRaw :: [(FilePath, LBS.ByteString)] + , simpleTarRaw :: [(OsPath, LBS.ByteString)] , simpleTarBS :: LBS.ByteString } @@ -219,16 +221,16 @@ instance Arbitrary SimpleTarArchive where , simpleTarBS = Tar.write entries } where - mkRaw :: Int -> Gen [(FilePath, LBS.ByteString)] + mkRaw :: Int -> Gen [(OsPath, LBS.ByteString)] mkRaw 0 = return [] mkRaw n = do -- Pick a size around 0, 1, or 2 block boundaries sz <- sized $ \n -> elements (take n fileSizes) bs <- LBS.pack `fmap` vectorOf sz arbitrary es <- mkRaw (n - 1) - return $ ("file" ++ show n, bs) : es + return $ ([osp|file|] <> fromJust (OSP.encodeUtf (show n)), bs) : es - mkList :: [(FilePath, LBS.ByteString)] -> [Tar.Entry] + mkList :: [(OsPath, LBS.ByteString)] -> [Tar.Entry] mkList [] = [] mkList ((fp, bs):es) = entry : mkList es where diff --git a/test/Codec/Archive/Tar/Pack/Tests.hs b/test/Codec/Archive/Tar/Pack/Tests.hs index a8fd353..e1f9122 100644 --- a/test/Codec/Archive/Tar/Pack/Tests.hs +++ b/test/Codec/Archive/Tar/Pack/Tests.hs @@ -8,29 +8,36 @@ module Codec.Archive.Tar.Pack.Tests , unit_roundtrip_symlink , unit_roundtrip_long_symlink , unit_roundtrip_long_filepath + , unit_roundtrip_unicode ) where +import Data.Maybe import Control.DeepSeq -import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Lazy.Char8 as B8 import Data.Char import Data.FileEmbed import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Pack as Pack -import Codec.Archive.Tar.Types (GenEntries(..), Entries, simpleEntry, toTarPath, GenEntry (entryTarPath)) +import Codec.Archive.Tar.Types (GenEntries(..), Entries, GenEntry (entryTarPath), toFSPosixPath') import qualified Codec.Archive.Tar.Unpack as Unpack -import qualified Codec.Archive.Tar.Write as Write import Control.Exception -import Data.List.NonEmpty (NonEmpty(..)) import GHC.IO.Encoding -import System.Directory +import System.Directory.OsPath import System.FilePath -import qualified System.FilePath.Posix as Posix import qualified System.Info import System.IO.Temp import System.IO.Unsafe import Test.Tasty.QuickCheck +import qualified System.OsString as OS + +import System.OsPath (OsPath) +import qualified System.OsPath as OSP +import qualified System.File.OsPath as OSP +import qualified System.OsString.Posix as PS +import qualified System.OsPath.Posix as PFP + supportsUnicode :: Bool supportsUnicode = unsafePerformIO $ do -- Normally getFileSystemEncoding returns a Unicode encoding, @@ -46,64 +53,67 @@ supportsUnicode = unsafePerformIO $ do -- pack and unpack; read back and compare results. prop_roundtrip :: [String] -> String -> Property prop_roundtrip xss cnt - | x : xs <- filter (not . null) $ map mkFilePath xss - = ioProperty $ withSystemTempDirectory "tar-test" $ \baseDir -> do + | x : xs <- filter (not . OS.null) $ map mkFilePath xss + = ioProperty $ withSystemTempDirectory "tar-test" $ \baseDir' -> do + baseDir <- OSP.encodeFS baseDir' file : dirs <- pure $ trimUpToMaxPathLength baseDir (x : xs) - let relDir = joinPath dirs - absDir = baseDir relDir - relFile = relDir file - absFile = absDir file - errMsg = "relDir = " ++ relDir ++ - "\nabsDir = " ++ absDir ++ - "\nrelFile = " ++ relFile ++ - "\nabsFile = " ++ absFile + let relDir = OSP.joinPath dirs + absDir = baseDir OSP. relDir + relFile = relDir OSP. file + absFile = absDir OSP. file + errMsg = "relDir = " ++ fromJust (OSP.decodeUtf relDir) ++ + "\nabsDir = " ++ fromJust (OSP.decodeUtf absDir) ++ + "\nrelFile = " ++ fromJust (OSP.decodeUtf relFile) ++ + "\nabsFile = " ++ fromJust (OSP.decodeUtf absFile) -- Not all filesystems allow paths to contain arbitrary Unicode. -- E. g., at the moment of writing Apple FS does not support characters -- introduced in Unicode 15.0. canCreateDirectory <- try (createDirectoryIfMissing True absDir) case canCreateDirectory of - Left (e :: IOException) -> discard + Left (_ :: IOException) -> discard Right () -> do - canWriteFile <- try (writeFile absFile cnt) + canWriteFile <- try (OSP.writeFile absFile $ B8.pack cnt) case canWriteFile of - Left (e :: IOException) -> discard + Left (_ :: IOException) -> discard Right () -> counterexample errMsg <$> do -- Forcing the result, otherwise lazy IO misbehaves. !entries <- Pack.pack baseDir [relFile] >>= evaluate . force let fileNames - = map (map (\c -> if c == Posix.pathSeparator then pathSeparator else c)) + = map (PS.map (\c -> if c == PFP.pathSeparator then PFP.pathSeparator else c)) $ Tar.foldEntries ((:) . entryTarPath) [] undefined -- decodeLongNames produces FilePath with POSIX path separators $ Tar.decodeLongNames $ foldr Next Done entries - if [relFile] /= fileNames then pure ([relFile] === fileNames) else do + let relFile' = toFSPosixPath' relFile + if [relFile'] /= fileNames then pure ([relFile'] === fileNames) else do -- Try hard to clean up removeFile absFile - writeFile absFile "" + OSP.writeFile absFile "" case dirs of [] -> pure () - d : _ -> removeDirectoryRecursive (baseDir d) + d : _ -> removeDirectoryRecursive (baseDir OSP. d) -- Unpack back Unpack.unpack baseDir (foldr Next Done entries :: Entries IOException) exist <- doesFileExist absFile if exist then do - cnt' <- readFile absFile >>= evaluate . force - pure $ cnt === cnt' + cnt' <- OSP.readFile absFile >>= evaluate . force + pure $ B8.pack cnt === cnt' else do -- Forcing the result, otherwise lazy IO misbehaves. recFiles <- Pack.getDirectoryContentsRecursive baseDir >>= evaluate . force - pure $ counterexample ("File " ++ absFile ++ " does not exist; instead found\n" ++ unlines recFiles) False + pure $ counterexample ("File " ++ fromJust (OSP.decodeUtf absFile) + ++ " does not exist; instead found\n" ++ unlines (fmap (fromJust . OSP.decodeUtf) recFiles)) False | otherwise = discard -mkFilePath :: String -> FilePath -mkFilePath xs = makeValid $ filter isGood $ +mkFilePath :: String -> OsPath +mkFilePath xs = fromJust $ OSP.encodeUtf $ makeValid $ filter isGood $ map (if supportsUnicode then id else chr . (`mod` 128) . ord) xs where isGood c @@ -112,24 +122,15 @@ mkFilePath xs = makeValid $ filter isGood $ && generalCategory c /= Surrogate && (supportsUnicode || isAscii c) -trimUpToMaxPathLength :: FilePath -> [FilePath] -> [FilePath] -trimUpToMaxPathLength baseDir = go (maxPathLength - utf8Length baseDir - 1) +trimUpToMaxPathLength :: OsPath -> [OsPath] -> [OsPath] +trimUpToMaxPathLength baseDir = go (maxPathLength - OS.length baseDir - 1) where - go :: Int -> [FilePath] -> [FilePath] - go cnt [] = [] + go :: Int -> [OsPath] -> [OsPath] + go _ [] = [] go cnt (x : xs) - | cnt < 4 = [] - | cnt <= utf8Length x = [take (cnt `quot` 4) x] - | otherwise = x : go (cnt - utf8Length x - 1) xs - -utf8Length :: String -> Int -utf8Length = sum . map charLength - where - charLength c - | c < chr 0x80 = 1 - | c < chr 0x800 = 2 - | c < chr 0x10000 = 3 - | otherwise = 4 + | cnt <= 0 = [] + | cnt <= OS.length x = [OS.take cnt x] + | otherwise = x : go (cnt - OS.length x - 1) xs maxPathLength :: Int maxPathLength = case System.Info.os of @@ -153,3 +154,10 @@ unit_roundtrip_long_symlink = let tar :: BL.ByteString = BL.fromStrict $(embedFile "test/data/long-symlink.tar") entries = Tar.foldEntries (:) [] (const []) (Tar.read tar) in Tar.write entries === tar + +unit_roundtrip_unicode :: Property +unit_roundtrip_unicode = + let tar :: BL.ByteString = BL.fromStrict $(embedFile "test/data/unicode.tar") + entries = Tar.foldEntries (:) [] (const []) (Tar.read tar) + in Tar.write entries === tar + diff --git a/test/Codec/Archive/Tar/Tests.hs b/test/Codec/Archive/Tar/Tests.hs index 000e6a3..e74859d 100644 --- a/test/Codec/Archive/Tar/Tests.hs +++ b/test/Codec/Archive/Tar/Tests.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE CPP #-} ----------------------------------------------------------------------------- -- | @@ -25,6 +26,8 @@ import qualified Data.ByteString.Lazy as BL import Prelude hiding (read) import Test.Tasty.QuickCheck +import System.OsPath (osp) + prop_write_read_ustar :: [Entry] -> Property prop_write_read_ustar entries = foldr Next Done entries' === read (write entries') @@ -47,7 +50,7 @@ prop_large_filesize :: Word -> Property prop_large_filesize n = sz === sz' where sz = fromIntegral $ n * 1024 * 1024 * 128 - Right fn = toTarPath False "Large.file" + Right fn = toTarPath False [osp|Large.file|] entry = simpleEntry fn (NormalFile (BL.replicate sz 42) sz) -- Trim the tail so it does not blow up RAM tar = BL.take 2048 $ write [entry] diff --git a/test/Codec/Archive/Tar/Types/Tests.hs b/test/Codec/Archive/Tar/Types/Tests.hs index f193163..13e38b6 100644 --- a/test/Codec/Archive/Tar/Types/Tests.hs +++ b/test/Codec/Archive/Tar/Types/Tests.hs @@ -9,6 +9,8 @@ -- ----------------------------------------------------------------------------- {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE CPP #-} module Codec.Archive.Tar.Types.Tests ( limitToV7FormatCompat @@ -17,25 +19,26 @@ module Codec.Archive.Tar.Types.Tests , prop_fromTarPathToWindowsPath ) where -import Codec.Archive.Tar.PackAscii import Codec.Archive.Tar.Types +import GHC.Stack (HasCallStack) + +import Data.Maybe import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS -import qualified System.FilePath as FilePath.Native - ( joinPath, splitDirectories, addTrailingPathSeparator ) -import qualified System.FilePath.Posix as FilePath.Posix - ( joinPath, splitPath, splitDirectories, hasTrailingPathSeparator - , addTrailingPathSeparator ) -import qualified System.FilePath.Windows as FilePath.Windows - ( joinPath, splitDirectories, addTrailingPathSeparator ) - import Test.QuickCheck -import Control.Applicative ((<$>), (<*>), pure) import Data.Word (Word16) +import System.OsString.Internal.Types (OsString(..)) +import qualified System.OsString.Posix as PS + +import System.OsPath (OsPath) +import System.OsPath.Windows (WindowsPath) +import System.OsPath.Posix (PosixPath) +import qualified System.OsPath as OSP +import qualified System.OsPath.Posix as PFP + prop_fromTarPath :: TarPath -> Property prop_fromTarPath tp = fromTarPath tp === fromTarPathRef tp @@ -45,38 +48,24 @@ prop_fromTarPathToPosixPath tp = fromTarPathToPosixPath tp === fromTarPathToPosi prop_fromTarPathToWindowsPath :: TarPath -> Property prop_fromTarPathToWindowsPath tp = fromTarPathToWindowsPath tp === fromTarPathToWindowsPathRef tp -fromTarPathRef :: TarPath -> FilePath -fromTarPathRef (TarPath namebs prefixbs) = adjustDirectory $ - FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix - ++ FilePath.Posix.splitDirectories name - where - name = BS.Char8.unpack $ posixToByteString namebs - prefix = BS.Char8.unpack $ posixToByteString prefixbs - adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name - = FilePath.Native.addTrailingPathSeparator - | otherwise = id - -fromTarPathToPosixPathRef :: TarPath -> FilePath -fromTarPathToPosixPathRef (TarPath namebs prefixbs) = adjustDirectory $ - FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories prefix - ++ FilePath.Posix.splitDirectories name - where - name = BS.Char8.unpack $ posixToByteString namebs - prefix = BS.Char8.unpack $ posixToByteString prefixbs - adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name - = FilePath.Posix.addTrailingPathSeparator - | otherwise = id - -fromTarPathToWindowsPathRef :: TarPath -> FilePath -fromTarPathToWindowsPathRef (TarPath namebs prefixbs) = adjustDirectory $ - FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories prefix - ++ FilePath.Posix.splitDirectories name - where - name = BS.Char8.unpack $ posixToByteString namebs - prefix = BS.Char8.unpack $ posixToByteString prefixbs - adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name - = FilePath.Windows.addTrailingPathSeparator - | otherwise = id +fromTarPathRef :: TarPath -> OsPath +#if defined(mingw32_HOST_OS) +fromTarPathRef = OsString . fromTarPathToWindowsPathRef +#else +fromTarPathRef = OsString . fromTarPathToPosixPathRef +#endif + +fromTarPathToWindowsPathRef :: HasCallStack => TarPath -> WindowsPath +fromTarPathToWindowsPathRef tarPath = + let posix = fromTarPathToPosixPathRef tarPath + in toWindowsPath posix + +fromTarPathToPosixPathRef :: TarPath -> PosixPath +fromTarPathToPosixPathRef (TarPath name prefix) + | PS.null prefix = name + | PS.null name = prefix + | otherwise = prefix <> PS.cons PFP.pathSeparator name + instance (Arbitrary tarPath, Arbitrary linkTarget) => Arbitrary (GenEntry tarPath linkTarget) where arbitrary = do @@ -101,29 +90,33 @@ instance (Arbitrary tarPath, Arbitrary linkTarget) => Arbitrary (GenEntry tarPat instance Arbitrary TarPath where arbitrary = either error id . toTarPath False - . FilePath.Posix.joinPath + . OSP.joinPath + . fmap (fromJust . OSP.encodeUtf) <$> listOf1ToN (255 `div` 5) (elements (map (replicate 4) "abcd")) shrink = map (either error id . toTarPath False) - . map FilePath.Posix.joinPath + . map OSP.joinPath . filter (not . null) . shrinkList shrinkNothing - . FilePath.Posix.splitPath + . OSP.splitPath + . OsString . fromTarPathToPosixPath instance Arbitrary LinkTarget where - arbitrary = maybe (error "link target too large") id + arbitrary = either (const $ error "link target too large") id . toLinkTarget - . FilePath.Native.joinPath + . OSP.joinPath + . fmap (fromJust . OSP.encodeUtf) <$> listOf1ToN (100 `div` 5) (elements (map (replicate 4) "abcd")) - shrink = map (maybe (error "link target too large") id . toLinkTarget) - . map FilePath.Posix.joinPath + shrink = map (either (const $ error "link target too large") id . toLinkTarget) + . map OSP.joinPath . filter (not . null) . shrinkList shrinkNothing - . FilePath.Posix.splitPath + . OSP.splitPath + . OsString . fromLinkTargetToPosixPath @@ -174,12 +167,20 @@ instance Arbitrary BS.ByteString where arbitrary = fmap BS.pack arbitrary shrink = map BS.pack . shrink . BS.unpack +instance Arbitrary PS.PosixString where + arbitrary = fmap PS.pack arbitrary + shrink = map PS.pack . shrink . PS.unpack + +instance Arbitrary PS.PosixChar where + arbitrary = PS.unsafeFromChar <$> arbitrary + shrink = map PS.unsafeFromChar . shrink . PS.toChar + instance Arbitrary Ownership where arbitrary = Ownership <$> name <*> name <*> idno <*> idno where -- restrict user/group to posix ^[a-z][-a-z0-9]{0,30}$ - name = do + name = fromJust . PFP.encodeUtf <$> do first <- choose ('a', 'z') rest <- listOf0ToN 30 (oneof [choose ('a', 'z'), choose ('0', '9'), pure '-']) return $ first : rest @@ -216,8 +217,8 @@ limitToV7FormatCompat entry@Entry { entryFormat = V7Format } = other -> other, entryOwnership = (entryOwnership entry) { - groupName = "", - ownerName = "" + groupName = mempty, + ownerName = mempty }, entryTarPath = let TarPath name _prefix = entryTarPath entry diff --git a/test/Codec/Archive/Tar/Unpack/Tests.hs b/test/Codec/Archive/Tar/Unpack/Tests.hs index ece3c0f..4e26a6e 100644 --- a/test/Codec/Archive/Tar/Unpack/Tests.hs +++ b/test/Codec/Archive/Tar/Unpack/Tests.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE OverloadedStrings #-} module Codec.Archive.Tar.Unpack.Tests @@ -7,21 +8,21 @@ module Codec.Archive.Tar.Unpack.Tests import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Types as Tar import Codec.Archive.Tar.Types (GenEntries(..), Entries, GenEntry(..)) -import qualified Codec.Archive.Tar.Unpack as Unpack import Control.Exception import Data.Time.Clock import Data.Time.Clock.System -import System.Directory -import System.FilePath +import System.Directory.OsPath import System.IO.Temp import Test.Tasty.QuickCheck +import qualified System.OsPath as OSP case_modtime_1970 :: Property -case_modtime_1970 = ioProperty $ withSystemTempDirectory "tar-test" $ \baseDir -> do - let filename = "foo" +case_modtime_1970 = ioProperty $ withSystemTempDirectory "tar-test" $ \baseDir' -> do + baseDir <- OSP.encodeFS baseDir' + let filename = [OSP.osp|foo|] Right tarPath <- pure $ Tar.toTarPath False filename let entry = (Tar.fileEntry tarPath "bar") { entryTime = 0 } entries = Next entry Done :: Entries IOException Tar.unpack baseDir entries - modTime <- getModificationTime (baseDir filename) + modTime <- getModificationTime (baseDir OSP. filename) pure $ modTime === UTCTime systemEpochDay 0 diff --git a/test/Properties.hs b/test/Properties.hs index ba99810..1912333 100644 --- a/test/Properties.hs +++ b/test/Properties.hs @@ -67,6 +67,7 @@ main = adjustOption (\(QuickCheckMaxRatio n) -> QuickCheckMaxRatio (max n 100)) $ testProperty "roundtrip" Pack.prop_roundtrip, testProperty "symlink" Pack.unit_roundtrip_symlink, + testProperty "unicode" Pack.unit_roundtrip_unicode, testProperty "long filepath" Pack.unit_roundtrip_long_filepath, testProperty "long symlink" Pack.unit_roundtrip_long_symlink ] diff --git a/test/data/unicode.tar b/test/data/unicode.tar new file mode 100644 index 0000000..04c6519 Binary files /dev/null and b/test/data/unicode.tar differ