Skip to content

Commit

Permalink
Implement Unicode support by utilizing PosixString and friends
Browse files Browse the repository at this point in the history
  • Loading branch information
hasufell committed Jan 6, 2024
1 parent b684654 commit f366d67
Show file tree
Hide file tree
Showing 20 changed files with 686 additions and 487 deletions.
27 changes: 15 additions & 12 deletions Codec/Archive/Tar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,12 @@ import Codec.Archive.Tar.Write (write)
import Control.Applicative ((<|>))
import Control.Exception (Exception, throw, catch, SomeException(..))
import qualified Data.ByteString.Lazy as BL
import System.IO (withFile, IOMode(..))
import System.IO (IOMode(..))
import Prelude hiding (read)

import System.OsPath (OsPath)
import qualified System.File.OsPath as OSP

-- | Create a new @\".tar\"@ file from a directory of files.
--
-- It is equivalent to calling the standard @tar@ program like so:
Expand Down Expand Up @@ -213,11 +216,11 @@ import Prelude hiding (read)
--
-- * @rwxr-xr-x@ for directories
--
create :: FilePath -- ^ Path of the \".tar\" file to write.
-> FilePath -- ^ Base directory
-> [FilePath] -- ^ Files and directories to archive, relative to base dir
create :: OsPath -- ^ Path of the \".tar\" file to write.
-> OsPath -- ^ Base directory
-> [OsPath] -- ^ Files and directories to archive, relative to base dir
-> IO ()
create tar base paths = BL.writeFile tar . write =<< pack base paths
create tar base paths = OSP.writeFile tar . write =<< pack base paths

-- | Extract all the files contained in a @\".tar\"@ file.
--
Expand Down Expand Up @@ -249,22 +252,22 @@ create tar base paths = BL.writeFile tar . write =<< pack base paths
-- containing entries that point outside of the tarball (either absolute paths
-- or relative paths) will be caught and an exception will be thrown.
--
extract :: FilePath -- ^ Destination directory
-> FilePath -- ^ Tarball
extract :: OsPath -- ^ Destination directory
-> OsPath -- ^ Tarball
-> IO ()
extract dir tar = unpack dir . read =<< BL.readFile tar
extract dir tar = unpack dir . read =<< OSP.readFile tar

-- | Append new entries to a @\".tar\"@ file from a directory of files.
--
-- This is much like 'create', except that all the entries are added to the
-- end of an existing tar file. Or if the file does not already exists then
-- it behaves the same as 'create'.
--
append :: FilePath -- ^ Path of the \".tar\" file to write.
-> FilePath -- ^ Base directory
-> [FilePath] -- ^ Files and directories to archive, relative to base dir
append :: OsPath -- ^ Path of the \".tar\" file to write.
-> OsPath -- ^ Base directory
-> [OsPath] -- ^ Files and directories to archive, relative to base dir
-> IO ()
append tar base paths =
withFile tar ReadWriteMode $ \hnd -> do
OSP.withFile tar ReadWriteMode $ \hnd -> do
_ <- hSeekEndEntryOffset hnd Nothing
BL.hPut hnd . write =<< pack base paths
153 changes: 97 additions & 56 deletions Codec/Archive/Tar/Check/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE MultiWayIf #-}
{-# OPTIONS_GHC -Wno-orphans #-}
{-# OPTIONS_HADDOCK hide #-}
-----------------------------------------------------------------------------
Expand Down Expand Up @@ -50,6 +52,17 @@ import qualified System.FilePath as FilePath.Native
import qualified System.FilePath.Windows as FilePath.Windows
import qualified System.FilePath.Posix as FilePath.Posix

import System.OsPath (OsPath)
import System.OsPath.Posix (PosixPath)
import qualified System.OsPath as OSP
import qualified System.OsPath.Posix as PFP
import qualified System.OsPath.Windows as WFP

import System.OsString.Posix (pstr)
import System.OsString (osstr)
import qualified System.OsString.Posix as PS
import qualified System.OsString.Windows as WS


--------------------------
-- Security
Expand Down Expand Up @@ -78,57 +91,79 @@ import qualified System.FilePath.Posix as FilePath.Posix
-- such as exhaustion of file handlers.
checkSecurity
:: Entries e
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) FileNameError)
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) FileNameError)
checkSecurity = checkEntries checkEntrySecurity . decodeLongNames

-- | Worker of 'Codec.Archive.Tar.Check.checkSecurity'.
--
-- @since 0.6.0.0
checkEntrySecurity :: GenEntry FilePath FilePath -> Maybe FileNameError
checkEntrySecurity :: GenEntry PosixPath PosixPath -> Maybe FileNameError
checkEntrySecurity e =
check (entryTarPath e) <|>
case entryContent e of
HardLink link ->
check link
SymbolicLink link ->
check (FilePath.Posix.takeDirectory (entryTarPath e) FilePath.Posix.</> link)
check (PFP.takeDirectory (entryTarPath e) PFP.</> link)
_ -> Nothing
where
checkPosix :: PosixPath -> Maybe FileNameError
checkPosix name
| FilePath.Posix.isAbsolute name
= Just $ AbsoluteFileName name
| not (FilePath.Posix.isValid name)
= Just $ InvalidFileName name
| not (isInsideBaseDir (FilePath.Posix.splitDirectories name))
= Just $ UnsafeLinkTarget name
| otherwise = Nothing

checkNative (fromFilePathToNative -> name)
| FilePath.Native.isAbsolute name || FilePath.Native.hasDrive name
| PFP.isAbsolute name
= Just $ AbsoluteFileName name
| not (FilePath.Native.isValid name)
| not (PFP.isValid name)
= Just $ InvalidFileName name
| not (isInsideBaseDir (FilePath.Native.splitDirectories name))
| not (isInsideBaseDir (PFP.splitDirectories name))
= Just $ UnsafeLinkTarget name
| otherwise = Nothing

check name = checkPosix name <|> checkNative (fromFilePathToNative name)

isInsideBaseDir :: [FilePath] -> Bool
checkNative :: PosixPath -> Maybe FileNameError
checkNative name'
| (Just name) <- fromPosixPath name' =
if | OSP.isAbsolute name || OSP.hasDrive name
-> Just $ AbsoluteFileName name'
| not (OSP.isValid name)
-> Just $ InvalidFileName name'
| not (isInsideBaseDir' (OSP.splitDirectories name))
-> Just $ UnsafeLinkTarget name'
| otherwise
-> Nothing
| otherwise = Just $ FileNameDecodingFailure name'

check name = checkPosix name <|> checkNative name

isInsideBaseDir :: [PosixPath] -> Bool
isInsideBaseDir = go 0
where
go :: Word -> [FilePath] -> Bool
go :: Word -> [PosixPath] -> Bool
go !_ [] = True
go 0 (x : _)
| x == [pstr|..|] = False
go lvl (x : xs)
| x == [pstr|..|] = go (lvl - 1) xs
go lvl (x : xs)
| x == [pstr|.|] = go lvl xs
go lvl (_ : xs) = go (lvl + 1) xs

isInsideBaseDir' :: [OsPath] -> Bool
isInsideBaseDir' = go 0
where
go :: Word -> [OsPath] -> Bool
go !_ [] = True
go 0 (".." : _) = False
go lvl (".." : xs) = go (lvl - 1) xs
go lvl ("." : xs) = go lvl xs
go 0 (x : _)
| x == [osstr|..|] = False
go lvl (x : xs)
| x == [osstr|..|] = go (lvl - 1) xs
go lvl (x : xs)
| x == [osstr|.|] = go lvl xs
go lvl (_ : xs) = go (lvl + 1) xs

-- | Errors arising from tar file names being in some way invalid or dangerous
data FileNameError
= InvalidFileName FilePath
| AbsoluteFileName FilePath
| UnsafeLinkTarget FilePath
= InvalidFileName PosixPath
| AbsoluteFileName PosixPath
| UnsafeLinkTarget PosixPath
| FileNameDecodingFailure PosixPath
-- ^ @since 0.6.0.0
deriving (Typeable)

Expand All @@ -142,6 +177,7 @@ showFileNameError mb_plat err = case err of
InvalidFileName path -> "Invalid" ++ plat ++ " file name in tar archive: " ++ show path
AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path
UnsafeLinkTarget path -> "Unsafe" ++ plat ++ " link target in tar archive: " ++ show path
FileNameDecodingFailure path -> "Decoding failure of path " ++ show path
where plat = maybe "" (' ':) mb_plat


Expand All @@ -167,17 +203,17 @@ showFileNameError mb_plat err = case err of
-- Not only it is faster, but also alleviates issues with lazy I/O
-- such as exhaustion of file handlers.
checkTarbomb
:: FilePath
:: PosixPath
-> Entries e
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) TarBombError)
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) TarBombError)
checkTarbomb expectedTopDir
= checkEntries (checkEntryTarbomb expectedTopDir)
. decodeLongNames

-- | Worker of 'checkTarbomb'.
--
-- @since 0.6.0.0
checkEntryTarbomb :: FilePath -> GenEntry FilePath linkTarget -> Maybe TarBombError
checkEntryTarbomb :: PosixPath -> GenEntry PosixPath linkTarget -> Maybe TarBombError
checkEntryTarbomb expectedTopDir entry = do
case entryContent entry of
-- Global extended header aka XGLTYPE aka pax_global_header
Expand All @@ -186,18 +222,18 @@ checkEntryTarbomb expectedTopDir entry = do
-- Extended header referring to the next file in the archive aka XHDTYPE
OtherEntryType 'x' _ _ -> Nothing
_ ->
case FilePath.Posix.splitDirectories (entryTarPath entry) of
case PFP.splitDirectories (entryTarPath entry) of
(topDir:_) | topDir == expectedTopDir -> Nothing
_ -> Just $ TarBombError expectedTopDir (entryTarPath entry)

-- | An error that occurs if a tar file is a \"tar bomb\" that would extract
-- files outside of the intended directory.
data TarBombError
= TarBombError
FilePath -- ^ Path inside archive.
PosixPath -- ^ Path inside archive.
--
-- @since 0.6.0.0
FilePath -- ^ Expected top directory.
PosixPath -- ^ Expected top directory.
deriving (Typeable)

instance Exception TarBombError
Expand Down Expand Up @@ -236,43 +272,45 @@ instance Show TarBombError where
-- such as exhaustion of file handlers.
checkPortability
:: Entries e
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) PortabilityError)
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) PortabilityError)
checkPortability = checkEntries checkEntryPortability . decodeLongNames

-- | Worker of 'checkPortability'.
--
-- @since 0.6.0.0
checkEntryPortability :: GenEntry FilePath linkTarget -> Maybe PortabilityError
checkEntryPortability :: GenEntry PosixPath linkTarget -> Maybe PortabilityError
checkEntryPortability entry
| entryFormat entry `elem` [V7Format, GnuFormat]
= Just $ NonPortableFormat (entryFormat entry)
| (Just windowsPath) <- toWindowsPath posixPath =
if | entryFormat entry `elem` [V7Format, GnuFormat]
-> Just $ NonPortableFormat (entryFormat entry)

| not (portableFileType (entryContent entry))
= Just NonPortableFileType
| not (portableFileType (entryContent entry))
-> Just NonPortableFileType

| not (all portableChar posixPath)
= Just $ NonPortableEntryNameChar posixPath
| not (PS.all portableChar posixPath)
-> Just $ NonPortableEntryNameChar posixPath

| not (FilePath.Posix.isValid posixPath)
= Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
| not (FilePath.Windows.isValid windowsPath)
= Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
| not (PFP.isValid posixPath)
-> Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
| not (WFP.isValid windowsPath)
-> Just $ NonPortableFileName "windows" (InvalidFileName posixPath)

| FilePath.Posix.isAbsolute posixPath
= Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath)
| FilePath.Windows.isAbsolute windowsPath
= Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath)
| PFP.isAbsolute posixPath
-> Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath)
| WFP.isAbsolute windowsPath
-> Just $ NonPortableFileName "windows" (AbsoluteFileName posixPath)

| any (=="..") (FilePath.Posix.splitDirectories posixPath)
= Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
| any (=="..") (FilePath.Windows.splitDirectories windowsPath)
= Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
| any (== [PS.pstr|..|]) (PFP.splitDirectories posixPath)
-> Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
| any (== [WS.pstr|..|]) (WFP.splitDirectories windowsPath)
-> Just $ NonPortableFileName "windows" (InvalidFileName posixPath)

| otherwise = Nothing
| otherwise
-> Nothing
| otherwise = Just $ NonPortableDecodingFailure posixPath

where
posixPath = entryTarPath entry
windowsPath = fromFilePathToWindowsPath posixPath
posixPath = entryTarPath entry

portableFileType ftype = case ftype of
NormalFile {} -> True
Expand All @@ -281,14 +319,15 @@ checkEntryPortability entry
Directory -> True
_ -> False

portableChar c = c <= '\127'
portableChar c = PS.toChar c <= '\127'

-- | Portability problems in a tar archive
data PortabilityError
= NonPortableFormat Format
| NonPortableFileType
| NonPortableEntryNameChar FilePath
| NonPortableEntryNameChar PosixPath
| NonPortableFileName PortabilityPlatform FileNameError
| NonPortableDecodingFailure PosixPath
deriving (Typeable)

-- | The name of a platform that portability issues arise from
Expand All @@ -306,6 +345,8 @@ instance Show PortabilityError where
= "Non-portable character in archive entry name: " ++ show posixPath
show (NonPortableFileName platform err)
= showFileNameError (Just platform) err
show (NonPortableDecodingFailure posixPath)
= "Decoding failure of path " ++ show posixPath

--------------------------
-- Utils
Expand Down
Loading

0 comments on commit f366d67

Please sign in to comment.