Skip to content

Commit

Permalink
Merge pull request haskell#10112 from mpickering/wip/10110
Browse files Browse the repository at this point in the history
perf: Group together packages by repo when verifying tarballs
  • Loading branch information
mergify[bot] authored Jun 16, 2024
2 parents 3169b87 + 7d46115 commit e1f73a4
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 52 deletions.
1 change: 1 addition & 0 deletions .hlint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
- ignore: {name: "Use unwords"} # 8 hints
- ignore: {name: "Use void"} # 22 hints
- ignore: {name: "Use when"} # 1 hint
- ignore: {name: "Use uncurry"} # 1 hint

- arguments:
- --ignore-glob=Cabal-syntax/src/Distribution/Fields/Lexer.hs
Expand Down
97 changes: 62 additions & 35 deletions cabal-install/src/Distribution/Client/FetchUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ module Distribution.Client.FetchUtils
-- ** specifically for repo packages
, checkRepoTarballFetched
, fetchRepoTarball
, verifyFetchedTarball
, verifyFetchedTarballs

-- ** fetching packages asynchronously
, asyncFetchPackages
Expand Down Expand Up @@ -98,6 +98,7 @@ import System.IO
, openTempFile
)

import Control.Monad (forM)
import Distribution.Client.Errors
import qualified Hackage.Security.Client as Sec
import qualified Hackage.Security.Util.Checked as Sec
Expand Down Expand Up @@ -152,40 +153,66 @@ checkRepoTarballFetched repo pkgid = do
then return (Just file)
else return Nothing

verifyFetchedTarball :: Verbosity -> RepoContext -> Repo -> PackageId -> IO Bool
verifyFetchedTarball verbosity repoCtxt repo pkgid =
let file = packageFile repo pkgid
handleError :: IO Bool -> IO Bool
handleError act = do
res <- Safe.try act
case res of
Left e -> warn verbosity ("Error verifying fetched tarball " ++ file ++ ", will redownload: " ++ show (e :: SomeException)) >> pure False
Right b -> pure b
in handleError $ do
exists <- doesFileExist file
if not exists
then return True -- if the file does not exist, it vacuously passes validation, since it will be downloaded as necessary with what we will then check is a valid hash.
else case repo of
-- a secure repo has hashes we can compare against to confirm this is the correct file.
RepoSecure{} ->
repoContextWithSecureRepo repoCtxt repo $ \repoSecure ->
Sec.withIndex repoSecure $ \callbacks ->
let warnAndFail s = warn verbosity ("Fetched tarball " ++ file ++ " does not match server, will redownload: " ++ s) >> return False
in -- the do block in parens is due to dealing with the checked exceptions mechanism.
( do
fileInfo <- Sec.indexLookupFileInfo callbacks pkgid
sz <- Sec.FileLength . fromInteger <$> getFileSize file
if sz /= Sec.fileInfoLength (Sec.trusted fileInfo)
then warnAndFail "file length mismatch"
else do
res <- Sec.compareTrustedFileInfo (Sec.trusted fileInfo) <$> Sec.computeFileInfo (Sec.Path file :: Sec.Path Sec.Absolute)
if res
then pure True
else warnAndFail "file hash mismatch"
)
`Sec.catchChecked` (\(e :: Sec.InvalidPackageException) -> warnAndFail (show e))
`Sec.catchChecked` (\(e :: Sec.VerificationError) -> warnAndFail (show e))
_ -> pure True
verifyFetchedTarballs
:: Verbosity
-> RepoContext
-> Repo
-> [PackageId]
-> IO
( [ Either
(Repo, PackageId) -- Verified
(Repo, PackageId) -- unverified)
]
)
verifyFetchedTarballs verbosity repoCtxt repo pkgids =
-- Establish the context once per repo (see #10110), this codepath is important
-- to be fast as it can happen when no other building happens.
let establishContext k =
case repo of
RepoSecure{} ->
repoContextWithSecureRepo repoCtxt repo $ \repoSecure ->
Sec.withIndex repoSecure $ \callbacks -> k (Just callbacks)
_ -> k Nothing
in do
establishContext $ \mCallbacks ->
forM pkgids $ \pkgid -> do
let file = packageFile repo pkgid
res <- verifyFetchedTarball verbosity file mCallbacks pkgid
return $ if res then Left (repo, pkgid) else Right (repo, pkgid)

verifyFetchedTarball :: Verbosity -> FilePath -> Maybe Sec.IndexCallbacks -> PackageId -> IO Bool
verifyFetchedTarball verbosity file mCallbacks pkgid =
let
handleError :: IO Bool -> IO Bool
handleError act = do
res <- Safe.try act
case res of
Left e -> warn verbosity ("Error verifying fetched tarball " ++ file ++ ", will redownload: " ++ show (e :: SomeException)) >> pure False
Right b -> pure b
in
handleError $ do
exists <- doesFileExist file
if not exists
then return True -- if the file does not exist, it vacuously passes validation, since it will be downloaded as necessary with what we will then check is a valid hash.
else case mCallbacks of
-- a secure repo has hashes we can compare against to confirm this is the correct file.
Just callbacks ->
let warnAndFail s = warn verbosity ("Fetched tarball " ++ file ++ " does not match server, will redownload: " ++ s) >> return False
in -- the do block in parens is due to dealing with the checked exceptions mechanism.
( do
fileInfo <- Sec.indexLookupFileInfo callbacks pkgid
sz <- Sec.FileLength . fromInteger <$> getFileSize file
if sz /= Sec.fileInfoLength (Sec.trusted fileInfo)
then warnAndFail "file length mismatch"
else do
res <- Sec.compareTrustedFileInfo (Sec.trusted fileInfo) <$> Sec.computeFileInfo (Sec.Path file :: Sec.Path Sec.Absolute)
if res
then pure True
else warnAndFail "file hash mismatch"
)
`Sec.catchChecked` (\(e :: Sec.InvalidPackageException) -> warnAndFail (show e))
`Sec.catchChecked` (\(e :: Sec.VerificationError) -> warnAndFail (show e))
_ -> pure True

-- | Fetch a package if we don't have it already.
fetchPackage
Expand Down
38 changes: 21 additions & 17 deletions cabal-install/src/Distribution/Client/ProjectPlanning.hs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ import Distribution.Client.SetupWrapper
import Distribution.Client.Store
import Distribution.Client.Targets (userToPackageConstraint)
import Distribution.Client.Types
import Distribution.Client.Utils (incVersion)
import Distribution.Client.Utils (concatMapM, incVersion)

import qualified Distribution.Client.BuildReports.Storage as BuildReports
import qualified Distribution.Client.IndexUtils as IndexUtils
Expand Down Expand Up @@ -206,7 +206,7 @@ import qualified Distribution.Solver.Types.ComponentDeps as CD
import qualified Distribution.Compat.Graph as Graph

import Control.Exception (assert)
import Control.Monad (forM, sequence)
import Control.Monad (sequence)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.State as State (State, execState, runState, state)
import Data.Foldable (fold)
Expand Down Expand Up @@ -1069,25 +1069,29 @@ getPackageSourceHashes verbosity withRepoCtx solverPlan = do
-- Tarballs from repositories, either where the repository provides
-- hashes as part of the repo metadata, or where we will have to
-- download and hash the tarball.
repoTarballPkgsWithMetadataUnvalidated :: [(PackageId, Repo)]
repoTarballPkgsWithoutMetadata :: [(PackageId, Repo)]
repoTarballPkgsWithMetadataUnvalidated :: [(Repo, [PackageId])]
repoTarballPkgsWithoutMetadata :: [(Repo, PackageId)]
( repoTarballPkgsWithMetadataUnvalidated
, repoTarballPkgsWithoutMetadata
) =
partitionEithers
[ case repo of
RepoSecure{} -> Left (pkgid, repo)
_ -> Right (pkgid, repo)
RepoSecure{} -> Left (repo, [pkgid])
_ -> Right (repo, pkgid)
| (pkgid, RepoTarballPackage repo _ _) <- allPkgLocations
]

-- Group up the unvalidated packages by repo so we only read the remote
-- index once per repo (see #10110). The packages are ungrouped here and then regrouped
-- below, it would be better in future to refactor this whole code path so that we don't
-- repeatedly group and ungroup.
repoTarballPkgsWithMetadataUnvalidatedMap = Map.fromListWith (++) repoTarballPkgsWithMetadataUnvalidated

(repoTarballPkgsWithMetadata, repoTarballPkgsToDownloadWithMeta) <- fmap partitionEithers $
liftIO $
withRepoCtx $ \repoctx -> forM repoTarballPkgsWithMetadataUnvalidated $
\x@(pkg, repo) ->
verifyFetchedTarball verbosity repoctx repo pkg >>= \b -> case b of
True -> return $ Left x
False -> return $ Right x
withRepoCtx $ \repoctx -> flip concatMapM (Map.toList repoTarballPkgsWithMetadataUnvalidatedMap) $
\(repo, pkgids) ->
verifyFetchedTarballs verbosity repoctx repo pkgids

-- For tarballs from repos that do not have hashes available we now have
-- to check if the packages were downloaded already.
Expand All @@ -1101,9 +1105,9 @@ getPackageSourceHashes verbosity withRepoCtx solverPlan = do
[ do
mtarball <- checkRepoTarballFetched repo pkgid
case mtarball of
Nothing -> return (Left (pkgid, repo))
Nothing -> return (Left (repo, pkgid))
Just tarball -> return (Right (pkgid, tarball))
| (pkgid, repo) <- repoTarballPkgsWithoutMetadata
| (repo, pkgid) <- repoTarballPkgsWithoutMetadata
]

let repoTarballPkgsToDownload = repoTarballPkgsToDownloadWithMeta ++ repoTarballPkgsToDownloadWithNoMeta
Expand Down Expand Up @@ -1139,9 +1143,9 @@ getPackageSourceHashes verbosity withRepoCtx solverPlan = do
| pkgid <- pkgids
]
| (repo, pkgids) <-
map (\grp@((_, repo) :| _) -> (repo, map fst (NE.toList grp)))
. NE.groupBy ((==) `on` (remoteRepoName . repoRemote . snd))
. sortBy (compare `on` (remoteRepoName . repoRemote . snd))
map (\grp@((repo, _) :| _) -> (repo, map snd (NE.toList grp)))
. NE.groupBy ((==) `on` (remoteRepoName . repoRemote . fst))
. sortBy (compare `on` (remoteRepoName . repoRemote . fst))
$ repoTarballPkgsWithMetadata
]

Expand All @@ -1153,7 +1157,7 @@ getPackageSourceHashes verbosity withRepoCtx solverPlan = do
[ do
tarball <- fetchRepoTarball verbosity repoctx repo pkgid
return (pkgid, tarball)
| (pkgid, repo) <- repoTarballPkgsToDownload
| (repo, pkgid) <- repoTarballPkgsToDownload
]

return
Expand Down
1 change: 1 addition & 0 deletions cabal-install/src/Distribution/Client/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ module Distribution.Client.Utils
, listFilesInside
, safeRead
, hasElem
, concatMapM
, occursOnlyOrBefore
, giveRTSWarning
) where
Expand Down

0 comments on commit e1f73a4

Please sign in to comment.