From 79428827b80255884d46fc422ed709e2f5427e57 Mon Sep 17 00:00:00 2001 From: Mac Malone Date: Sat, 2 Nov 2024 20:23:39 -0400 Subject: [PATCH] feat: add `text` option for `buildFile*` utilities (#5924) Adds an optional `text` argument to the `fetchFile*` and `buildFile*` definitions that can be used to hash built files as text files (with normalized line endings) instead of as binary files (the previous default). Separately, this change also significantly expands the documentation in the `Lake.Build.Trace` module and preforms minor touchups of some build job signatures. --- src/lake/Lake/Build/Common.lean | 65 ++++++++++++------- src/lake/Lake/Build/Job.lean | 14 ++-- src/lake/Lake/Build/Trace.lean | 110 ++++++++++++++++++++------------ 3 files changed, 119 insertions(+), 70 deletions(-) diff --git a/src/lake/Lake/Build/Common.lean b/src/lake/Lake/Build/Common.lean index e5d723cf07a6..4917f5ad181a 100644 --- a/src/lake/Lake/Build/Common.lean +++ b/src/lake/Lake/Build/Common.lean @@ -161,15 +161,17 @@ def clearFileHash (file : FilePath) : IO Unit := do /-- Fetches the hash of a file that may already be cached in a `.hash` file. -If the `.hash` file does not exist or hash files are not trusted -(e.g., with `--rehash`), creates it with a newly computed hash. +If hash files are not trusted (e.g., with `--rehash`) or the `.hash` file does +not exist, it will be created with a newly computed hash. + +If `text := true`, `file` is hashed as a text file rather than a binary file. -/ -def fetchFileHash (file : FilePath) : JobM Hash := do +def fetchFileHash (file : FilePath) (text := false) : JobM Hash := do let hashFile := FilePath.mk <| file.toString ++ ".hash" if (← getTrustHash) then if let some hash ← Hash.load? hashFile then return hash - let hash ← computeHash file + let hash ← computeFileHash file text createParentDirs hashFile IO.FS.writeFile hashFile hash.toString return hash @@ -177,9 +179,11 @@ def fetchFileHash (file : FilePath) : JobM Hash := do /-- Fetches the trace of a file that may have already have its hash cached in a `.hash` file. If no such `.hash` file exists, recomputes and creates it. + +If `text := true`, `file` is hashed as text file rather than a binary file. -/ -def fetchFileTrace (file : FilePath) : JobM BuildTrace := do - return .mk (← fetchFileHash file) (← getMTime file) +def fetchFileTrace (file : FilePath) (text := false) : JobM BuildTrace := do + return .mk (← fetchFileHash file text) (← getMTime file) /-- Builds `file` using `build` unless it already exists and `depTrace` matches @@ -191,66 +195,83 @@ a `.log.json` file and replayed from there if the build is skipped. For example, given `file := "foo.c"`, compares `depTrace` with that in `foo.c.trace` with the hash cached in `foo.c.hash` and the log cached in `foo.c.trace`. + +If `text := true`, `file` is hashed as a text file rather than a binary file. -/ def buildFileUnlessUpToDate - (file : FilePath) (depTrace : BuildTrace) (build : JobM PUnit) + (file : FilePath) (depTrace : BuildTrace) (build : JobM PUnit) (text := false) : JobM BuildTrace := do let traceFile := FilePath.mk <| file.toString ++ ".trace" buildUnlessUpToDate file depTrace traceFile do build clearFileHash file - fetchFileTrace file + fetchFileTrace file text /-- Build `file` using `build` after `dep` completes if the dependency's trace (and/or `extraDepTrace`) has changed. + +If `text := true`, `file` is handled as a text file rather than a binary file. -/ @[inline] def buildFileAfterDep (file : FilePath) (dep : BuildJob α) (build : α → JobM PUnit) - (extraDepTrace : JobM _ := pure BuildTrace.nil) + (extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false) : SpawnM (BuildJob FilePath) := dep.bindSync fun depInfo depTrace => do let depTrace := depTrace.mix (← extraDepTrace) - let trace ← buildFileUnlessUpToDate file depTrace <| build depInfo + let trace ← buildFileUnlessUpToDate file depTrace (build depInfo) text return (file, trace) -/-- Build `file` using `build` after `deps` have built if any of their traces change. -/ +/-- +Build `file` using `build` after `deps` have built if any of their traces change. + +If `text := true`, `file` is handled as a text file rather than a binary file. +-/ @[inline] def buildFileAfterDepList (file : FilePath) (deps : List (BuildJob α)) (build : List α → JobM PUnit) - (extraDepTrace : JobM _ := pure BuildTrace.nil) + (extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false) : SpawnM (BuildJob FilePath) := do - buildFileAfterDep file (← BuildJob.collectList deps) build extraDepTrace + buildFileAfterDep file (.collectList deps) build extraDepTrace text + +/-- +Build `file` using `build` after `deps` have built if any of their traces change. -/-- Build `file` using `build` after `deps` have built if any of their traces change. -/ +If `text := true`, `file` is handled as a text file rather than a binary file. +-/ @[inline] def buildFileAfterDepArray (file : FilePath) (deps : Array (BuildJob α)) (build : Array α → JobM PUnit) - (extraDepTrace : JobM _ := pure BuildTrace.nil) + (extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false) : SpawnM (BuildJob FilePath) := do - buildFileAfterDep file (← BuildJob.collectArray deps) build extraDepTrace + buildFileAfterDep file (.collectArray deps) build extraDepTrace text /-! ## Common Builds -/ /-- A build job for binary file that is expected to already exist (e.g., a data blob). -Any byte difference in the file will trigger a rebuild of dependents. + +Any byte difference in a binary file will trigger a rebuild of its dependents. -/ def inputBinFile (path : FilePath) : SpawnM (BuildJob FilePath) := Job.async <| (path, ·) <$> computeTrace path /-- A build job for text file that is expected to already exist (e.g., a source file). -Normalizes line endings (converts CRLF to LF) to produce platform-independent traces. + +Text file traces have normalized line endings to avoid unnecessary rebuilds across platforms. -/ def inputTextFile (path : FilePath) : SpawnM (BuildJob FilePath) := Job.async <| (path, ·) <$> computeTrace (TextFilePath.mk path) /-- -A build job for file that is expected to already exist. +A build job for file that is expected to already exist (e.g., a data blob or source file). -**Deprecated:** Use either `inputTextFile` or `inputBinFile`. -`inputTextFile` normalizes line endings to produce platform-independent traces. +If `text := true`, the file is handled as a text file rather than a binary file. +Any byte difference in a binary file will trigger a rebuild of its dependents. +In contrast, text file traces have normalized line endings to avoid unnecessary +rebuilds across platforms. -/ -@[deprecated (since := "2024-06-08")] abbrev inputFile := @inputBinFile +@[inline] def inputFile (path : FilePath) (text : Bool) : SpawnM (BuildJob FilePath) := + if text then inputTextFile path else inputBinFile path /-- Build an object file from a source file job using `compiler`. The invocation is: diff --git a/src/lake/Lake/Build/Job.lean b/src/lake/Lake/Build/Job.lean index 6c909644b8f2..4bc441c18e05 100644 --- a/src/lake/Lake/Build/Job.lean +++ b/src/lake/Lake/Build/Job.lean @@ -298,11 +298,11 @@ instance : Functor BuildJob where @[inline] protected def wait? (self : BuildJob α) : BaseIO (Option α) := (·.map (·.1)) <$> self.toJob.wait? -def add (t1 : BuildJob α) (t2 : BuildJob β) : BuildJob α := - mk <| t1.toJob.zipWith (fun a _ => a) t2.toJob +def add (self : BuildJob α) (other : BuildJob β) : BuildJob α := + mk <| self.toJob.zipWith (fun a _ => a) other.toJob -def mix (t1 : BuildJob α) (t2 : BuildJob β) : BuildJob Unit := - mk <| t1.toJob.zipWith (fun (_,t) (_,t') => ((), mixTrace t t')) t2.toJob +def mix (self : BuildJob α) (other : BuildJob β) : BuildJob Unit := + mk <| self.toJob.zipWith (fun (_,t) (_,t') => ((), mixTrace t t')) other.toJob def mixList (jobs : List (BuildJob α)) : Id (BuildJob Unit) := ofJob $ jobs.foldr (·.toJob.zipWith (fun (_,t') t => mixTrace t t') ·) (pure nilTrace) @@ -311,12 +311,12 @@ def mixArray (jobs : Array (BuildJob α)) : Id (BuildJob Unit) := ofJob $ jobs.foldl (·.zipWith (fun t (_,t') => mixTrace t t') ·.toJob) (pure nilTrace) def zipWith - (f : α → β → γ) (t1 : BuildJob α) (t2 : BuildJob β) + (f : α → β → γ) (self : BuildJob α) (other : BuildJob β) : BuildJob γ := - mk <| t1.toJob.zipWith (fun (a,t) (b,t') => (f a b, mixTrace t t')) t2.toJob + mk <| self.toJob.zipWith (fun (a,t) (b,t') => (f a b, mixTrace t t')) other.toJob def collectList (jobs : List (BuildJob α)) : Id (BuildJob (List α)) := return jobs.foldr (zipWith List.cons) (pure []) def collectArray (jobs : Array (BuildJob α)) : Id (BuildJob (Array α)) := - return jobs.foldl (zipWith Array.push) (pure #[]) + return jobs.foldl (zipWith Array.push) (pure (Array.mkEmpty jobs.size)) diff --git a/src/lake/Lake/Build/Trace.lean b/src/lake/Lake/Build/Trace.lean index a8323e961620..9a198679a64d 100644 --- a/src/lake/Lake/Build/Trace.lean +++ b/src/lake/Lake/Build/Trace.lean @@ -6,12 +6,20 @@ Authors: Mac Malone import Lake.Util.IO import Lean.Data.Json +/-! # Lake Traces + +This module defines Lake traces and associated utilities. +Traces are used to determine whether a Lake build artifact is *dirty* +(needs to be rebuilt) or is already *up-to-date*. +The primary type is `Lake.BuildTrace`. +-/ + open System Lean namespace Lake -------------------------------------------------------------------------------- -/-! # Utilities -/ +/-! ## Utilities -/ -------------------------------------------------------------------------------- class CheckExists.{u} (i : Type u) where @@ -24,60 +32,63 @@ instance : CheckExists FilePath where checkExists := FilePath.pathExists -------------------------------------------------------------------------------- -/-! # Trace Abstraction -/ +/-! ## Trace Abstraction -/ -------------------------------------------------------------------------------- -class ComputeTrace.{u,v,w} (i : Type u) (m : outParam $ Type v → Type w) (t : Type v) where - /-- Compute the trace of some target info using information from the monadic context. -/ - computeTrace : i → m t +class ComputeTrace (α : Type u) (m : outParam $ Type v → Type w) (τ : Type v) where + /-- Compute the trace of an object in its preferred monad. -/ + computeTrace : α → m τ -@[inline] def computeTrace [ComputeTrace i m t] [MonadLiftT m n] (info : i) : n t := - liftM <| ComputeTrace.computeTrace info +/-- Compute the trace of an object in a supporting monad. -/ +@[inline] def computeTrace [ComputeTrace α m τ] [MonadLiftT m n] (a : α) : n τ := + liftM <| ComputeTrace.computeTrace a -class NilTrace.{u} (t : Type u) where +class NilTrace.{u} (α : Type u) where /-- The nil trace. Should not unduly clash with a proper trace. -/ - nilTrace : t + nilTrace : α export NilTrace (nilTrace) -instance inhabitedOfNilTrace [NilTrace t] : Inhabited t := ⟨nilTrace⟩ +instance inhabitedOfNilTrace [NilTrace α] : Inhabited α := ⟨nilTrace⟩ -class MixTrace.{u} (t : Type u) where +class MixTrace.{u} (α : Type u) where /-- Combine two traces. The result should be dirty if either of the inputs is dirty. -/ - mixTrace : t → t → t + mixTrace : α → α → α export MixTrace (mixTrace) section -variable [MixTrace t] [NilTrace t] +variable [MixTrace τ] [NilTrace τ] -def mixTraceList (traces : List t) : t := +/- Combine a `List` of traces (left-to-right). -/ +def mixTraceList (traces : List τ) : τ := traces.foldl mixTrace nilTrace -def mixTraceArray (traces : Array t) : t := +/- Combine an `Array` of traces (left-to-right). -/ +def mixTraceArray (traces : Array τ) : τ := traces.foldl mixTrace nilTrace -variable [ComputeTrace i m t] +variable [ComputeTrace α m τ] -@[specialize] def computeListTrace [MonadLiftT m n] [Monad n] (artifacts : List i) : n t := - artifacts.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace +/- Compute the trace of each element of a `List` and combine them (left-to-right). -/ +@[inline] def computeListTrace [MonadLiftT m n] [Monad n] (as : List α) : n τ := + as.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace -instance [Monad m] : ComputeTrace (List i) m t := ⟨computeListTrace⟩ +instance [Monad m] : ComputeTrace (List α) m τ := ⟨computeListTrace⟩ -@[specialize] def computeArrayTrace [MonadLiftT m n] [Monad n] (artifacts : Array i) : n t := - artifacts.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace +/- Compute the trace of each element of an `Array` and combine them (left-to-right). -/ +@[inline] def computeArrayTrace [MonadLiftT m n] [Monad n] (as : Array α) : n τ := + as.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace -instance [Monad m] : ComputeTrace (Array i) m t := ⟨computeArrayTrace⟩ +instance [Monad m] : ComputeTrace (Array α) m τ := ⟨computeArrayTrace⟩ end -------------------------------------------------------------------------------- -/-! # Hash Trace -/ +/-! ## Hash Trace -/ -------------------------------------------------------------------------------- -/-- -A content hash. -TODO: Use a secure hash rather than the builtin Lean hash function. --/ +/-- A content hash. -/ +-- TODO: Use a secure hash rather than the builtin Lean hash function. structure Hash where val : UInt64 deriving BEq, DecidableEq, Repr @@ -127,51 +138,66 @@ instance : FromJson Hash := ⟨Hash.fromJson?⟩ end Hash class ComputeHash (α : Type u) (m : outParam $ Type → Type v) where + /-- Compute the hash of an object in its preferred monad. -/ computeHash : α → m Hash instance [ComputeHash α m] : ComputeTrace α m Hash := ⟨ComputeHash.computeHash⟩ +/-- Compute the hash of object `a` in a pure context. -/ @[inline] def pureHash [ComputeHash α Id] (a : α) : Hash := ComputeHash.computeHash a +/-- Compute the hash an object in an supporting monad. -/ @[inline] def computeHash [ComputeHash α m] [MonadLiftT m n] (a : α) : n Hash := liftM <| ComputeHash.computeHash a instance : ComputeHash String Id := ⟨Hash.ofString⟩ -def computeFileHash (file : FilePath) : IO Hash := +/-- +Compute the hash of a binary file. +Binary files are equivalent only if they are byte identical. +-/ +def computeBinFileHash (file : FilePath) : IO Hash := Hash.ofByteArray <$> IO.FS.readBinFile file -instance : ComputeHash FilePath IO := ⟨computeFileHash⟩ +instance : ComputeHash FilePath IO := ⟨computeBinFileHash⟩ +/-- +Compute the hash of a text file. +Normalizes `\r\n` sequences to `\n` for cross-platform compatibility. +-/ def computeTextFileHash (file : FilePath) : IO Hash := do let text ← IO.FS.readFile file let text := text.crlfToLf return Hash.ofString text /-- - A wrapper around `FilePath` that adjusts its `ComputeHash` implementation - to normalize `\r\n` sequences to `\n` for cross-platform compatibility. -/ +A wrapper around `FilePath` that adjusts its `ComputeHash` implementation +to normalize `\r\n` sequences to `\n` for cross-platform compatibility. +-/ structure TextFilePath where path : FilePath instance : Coe TextFilePath FilePath := ⟨(·.path)⟩ +instance : ComputeHash TextFilePath IO := ⟨(computeTextFileHash ·)⟩ -instance : ComputeHash TextFilePath IO where - computeHash file := computeTextFileHash file +/-- Compute the hash of a file. If `text := true`, normalize line endings. -/ +@[inline] def computeFileHash (file : FilePath) (text := false) : IO Hash := + if text then computeTextFileHash file else computeBinFileHash file -@[specialize] def computeArrayHash [ComputeHash α m] [Monad m] (xs : Array α) : m Hash := - xs.foldlM (fun h a => return h.mix (← computeHash a)) .nil +/-- Compute the hash of each element of an array and combine them (left-to-right). -/ +@[inline] def computeArrayHash [ComputeHash α m] [Monad m] (as : Array α) : m Hash := + computeArrayTrace as instance [ComputeHash α m] [Monad m] : ComputeHash (Array α) m := ⟨computeArrayHash⟩ -------------------------------------------------------------------------------- -/-! # Modification Time (MTime) Trace -/ +/-! ## Modification Time (MTime) Trace -/ -------------------------------------------------------------------------------- open IO.FS (SystemTime) -/-- A modification time. -/ +/-- A modification time (e.g., of a file). -/ def MTime := SystemTime namespace MTime @@ -192,12 +218,14 @@ instance : MixTrace MTime := ⟨max⟩ end MTime -class GetMTime (α) where +class GetMTime (α : Type u) where + /-- Return the modification time of an object. -/ getMTime : α → IO MTime export GetMTime (getMTime) instance [GetMTime α] : ComputeTrace α IO MTime := ⟨getMTime⟩ +/-- Return the modification time of a file recorded by the OS. -/ @[inline] def getFileMTime (file : FilePath) : IO MTime := return (← file.metadata).modified @@ -216,7 +244,7 @@ That is, check if the info is newer than `self`. | .error _ => return false -------------------------------------------------------------------------------- -/-! # Lake Build Trace (Hash + MTIme) -/ +/-! ## Lake Build Trace -/ -------------------------------------------------------------------------------- /-- Trace used for common Lake targets. Combines `Hash` and `MTime`. -/ @@ -242,10 +270,10 @@ def nil : BuildTrace := instance : NilTrace BuildTrace := ⟨nil⟩ -@[specialize] def compute [ComputeHash i m] [MonadLiftT m IO] [GetMTime i] (info : i) : IO BuildTrace := +@[specialize] def compute [ComputeHash α m] [MonadLiftT m IO] [GetMTime α] (info : α) : IO BuildTrace := return mk (← computeHash info) (← getMTime info) -instance [ComputeHash i m] [MonadLiftT m IO] [GetMTime i] : ComputeTrace i IO BuildTrace := ⟨compute⟩ +instance [ComputeHash α m] [MonadLiftT m IO] [GetMTime α] : ComputeTrace α IO BuildTrace := ⟨compute⟩ def mix (t1 t2 : BuildTrace) : BuildTrace := mk (Hash.mix t1.hash t2.hash) (max t1.mtime t2.mtime)