Skip to content

Commit

Permalink
refactor(fingerprint): Track the intent for each use of UnitHash (#…
Browse files Browse the repository at this point in the history
…14826)

### What does this PR try to resolve?

We have
- `-C metadata`
- `-C extra-filename`
- Uniquifying build scripts
- Uniquifying scraped documentation

Currently, these are all the same value. For #8716, we might want to
have `-C metadata` and `-C extra-filename` hash different parts of the
`Unit`. I figured that this change helps to clarify intent so that even
if we don't change the definitions, this could still be worth it.

The last two I'm tracking as a `unit_id`.  As we evolve the first two
hashes, we can decide which would be a better fit for backing the
`unit_id`.
For scraping,  I could have treated this as
`-C extra-filename` but then we'd have a lot of `.expect()`s.

I moved the accessors from `CompilationFiles` to `MetaInfo` so we could
closely associate the documentation, API, and implementation. Making
`CompilationFiles::c_metadata` the main entry point that gets documented
would be weird because the hashing is associated with `MetaInfo`
(umbrella type, was private, now `Metadata`) or `Metadata` (agnostic of
each use, now `UnitHash`)

### How should we test and review this PR?

### Additional information
  • Loading branch information
weihanglo authored Nov 16, 2024
2 parents 9c3c48f + 1f4eeb4 commit 69e5959
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 85 deletions.
95 changes: 53 additions & 42 deletions src/cargo/core/compiler/build_runner/compilation_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ use crate::util::{self, CargoResult, StableHasher};
/// use the same rustc version.
const METADATA_VERSION: u8 = 2;

/// Uniquely identify a [`Unit`] under specific circumstances, see [`Metadata`] for more.
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
pub struct UnitHash(u64);

impl fmt::Display for UnitHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:016x}", self.0)
}
}

impl fmt::Debug for UnitHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "UnitHash({:016x})", self.0)
}
}

/// The `Metadata` is a hash used to make unique file names for each unit in a
/// build. It is also used for symbol mangling.
///
Expand Down Expand Up @@ -68,30 +84,27 @@ const METADATA_VERSION: u8 = 2;
///
/// Note that the `Fingerprint` is in charge of tracking everything needed to determine if a
/// rebuild is needed.
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
pub struct Metadata(u64);
#[derive(Copy, Clone, Debug)]
pub struct Metadata {
meta_hash: UnitHash,
use_extra_filename: bool,
}

impl fmt::Display for Metadata {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:016x}", self.0)
impl Metadata {
/// A hash to identify a given [`Unit`] in the build graph
pub fn unit_id(&self) -> UnitHash {
self.meta_hash
}
}

impl fmt::Debug for Metadata {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Metadata({:016x})", self.0)
/// A hash to add to symbol naming through `-C metadata`
pub fn c_metadata(&self) -> UnitHash {
self.meta_hash
}
}

/// Information about the metadata hashes used for a `Unit`.
struct MetaInfo {
/// The symbol hash to use.
meta_hash: Metadata,
/// Whether or not the `-C extra-filename` flag is used to generate unique
/// output filenames for this `Unit`.
///
/// If this is `true`, the `meta_hash` is used for the filename.
use_extra_filename: bool,
/// A hash to add to file names through `-C extra-filename`
pub fn c_extra_filename(&self) -> Option<UnitHash> {
self.use_extra_filename.then_some(self.meta_hash)
}
}

/// Collection of information about the files emitted by the compiler, and the
Expand All @@ -108,7 +121,7 @@ pub struct CompilationFiles<'a, 'gctx> {
roots: Vec<Unit>,
ws: &'a Workspace<'gctx>,
/// Metadata hash to use for each unit.
metas: HashMap<Unit, MetaInfo>,
metas: HashMap<Unit, Metadata>,
/// For each Unit, a list all files produced.
outputs: HashMap<Unit, LazyCell<Arc<Vec<OutputFile>>>>,
}
Expand Down Expand Up @@ -177,13 +190,7 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
///
/// [`fingerprint`]: super::super::fingerprint#fingerprints-and-metadata
pub fn metadata(&self, unit: &Unit) -> Metadata {
self.metas[unit].meta_hash
}

/// Returns whether or not `-C extra-filename` is used to extend the
/// output filenames to make them unique.
pub fn use_extra_filename(&self, unit: &Unit) -> bool {
self.metas[unit].use_extra_filename
self.metas[unit]
}

/// Gets the short hash based only on the `PackageId`.
Expand Down Expand Up @@ -224,9 +231,9 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
/// taken in those cases!
fn pkg_dir(&self, unit: &Unit) -> String {
let name = unit.pkg.package_id().name();
let meta = &self.metas[unit];
if meta.use_extra_filename {
format!("{}-{}", name, meta.meta_hash)
let meta = self.metas[unit];
if let Some(c_extra_filename) = meta.c_extra_filename() {
format!("{}-{}", name, c_extra_filename)
} else {
format!("{}-{}", name, self.target_short_hash(unit))
}
Expand Down Expand Up @@ -467,7 +474,11 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
// The file name needs to be stable across Cargo sessions.
// This originally used unit.buildkey(), but that isn't stable,
// so we use metadata instead (prefixed with name for debugging).
let file_name = format!("{}-{}.examples", unit.pkg.name(), self.metadata(unit));
let file_name = format!(
"{}-{}.examples",
unit.pkg.name(),
self.metadata(unit).unit_id()
);
let path = self.deps_dir(unit).join(file_name);
vec![OutputFile {
path,
Expand Down Expand Up @@ -523,8 +534,8 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
// Convert FileType to OutputFile.
let mut outputs = Vec::new();
for file_type in file_types {
let meta = &self.metas[unit];
let meta_opt = meta.use_extra_filename.then(|| meta.meta_hash.to_string());
let meta = self.metas[unit];
let meta_opt = meta.c_extra_filename().map(|h| h.to_string());
let path = out_dir.join(file_type.output_filename(&unit.target, meta_opt.as_deref()));

// If, the `different_binary_name` feature is enabled, the name of the hardlink will
Expand Down Expand Up @@ -558,8 +569,8 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
fn metadata_of<'a>(
unit: &Unit,
build_runner: &BuildRunner<'_, '_>,
metas: &'a mut HashMap<Unit, MetaInfo>,
) -> &'a MetaInfo {
metas: &'a mut HashMap<Unit, Metadata>,
) -> &'a Metadata {
if !metas.contains_key(unit) {
let meta = compute_metadata(unit, build_runner, metas);
metas.insert(unit.clone(), meta);
Expand All @@ -574,8 +585,8 @@ fn metadata_of<'a>(
fn compute_metadata(
unit: &Unit,
build_runner: &BuildRunner<'_, '_>,
metas: &mut HashMap<Unit, MetaInfo>,
) -> MetaInfo {
metas: &mut HashMap<Unit, Metadata>,
) -> Metadata {
let bcx = &build_runner.bcx;
let mut hasher = StableHasher::new();

Expand Down Expand Up @@ -669,9 +680,9 @@ fn compute_metadata(
target_configs_are_different.hash(&mut hasher);
}

MetaInfo {
meta_hash: Metadata(hasher.finish()),
use_extra_filename: should_use_metadata(bcx, unit),
Metadata {
meta_hash: UnitHash(hasher.finish()),
use_extra_filename: use_extra_filename(bcx, unit),
}
}

Expand Down Expand Up @@ -717,8 +728,8 @@ fn hash_rustc_version(bcx: &BuildContext<'_, '_>, hasher: &mut StableHasher, uni
// between different backends without recompiling.
}

/// Returns whether or not this unit should use a metadata hash.
fn should_use_metadata(bcx: &BuildContext<'_, '_>, unit: &Unit) -> bool {
/// Returns whether or not this unit should use a hash in the filename to make it unique.
fn use_extra_filename(bcx: &BuildContext<'_, '_>, unit: &Unit) -> bool {
if unit.mode.is_doc_test() || unit.mode.is_doc() {
// Doc tests do not have metadata.
return false;
Expand Down
10 changes: 5 additions & 5 deletions src/cargo/core/compiler/build_runner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use super::{

mod compilation_files;
use self::compilation_files::CompilationFiles;
pub use self::compilation_files::{Metadata, OutputFile};
pub use self::compilation_files::{Metadata, OutputFile, UnitHash};

/// Collection of all the stuff that is needed to perform a build.
///
Expand Down Expand Up @@ -86,7 +86,7 @@ pub struct BuildRunner<'a, 'gctx> {
/// Set of metadata of Docscrape units that fail before completion, e.g.
/// because the target has a type error. This is in an Arc<Mutex<..>>
/// because it is continuously updated as the job progresses.
pub failed_scrape_units: Arc<Mutex<HashSet<Metadata>>>,
pub failed_scrape_units: Arc<Mutex<HashSet<UnitHash>>>,
}

impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
Expand Down Expand Up @@ -435,15 +435,15 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
/// the given unit.
///
/// If the package does not have a build script, this returns None.
pub fn find_build_script_metadata(&self, unit: &Unit) -> Option<Metadata> {
pub fn find_build_script_metadata(&self, unit: &Unit) -> Option<UnitHash> {
let script_unit = self.find_build_script_unit(unit)?;
Some(self.get_run_build_script_metadata(&script_unit))
}

/// Returns the metadata hash for a `RunCustomBuild` unit.
pub fn get_run_build_script_metadata(&self, unit: &Unit) -> Metadata {
pub fn get_run_build_script_metadata(&self, unit: &Unit) -> UnitHash {
assert!(unit.mode.is_run_custom_build());
self.files().metadata(unit)
self.files().metadata(unit).unit_id()
}

pub fn is_primary_package(&self, unit: &Unit) -> bool {
Expand Down
14 changes: 7 additions & 7 deletions src/cargo/core/compiler/compilation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use cargo_util::{paths, ProcessBuilder};

use crate::core::compiler::apply_env_config;
use crate::core::compiler::BuildContext;
use crate::core::compiler::{CompileKind, Metadata, Unit};
use crate::core::compiler::{CompileKind, Unit, UnitHash};
use crate::core::Package;
use crate::util::{context, CargoResult, GlobalContext};

Expand Down Expand Up @@ -45,7 +45,7 @@ pub struct Doctest {
/// The script metadata, if this unit's package has a build script.
///
/// This is used for indexing [`Compilation::extra_env`].
pub script_meta: Option<Metadata>,
pub script_meta: Option<UnitHash>,

/// Environment variables to set in the rustdoc process.
pub env: HashMap<String, OsString>,
Expand All @@ -61,7 +61,7 @@ pub struct UnitOutput {
/// The script metadata, if this unit's package has a build script.
///
/// This is used for indexing [`Compilation::extra_env`].
pub script_meta: Option<Metadata>,
pub script_meta: Option<UnitHash>,
}

/// A structure returning the result of a compilation.
Expand Down Expand Up @@ -101,7 +101,7 @@ pub struct Compilation<'gctx> {
///
/// The key is the build script metadata for uniquely identifying the
/// `RunCustomBuild` unit that generated these env vars.
pub extra_env: HashMap<Metadata, Vec<(String, String)>>,
pub extra_env: HashMap<UnitHash, Vec<(String, String)>>,

/// Libraries to test with rustdoc.
pub to_doc_test: Vec<Doctest>,
Expand Down Expand Up @@ -197,7 +197,7 @@ impl<'gctx> Compilation<'gctx> {
pub fn rustdoc_process(
&self,
unit: &Unit,
script_meta: Option<Metadata>,
script_meta: Option<UnitHash>,
) -> CargoResult<ProcessBuilder> {
let mut rustdoc = ProcessBuilder::new(&*self.gctx.rustdoc()?);
if self.gctx.extra_verbose() {
Expand Down Expand Up @@ -256,7 +256,7 @@ impl<'gctx> Compilation<'gctx> {
cmd: T,
kind: CompileKind,
pkg: &Package,
script_meta: Option<Metadata>,
script_meta: Option<UnitHash>,
) -> CargoResult<ProcessBuilder> {
let builder = if let Some((runner, args)) = self.target_runner(kind) {
let mut builder = ProcessBuilder::new(runner);
Expand Down Expand Up @@ -285,7 +285,7 @@ impl<'gctx> Compilation<'gctx> {
&self,
mut cmd: ProcessBuilder,
pkg: &Package,
script_meta: Option<Metadata>,
script_meta: Option<UnitHash>,
kind: CompileKind,
tool_kind: ToolKind,
) -> CargoResult<ProcessBuilder> {
Expand Down
24 changes: 12 additions & 12 deletions src/cargo/core/compiler/custom_build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
use super::{fingerprint, BuildRunner, Job, Unit, Work};
use crate::core::compiler::artifact;
use crate::core::compiler::build_runner::Metadata;
use crate::core::compiler::build_runner::UnitHash;
use crate::core::compiler::fingerprint::DirtyReason;
use crate::core::compiler::job_queue::JobState;
use crate::core::{profiles::ProfileRoot, PackageId, Target};
Expand Down Expand Up @@ -111,13 +111,13 @@ pub struct BuildOutput {
/// inserted during `build_map`. The rest of the entries are added
/// immediately after each build script runs.
///
/// The `Metadata` is the unique metadata hash for the `RunCustomBuild` Unit of
/// The [`UnitHash`] is the unique metadata hash for the `RunCustomBuild` Unit of
/// the package. It needs a unique key, since the build script can be run
/// multiple times with different profiles or features. We can't embed a
/// `Unit` because this structure needs to be shareable between threads.
#[derive(Default)]
pub struct BuildScriptOutputs {
outputs: HashMap<Metadata, BuildOutput>,
outputs: HashMap<UnitHash, BuildOutput>,
}

/// Linking information for a `Unit`.
Expand All @@ -141,9 +141,9 @@ pub struct BuildScripts {
/// usage here doesn't blow up too much.
///
/// For more information, see #2354.
pub to_link: Vec<(PackageId, Metadata)>,
pub to_link: Vec<(PackageId, UnitHash)>,
/// This is only used while constructing `to_link` to avoid duplicates.
seen_to_link: HashSet<(PackageId, Metadata)>,
seen_to_link: HashSet<(PackageId, UnitHash)>,
/// Host-only dependencies that have build scripts. Each element is an
/// index into `BuildScriptOutputs`.
///
Expand All @@ -152,7 +152,7 @@ pub struct BuildScripts {
/// Any `BuildOutput::library_paths` path relative to `target` will be
/// added to `LD_LIBRARY_PATH` so that the compiler can find any dynamic
/// libraries a build script may have generated.
pub plugins: BTreeSet<(PackageId, Metadata)>,
pub plugins: BTreeSet<(PackageId, UnitHash)>,
}

/// Dependency information as declared by a build script that might trigger
Expand Down Expand Up @@ -649,7 +649,7 @@ fn build_work(build_runner: &mut BuildRunner<'_, '_>, unit: &Unit) -> CargoResul
fn insert_log_messages_in_build_outputs(
build_script_outputs: Arc<Mutex<BuildScriptOutputs>>,
id: PackageId,
metadata_hash: Metadata,
metadata_hash: UnitHash,
log_messages: Vec<LogMessage>,
) {
let build_output_with_only_log_messages = BuildOutput {
Expand Down Expand Up @@ -1245,7 +1245,7 @@ pub fn build_map(build_runner: &mut BuildRunner<'_, '_>) -> CargoResult<()> {

// When adding an entry to 'to_link' we only actually push it on if the
// script hasn't seen it yet (e.g., we don't push on duplicates).
fn add_to_link(scripts: &mut BuildScripts, pkg: PackageId, metadata: Metadata) {
fn add_to_link(scripts: &mut BuildScripts, pkg: PackageId, metadata: UnitHash) {
if scripts.seen_to_link.insert((pkg, metadata)) {
scripts.to_link.push((pkg, metadata));
}
Expand Down Expand Up @@ -1297,7 +1297,7 @@ fn prev_build_output(

impl BuildScriptOutputs {
/// Inserts a new entry into the map.
fn insert(&mut self, pkg_id: PackageId, metadata: Metadata, parsed_output: BuildOutput) {
fn insert(&mut self, pkg_id: PackageId, metadata: UnitHash, parsed_output: BuildOutput) {
match self.outputs.entry(metadata) {
Entry::Vacant(entry) => {
entry.insert(parsed_output);
Expand All @@ -1314,17 +1314,17 @@ impl BuildScriptOutputs {
}

/// Returns `true` if the given key already exists.
fn contains_key(&self, metadata: Metadata) -> bool {
fn contains_key(&self, metadata: UnitHash) -> bool {
self.outputs.contains_key(&metadata)
}

/// Gets the build output for the given key.
pub fn get(&self, meta: Metadata) -> Option<&BuildOutput> {
pub fn get(&self, meta: UnitHash) -> Option<&BuildOutput> {
self.outputs.get(&meta)
}

/// Returns an iterator over all entries.
pub fn iter(&self) -> impl Iterator<Item = (&Metadata, &BuildOutput)> {
pub fn iter(&self) -> impl Iterator<Item = (&UnitHash, &BuildOutput)> {
self.outputs.iter()
}
}
2 changes: 1 addition & 1 deletion src/cargo/core/compiler/job_queue/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ impl<'gctx> DrainState<'gctx> {
.failed_scrape_units
.lock()
.unwrap()
.insert(build_runner.files().metadata(&unit));
.insert(build_runner.files().metadata(&unit).unit_id());
self.queue.finish(&unit, &artifact);
}
Err(error) => {
Expand Down
Loading

0 comments on commit 69e5959

Please sign in to comment.