diff --git a/Cargo.lock b/Cargo.lock index e501321056..8c326318c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1665,6 +1665,7 @@ dependencies = [ "hex", "insta", "itertools 0.12.1", + "jj-lib-proc-macros", "maplit", "num_cpus", "once_cell", @@ -1695,6 +1696,15 @@ dependencies = [ "zstd", ] +[[package]] +name = "jj-lib-proc-macros" +version = "0.14.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "jobserver" version = "0.1.27" @@ -2213,9 +2223,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.76" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] diff --git a/Cargo.toml b/Cargo.toml index 29d4701a07..98aea82f07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ cargo-features = [] [workspace] resolver = "2" -members = ["cli", "lib", "lib/testutils", "lib/gen-protos"] +members = ["cli", "lib", "lib/gen-protos", "lib/proc-macros", "lib/testutils"] [workspace.package] version = "0.14.0" @@ -66,8 +66,10 @@ pest = "2.7.7" pest_derive = "2.7.7" pollster = "0.3.0" pretty_assertions = "1.4.0" +proc-macro2 = "1.0.78" prost = "0.12.3" prost-build = "0.12.3" +quote = "1.0.35" rand = "0.8.5" rand_chacha = "0.3.1" rayon = "1.8.1" @@ -85,6 +87,7 @@ smallvec = { version = "1.13.0", features = [ "union", ] } strsim = "0.11.0" +syn = "2.0.48" tempfile = "3.10.0" test-case = "3.3.1" textwrap = "0.16.0" @@ -110,6 +113,7 @@ zstd = "0.12.4" # their own (alphabetically sorted) block jj-lib = { path = "lib", version = "0.14.0" } +jj-lib-proc-macros = { path = "lib/proc-macros", version = "0.14.0" } testutils = { path = "lib/testutils" } # Insta suggests compiling these packages in opt mode for faster testing. diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 7a480cd05e..5e5a400496 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -37,6 +37,7 @@ gix = { workspace = true } glob = { workspace = true } hex = { workspace = true } itertools = { workspace = true } +jj-lib-proc-macros = { workspace = true } maplit = { workspace = true } once_cell = { workspace = true } pest = { workspace = true } diff --git a/lib/proc-macros/Cargo.toml b/lib/proc-macros/Cargo.toml new file mode 100644 index 0000000000..3b526c3396 --- /dev/null +++ b/lib/proc-macros/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "jj-lib-proc-macros" +publish = false + +version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = { workspace=true } +quote = { workspace=true } +syn = { workspace=true } diff --git a/lib/proc-macros/src/content_hash.rs b/lib/proc-macros/src/content_hash.rs new file mode 100644 index 0000000000..1e3a55e86b --- /dev/null +++ b/lib/proc-macros/src/content_hash.rs @@ -0,0 +1,37 @@ +use proc_macro2::TokenStream; +use quote::{quote, quote_spanned}; +use syn::spanned::Spanned; +use syn::{Data, Fields, Index}; + +pub fn generate_hash_impl(data: &Data) -> TokenStream { + match *data { + Data::Struct(ref data) => match data.fields { + Fields::Named(ref fields) => { + let hash_statements = fields.named.iter().map(|f| { + let field_name = &f.ident; + quote_spanned! {f.span()=> + ::jj_lib::content_hash::ContentHash::hash(&self.#field_name, state); + } + }); + quote! { + #(#hash_statements)* + } + } + Fields::Unnamed(ref fields) => { + let hash_statements = fields.unnamed.iter().enumerate().map(|(i, f)| { + let index = Index::from(i); + quote_spanned! {f.span() => + ::jj_lib::content_hash::ContentHash::hash(&self.#index, state); + } + }); + quote! { + #(#hash_statements)* + } + } + Fields::Unit => { + quote! {} + } + }, + _ => unimplemented!("ContentHash can only be derived for structs."), + } +} diff --git a/lib/proc-macros/src/lib.rs b/lib/proc-macros/src/lib.rs new file mode 100644 index 0000000000..8e8c128413 --- /dev/null +++ b/lib/proc-macros/src/lib.rs @@ -0,0 +1,30 @@ +mod content_hash; + +extern crate proc_macro; + +use quote::quote; +use syn::{parse_macro_input, DeriveInput}; + +/// Derives the `ContentHash` trait for a struct by calling `ContentHash::hash` +/// on each of the struct members in the order that they're declared. All +/// members of the struct must implement the `ContentHash` trait. +#[proc_macro_derive(ContentHash)] +pub fn derive_content_hash(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input as DeriveInput); + + // The name of the struct. + let name = &input.ident; + + // Generate an expression to hash each of the fields in the struct. + let hash_impl = content_hash::generate_hash_impl(&input.data); + + let expanded = quote! { + #[automatically_derived] + impl ::jj_lib::content_hash::ContentHash for #name { + fn hash(&self, state: &mut impl ::jj_lib::content_hash::DigestUpdate) { + #hash_impl + } + } + }; + expanded.into() +} diff --git a/lib/src/backend.rs b/lib/src/backend.rs index edc81b3185..cf8f21309d 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -25,7 +25,7 @@ use std::vec::Vec; use async_trait::async_trait; use thiserror::Error; -use crate::content_hash::ContentHash; +use crate::content_hash::{ContentHash, DigestUpdate}; use crate::index::Index; use crate::merge::Merge; use crate::object_id::{id_type, ObjectId}; @@ -111,7 +111,7 @@ impl PartialEq for MergedTreeId { impl Eq for MergedTreeId {} impl ContentHash for MergedTreeId { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { match self { MergedTreeId::Legacy(tree_id) => { state.update(&0u32.to_le_bytes()); @@ -247,7 +247,7 @@ impl TreeValue { } impl ContentHash for TreeValue { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { use TreeValue::*; match self { File { id, executable } => { diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs index 9f9dfc3734..39f7723be4 100644 --- a/lib/src/content_hash.rs +++ b/lib/src/content_hash.rs @@ -1,7 +1,11 @@ //! Portable, stable hashing suitable for identifying values use blake2::Blake2b512; +// Re-export DigestUpdate so that the ContentHash proc macro can be used in +// external crates without directly depending on the digest crate. +pub use digest::Update as DigestUpdate; use itertools::Itertools as _; +pub use jj_lib_proc_macros::ContentHash; /// Portable, stable hashing suitable for identifying values /// @@ -10,9 +14,11 @@ use itertools::Itertools as _; /// order their elements according to their `Ord` implementation. Enums should /// hash a 32-bit little-endian encoding of the ordinal number of the enum /// variant, then the variant's fields in lexical order. +/// +/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`. pub trait ContentHash { /// Update the hasher state with this object's content - fn hash(&self, state: &mut impl digest::Update); + fn hash(&self, state: &mut impl DigestUpdate); } /// The 512-bit BLAKE2b content hash @@ -24,36 +30,36 @@ pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output ContentHash for [T] { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { state.update(&(self.len() as u64).to_le_bytes()); for x in self { x.hash(state); @@ -62,19 +68,19 @@ impl ContentHash for [T] { } impl ContentHash for Vec { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { self.as_slice().hash(state) } } impl ContentHash for String { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { self.as_bytes().hash(state); } } impl ContentHash for Option { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { match self { None => state.update(&0u32.to_le_bytes()), Some(x) => { @@ -90,7 +96,7 @@ where K: ContentHash + Ord, V: ContentHash, { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { state.update(&(self.len() as u64).to_le_bytes()); let mut kv = self.iter().collect_vec(); kv.sort_unstable_by_key(|&(k, _)| k); @@ -105,7 +111,7 @@ impl ContentHash for std::collections::HashSet where K: ContentHash + Ord, { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { state.update(&(self.len() as u64).to_le_bytes()); for k in self.iter().sorted() { k.hash(state); @@ -118,7 +124,7 @@ where K: ContentHash, V: ContentHash, { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { state.update(&(self.len() as u64).to_le_bytes()); for (k, v) in self.iter() { k.hash(state); @@ -231,6 +237,31 @@ mod tests { ); } + // This will be removed once all uses of content_hash! are replaced by the + // derive version. + #[test] + fn derive_is_equivalent_to_macro() { + content_hash! { + struct FooMacro { x: Vec>, y: i64} + } + + #[derive(ContentHash)] + struct FooDerive { + x: Vec>, + y: i64, + } + + let foo_macro = FooMacro { + x: vec![None, Some(42)], + y: 17, + }; + let foo_derive = FooDerive { + x: vec![None, Some(42)], + y: 17, + }; + assert_eq!(hash(&foo_macro), hash(&foo_derive)); + } + fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output { blake2b_hash(x) } diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 2340a3bb1a..a469da3e97 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -18,6 +18,13 @@ #![deny(unused_must_use)] #![forbid(unsafe_code)] +// Needed so that proc macros can be used inside jj_lib and by external crates +// that depend on it. +// See: +// - https://github.com/rust-lang/rust/issues/54647#issuecomment-432015102 +// - https://github.com/rust-lang/rust/issues/54363 +extern crate self as jj_lib; + #[macro_use] pub mod content_hash; diff --git a/lib/src/merge.rs b/lib/src/merge.rs index 435102bce7..b5d66a9152 100644 --- a/lib/src/merge.rs +++ b/lib/src/merge.rs @@ -29,7 +29,7 @@ use smallvec::{smallvec_inline, SmallVec}; use crate::backend; use crate::backend::{BackendError, FileId, TreeId, TreeValue}; -use crate::content_hash::ContentHash; +use crate::content_hash::{ContentHash, DigestUpdate}; use crate::object_id::ObjectId; use crate::repo_path::RepoPath; use crate::store::Store; @@ -457,7 +457,7 @@ impl Merge> { } impl ContentHash for Merge { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { self.values.hash(state) } } diff --git a/lib/src/op_store.rs b/lib/src/op_store.rs index 750f996a71..302289bb71 100644 --- a/lib/src/op_store.rs +++ b/lib/src/op_store.rs @@ -25,7 +25,7 @@ use once_cell::sync::Lazy; use thiserror::Error; use crate::backend::{CommitId, MillisSinceEpoch, Timestamp}; -use crate::content_hash::ContentHash; +use crate::content_hash::{ContentHash, DigestUpdate}; use crate::merge::Merge; use crate::object_id::{id_type, HexPrefix, ObjectId, PrefixResolution}; @@ -213,7 +213,7 @@ pub enum RemoteRefState { } impl ContentHash for RemoteRefState { - fn hash(&self, state: &mut impl digest::Update) { + fn hash(&self, state: &mut impl DigestUpdate) { match self { RemoteRefState::New => state.update(&0u32.to_le_bytes()), RemoteRefState::Tracking => state.update(&1u32.to_le_bytes()),