Skip to content

Commit

Permalink
Implement a procedural macro to derive the ContentHash trait for structs
Browse files Browse the repository at this point in the history
This is a no-op in terms of function, but provides a nicer way to derive the
ContentHash trait for structs using the `#[derive(ContentHash)]` syntax used
for other traits such as `Debug`.

This commit only adds the macro. A subsequent commit will replace uses of
`content_hash!{}` with `#[derive(ContentHash)]`.

The new macro generates nice error messages, just like the old macro, although
they are slightly different.

Before:
```
error[E0277]: the trait bound `NotImplemented: content_hash::ContentHash` is not satisfied
   --> lib/src/content_hash.rs:242:30
    |
242 |             struct Broken{t: NotImplemented}
    |                              ^^^^^^^^^^^^^^ the trait `content_hash::ContentHash` is not implemented for `NotImplemented`
    |
    = help: the following other types implement trait `content_hash::ContentHash`:
              bool
              i32
              i64
              u8
              std::collections::HashMap<K, V>
              BTreeMap<K, V>
              std::collections::HashSet<K>
              content_hash::tests::test_struct_sanity::Foo
            and 38 others
```

After:
```
error[E0277]: the trait bound `NotImplemented: content_hash::ContentHash` is not satisfied
   --> lib/src/content_hash.rs:247:13
    |
247 |             t: NotImplemented,
    |             ^ the trait `content_hash::ContentHash` is not implemented for `NotImplemented`
    |
    = help: the following other types implement trait `content_hash::ContentHash`:
              bool
              i32
              i64
              u8
              std::collections::HashMap<K, V>
              BTreeMap<K, V>
              std::collections::HashSet<K>
              content_hash::tests::test_struct_sanity::Foo
            and 38 others

For more information about this error, try `rustc --explain E0277`.
error: could not compile `jj-lib` (lib test) due to 2 previous errors
```

This commit does two things to make proc macros re-exported by jj_lib useable
by deps:

1. jj_lib needs to be able refer to itself as `jj_lib` which it does
   by adding an `extern crate self as jj_lib` declaration.

2. jj_lib::content_hash needs to re-export the `digest::Update` type so that
   users of jj_lib can use the `#[derive(ContentHash)]` proc macro without
   directly depending on the digest crate. This is done by re-exporting it
   as `DigestUpdate`.


#3054
  • Loading branch information
emesterhazy committed Feb 16, 2024
1 parent a80d018 commit a21079b
Show file tree
Hide file tree
Showing 11 changed files with 160 additions and 25 deletions.
14 changes: 12 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cargo-features = []

[workspace]
resolver = "2"
members = ["cli", "lib", "lib/testutils", "lib/gen-protos"]
members = ["cli", "lib", "lib/gen-protos", "lib/proc-macros", "lib/testutils"]

[workspace.package]
version = "0.14.0"
Expand Down Expand Up @@ -66,8 +66,10 @@ pest = "2.7.7"
pest_derive = "2.7.7"
pollster = "0.3.0"
pretty_assertions = "1.4.0"
proc-macro2 = "1.0.78"
prost = "0.12.3"
prost-build = "0.12.3"
quote = "1.0.35"
rand = "0.8.5"
rand_chacha = "0.3.1"
rayon = "1.8.1"
Expand All @@ -85,6 +87,7 @@ smallvec = { version = "1.13.0", features = [
"union",
] }
strsim = "0.11.0"
syn = "2.0.48"
tempfile = "3.10.0"
test-case = "3.3.1"
textwrap = "0.16.0"
Expand All @@ -110,6 +113,7 @@ zstd = "0.12.4"
# their own (alphabetically sorted) block

jj-lib = { path = "lib", version = "0.14.0" }
jj-lib-proc-macros = { path = "lib/proc-macros", version = "0.14.0" }
testutils = { path = "lib/testutils" }

# Insta suggests compiling these packages in opt mode for faster testing.
Expand Down
1 change: 1 addition & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ gix = { workspace = true }
glob = { workspace = true }
hex = { workspace = true }
itertools = { workspace = true }
jj-lib-proc-macros = { workspace = true }
maplit = { workspace = true }
once_cell = { workspace = true }
pest = { workspace = true }
Expand Down
15 changes: 15 additions & 0 deletions lib/proc-macros/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[package]
name = "jj-lib-proc-macros"
publish = false

version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }

[lib]
proc-macro = true

[dependencies]
proc-macro2 = { workspace=true }
quote = { workspace=true }
syn = { workspace=true }
37 changes: 37 additions & 0 deletions lib/proc-macros/src/content_hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use proc_macro2::TokenStream;
use quote::{quote, quote_spanned};
use syn::spanned::Spanned;
use syn::{Data, Fields, Index};

pub fn generate_hash_impl(data: &Data) -> TokenStream {
match *data {
Data::Struct(ref data) => match data.fields {
Fields::Named(ref fields) => {
let hash_statements = fields.named.iter().map(|f| {
let field_name = &f.ident;
quote_spanned! {f.span()=>
::jj_lib::content_hash::ContentHash::hash(&self.#field_name, state);
}
});
quote! {
#(#hash_statements)*
}
}
Fields::Unnamed(ref fields) => {
let hash_statements = fields.unnamed.iter().enumerate().map(|(i, f)| {
let index = Index::from(i);
quote_spanned! {f.span() =>
::jj_lib::content_hash::ContentHash::hash(&self.#index, state);
}
});
quote! {
#(#hash_statements)*
}
}
Fields::Unit => {
quote! {}
}
},
_ => unimplemented!("ContentHash can only be derived for structs."),
}
}
30 changes: 30 additions & 0 deletions lib/proc-macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
mod content_hash;

extern crate proc_macro;

use quote::quote;
use syn::{parse_macro_input, DeriveInput};

/// Derives the `ContentHash` trait for a struct by calling `ContentHash::hash`
/// on each of the struct members in the order that they're declared. All
/// members of the struct must implement the `ContentHash` trait.
#[proc_macro_derive(ContentHash)]
pub fn derive_content_hash(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let input = parse_macro_input!(input as DeriveInput);

// The name of the struct.
let name = &input.ident;

// Generate an expression to hash each of the fields in the struct.
let hash_impl = content_hash::generate_hash_impl(&input.data);

let expanded = quote! {
#[automatically_derived]
impl ::jj_lib::content_hash::ContentHash for #name {
fn hash(&self, state: &mut impl ::jj_lib::content_hash::DigestUpdate) {
#hash_impl
}
}
};
expanded.into()
}
6 changes: 3 additions & 3 deletions lib/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use std::vec::Vec;
use async_trait::async_trait;
use thiserror::Error;

use crate::content_hash::ContentHash;
use crate::content_hash::{ContentHash, DigestUpdate};
use crate::index::Index;
use crate::merge::Merge;
use crate::object_id::{id_type, ObjectId};
Expand Down Expand Up @@ -111,7 +111,7 @@ impl PartialEq for MergedTreeId {
impl Eq for MergedTreeId {}

impl ContentHash for MergedTreeId {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
match self {
MergedTreeId::Legacy(tree_id) => {
state.update(&0u32.to_le_bytes());
Expand Down Expand Up @@ -247,7 +247,7 @@ impl TreeValue {
}

impl ContentHash for TreeValue {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
use TreeValue::*;
match self {
File { id, executable } => {
Expand Down
61 changes: 46 additions & 15 deletions lib/src/content_hash.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
//! Portable, stable hashing suitable for identifying values
use blake2::Blake2b512;
// Re-export DigestUpdate so that the ContentHash proc macro can be used in
// external crates without directly depending on the digest crate.
pub use digest::Update as DigestUpdate;
use itertools::Itertools as _;
pub use jj_lib_proc_macros::ContentHash;

/// Portable, stable hashing suitable for identifying values
///
Expand All @@ -10,9 +14,11 @@ use itertools::Itertools as _;
/// order their elements according to their `Ord` implementation. Enums should
/// hash a 32-bit little-endian encoding of the ordinal number of the enum
/// variant, then the variant's fields in lexical order.
///
/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`.
pub trait ContentHash {
/// Update the hasher state with this object's content
fn hash(&self, state: &mut impl digest::Update);
fn hash(&self, state: &mut impl DigestUpdate);
}

/// The 512-bit BLAKE2b content hash
Expand All @@ -24,48 +30,48 @@ pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b51
}

impl ContentHash for () {
fn hash(&self, _: &mut impl digest::Update) {}
fn hash(&self, _: &mut impl DigestUpdate) {}
}

impl ContentHash for bool {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
u8::from(*self).hash(state);
}
}

impl ContentHash for u8 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&[*self]);
}
}

impl ContentHash for u32 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&self.to_le_bytes());
}
}

impl ContentHash for i32 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&self.to_le_bytes());
}
}

impl ContentHash for u64 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&self.to_le_bytes());
}
}

impl ContentHash for i64 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&self.to_le_bytes());
}
}

// TODO: Specialize for [u8] once specialization exists
impl<T: ContentHash> ContentHash for [T] {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&(self.len() as u64).to_le_bytes());
for x in self {
x.hash(state);
Expand All @@ -74,19 +80,19 @@ impl<T: ContentHash> ContentHash for [T] {
}

impl<T: ContentHash> ContentHash for Vec<T> {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
self.as_slice().hash(state)
}
}

impl ContentHash for String {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
self.as_bytes().hash(state);
}
}

impl<T: ContentHash> ContentHash for Option<T> {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
match self {
None => state.update(&0u32.to_le_bytes()),
Some(x) => {
Expand All @@ -102,7 +108,7 @@ where
K: ContentHash + Ord,
V: ContentHash,
{
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&(self.len() as u64).to_le_bytes());
let mut kv = self.iter().collect_vec();
kv.sort_unstable_by_key(|&(k, _)| k);
Expand All @@ -117,7 +123,7 @@ impl<K> ContentHash for std::collections::HashSet<K>
where
K: ContentHash + Ord,
{
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&(self.len() as u64).to_le_bytes());
for k in self.iter().sorted() {
k.hash(state);
Expand All @@ -130,7 +136,7 @@ where
K: ContentHash,
V: ContentHash,
{
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&(self.len() as u64).to_le_bytes());
for (k, v) in self.iter() {
k.hash(state);
Expand Down Expand Up @@ -243,6 +249,31 @@ mod tests {
);
}

// This will be removed once all uses of content_hash! are replaced by the
// derive version.
#[test]
fn derive_is_equivalent_to_macro() {
content_hash! {
struct FooMacro { x: Vec<Option<i32>>, y: i64}
}

#[derive(ContentHash)]
struct FooDerive {
x: Vec<Option<i32>>,
y: i64,
}

let foo_macro = FooMacro {
x: vec![None, Some(42)],
y: 17,
};
let foo_derive = FooDerive {
x: vec![None, Some(42)],
y: 17,
};
assert_eq!(hash(&foo_macro), hash(&foo_derive));
}

fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
blake2b_hash(x)
}
Expand Down
7 changes: 7 additions & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
#![deny(unused_must_use)]
#![forbid(unsafe_code)]

// Needed so that proc macros can be used inside jj_lib and by external crates
// that depend on it.
// See:
// - https://github.com/rust-lang/rust/issues/54647#issuecomment-432015102
// - https://github.com/rust-lang/rust/issues/54363
extern crate self as jj_lib;

#[macro_use]
pub mod content_hash;

Expand Down
4 changes: 2 additions & 2 deletions lib/src/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use smallvec::{smallvec_inline, SmallVec};

use crate::backend;
use crate::backend::{BackendError, FileId, TreeId, TreeValue};
use crate::content_hash::ContentHash;
use crate::content_hash::{ContentHash, DigestUpdate};
use crate::object_id::ObjectId;
use crate::repo_path::RepoPath;
use crate::store::Store;
Expand Down Expand Up @@ -457,7 +457,7 @@ impl<T> Merge<Merge<T>> {
}

impl<T: ContentHash> ContentHash for Merge<T> {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
self.values.hash(state)
}
}
Expand Down
Loading

0 comments on commit a21079b

Please sign in to comment.