From f8a40a30a60e408f2fe571fe3b9e8d5ba10eda62 Mon Sep 17 00:00:00 2001 From: dploch Date: Fri, 12 Apr 2024 22:07:18 -0400 Subject: [PATCH] bluesky: copy tracing API --- cli/examples/custom-backend/main.rs | 13 +++++- lib/src/backend.rs | 51 +++++++++++++++++++++++- lib/src/commit_builder.rs | 1 + lib/src/git_backend.rs | 26 ++++++++++-- lib/src/local_backend.rs | 61 ++++++++++++++++++++++++++--- lib/src/protos/local_store.proto | 11 ++++++ lib/src/protos/local_store.rs | 18 +++++++++ lib/src/repo_path.rs | 2 +- lib/testutils/src/lib.rs | 1 + lib/testutils/src/test_backend.rs | 14 ++++++- 10 files changed, 183 insertions(+), 15 deletions(-) diff --git a/cli/examples/custom-backend/main.rs b/cli/examples/custom-backend/main.rs index beacdf8537..dd2921773b 100644 --- a/cli/examples/custom-backend/main.rs +++ b/cli/examples/custom-backend/main.rs @@ -23,12 +23,12 @@ use jj_cli::command_error::CommandError; use jj_cli::ui::Ui; use jj_lib::backend::{ Backend, BackendInitError, BackendLoadError, BackendResult, ChangeId, Commit, CommitId, - Conflict, ConflictId, FileId, SigningFn, SymlinkId, Tree, TreeId, + Conflict, ConflictId, CopyTrace, FileId, SigningFn, SymlinkId, Tree, TreeId, }; use jj_lib::git_backend::GitBackend; use jj_lib::index::Index; use jj_lib::repo::StoreFactories; -use jj_lib::repo_path::RepoPath; +use jj_lib::repo_path::{RepoPath, RepoPathBuf}; use jj_lib::settings::UserSettings; use jj_lib::signing::Signer; use jj_lib::workspace::{Workspace, WorkspaceInitError}; @@ -174,6 +174,15 @@ impl Backend for JitBackend { self.inner.write_commit(contents, sign_with) } + fn copy_trace( + &self, + paths: &[RepoPathBuf], + head: &CommitId, + root: &CommitId, + ) -> BackendResult> + '_>> { + self.inner.copy_trace(paths, head, root) + } + fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> { self.inner.gc(index, keep_newer) } diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 1b88e9a60f..c771910e14 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -15,7 +15,7 @@ #![allow(missing_docs)] use std::any::Any; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::fmt::Debug; use std::io::Read; use std::time::SystemTime; @@ -27,7 +27,7 @@ use crate::content_hash::ContentHash; use crate::index::Index; use crate::merge::Merge; use crate::object_id::{id_type, ObjectId}; -use crate::repo_path::{RepoPath, RepoPathComponent, RepoPathComponentBuf}; +use crate::repo_path::{RepoPath, RepoPathBuf, RepoPathComponent, RepoPathComponentBuf}; use crate::signing::SignResult; id_type!( @@ -125,11 +125,43 @@ impl MergedTreeId { } } +/// An optionally versioned copy source. +#[derive(ContentHash, Debug, PartialEq, Eq, Clone)] +pub struct CopySource { + /// The path the target was copied from. + pub path: RepoPathBuf, + /// The specific version the target was copied from. If unspecified, the + /// copy comes from the parent commit of the target file version. + pub commit_id: Option, +} + +/// A singular copy event in a specific file's history. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct CopyTrace { + /// The file that was copied into. + pub target: RepoPathBuf, + /// The commit where the file was copied. + pub commit_id: CommitId, + /// The source of the copy. + pub source: CopySource, +} + +/// Map from target -> source +/// +/// This can be set on write to explicitly record copies in the backend. The +/// backend may discard these silently if it does not support explicit copy +/// tracking (e.g. git, which tracks copies only implicitly). Backends which +/// do support explicit copy tracking may provide this information on read, +/// but are not required to. Callers should always use `copy_trace()` to get +/// copy info in order to support backends which don't explicitly store it. +pub type CopySources = HashMap; + #[derive(ContentHash, Debug, PartialEq, Eq, Clone)] pub struct Commit { pub parents: Vec, pub predecessors: Vec, pub root_tree: MergedTreeId, + pub copy_sources: Option, pub change_id: ChangeId, pub description: String, pub author: Signature, @@ -327,6 +359,7 @@ pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Comm parents: vec![], predecessors: vec![], root_tree: MergedTreeId::Legacy(empty_tree_id), + copy_sources: None, change_id: root_change_id, description: String::new(), author: signature.clone(), @@ -404,6 +437,20 @@ pub trait Backend: Send + Sync + Debug { sign_with: Option<&mut SigningFn>, ) -> BackendResult<(CommitId, Commit)>; + /// Trace copy events for a set of files in a specific range of commits, in + /// reverse topological order. + /// + /// Performs transitive tracing if the backend supports it. Thus, the + /// returned iterator may emit copy traces for files not in `paths`, because + /// they were transitively copied into `paths` later on in the revset + /// topology (earlier in the iterator). + fn copy_trace( + &self, + paths: &[RepoPathBuf], + head: &CommitId, + root: &CommitId, + ) -> BackendResult> + '_>>; + /// Perform garbage collection. /// /// All commits found in the `index` won't be removed. In addition to that, diff --git a/lib/src/commit_builder.rs b/lib/src/commit_builder.rs index cef5b9789b..0b73c4f09e 100644 --- a/lib/src/commit_builder.rs +++ b/lib/src/commit_builder.rs @@ -47,6 +47,7 @@ impl CommitBuilder<'_> { parents, predecessors: vec![], root_tree: tree_id, + copy_sources: None, change_id, description: String::new(), author: signature.clone(), diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index fb21dd6497..fce7c34aca 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -34,16 +34,16 @@ use thiserror::Error; use crate::backend::{ make_root_commit, Backend, BackendError, BackendInitError, BackendLoadError, BackendResult, - ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, FileId, MergedTreeId, - MillisSinceEpoch, SecureSig, Signature, SigningFn, SymlinkId, Timestamp, Tree, TreeId, - TreeValue, + ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, CopyTrace, FileId, + MergedTreeId, MillisSinceEpoch, SecureSig, Signature, SigningFn, SymlinkId, Timestamp, Tree, + TreeId, TreeValue, }; use crate::file_util::{IoResultExt as _, PathError}; use crate::index::Index; use crate::lock::FileLock; use crate::merge::{Merge, MergeBuilder}; use crate::object_id::ObjectId; -use crate::repo_path::{RepoPath, RepoPathComponentBuf}; +use crate::repo_path::{RepoPath, RepoPathBuf, RepoPathComponentBuf}; use crate::settings::UserSettings; use crate::stacked_table::{ MutableTable, ReadonlyTable, TableSegment, TableStore, TableStoreError, @@ -516,6 +516,7 @@ fn commit_from_git_without_root_parent( root_tree, change_id, description, + copy_sources: None, author, committer, secure_sig, @@ -1209,6 +1210,18 @@ impl Backend for GitBackend { Ok((id, contents)) } + fn copy_trace( + &self, + _paths: &[RepoPathBuf], + _head: &CommitId, + _root: &CommitId, + ) -> BackendResult> + '_>> { + // TODO: Implement copy tracing for git repos + Err(BackendError::Unsupported( + "Git backend does not support copy tracing".to_string(), + )) + } + #[tracing::instrument(skip(self, index))] fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> { let git_repo = self.lock_git_repo(); @@ -1656,6 +1669,7 @@ mod tests { root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), change_id: ChangeId::from_hex("abc123"), description: "".to_string(), + copy_sources: None, author: create_signature(), committer: create_signature(), secure_sig: None, @@ -1734,6 +1748,7 @@ mod tests { root_tree: MergedTreeId::Merge(root_tree.clone()), change_id: ChangeId::from_hex("abc123"), description: "".to_string(), + copy_sources: None, author: create_signature(), committer: create_signature(), secure_sig: None, @@ -1817,6 +1832,7 @@ mod tests { root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), change_id: ChangeId::new(vec![]), description: "initial".to_string(), + copy_sources: None, author: signature.clone(), committer: signature, secure_sig: None, @@ -1894,6 +1910,7 @@ mod tests { root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), change_id: ChangeId::new(vec![]), description: "initial".to_string(), + copy_sources: None, author: create_signature(), committer: create_signature(), secure_sig: None, @@ -1935,6 +1952,7 @@ mod tests { root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), change_id: ChangeId::new(vec![]), description: "initial".to_string(), + copy_sources: None, author: create_signature(), committer: create_signature(), secure_sig: None, diff --git a/lib/src/local_backend.rs b/lib/src/local_backend.rs index 7fa09d147b..7cce593ee4 100644 --- a/lib/src/local_backend.rs +++ b/lib/src/local_backend.rs @@ -28,16 +28,16 @@ use prost::Message; use tempfile::NamedTempFile; use crate::backend::{ - make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, Conflict, - ConflictId, ConflictTerm, FileId, MergedTreeId, MillisSinceEpoch, SecureSig, Signature, - SigningFn, SymlinkId, Timestamp, Tree, TreeId, TreeValue, + self, make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, + Conflict, ConflictId, ConflictTerm, CopyTrace, FileId, MergedTreeId, MillisSinceEpoch, + SecureSig, Signature, SigningFn, SymlinkId, Timestamp, Tree, TreeId, TreeValue, }; use crate::content_hash::blake2b_hash; use crate::file_util::persist_content_addressed_temp_file; use crate::index::Index; use crate::merge::MergeBuilder; use crate::object_id::ObjectId; -use crate::repo_path::{RepoPath, RepoPathComponentBuf}; +use crate::repo_path::{RepoPath, RepoPathBuf, RepoPathComponentBuf}; const COMMIT_ID_LENGTH: usize = 64; const CHANGE_ID_LENGTH: usize = 16; @@ -92,7 +92,9 @@ impl LocalBackend { pub fn load(store_path: &Path) -> Self { let root_commit_id = CommitId::from_bytes(&[0; COMMIT_ID_LENGTH]); let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]); - let empty_tree_id = TreeId::from_hex("482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310"); + let empty_tree_id = TreeId::from_hex( + "482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310", + ); LocalBackend { path: store_path.to_path_buf(), root_commit_id, @@ -301,11 +303,57 @@ impl Backend for LocalBackend { Ok((id, commit)) } + fn copy_trace( + &self, + _paths: &[RepoPathBuf], + _head: &CommitId, + _root: &CommitId, + ) -> BackendResult> + '_>> { + // TODO: Implement a rev walk from head->root and emit copy trace events. + Err(BackendError::Unsupported( + "Local copy tracing not implemented".to_string(), + )) + } + fn gc(&self, _index: &dyn Index, _keep_newer: SystemTime) -> BackendResult<()> { Ok(()) } } +fn copy_sources_to_proto( + copy_sources: &backend::CopySources, +) -> crate::protos::local_store::CopySources { + let mut records = vec![]; + for (target_path, copy_source) in copy_sources { + records.push(crate::protos::local_store::CopyRecord { + target: target_path.as_internal_file_string().to_owned(), + source: copy_source.path.as_internal_file_string().to_owned(), + commit_id: copy_source.commit_id.as_ref().map(|id| id.to_bytes()), + }); + } + + crate::protos::local_store::CopySources { records } +} + +fn copy_sources_from_proto( + copy_sources: crate::protos::local_store::CopySources, +) -> backend::CopySources { + let mut out = backend::CopySources::new(); + for record in copy_sources.records { + let target_path = RepoPathBuf::from_internal_string(record.target); + let source_path = RepoPathBuf::from_internal_string(record.source); + let source_id = record.commit_id.map(CommitId::new); + out.insert( + target_path, + backend::CopySource { + path: source_path, + commit_id: source_id, + }, + ); + } + out +} + #[allow(unknown_lints)] // XXX FIXME (aseipp): nightly bogons; re-test this occasionally #[allow(clippy::assigning_clones)] pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit { @@ -327,6 +375,7 @@ pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit { } proto.change_id = commit.change_id.to_bytes(); proto.description = commit.description.clone(); + proto.copy_sources = commit.copy_sources.as_ref().map(copy_sources_to_proto); proto.author = Some(signature_to_proto(&commit.author)); proto.committer = Some(signature_to_proto(&commit.committer)); proto @@ -356,6 +405,7 @@ fn commit_from_proto(mut proto: crate::protos::local_store::Commit) -> Commit { root_tree, change_id, description: proto.description, + copy_sources: proto.copy_sources.map(copy_sources_from_proto), author: signature_from_proto(proto.author.unwrap_or_default()), committer: signature_from_proto(proto.committer.unwrap_or_default()), secure_sig, @@ -512,6 +562,7 @@ mod tests { root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()), change_id: ChangeId::from_hex("abc123"), description: "".to_string(), + copy_sources: None, author: create_signature(), committer: create_signature(), secure_sig: None, diff --git a/lib/src/protos/local_store.proto b/lib/src/protos/local_store.proto index d804cc8762..250a5030cc 100644 --- a/lib/src/protos/local_store.proto +++ b/lib/src/protos/local_store.proto @@ -39,6 +39,16 @@ message Tree { repeated Entry entries = 1; } +message CopySources { + repeated CopyRecord records = 1; +} + +message CopyRecord { + string target = 1; + string source = 2; + optional bytes commit_id = 3; +} + message Commit { repeated bytes parents = 1; repeated bytes predecessors = 2; @@ -48,6 +58,7 @@ message Commit { bool uses_tree_conflict_format = 8; bytes change_id = 4; string description = 5; + optional CopySources copy_sources = 10; message Timestamp { int64 millis_since_epoch = 1; diff --git a/lib/src/protos/local_store.rs b/lib/src/protos/local_store.rs index 5740530825..2214df590e 100644 --- a/lib/src/protos/local_store.rs +++ b/lib/src/protos/local_store.rs @@ -47,6 +47,22 @@ pub mod tree { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct CopySources { + #[prost(message, repeated, tag = "1")] + pub records: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CopyRecord { + #[prost(string, tag = "1")] + pub target: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub source: ::prost::alloc::string::String, + #[prost(bytes = "vec", optional, tag = "3")] + pub commit_id: ::core::option::Option<::prost::alloc::vec::Vec>, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct Commit { #[prost(bytes = "vec", repeated, tag = "1")] pub parents: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, @@ -62,6 +78,8 @@ pub struct Commit { pub change_id: ::prost::alloc::vec::Vec, #[prost(string, tag = "5")] pub description: ::prost::alloc::string::String, + #[prost(message, optional, tag = "10")] + pub copy_sources: ::core::option::Option, #[prost(message, optional, tag = "6")] pub author: ::core::option::Option, #[prost(message, optional, tag = "7")] diff --git a/lib/src/repo_path.rs b/lib/src/repo_path.rs index 6a8716ddb3..05ef9668ba 100644 --- a/lib/src/repo_path.rs +++ b/lib/src/repo_path.rs @@ -169,7 +169,7 @@ impl DoubleEndedIterator for RepoPathComponentsIter<'_> { impl FusedIterator for RepoPathComponentsIter<'_> {} /// Owned repository path. -#[derive(Clone, Eq, Hash, PartialEq)] +#[derive(Clone, ContentHash, Eq, Hash, PartialEq)] pub struct RepoPathBuf { // Don't add more fields. Eq, Hash, and Ord must be compatible with the // borrowed RepoPath type. diff --git a/lib/testutils/src/lib.rs b/lib/testutils/src/lib.rs index fdd871aa5f..6cd0b9fdcc 100644 --- a/lib/testutils/src/lib.rs +++ b/lib/testutils/src/lib.rs @@ -359,6 +359,7 @@ pub fn commit_with_tree(store: &Arc, tree_id: MergedTreeId) -> Commit { root_tree: tree_id, change_id: ChangeId::from_hex("abcd"), description: "description".to_string(), + copy_sources: None, author: signature.clone(), committer: signature, secure_sig: None, diff --git a/lib/testutils/src/test_backend.rs b/lib/testutils/src/test_backend.rs index 91ece531d8..25abec29bb 100644 --- a/lib/testutils/src/test_backend.rs +++ b/lib/testutils/src/test_backend.rs @@ -23,7 +23,7 @@ use std::time::SystemTime; use async_trait::async_trait; use jj_lib::backend::{ make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, Conflict, - ConflictId, FileId, SecureSig, SigningFn, SymlinkId, Tree, TreeId, + ConflictId, CopyTrace, FileId, SecureSig, SigningFn, SymlinkId, Tree, TreeId, }; use jj_lib::index::Index; use jj_lib::object_id::ObjectId; @@ -300,6 +300,18 @@ impl Backend for TestBackend { Ok((id, contents)) } + fn copy_trace( + &self, + _paths: &[RepoPathBuf], + _head: &CommitId, + _root: &CommitId, + ) -> BackendResult> + '_>> { + // TODO: Implement a rev walk which emits copy trace events. + Err(BackendError::Unsupported( + "TestBackend does not support copy tracing".to_string(), + )) + } + fn gc(&self, _index: &dyn Index, _keep_newer: SystemTime) -> BackendResult<()> { Ok(()) }