From 5dd2cfa82214e3749895b585d28891d7ea703d92 Mon Sep 17 00:00:00 2001 From: Waleed Khan Date: Tue, 29 Aug 2023 23:13:35 +0200 Subject: [PATCH] merge_tools: create builtin diff editor --- Cargo.lock | 61 +++- Cargo.toml | 1 + cli/Cargo.toml | 1 + cli/src/merge_tools/builtin.rs | 575 +++++++++++++++++++++++++++++++++ cli/src/merge_tools/mod.rs | 11 +- lib/src/backend.rs | 12 + 6 files changed, 659 insertions(+), 2 deletions(-) create mode 100644 cli/src/merge_tools/builtin.rs diff --git a/Cargo.lock b/Cargo.lock index 6a316350ee..3886a58462 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -269,6 +269,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "cassowary" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" + [[package]] name = "cast" version = "0.3.0" @@ -517,6 +523,22 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossterm" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67" +dependencies = [ + "bitflags 1.3.2", + "crossterm_winapi", + "libc", + "mio", + "parking_lot", + "signal-hook", + "signal-hook-mio", + "winapi", +] + [[package]] name = "crossterm" version = "0.26.1" @@ -1004,7 +1026,7 @@ dependencies = [ "clap_mangen", "config", "criterion", - "crossterm", + "crossterm 0.26.1", "dirs", "esl01-renderdag", "git2", @@ -1021,6 +1043,7 @@ dependencies = [ "pest_derive", "regex", "rpassword", + "scm-record", "serde", "slab", "tempfile", @@ -1834,6 +1857,23 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scm-record" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de5add99102ba58391e9c1e0c2ee5410654d8ffe7605a393db863a403cb5a2d" +dependencies = [ + "cassowary", + "crossterm 0.26.1", + "num-traits", + "serde", + "serde_json", + "thiserror", + "tracing", + "tui", + "unicode-width", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2321,6 +2361,19 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tui" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccdd26cbd674007e649a272da4475fb666d3aa0ad0531da7136db6fab0e5bad1" +dependencies = [ + "bitflags 1.3.2", + "cassowary", + "crossterm 0.25.0", + "unicode-segmentation", + "unicode-width", +] + [[package]] name = "typenum" version = "1.16.0" @@ -2360,6 +2413,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "unicode-width" version = "0.1.10" diff --git a/Cargo.toml b/Cargo.toml index 11876cc596..37f627889e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,7 @@ smallvec = { version = "1.11.0", features = [ "const_new", "union", ] } +scm-record = "0.1.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.105" slab = "0.4.9" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index d3bc2d10fe..ef548500b2 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -49,6 +49,7 @@ pest = { workspace = true } pest_derive = { workspace = true } regex = { workspace = true } rpassword = { workspace = true } +scm-record = { workspace = true } serde = { workspace = true } slab = { workspace = true } tempfile = { workspace = true } diff --git a/cli/src/merge_tools/builtin.rs b/cli/src/merge_tools/builtin.rs new file mode 100644 index 0000000000..8fb3c515ae --- /dev/null +++ b/cli/src/merge_tools/builtin.rs @@ -0,0 +1,575 @@ +use std::borrow::Cow; +use std::path::Path; +use std::sync::Arc; + +use itertools::Itertools; +use jj_lib::backend::{BackendError, FileId, MergedTreeId, ObjectId, TreeValue}; +use jj_lib::diff::{find_line_ranges, Diff, DiffHunk}; +use jj_lib::matchers::EverythingMatcher; +use jj_lib::merge::Merge; +use jj_lib::merged_tree::{MergedTree, MergedTreeBuilder}; +use jj_lib::repo_path::RepoPath; +use jj_lib::store::Store; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum BuiltinToolError { + #[error("Failed to record changes: {0}")] + Record(#[from] scm_record::RecordError), + #[error(transparent)] + ReadFileBackend(BackendError), + #[error("Failed to read file {path:?} with ID {id:?}: {source}")] + ReadFileIo { + path: RepoPath, + id: FileId, + source: std::io::Error, + }, + #[error(transparent)] + ReadSymlink(BackendError), + #[error("Failed to decode UTF-8 text for item {item} (this should not happen): {source}")] + DecodeUtf8 { + source: std::str::Utf8Error, + item: &'static str, + }, + #[error("Rendering {item} {id} is unimplemented for the builtin difftool/mergetool")] + Unimplemented { item: &'static str, id: String }, + #[error("Backend error: {0:?}")] + BackendError(#[from] jj_lib::backend::BackendError), +} + +#[derive(Clone, Debug)] +enum FileContents { + Absent, + Text { + contents: String, + hash: Option, + num_bytes: u64, + }, + Binary { + hash: Option, + num_bytes: u64, + }, +} + +/// Information about a file that was read from disk. Note that the file may not +/// have existed, in which case its contents will be marked as absent. +#[derive(Clone, Debug)] +pub struct FileInfo { + file_mode: scm_record::FileMode, + contents: FileContents, +} + +/// File modes according to the Git file mode conventions. used for display +/// purposes and equality comparison. +/// +/// TODO: let `scm-record` accept strings instead of numbers for file modes? Or +/// figure out some other way to represent file mode changes in a jj-compatible +/// manner? +mod mode { + pub const NORMAL: usize = 0o100644; + pub const EXECUTABLE: usize = 0o100755; + pub const SYMLINK: usize = 0o120000; +} + +fn describe_binary(hash: Option<&str>, num_bytes: u64) -> String { + match hash { + Some(hash) => { + format!("{hash} ({num_bytes}B)") + } + None => format!("({num_bytes}B)"), + } +} + +fn buf_to_file_contents(hash: Option, buf: Vec) -> FileContents { + let num_bytes: u64 = buf.len().try_into().unwrap(); + let text = if buf.contains(&0) { + None + } else { + String::from_utf8(buf).ok() + }; + match text { + Some(text) => FileContents::Text { + contents: text, + hash, + num_bytes, + }, + None => FileContents::Binary { hash, num_bytes }, + } +} + +fn read_file_contents( + store: &Store, + tree: &MergedTree, + path: &RepoPath, +) -> Result { + match tree.path_value(path).into_resolved() { + Ok(None) => Ok(FileInfo { + file_mode: scm_record::FileMode::absent(), + contents: FileContents::Absent, + }), + + Ok(Some(TreeValue::File { id, executable })) => { + let mut reader = store + .read_file(path, &id) + .map_err(BuiltinToolError::ReadFileBackend)?; + let mut buf = Vec::new(); + reader + .read_to_end(&mut buf) + .map_err(|err| BuiltinToolError::ReadFileIo { + path: path.clone(), + id: id.clone(), + source: err, + })?; + + let file_mode = if executable { + scm_record::FileMode(mode::EXECUTABLE) + } else { + scm_record::FileMode(mode::NORMAL) + }; + let contents = buf_to_file_contents(Some(id.hex()), buf); + Ok(FileInfo { + file_mode, + contents, + }) + } + + Ok(Some(TreeValue::Symlink(id))) => { + let value = store + .read_symlink(path, &id) + .map_err(BuiltinToolError::ReadSymlink)?; + let file_mode = scm_record::FileMode(mode::SYMLINK); + let num_bytes = value.len().try_into().unwrap(); + Ok(FileInfo { + file_mode, + contents: FileContents::Text { + contents: value, + hash: Some(id.hex()), + num_bytes, + }, + }) + } + + Ok(Some(TreeValue::Tree(tree_id))) => { + unreachable!("list of changed files included a tree: {tree_id:?}"); + } + Ok(Some(TreeValue::GitSubmodule(id))) => Err(BuiltinToolError::Unimplemented { + item: "git submodule", + id: id.hex(), + }), + Ok(Some(TreeValue::Conflict(id))) => { + unreachable!("list of changed files included a conflict: {id:?}"); + } + Err(merge) => { + unreachable!("list of changed files included a merge: {merge:?}"); + } + } +} + +fn make_section_changed_lines( + contents: &str, + change_type: scm_record::ChangeType, +) -> Vec> { + contents + .split_inclusive('\n') + .map(|line| scm_record::SectionChangedLine { + is_checked: false, + change_type, + line: Cow::Owned(line.to_owned()), + }) + .collect() +} + +fn make_diff_sections( + left_contents: &str, + right_contents: &str, +) -> Result>, BuiltinToolError> { + let diff = Diff::for_tokenizer( + &[left_contents.as_bytes(), right_contents.as_bytes()], + &find_line_ranges, + ); + let mut sections = Vec::new(); + for hunk in diff.hunks() { + match hunk { + DiffHunk::Matching(text) => { + let text = + std::str::from_utf8(text).map_err(|err| BuiltinToolError::DecodeUtf8 { + source: err, + item: "matching text in diff hunk", + })?; + sections.push(scm_record::Section::Unchanged { + lines: text + .split_inclusive('\n') + .map(|line| Cow::Owned(line.to_owned())) + .collect(), + }) + } + DiffHunk::Different(sides) => { + assert_eq!(sides.len(), 2, "only two inputs were provided to the diff"); + let left_side = + std::str::from_utf8(sides[0]).map_err(|err| BuiltinToolError::DecodeUtf8 { + source: err, + item: "left side of diff hunk", + })?; + let right_side = + std::str::from_utf8(sides[1]).map_err(|err| BuiltinToolError::DecodeUtf8 { + source: err, + item: "right side of diff hunk", + })?; + sections.push(scm_record::Section::Changed { + lines: [ + make_section_changed_lines(left_side, scm_record::ChangeType::Removed), + make_section_changed_lines(right_side, scm_record::ChangeType::Added), + ] + .concat(), + }) + } + } + } + Ok(sections) +} + +pub fn make_diff_files( + store: &Arc, + left_tree: &MergedTree, + right_tree: &MergedTree, + changed_files: &[RepoPath], +) -> Result>, BuiltinToolError> { + let mut files = Vec::new(); + for changed_path in changed_files { + let FileInfo { + file_mode: left_file_mode, + contents: left_contents, + } = read_file_contents(store, left_tree, changed_path)?; + let FileInfo { + file_mode: right_file_mode, + contents: right_contents, + } = read_file_contents(store, right_tree, changed_path)?; + + let mut sections = Vec::new(); + if left_file_mode != right_file_mode + && left_file_mode != scm_record::FileMode::absent() + && right_file_mode != scm_record::FileMode::absent() + { + sections.push(scm_record::Section::FileMode { + is_checked: false, + before: left_file_mode, + after: right_file_mode, + }); + } + + match (left_contents, right_contents) { + (FileContents::Absent, FileContents::Absent) => {} + ( + FileContents::Absent, + FileContents::Text { + contents, + hash: _, + num_bytes: _, + }, + ) => sections.push(scm_record::Section::Changed { + lines: make_section_changed_lines(&contents, scm_record::ChangeType::Added), + }), + + (FileContents::Absent, FileContents::Binary { hash, num_bytes }) => { + sections.push(scm_record::Section::Binary { + is_checked: false, + old_description: None, + new_description: Some(Cow::Owned(describe_binary(hash.as_deref(), num_bytes))), + }) + } + + ( + FileContents::Text { + contents, + hash: _, + num_bytes: _, + }, + FileContents::Absent, + ) => sections.push(scm_record::Section::Changed { + lines: make_section_changed_lines(&contents, scm_record::ChangeType::Removed), + }), + + ( + FileContents::Text { + contents: old_contents, + hash: _, + num_bytes: _, + }, + FileContents::Text { + contents: new_contents, + hash: _, + num_bytes: _, + }, + ) => { + sections.extend(make_diff_sections(&old_contents, &new_contents)?); + } + + ( + FileContents::Text { + contents: _, + hash: old_hash, + num_bytes: old_num_bytes, + } + | FileContents::Binary { + hash: old_hash, + num_bytes: old_num_bytes, + }, + FileContents::Text { + contents: _, + hash: new_hash, + num_bytes: new_num_bytes, + } + | FileContents::Binary { + hash: new_hash, + num_bytes: new_num_bytes, + }, + ) => sections.push(scm_record::Section::Binary { + is_checked: false, + old_description: Some(Cow::Owned(describe_binary( + old_hash.as_deref(), + old_num_bytes, + ))), + new_description: Some(Cow::Owned(describe_binary( + new_hash.as_deref(), + new_num_bytes, + ))), + }), + + (FileContents::Binary { hash, num_bytes }, FileContents::Absent) => { + sections.push(scm_record::Section::Binary { + is_checked: false, + old_description: Some(Cow::Owned(describe_binary(hash.as_deref(), num_bytes))), + new_description: None, + }) + } + } + + files.push(scm_record::File { + old_path: None, + path: Cow::Owned(changed_path.to_fs_path(Path::new(""))), + file_mode: None, + sections, + }); + } + Ok(files) +} + +pub fn apply_diff_builtin( + store: Arc, + left_tree: &MergedTree, + right_tree: &MergedTree, + changed_files: Vec, + files: &[scm_record::File], +) -> Result { + let mut tree_builder = MergedTreeBuilder::new(left_tree.id().clone()); + assert_eq!( + changed_files.len(), + files.len(), + "result had a different number of files" + ); + for (path, file) in changed_files.into_iter().zip(files) { + let (selected, _unselected) = file.get_selected_contents(); + match selected { + scm_record::SelectedContents::Absent => { + tree_builder.set_or_remove(path, Merge::absent()); + } + scm_record::SelectedContents::Unchanged => { + // Do nothing. + } + scm_record::SelectedContents::Binary { + old_description: _, + new_description: _, + } => { + let value = right_tree.path_value(&path); + tree_builder.set_or_remove(path, value); + } + scm_record::SelectedContents::Present { contents } => { + let file_id = store.write_file(&path, &mut contents.as_bytes())?; + tree_builder.set_or_remove( + path, + Merge::normal(TreeValue::File { + id: file_id, + executable: file.get_file_mode() + == Some(scm_record::FileMode(mode::EXECUTABLE)), + }), + ) + } + } + } + + let tree_id = tree_builder.write_tree(left_tree.store())?; + Ok(tree_id) +} + +pub fn edit_diff_builtin( + left_tree: &MergedTree, + right_tree: &MergedTree, +) -> Result { + let store = left_tree.store().clone(); + let changed_files = left_tree + .diff(right_tree, &EverythingMatcher) + .map(|(path, _left, _right)| path) + .collect_vec(); + let files = make_diff_files(&store, left_tree, right_tree, &changed_files)?; + let recorder = scm_record::Recorder::new( + scm_record::RecordState { + is_read_only: false, + files, + }, + scm_record::EventSource::Crossterm, + ); + let result = recorder.run().map_err(BuiltinToolError::Record)?; + let tree_id = apply_diff_builtin(store, left_tree, right_tree, changed_files, &result.files) + .map_err(BuiltinToolError::BackendError)?; + Ok(tree_id) +} + +#[cfg(test)] +mod tests { + use jj_lib::repo::Repo; + use testutils::TestRepo; + + use super::*; + + #[test] + fn test_edit_diff_builtin() { + let test_repo = TestRepo::init(false); + let store = test_repo.repo.store(); + + let unused_path = RepoPath::from_internal_string("unused"); + let unchanged = RepoPath::from_internal_string("unchanged"); + let changed_path = RepoPath::from_internal_string("changed"); + let added_path = RepoPath::from_internal_string("added"); + let left_tree = testutils::create_tree( + &test_repo.repo, + &[ + (&unused_path, "unused\n"), + (&unchanged, "unchanged\n"), + (&changed_path, "line1\nline2\nline3\n"), + ], + ); + let right_tree = testutils::create_tree( + &test_repo.repo, + &[ + (&unused_path, "unused\n"), + (&unchanged, "unchanged\n"), + (&changed_path, "line1\nchanged1\nchanged2\nline3\nadded1\n"), + (&added_path, "added\n"), + ], + ); + + let changed_files = vec![unchanged.clone(), changed_path.clone(), added_path.clone()]; + let files = make_diff_files(store, &left_tree, &right_tree, &changed_files).unwrap(); + insta::assert_debug_snapshot!(files, @r###" + [ + File { + old_path: None, + path: "unchanged", + file_mode: None, + sections: [ + Unchanged { + lines: [ + "unchanged\n", + ], + }, + ], + }, + File { + old_path: None, + path: "changed", + file_mode: None, + sections: [ + Unchanged { + lines: [ + "line1\n", + ], + }, + Changed { + lines: [ + SectionChangedLine { + is_checked: false, + change_type: Removed, + line: "line2\n", + }, + SectionChangedLine { + is_checked: false, + change_type: Added, + line: "changed1\n", + }, + SectionChangedLine { + is_checked: false, + change_type: Added, + line: "changed2\n", + }, + ], + }, + Unchanged { + lines: [ + "line3\n", + ], + }, + Changed { + lines: [ + SectionChangedLine { + is_checked: false, + change_type: Added, + line: "added1\n", + }, + ], + }, + ], + }, + File { + old_path: None, + path: "added", + file_mode: None, + sections: [ + Changed { + lines: [ + SectionChangedLine { + is_checked: false, + change_type: Added, + line: "added\n", + }, + ], + }, + ], + }, + ] + "###); + + let no_changes_tree_id = apply_diff_builtin( + store.clone(), + &left_tree, + &right_tree, + changed_files.clone(), + &files, + ) + .unwrap(); + let no_changes_tree = store.get_root_tree(&no_changes_tree_id).unwrap(); + assert_eq!( + no_changes_tree.id(), + left_tree.id(), + "no-changes tree was different", + ); + + let mut files = files; + for file in files.iter_mut() { + file.toggle_all(); + } + let all_changes_tree_id = apply_diff_builtin( + store.clone(), + &left_tree, + &right_tree, + changed_files, + &files, + ) + .unwrap(); + let all_changes_tree = store.get_root_tree(&all_changes_tree_id).unwrap(); + assert_eq!( + all_changes_tree.id(), + right_tree.id(), + "all-changes tree was different", + ); + } +} diff --git a/cli/src/merge_tools/mod.rs b/cli/src/merge_tools/mod.rs index ad2af00be4..2a6544621c 100644 --- a/cli/src/merge_tools/mod.rs +++ b/cli/src/merge_tools/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod builtin; mod external; use std::sync::Arc; @@ -26,6 +27,7 @@ use jj_lib::settings::{ConfigResultExt as _, UserSettings}; use jj_lib::working_copy::SnapshotError; use thiserror::Error; +use self::builtin::{edit_diff_builtin, BuiltinToolError}; use self::external::{edit_diff_external, DiffCheckoutError, ExternalToolError}; pub use self::external::{generate_diff, ExternalMergeTool}; use crate::config::CommandNameAndArgs; @@ -33,6 +35,8 @@ use crate::ui::Ui; #[derive(Debug, Error)] pub enum DiffEditError { + #[error(transparent)] + InternalTool(#[from] Box), #[error(transparent)] ExternalTool(#[from] ExternalToolError), #[error(transparent)] @@ -53,6 +57,8 @@ pub enum DiffGenerateError { #[derive(Debug, Error)] pub enum ConflictResolveError { + #[error(transparent)] + InternalTool(#[from] Box), #[error(transparent)] ExternalTool(#[from] ExternalToolError), #[error("Couldn't find the path {0:?} in this revision")] @@ -125,7 +131,10 @@ pub fn edit_diff( // Start a diff editor on the two directories. let editor = get_diff_editor_from_settings(ui, settings)?; match editor { - MergeTool::Builtin => unimplemented!("run_mergetool with builtin mergetool"), + MergeTool::Builtin => { + let tree_id = edit_diff_builtin(left_tree, right_tree).map_err(Box::new)?; + Ok(tree_id) + } MergeTool::External(editor) => edit_diff_external( editor, left_tree, diff --git a/lib/src/backend.rs b/lib/src/backend.rs index edb800ec1e..153bb1cebe 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -289,6 +289,18 @@ pub enum TreeValue { Conflict(ConflictId), } +impl TreeValue { + pub fn hex(&self) -> String { + match self { + TreeValue::File { id, .. } => id.hex(), + TreeValue::Symlink(id) => id.hex(), + TreeValue::Tree(id) => id.hex(), + TreeValue::GitSubmodule(id) => id.hex(), + TreeValue::Conflict(id) => id.hex(), + } + } +} + impl ContentHash for TreeValue { fn hash(&self, state: &mut impl digest::Update) { use TreeValue::*;