diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d5f79609a..1a7d6bba2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,6 +63,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * The `file()` revset function now accepts fileset as argument. +* New `diff_contains()` revset function can be used to search diffs. + ### Fixed bugs * `jj diff --git` no longer shows the contents of binary files. diff --git a/docs/revsets.md b/docs/revsets.md index 56b7559277..ada858a24d 100644 --- a/docs/revsets.md +++ b/docs/revsets.md @@ -280,6 +280,16 @@ given [string pattern](#string-patterns). Some file patterns might need quoting because the `expression` must also be parsable as a revset. For example, `.` has to be quoted in `file(".")`. +* `diff_contains(text[, files])`: Commits containing diffs matching the given + `text` pattern line by line. + + The search paths can be narrowed by the `files` expression. All modified files + are scanned by default, but it is likely to change in future version to + respect the command line path arguments. + + For example, `diff_contains("TODO", "src")` will search revisions where "TODO" + is added to or removed from files under "src". + * `conflict()`: Commits with conflicts. * `present(x)`: Same as `x`, but evaluated to `none()` if any of the commits diff --git a/lib/src/default_index/revset_engine.rs b/lib/src/default_index/revset_engine.rs index eec315e2c1..c4d68bb67d 100644 --- a/lib/src/default_index/revset_engine.rs +++ b/lib/src/default_index/revset_engine.rs @@ -20,14 +20,17 @@ use std::collections::{BTreeSet, BinaryHeap, HashSet}; use std::ops::Range; use std::rc::Rc; use std::sync::Arc; -use std::{fmt, iter}; +use std::{fmt, iter, str}; +use futures::StreamExt as _; use itertools::Itertools; +use pollster::FutureExt as _; use super::rev_walk::{EagerRevWalk, PeekableRevWalk, RevWalk, RevWalkBuilder}; use super::revset_graph_iterator::RevsetGraphWalk; -use crate::backend::{BackendResult, ChangeId, CommitId, MillisSinceEpoch}; +use crate::backend::{BackendError, BackendResult, ChangeId, CommitId, MillisSinceEpoch}; use crate::commit::Commit; +use crate::conflicts::{materialized_diff_stream, MaterializedTreeValue}; use crate::default_index::{AsCompositeIndex, CompositeIndex, IndexPosition}; use crate::graph::GraphEdge; use crate::matchers::{Matcher, Visit}; @@ -37,6 +40,7 @@ use crate::revset::{ RevsetFilterPredicate, GENERATION_RANGE_FULL, }; use crate::store::Store; +use crate::str_util::StringPattern; use crate::{rewrite, union_find}; type BoxedPredicateFn<'a> = Box bool + 'a>; @@ -1078,6 +1082,16 @@ fn build_predicate_fn( has_diff_from_parent(&store, index, &commit, matcher.as_ref()).unwrap() }) } + RevsetFilterPredicate::DiffContains { text, files } => { + let text_pattern = text.clone(); + let files_matcher: Rc = files.to_matcher().into(); + box_pure_predicate_fn(move |index, pos| { + let entry = index.entry_by_pos(pos); + let commit = store.get_commit(&entry.commit_id()).unwrap(); + matches_diff_from_parent(&store, index, &commit, &text_pattern, &*files_matcher) + .unwrap() + }) + } RevsetFilterPredicate::HasConflict => box_pure_predicate_fn(move |index, pos| { let entry = index.entry_by_pos(pos); let commit = store.get_commit(&entry.commit_id()).unwrap(); @@ -1115,6 +1129,75 @@ fn has_diff_from_parent( Ok(from_tree.diff(&to_tree, matcher).next().is_some()) } +fn matches_diff_from_parent( + store: &Arc, + index: &CompositeIndex, + commit: &Commit, + text_pattern: &StringPattern, + files_matcher: &dyn Matcher, +) -> BackendResult { + let parents: Vec<_> = commit.parents().try_collect()?; + let from_tree = rewrite::merge_commit_trees_without_repo(store, &index, &parents)?; + let to_tree = commit.tree()?; + let tree_diff = from_tree.diff_stream(&to_tree, files_matcher); + // Conflicts are compared in materialized form. Alternatively, conflict + // pairs can be compared one by one. #4062 + let mut diff_stream = materialized_diff_stream(store, tree_diff); + async { + while let Some((path, diff)) = diff_stream.next().await { + let (left_value, right_value) = diff?; + let left_content = to_file_content(&path, left_value)?; + let right_content = to_file_content(&path, right_value)?; + // Filter lines prior to comparison. This might produce inferior + // hunks due to lack of contexts, but is way faster than full diff. + let left_lines = match_lines(&left_content, text_pattern); + let right_lines = match_lines(&right_content, text_pattern); + if left_lines.ne(right_lines) { + return Ok(true); + } + } + Ok(false) + } + .block_on() +} + +fn match_lines<'a: 'b, 'b>( + text: &'a [u8], + pattern: &'b StringPattern, +) -> impl Iterator + 'b { + // The pattern is matched line by line so that it can be anchored to line + // start/end. For example, exact:"" will match blank lines. + text.split_inclusive(|b| *b == b'\n').filter(|line| { + let line = line.strip_suffix(b"\n").unwrap_or(line); + // TODO: add .matches_bytes() or .to_bytes_matcher() + str::from_utf8(line).map_or(false, |line| pattern.matches(line)) + }) +} + +fn to_file_content(path: &RepoPath, value: MaterializedTreeValue) -> BackendResult> { + match value { + MaterializedTreeValue::Absent => Ok(vec![]), + MaterializedTreeValue::AccessDenied(_) => Ok(vec![]), + MaterializedTreeValue::File { id, mut reader, .. } => { + let mut content = vec![]; + reader + .read_to_end(&mut content) + .map_err(|err| BackendError::ReadFile { + path: path.to_owned(), + id: id.clone(), + source: err.into(), + })?; + Ok(content) + } + MaterializedTreeValue::Symlink { id: _, target } => Ok(target.into_bytes()), + MaterializedTreeValue::GitSubmodule(_) => Ok(vec![]), + MaterializedTreeValue::Conflict { contents, .. } => Ok(contents), + MaterializedTreeValue::Tree(id) => { + panic!("Unexpected tree with id {id:?} in diff at path {path:?}"); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/lib/src/revset.rs b/lib/src/revset.rs index db4ba985f7..afee11cfef 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -134,6 +134,11 @@ pub enum RevsetFilterPredicate { Committer(StringPattern), /// Commits modifying the paths specified by the fileset. File(FilesetExpression), + /// Commits containing diffs matching the `text` pattern within the `files`. + DiffContains { + text: StringPattern, + files: FilesetExpression, + }, /// Commits with conflicts HasConflict, /// Custom predicates provided by extensions @@ -714,6 +719,26 @@ static BUILTIN_FUNCTION_MAP: Lazy> = Lazy: let expr = FilesetExpression::union_all(file_expressions); Ok(RevsetExpression::filter(RevsetFilterPredicate::File(expr))) }); + map.insert("diff_contains", |function, context| { + let ([text_arg], [files_opt_arg]) = function.expect_arguments()?; + let text = expect_string_pattern(text_arg)?; + let files = if let Some(files_arg) = files_opt_arg { + let ctx = context.workspace.as_ref().ok_or_else(|| { + RevsetParseError::with_span( + RevsetParseErrorKind::FsPathWithoutWorkspace, + files_arg.span, + ) + })?; + expect_fileset_expression(files_arg, ctx.path_converter)? + } else { + // TODO: defaults to CLI path arguments? + // https://github.com/martinvonz/jj/issues/2933#issuecomment-1925870731 + FilesetExpression::all() + }; + Ok(RevsetExpression::filter( + RevsetFilterPredicate::DiffContains { text, files }, + )) + }); map.insert("conflict", |function, _context| { function.expect_no_arguments()?; Ok(RevsetExpression::filter(RevsetFilterPredicate::HasConflict)) diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index 13f3d3e210..6add8b4d6c 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -3012,6 +3012,152 @@ fn test_evaluate_expression_file() { ); } +#[test] +fn test_evaluate_expression_diff_contains() { + let settings = testutils::user_settings(); + let test_workspace = TestWorkspace::init(&settings); + let repo = &test_workspace.repo; + + let mut tx = repo.start_transaction(&settings); + let mut_repo = tx.mut_repo(); + + let empty_clean_inserted_deleted = + RepoPath::from_internal_string("empty_clean_inserted_deleted"); + let blank_clean_inserted_clean = RepoPath::from_internal_string("blank_clean_inserted_clean"); + let noeol_modified_modified_clean = + RepoPath::from_internal_string("noeol_modified_modified_clean"); + let normal_inserted_modified_removed = + RepoPath::from_internal_string("normal_inserted_modified_removed"); + let tree1 = create_tree( + repo, + &[ + (empty_clean_inserted_deleted, ""), + (blank_clean_inserted_clean, "\n"), + (noeol_modified_modified_clean, "1"), + (normal_inserted_modified_removed, "1\n"), + ], + ); + let tree2 = create_tree( + repo, + &[ + (empty_clean_inserted_deleted, ""), + (blank_clean_inserted_clean, "\n"), + (noeol_modified_modified_clean, "2"), + (normal_inserted_modified_removed, "1\n2\n"), + ], + ); + let tree3 = create_tree( + repo, + &[ + (empty_clean_inserted_deleted, "3"), + (blank_clean_inserted_clean, "\n3\n"), + (noeol_modified_modified_clean, "2 3"), + (normal_inserted_modified_removed, "1 3\n2\n"), + ], + ); + let tree4 = create_tree( + repo, + &[ + (empty_clean_inserted_deleted, ""), + (blank_clean_inserted_clean, "\n3\n"), + (noeol_modified_modified_clean, "2 3"), + // normal_inserted_modified_removed + ], + ); + let commit1 = mut_repo + .new_commit( + &settings, + vec![repo.store().root_commit_id().clone()], + tree1.id(), + ) + .write() + .unwrap(); + let commit2 = mut_repo + .new_commit(&settings, vec![commit1.id().clone()], tree2.id()) + .write() + .unwrap(); + let commit3 = mut_repo + .new_commit(&settings, vec![commit2.id().clone()], tree3.id()) + .write() + .unwrap(); + let commit4 = mut_repo + .new_commit(&settings, vec![commit3.id().clone()], tree4.id()) + .write() + .unwrap(); + + let query = |revset_str: &str| { + resolve_commit_ids_in_workspace( + mut_repo, + revset_str, + &test_workspace.workspace, + Some(test_workspace.workspace.workspace_root()), + ) + }; + + // should match both inserted and deleted lines + assert_eq!( + query("diff_contains('2')"), + vec![ + commit4.id().clone(), + commit3.id().clone(), + commit2.id().clone(), + ] + ); + assert_eq!( + query("diff_contains('3')"), + vec![commit4.id().clone(), commit3.id().clone()] + ); + assert_eq!(query("diff_contains('2 3')"), vec![commit3.id().clone()]); + assert_eq!( + query("diff_contains('1 3')"), + vec![commit4.id().clone(), commit3.id().clone()] + ); + + // should match line with eol + assert_eq!( + query(&format!( + "diff_contains(exact:'1', {normal_inserted_modified_removed:?})", + )), + vec![commit3.id().clone(), commit1.id().clone()] + ); + + // should match line without eol + assert_eq!( + query(&format!( + "diff_contains(exact:'1', {noeol_modified_modified_clean:?})", + )), + vec![commit2.id().clone(), commit1.id().clone()] + ); + + // exact:'' should match blank line + assert_eq!( + query(&format!( + "diff_contains(exact:'', {empty_clean_inserted_deleted:?})", + )), + vec![] + ); + assert_eq!( + query(&format!( + "diff_contains(exact:'', {blank_clean_inserted_clean:?})", + )), + vec![commit1.id().clone()] + ); + + // '' should match anything but clean + assert_eq!( + query(&format!( + "diff_contains('', {empty_clean_inserted_deleted:?})", + )), + vec![commit4.id().clone(), commit3.id().clone()] + ); + assert_eq!( + query(&format!( + "diff_contains('', {blank_clean_inserted_clean:?})", + )), + vec![commit3.id().clone(), commit1.id().clone()] + ); +} + #[test] fn test_evaluate_expression_conflict() { let settings = testutils::user_settings();