Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revset: add diff_contains(text[, files]) to search diffs #4100

Merged
merged 4 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

* The `file()` revset function now accepts fileset as argument.

* New `diff_contains()` revset function can be used to search diffs.

### Fixed bugs

* `jj diff --git` no longer shows the contents of binary files.
Expand Down
32 changes: 4 additions & 28 deletions cli/src/diff_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ use std::ops::Range;
use std::path::{Path, PathBuf};
use std::{io, mem};

use futures::{try_join, Stream, StreamExt};
use futures::StreamExt;
use itertools::Itertools;
use jj_lib::backend::{BackendError, BackendResult, TreeValue};
use jj_lib::backend::{BackendError, TreeValue};
use jj_lib::commit::Commit;
use jj_lib::conflicts::{materialize_tree_value, MaterializedTreeValue};
use jj_lib::conflicts::{materialized_diff_stream, MaterializedTreeValue};
use jj_lib::diff::{Diff, DiffHunk};
use jj_lib::files::DiffLine;
use jj_lib::matchers::Matcher;
use jj_lib::merge::MergedTreeValue;
use jj_lib::merged_tree::{MergedTree, TreeDiffStream};
use jj_lib::object_id::ObjectId;
use jj_lib::repo::Repo;
use jj_lib::repo_path::{RepoPath, RepoPathBuf, RepoPathUiConverter};
use jj_lib::repo_path::{RepoPath, RepoPathUiConverter};
use jj_lib::settings::{ConfigResultExt as _, UserSettings};
use jj_lib::store::Store;
yuja marked this conversation as resolved.
Show resolved Hide resolved
use jj_lib::{diff, files};
Expand Down Expand Up @@ -995,30 +995,6 @@ fn show_unified_diff_hunks(
Ok(())
}

fn materialized_diff_stream<'a>(
store: &'a Store,
tree_diff: TreeDiffStream<'a>,
) -> impl Stream<
Item = (
RepoPathBuf,
BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
),
> + 'a {
tree_diff
.map(|(path, diff)| async {
match diff {
Err(err) => (path, Err(err)),
Ok((before, after)) => {
let before_future = materialize_tree_value(store, &path, before);
let after_future = materialize_tree_value(store, &path, after);
let values = try_join!(before_future, after_future);
(path, values)
}
}
})
.buffered((store.concurrency() / 2).max(1))
}

pub fn show_git_diff(
formatter: &mut dyn Formatter,
store: &Store,
Expand Down
10 changes: 10 additions & 0 deletions docs/revsets.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,16 @@ given [string pattern](#string-patterns).
Some file patterns might need quoting because the `expression` must also be
parsable as a revset. For example, `.` has to be quoted in `file(".")`.

* `diff_contains(text[, files])`: Commits containing diffs matching the given
`text` pattern line by line.

The search paths can be narrowed by the `files` expression. All modified files
are scanned by default, but it is likely to change in future version to
respect the command line path arguments.

For example, `diff_contains("TODO", "src")` will search revisions where "TODO"
is added to or removed from files under "src".

* `conflict()`: Commits with conflicts.

* `present(x)`: Same as `x`, but evaluated to `none()` if any of the commits
Expand Down
29 changes: 27 additions & 2 deletions lib/src/conflicts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
use std::io::{Read, Write};
use std::iter::zip;

use futures::{StreamExt, TryStreamExt};
use futures::{try_join, Stream, StreamExt, TryStreamExt};
use itertools::Itertools;
use regex::bytes::Regex;

Expand All @@ -26,7 +26,8 @@ use crate::diff::{Diff, DiffHunk};
use crate::files;
use crate::files::{ContentHunk, MergeResult};
use crate::merge::{Merge, MergeBuilder, MergedTreeValue};
use crate::repo_path::RepoPath;
use crate::merged_tree::TreeDiffStream;
use crate::repo_path::{RepoPath, RepoPathBuf};
use crate::store::Store;

const CONFLICT_START_LINE: &[u8] = b"<<<<<<<";
Expand Down Expand Up @@ -326,6 +327,30 @@ fn diff_size(hunks: &[DiffHunk]) -> usize {
.sum()
}

pub fn materialized_diff_stream<'a>(
store: &'a Store,
tree_diff: TreeDiffStream<'a>,
) -> impl Stream<
Item = (
RepoPathBuf,
BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
),
> + 'a {
tree_diff
.map(|(path, diff)| async {
match diff {
Err(err) => (path, Err(err)),
Ok((before, after)) => {
let before_future = materialize_tree_value(store, &path, before);
let after_future = materialize_tree_value(store, &path, after);
let values = try_join!(before_future, after_future);
(path, values)
}
}
})
.buffered((store.concurrency() / 2).max(1))
}

/// Parses conflict markers from a slice. Returns None if there were no valid
/// conflict markers. The caller has to provide the expected number of merge
/// sides (adds). Conflict markers that are otherwise valid will be considered
Expand Down
111 changes: 98 additions & 13 deletions lib/src/default_index/revset_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,18 @@ use std::collections::{BTreeSet, BinaryHeap, HashSet};
use std::ops::Range;
use std::rc::Rc;
use std::sync::Arc;
use std::{fmt, iter};
use std::{fmt, iter, str};

use futures::StreamExt as _;
use itertools::Itertools;
use pollster::FutureExt as _;

use super::rev_walk::{EagerRevWalk, PeekableRevWalk, RevWalk, RevWalkBuilder};
use super::revset_graph_iterator::RevsetGraphWalk;
use crate::backend::{ChangeId, CommitId, MillisSinceEpoch};
use crate::default_index::{AsCompositeIndex, CompositeIndex, IndexEntry, IndexPosition};
use crate::backend::{BackendError, BackendResult, ChangeId, CommitId, MillisSinceEpoch};
use crate::commit::Commit;
use crate::conflicts::{materialized_diff_stream, MaterializedTreeValue};
use crate::default_index::{AsCompositeIndex, CompositeIndex, IndexPosition};
use crate::graph::GraphEdge;
use crate::matchers::{Matcher, Visit};
use crate::repo_path::RepoPath;
Expand All @@ -36,6 +40,7 @@ use crate::revset::{
RevsetFilterPredicate, GENERATION_RANGE_FULL,
};
use crate::store::Store;
use crate::str_util::StringPattern;
use crate::{rewrite, union_find};

type BoxedPredicateFn<'a> = Box<dyn FnMut(&CompositeIndex, IndexPosition) -> bool + 'a>;
Expand Down Expand Up @@ -1033,6 +1038,7 @@ fn build_predicate_fn(
store: Arc<Store>,
predicate: &RevsetFilterPredicate,
) -> Box<dyn ToPredicateFn> {
// TODO: propagate BackendError
match predicate {
RevsetFilterPredicate::ParentCount(parent_count_range) => {
let parent_count_range = parent_count_range.clone();
Expand Down Expand Up @@ -1072,7 +1078,18 @@ fn build_predicate_fn(
let matcher: Rc<dyn Matcher> = expr.to_matcher().into();
box_pure_predicate_fn(move |index, pos| {
let entry = index.entry_by_pos(pos);
has_diff_from_parent(&store, index, &entry, matcher.as_ref())
let commit = store.get_commit(&entry.commit_id()).unwrap();
has_diff_from_parent(&store, index, &commit, matcher.as_ref()).unwrap()
})
}
RevsetFilterPredicate::DiffContains { text, files } => {
let text_pattern = text.clone();
let files_matcher: Rc<dyn Matcher> = files.to_matcher().into();
box_pure_predicate_fn(move |index, pos| {
let entry = index.entry_by_pos(pos);
let commit = store.get_commit(&entry.commit_id()).unwrap();
matches_diff_from_parent(&store, index, &commit, &text_pattern, &*files_matcher)
.unwrap()
})
}
RevsetFilterPredicate::HasConflict => box_pure_predicate_fn(move |index, pos| {
Expand All @@ -1094,23 +1111,91 @@ fn build_predicate_fn(
fn has_diff_from_parent(
store: &Arc<Store>,
index: &CompositeIndex,
entry: &IndexEntry<'_>,
commit: &Commit,
matcher: &dyn Matcher,
) -> bool {
let commit = store.get_commit(&entry.commit_id()).unwrap();
let parents: Vec<_> = commit.parents().try_collect().unwrap();
) -> BackendResult<bool> {
let parents: Vec<_> = commit.parents().try_collect()?;
if let [parent] = parents.as_slice() {
// Fast path: no need to load the root tree
let unchanged = commit.tree_id() == parent.tree_id();
if matcher.visit(RepoPath::root()) == Visit::AllRecursively {
return !unchanged;
return Ok(!unchanged);
} else if unchanged {
return false;
return Ok(false);
}
}
let from_tree = rewrite::merge_commit_trees_without_repo(store, &index, &parents)?;
let to_tree = commit.tree()?;
Ok(from_tree.diff(&to_tree, matcher).next().is_some())
}

fn matches_diff_from_parent(
store: &Arc<Store>,
index: &CompositeIndex,
commit: &Commit,
text_pattern: &StringPattern,
files_matcher: &dyn Matcher,
) -> BackendResult<bool> {
let parents: Vec<_> = commit.parents().try_collect()?;
let from_tree = rewrite::merge_commit_trees_without_repo(store, &index, &parents)?;
let to_tree = commit.tree()?;
let tree_diff = from_tree.diff_stream(&to_tree, files_matcher);
// Conflicts are compared in materialized form. Alternatively, conflict
// pairs can be compared one by one. #4062
let mut diff_stream = materialized_diff_stream(store, tree_diff);
async {
while let Some((path, diff)) = diff_stream.next().await {
let (left_value, right_value) = diff?;
let left_content = to_file_content(&path, left_value)?;
let right_content = to_file_content(&path, right_value)?;
// Filter lines prior to comparison. This might produce inferior
// hunks due to lack of contexts, but is way faster than full diff.
let left_lines = match_lines(&left_content, text_pattern);
let right_lines = match_lines(&right_content, text_pattern);
if left_lines.ne(right_lines) {
return Ok(true);
}
}
Ok(false)
}
.block_on()
}

fn match_lines<'a: 'b, 'b>(
text: &'a [u8],
pattern: &'b StringPattern,
) -> impl Iterator<Item = &'a [u8]> + 'b {
// The pattern is matched line by line so that it can be anchored to line
// start/end. For example, exact:"" will match blank lines.
text.split_inclusive(|b| *b == b'\n').filter(|line| {
let line = line.strip_suffix(b"\n").unwrap_or(line);
// TODO: add .matches_bytes() or .to_bytes_matcher()
str::from_utf8(line).map_or(false, |line| pattern.matches(line))
})
}

fn to_file_content(path: &RepoPath, value: MaterializedTreeValue) -> BackendResult<Vec<u8>> {
match value {
MaterializedTreeValue::Absent => Ok(vec![]),
MaterializedTreeValue::AccessDenied(_) => Ok(vec![]),
MaterializedTreeValue::File { id, mut reader, .. } => {
let mut content = vec![];
reader
.read_to_end(&mut content)
.map_err(|err| BackendError::ReadFile {
path: path.to_owned(),
id: id.clone(),
source: err.into(),
})?;
Ok(content)
}
MaterializedTreeValue::Symlink { id: _, target } => Ok(target.into_bytes()),
MaterializedTreeValue::GitSubmodule(_) => Ok(vec![]),
MaterializedTreeValue::Conflict { contents, .. } => Ok(contents),
MaterializedTreeValue::Tree(id) => {
panic!("Unexpected tree with id {id:?} in diff at path {path:?}");
}
}
let from_tree = rewrite::merge_commit_trees_without_repo(store, &index, &parents).unwrap();
let to_tree = commit.tree().unwrap();
from_tree.diff(&to_tree, matcher).next().is_some()
}

#[cfg(test)]
Expand Down
25 changes: 25 additions & 0 deletions lib/src/revset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ pub enum RevsetFilterPredicate {
Committer(StringPattern),
/// Commits modifying the paths specified by the fileset.
File(FilesetExpression),
/// Commits containing diffs matching the `text` pattern within the `files`.
DiffContains {
text: StringPattern,
files: FilesetExpression,
},
/// Commits with conflicts
HasConflict,
/// Custom predicates provided by extensions
Expand Down Expand Up @@ -714,6 +719,26 @@ static BUILTIN_FUNCTION_MAP: Lazy<HashMap<&'static str, RevsetFunction>> = Lazy:
let expr = FilesetExpression::union_all(file_expressions);
Ok(RevsetExpression::filter(RevsetFilterPredicate::File(expr)))
});
map.insert("diff_contains", |function, context| {
let ([text_arg], [files_opt_arg]) = function.expect_arguments()?;
let text = expect_string_pattern(text_arg)?;
let files = if let Some(files_arg) = files_opt_arg {
let ctx = context.workspace.as_ref().ok_or_else(|| {
RevsetParseError::with_span(
RevsetParseErrorKind::FsPathWithoutWorkspace,
files_arg.span,
)
})?;
expect_fileset_expression(files_arg, ctx.path_converter)?
} else {
// TODO: defaults to CLI path arguments?
// https://github.com/martinvonz/jj/issues/2933#issuecomment-1925870731
FilesetExpression::all()
};
Ok(RevsetExpression::filter(
RevsetFilterPredicate::DiffContains { text, files },
))
});
map.insert("conflict", |function, _context| {
function.expect_no_arguments()?;
Ok(RevsetExpression::filter(RevsetFilterPredicate::HasConflict))
Expand Down
Loading