Skip to content

Commit

Permalink
working_copy: on snapshot, warn new large files and continue
Browse files Browse the repository at this point in the history
I think this provides a better UX than refusing any operation due to large
files. Because untracked files won't be overwritten, it's usually safe to
continue operation ignoring the untracked files. One caveat is that new large
files can become tracked if files of the same name checked out. (see the test
case)

FWIW, the warning will be printed only once if watchman is enabled. If we use
the snapshot stats to print untracked paths in "jj status", this will be a
problem.

Closes #3616, #3912
  • Loading branch information
yuja committed Dec 11, 2024
1 parent 6a0ee04 commit 3de3e44
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 91 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

### New features

* `jj` command no longer fails due to new working-copy files larger than the
`snapshot.max-new-file-size` config option. It will print a warning and large
files will be left untracked.

### Fixed bugs

* The `$NO_COLOR` environment variable must now be non-empty to be respected.
Expand Down
71 changes: 69 additions & 2 deletions cli/src/cli_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ use clap::FromArgMatches;
use clap_complete::ArgValueCandidates;
use indexmap::IndexMap;
use indexmap::IndexSet;
use indoc::writedoc;
use itertools::Itertools;
use jj_lib::backend::BackendResult;
use jj_lib::backend::ChangeId;
Expand Down Expand Up @@ -113,6 +114,7 @@ use jj_lib::revset::RevsetWorkspaceContext;
use jj_lib::revset::SymbolResolverExtension;
use jj_lib::revset::UserRevsetExpression;
use jj_lib::rewrite::restore_tree;
use jj_lib::settings::HumanByteSize;
use jj_lib::settings::UserSettings;
use jj_lib::str_util::StringPattern;
use jj_lib::transaction::Transaction;
Expand All @@ -121,6 +123,8 @@ use jj_lib::working_copy;
use jj_lib::working_copy::CheckoutOptions;
use jj_lib::working_copy::CheckoutStats;
use jj_lib::working_copy::SnapshotOptions;
use jj_lib::working_copy::SnapshotStats;
use jj_lib::working_copy::UntrackedReason;
use jj_lib::working_copy::WorkingCopy;
use jj_lib::working_copy::WorkingCopyFactory;
use jj_lib::working_copy::WorkingCopyFreshness;
Expand Down Expand Up @@ -1806,8 +1810,7 @@ See https://martinvonz.github.io/jj/latest/working-copy/#stale-working-copy \
};
self.user_repo = ReadonlyUserRepo::new(repo);
let progress = crate::progress::snapshot_progress(ui);
// TODO: print stats
let (new_tree_id, _stats) = locked_ws
let (new_tree_id, stats) = locked_ws
.locked_wc()
.snapshot(&SnapshotOptions {
base_ignores,
Expand Down Expand Up @@ -1861,6 +1864,8 @@ See https://martinvonz.github.io/jj/latest/working-copy/#stale-working-copy \
locked_ws
.finish(self.user_repo.repo.op_id().clone())
.map_err(snapshot_command_error)?;
print_snapshot_stats(ui, &stats, &self.env.path_converter)
.map_err(snapshot_command_error)?;
Ok(())
}

Expand Down Expand Up @@ -2555,6 +2560,68 @@ pub fn print_conflicted_paths(
Ok(())
}

pub fn print_snapshot_stats(
ui: &Ui,
stats: &SnapshotStats,
path_converter: &RepoPathUiConverter,
) -> io::Result<()> {
// It might make sense to add files excluded by snapshot.auto-track to the
// untracked_paths, but they shouldn't be warned every time we do snapshot.
// These paths will have to be printed by "jj status" instead.
if !stats.untracked_paths.is_empty() {
writeln!(ui.warning_default(), "Refused to snapshot some files:")?;
let mut formatter = ui.stderr_formatter();
for (path, reason) in &stats.untracked_paths {
let ui_path = path_converter.format_file_path(path);
let message = match reason {
UntrackedReason::FileTooLarge { size, max_size } => {
// if the size difference is < 1KiB, then show exact bytes.
// otherwise, show in human-readable form; this avoids weird cases
// where a file is 400 bytes too large but the error says something
// like '1.0MiB, maximum size allowed is ~1.0MiB'
let size_diff = size - max_size;
if size_diff <= 1024 {
format!(
"{size_diff} bytes too large; the maximum size allowed is {max_size} \
bytes ({max_size_approx})",
max_size_approx = HumanByteSize(*max_size)
)
} else {
format!(
"{size}; the maximum size allowed is ~{max_size}",
size = HumanByteSize(*size),
max_size = HumanByteSize(*max_size)
)
}
}
};
writeln!(formatter, " {ui_path}: {message}")?;
}
}

if let Some(size) = stats
.untracked_paths
.values()
.map(|reason| match reason {
UntrackedReason::FileTooLarge { size, .. } => *size,
})
.max()
{
writedoc!(
ui.hint_default(),
r"
This is to prevent large files from being added by accident. You can fix this by:
- Adding the file to `.gitignore`
- Run `jj config set --repo snapshot.max-new-file-size {size}`
This will increase the maximum file size allowed for new files, in this repository only.
- Run `jj --config-toml 'snapshot.max-new-file-size={size}' st`
This will increase the maximum file size allowed for new files, for this command only.
"
)?;
}
Ok(())
}

pub fn print_checkout_stats(
ui: &Ui,
stats: CheckoutStats,
Expand Down
40 changes: 1 addition & 39 deletions cli/src/command_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,45 +370,7 @@ impl From<OpsetEvaluationError> for CommandError {

impl From<SnapshotError> for CommandError {
fn from(err: SnapshotError) -> Self {
match err {
SnapshotError::NewFileTooLarge {
path,
size,
max_size,
} => {
// if the size difference is < 1KiB, then show exact bytes.
// otherwise, show in human-readable form; this avoids weird cases
// where a file is 400 bytes too large but the error says something
// like '1.0MiB, maximum size allowed is ~1.0MiB'
let size_diff = size.0 - max_size.0;
let err_str = if size_diff <= 1024 {
format!(
"it is {} bytes too large; the maximum size allowed is {} bytes ({}).",
size_diff, max_size.0, max_size,
)
} else {
format!("it is {size}; the maximum size allowed is ~{max_size}.")
};

user_error(format!(
"Failed to snapshot the working copy\nThe file '{}' is too large to be \
snapshotted: {}",
path.display(),
err_str,
))
.hinted(format!(
"This is to prevent large files from being added by accident. You can fix \
this error by:
- Adding the file to `.gitignore`
- Run `jj config set --repo snapshot.max-new-file-size {}`
This will increase the maximum file size allowed for new files, in this repository only.
- Run `jj --config-toml 'snapshot.max-new-file-size={}' st`
This will increase the maximum file size allowed for new files, for this command only.",
size.0, size.0
))
}
err => internal_error_with_message("Failed to snapshot the working copy", err),
}
internal_error_with_message("Failed to snapshot the working copy", err)
}
}

Expand Down
5 changes: 3 additions & 2 deletions cli/src/commands/file/track.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::io::Write;
use jj_lib::working_copy::SnapshotOptions;
use tracing::instrument;

use crate::cli_util::print_snapshot_stats;
use crate::cli_util::CommandHelper;
use crate::command_error::CommandError;
use crate::ui::Ui;
Expand Down Expand Up @@ -52,8 +53,7 @@ pub(crate) fn cmd_file_track(
let mut tx = workspace_command.start_transaction().into_inner();
let base_ignores = workspace_command.base_ignores()?;
let (mut locked_ws, _wc_commit) = workspace_command.start_working_copy_mutation()?;
// TODO: print stats
let (_tree_id, _stats) = locked_ws.locked_wc().snapshot(&SnapshotOptions {
let (_tree_id, stats) = locked_ws.locked_wc().snapshot(&SnapshotOptions {
base_ignores,
fsmonitor_settings: command.settings().fsmonitor_settings()?,
progress: None,
Expand All @@ -67,5 +67,6 @@ pub(crate) fn cmd_file_track(
}
let repo = tx.commit("track paths")?;
locked_ws.finish(repo.op_id().clone())?;
print_snapshot_stats(ui, &stats, workspace_command.env().path_converter())?;
Ok(())
}
5 changes: 3 additions & 2 deletions cli/src/commands/file/untrack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use jj_lib::repo::Repo;
use jj_lib::working_copy::SnapshotOptions;
use tracing::instrument;

use crate::cli_util::print_snapshot_stats;
use crate::cli_util::CommandHelper;
use crate::command_error::user_error_with_hint;
use crate::command_error::CommandError;
Expand Down Expand Up @@ -76,8 +77,7 @@ pub(crate) fn cmd_file_untrack(
locked_ws.locked_wc().reset(&new_commit)?;
// Commit the working copy again so we can inform the user if paths couldn't be
// untracked because they're not ignored.
// TODO: print stats
let (wc_tree_id, _stats) = locked_ws.locked_wc().snapshot(&SnapshotOptions {
let (wc_tree_id, stats) = locked_ws.locked_wc().snapshot(&SnapshotOptions {
base_ignores,
fsmonitor_settings: command.settings().fsmonitor_settings()?,
progress: None,
Expand Down Expand Up @@ -118,5 +118,6 @@ Make sure they're ignored, then try again.",
}
let repo = tx.commit("untrack paths")?;
locked_ws.finish(repo.op_id().clone())?;
print_snapshot_stats(ui, &stats, workspace_command.env().path_converter())?;
Ok(())
}
81 changes: 67 additions & 14 deletions cli/tests/test_working_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,37 @@ fn test_snapshot_large_file() {
// test a small file using raw-integer-literal syntax, which is interpreted
// in bytes
test_env.add_config(r#"snapshot.max-new-file-size = 10"#);
std::fs::write(repo_path.join("empty"), "").unwrap();
std::fs::write(repo_path.join("large"), "a lot of text").unwrap();
let stderr = test_env.jj_cmd_failure(&repo_path, &["file", "list"]);
insta::assert_snapshot!(stderr, @r###"
Error: Failed to snapshot the working copy
The file '$TEST_ENV/repo/large' is too large to be snapshotted: it is 3 bytes too large; the maximum size allowed is 10 bytes (10.0B).
Hint: This is to prevent large files from being added by accident. You can fix this error by:
let (stdout, stderr) = test_env.jj_cmd_ok(&repo_path, &["file", "list"]);
insta::assert_snapshot!(stdout, @"empty");
insta::assert_snapshot!(stderr, @r"
Warning: Refused to snapshot some files:
large: 3 bytes too large; the maximum size allowed is 10 bytes (10.0B)
Hint: This is to prevent large files from being added by accident. You can fix this by:
- Adding the file to `.gitignore`
- Run `jj config set --repo snapshot.max-new-file-size 13`
This will increase the maximum file size allowed for new files, in this repository only.
- Run `jj --config-toml 'snapshot.max-new-file-size=13' st`
This will increase the maximum file size allowed for new files, for this command only.
"###);
");

// test with a larger file using 'KB' human-readable syntax
test_env.add_config(r#"snapshot.max-new-file-size = "10KB""#);
let big_string = vec![0; 1024 * 11];
std::fs::write(repo_path.join("large"), big_string).unwrap();
let stderr = test_env.jj_cmd_failure(&repo_path, &["file", "list"]);
insta::assert_snapshot!(stderr, @r###"
Error: Failed to snapshot the working copy
The file '$TEST_ENV/repo/large' is too large to be snapshotted: it is 1024 bytes too large; the maximum size allowed is 10240 bytes (10.0KiB).
Hint: This is to prevent large files from being added by accident. You can fix this error by:
let (stdout, stderr) = test_env.jj_cmd_ok(&repo_path, &["file", "list"]);
insta::assert_snapshot!(stdout, @"empty");
insta::assert_snapshot!(stderr, @r"
Warning: Refused to snapshot some files:
large: 1024 bytes too large; the maximum size allowed is 10240 bytes (10.0KiB)
Hint: This is to prevent large files from being added by accident. You can fix this by:
- Adding the file to `.gitignore`
- Run `jj config set --repo snapshot.max-new-file-size 11264`
This will increase the maximum file size allowed for new files, in this repository only.
- Run `jj --config-toml 'snapshot.max-new-file-size=11264' st`
This will increase the maximum file size allowed for new files, for this command only.
"###);
");

// test invalid configuration
let stderr = test_env.jj_cmd_failure(
Expand All @@ -71,8 +74,58 @@ fn test_snapshot_large_file() {

// No error if we disable auto-tracking of the path
test_env.add_config(r#"snapshot.auto-track = 'none()'"#);
let stdout = test_env.jj_cmd_success(&repo_path, &["file", "list"]);
insta::assert_snapshot!(stdout, @"");
let (stdout, stderr) = test_env.jj_cmd_ok(&repo_path, &["file", "list"]);
insta::assert_snapshot!(stdout, @"empty");
insta::assert_snapshot!(stderr, @"");
}

#[test]
fn test_snapshot_large_file_restore() {
let test_env = TestEnvironment::default();
test_env.jj_cmd_ok(test_env.env_root(), &["git", "init", "repo"]);
let repo_path = test_env.env_root().join("repo");
test_env.add_config("snapshot.max-new-file-size = 10");

test_env.jj_cmd_ok(&repo_path, &["describe", "-mcommitted"]);
std::fs::write(repo_path.join("file"), "small").unwrap();

// Write a large file in the working copy, restore it from a commit. The
// working-copy content shouldn't be overwritten.
test_env.jj_cmd_ok(&repo_path, &["new", "root()"]);
std::fs::write(repo_path.join("file"), "a lot of text").unwrap();
let (_stdout, stderr) =
test_env.jj_cmd_ok(&repo_path, &["restore", "--from=description(committed)"]);
insta::assert_snapshot!(stderr, @r"
Warning: Refused to snapshot some files:
file: 3 bytes too large; the maximum size allowed is 10 bytes (10.0B)
Hint: This is to prevent large files from being added by accident. You can fix this by:
- Adding the file to `.gitignore`
- Run `jj config set --repo snapshot.max-new-file-size 13`
This will increase the maximum file size allowed for new files, in this repository only.
- Run `jj --config-toml 'snapshot.max-new-file-size=13' st`
This will increase the maximum file size allowed for new files, for this command only.
Created kkmpptxz e3eb7e81 (no description set)
Working copy now at: kkmpptxz e3eb7e81 (no description set)
Parent commit : zzzzzzzz 00000000 (empty) (no description set)
Added 1 files, modified 0 files, removed 0 files
Warning: 1 of those updates were skipped because there were conflicting changes in the working copy.
Hint: Inspect the changes compared to the intended target with `jj diff --from e3eb7e819de5`.
Discard the conflicting changes with `jj restore --from e3eb7e819de5`.
");
insta::assert_snapshot!(
std::fs::read_to_string(repo_path.join("file")).unwrap(),
@"a lot of text");

// However, the next command will snapshot the large file because it is now
// tracked. TODO: Should we remember the untracked state?
let (stdout, stderr) = test_env.jj_cmd_ok(&repo_path, &["status"]);
insta::assert_snapshot!(stdout, @r"
Working copy changes:
A file
Working copy : kkmpptxz b75eed09 (no description set)
Parent commit: zzzzzzzz 00000000 (empty) (no description set)
");
insta::assert_snapshot!(stderr, @"");
}

#[test]
Expand Down
18 changes: 8 additions & 10 deletions lib/src/local_working_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ use crate::op_store::WorkspaceId;
use crate::repo_path::RepoPath;
use crate::repo_path::RepoPathBuf;
use crate::repo_path::RepoPathComponent;
use crate::settings::HumanByteSize;
use crate::store::Store;
use crate::tree::Tree;
use crate::working_copy::CheckoutError;
Expand Down Expand Up @@ -1097,7 +1096,6 @@ struct FileSnapshotter<'a> {
start_tracking_matcher: &'a dyn Matcher,
tree_entries_tx: Sender<(RepoPathBuf, MergedTreeValue)>,
file_states_tx: Sender<(RepoPathBuf, FileState)>,
#[allow(unused)] // TODO
untracked_paths_tx: Sender<(RepoPathBuf, UntrackedReason)>,
deleted_files_tx: Sender<RepoPathBuf>,
error: OnceLock<SnapshotError>,
Expand Down Expand Up @@ -1246,14 +1244,14 @@ impl FileSnapshotter<'_> {
err: err.into(),
})?;
if maybe_current_file_state.is_none() && metadata.len() > self.max_new_file_size {
// TODO: Maybe leave the file untracked instead
return Err(SnapshotError::NewFileTooLarge {
path: entry.path().clone(),
size: HumanByteSize(metadata.len()),
max_size: HumanByteSize(self.max_new_file_size),
});
}
if let Some(new_file_state) = file_state(&metadata) {
// Leave the large file untracked
let reason = UntrackedReason::FileTooLarge {
size: metadata.len(),
max_size: self.max_new_file_size,
};
self.untracked_paths_tx.send((path, reason)).ok();
Ok(None)
} else if let Some(new_file_state) = file_state(&metadata) {
self.process_present_file(
path,
&entry.path(),
Expand Down
12 changes: 0 additions & 12 deletions lib/src/working_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ use crate::repo::RewriteRootCommit;
use crate::repo_path::InvalidRepoPathError;
use crate::repo_path::RepoPath;
use crate::repo_path::RepoPathBuf;
use crate::settings::HumanByteSize;
use crate::settings::UserSettings;
use crate::store::Store;

Expand Down Expand Up @@ -182,17 +181,6 @@ pub enum SnapshotError {
/// Reading or writing from the commit backend failed.
#[error(transparent)]
BackendError(#[from] BackendError),
/// A file was larger than the specified maximum file size for new
/// (previously untracked) files.
#[error("New file {path} of size ~{size} exceeds snapshot.max-new-file-size ({max_size})")]
NewFileTooLarge {
/// The path of the large file.
path: PathBuf,
/// The size of the large file.
size: HumanByteSize,
/// The maximum allowed size.
max_size: HumanByteSize,
},
/// Checking path with ignore patterns failed.
#[error(transparent)]
GitIgnoreError(#[from] GitIgnoreError),
Expand Down
Loading

0 comments on commit 3de3e44

Please sign in to comment.