From 4f861bdc93595e8d113135ebb4a713b52b526644 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Sat, 27 Jul 2024 20:25:25 +0200 Subject: [PATCH] Implement hacky variant of `jj purge` --- cli/examples/custom-working-copy/main.rs | 4 +- cli/src/cli_util.rs | 43 ++++++++---- cli/src/commands/mod.rs | 3 + cli/src/commands/purge.rs | 66 +++++++++++++++++ cli/src/commands/untrack.rs | 15 ++-- cli/tests/cli-reference@.md.snap | 16 +++++ lib/src/backend.rs | 9 ++- lib/src/local_working_copy.rs | 70 +++++++++++++++---- lib/src/working_copy.rs | 17 ++++- lib/tests/test_local_working_copy.rs | 7 +- .../test_local_working_copy_concurrent.rs | 3 +- lib/testutils/src/lib.rs | 11 +-- 12 files changed, 218 insertions(+), 46 deletions(-) create mode 100644 cli/src/commands/purge.rs diff --git a/cli/examples/custom-working-copy/main.rs b/cli/examples/custom-working-copy/main.rs index eaba135c69..442c68020d 100644 --- a/cli/examples/custom-working-copy/main.rs +++ b/cli/examples/custom-working-copy/main.rs @@ -20,7 +20,7 @@ use itertools::Itertools; use jj_cli::cli_util::{CliRunner, CommandHelper}; use jj_cli::command_error::CommandError; use jj_cli::ui::Ui; -use jj_lib::backend::{Backend, MergedTreeId}; +use jj_lib::backend::{Backend, MergedTreeId, SnapshotResult}; use jj_lib::commit::Commit; use jj_lib::git_backend::GitBackend; use jj_lib::local_working_copy::LocalWorkingCopy; @@ -222,7 +222,7 @@ impl LockedWorkingCopy for LockedConflictsWorkingCopy { self.inner.old_tree_id() } - fn snapshot(&mut self, mut options: SnapshotOptions) -> Result { + fn snapshot(&mut self, mut options: SnapshotOptions) -> Result { options.base_ignores = options.base_ignores.chain("", "/.conflicts".as_bytes())?; self.inner.snapshot(options) } diff --git a/cli/src/cli_util.rs b/cli/src/cli_util.rs index 0931b40c8e..96a977a719 100644 --- a/cli/src/cli_util.rs +++ b/cli/src/cli_util.rs @@ -42,6 +42,7 @@ use jj_lib::git_backend::GitBackend; use jj_lib::gitignore::{GitIgnoreError, GitIgnoreFile}; use jj_lib::hex_util::to_reverse_hex; use jj_lib::id_prefix::IdPrefixContext; +use jj_lib::local_working_copy::SnapshotStats; use jj_lib::matchers::Matcher; use jj_lib::merge::MergedTreeValue; use jj_lib::merged_tree::MergedTree; @@ -574,7 +575,7 @@ impl WorkspaceCommandHelper { /// Snapshot the working copy if allowed, and import Git refs if the working /// copy is collocated with Git. #[instrument(skip_all)] - pub fn maybe_snapshot(&mut self, ui: &mut Ui) -> Result<(), CommandError> { + pub fn maybe_snapshot(&mut self, ui: &mut Ui) -> Result { if self.may_update_working_copy { if self.working_copy_shared_with_git { self.import_git_head(ui)?; @@ -583,13 +584,26 @@ impl WorkspaceCommandHelper { // pointing to the new working-copy commit might not be exported. // In that situation, the ref would be conflicted anyway, so export // failure is okay. - self.snapshot_working_copy(ui)?; + let stats = self.snapshot_working_copy(ui)?; + if !stats.files_to_large().is_empty() { + writeln!( + ui.status(), + "Some files are too large to be snapshotted. They will be ignored." + )?; + writeln!( + ui.status(), + "Use `jj purge` to remove them from the working copy." + )?; + } + // import_git_refs() can rebase the working-copy commit. if self.working_copy_shared_with_git { self.import_git_refs(ui)?; } + Ok(stats) + } else { + Ok(SnapshotStats::default()) } - Ok(()) } /// Imports new HEAD from the colocated Git repo. @@ -1186,7 +1200,7 @@ impl WorkspaceCommandHelper { } #[instrument(skip_all)] - fn snapshot_working_copy(&mut self, ui: &mut Ui) -> Result<(), CommandError> { + fn snapshot_working_copy(&mut self, ui: &mut Ui) -> Result { let workspace_id = self.workspace_id().to_owned(); let get_wc_commit = |repo: &ReadonlyRepo| -> Result, _> { repo.view() @@ -1198,7 +1212,7 @@ impl WorkspaceCommandHelper { let Some(wc_commit) = get_wc_commit(&repo)? else { // If the workspace has been deleted, it's unclear what to do, so we just skip // committing the working copy. - return Ok(()); + return Ok(SnapshotStats::default()); }; let base_ignores = self.base_ignores()?; @@ -1210,12 +1224,13 @@ impl WorkspaceCommandHelper { Ok(WorkingCopyFreshness::Fresh) => (repo, wc_commit), Ok(WorkingCopyFreshness::Updated(wc_operation)) => { let repo = repo.reload_at(&wc_operation)?; - let wc_commit = if let Some(wc_commit) = get_wc_commit(&repo)? { - wc_commit - } else { - return Ok(()); // The workspace has been deleted (see - // above) - }; + let wc_commit = + if let Some(wc_commit) = get_wc_commit(&repo)? { + wc_commit + } else { + return Ok(SnapshotStats::default()); // The workspace has been deleted (see + // above) + }; (repo, wc_commit) } Ok(WorkingCopyFreshness::WorkingCopyStale) => { @@ -1249,13 +1264,15 @@ See https://github.com/martinvonz/jj/blob/main/docs/working-copy.md#stale-workin }; self.user_repo = ReadonlyUserRepo::new(repo); let progress = crate::progress::snapshot_progress(ui); - let new_tree_id = locked_ws.locked_wc().snapshot(SnapshotOptions { + let snapshot_result = locked_ws.locked_wc().snapshot(SnapshotOptions { base_ignores, fsmonitor_settings: self.settings.fsmonitor_settings()?, progress: progress.as_ref().map(|x| x as _), max_new_file_size: self.settings.max_new_file_size()?, })?; drop(progress); + + let new_tree_id = snapshot_result.tree_id; if new_tree_id != *wc_commit.tree_id() { let mut tx = start_repo_transaction(&self.user_repo.repo, &self.settings, &self.string_args); @@ -1284,7 +1301,7 @@ See https://github.com/martinvonz/jj/blob/main/docs/working-copy.md#stale-workin self.user_repo = ReadonlyUserRepo::new(tx.commit("snapshot working copy")); } locked_ws.finish(self.user_repo.repo.op_id().clone())?; - Ok(()) + Ok(snapshot_result.snapshot_stats) } fn update_working_copy( diff --git a/cli/src/commands/mod.rs b/cli/src/commands/mod.rs index 4498ed398c..9bd37852e4 100644 --- a/cli/src/commands/mod.rs +++ b/cli/src/commands/mod.rs @@ -40,6 +40,7 @@ mod obslog; mod operation; mod parallelize; mod prev; +mod purge; mod rebase; mod resolve; mod restore; @@ -123,6 +124,7 @@ enum Command { Operation(operation::OperationCommand), Parallelize(parallelize::ParallelizeArgs), Prev(prev::PrevArgs), + Purge(purge::PurgeArgs), Rebase(rebase::RebaseArgs), Resolve(resolve::ResolveArgs), Restore(restore::RestoreArgs), @@ -204,6 +206,7 @@ pub fn run_command(ui: &mut Ui, command_helper: &CommandHelper) -> Result<(), Co Command::Resolve(args) => resolve::cmd_resolve(ui, command_helper, args), Command::Restore(args) => restore::cmd_restore(ui, command_helper, args), Command::Revert(_args) => revert(), + Command::Purge(args) => purge::cmd_purge(ui, command_helper, args), Command::Root(args) => root::cmd_root(ui, command_helper, args), Command::Run(args) => run::cmd_run(ui, command_helper, args), Command::Show(args) => show::cmd_show(ui, command_helper, args), diff --git a/cli/src/commands/purge.rs b/cli/src/commands/purge.rs new file mode 100644 index 0000000000..0e93681eca --- /dev/null +++ b/cli/src/commands/purge.rs @@ -0,0 +1,66 @@ +// Copyright 2024 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fs; +use std::io::Write; + +use crate::cli_util::CommandHelper; +use crate::command_error::{CommandError, CommandErrorKind}; +use crate::ui::Ui; + +/// Removes files not tracked by Jujutsu +/// Note: snapshot won't be taken before purging, so there is no way to undo +/// this operation +#[derive(clap::Args, Clone, Debug)] +pub(crate) struct PurgeArgs { + /// Do actual removal of files, instead of just listing them + #[arg(short, long, default_value = "false")] + no_dry_run: bool, +} + +pub(crate) fn cmd_purge( + ui: &mut Ui, + command: &CommandHelper, + args: &PurgeArgs, +) -> Result<(), CommandError> { + let mut workspace_command = command.workspace_helper_no_snapshot(ui)?; + let snapshot = workspace_command.maybe_snapshot(ui)?; + + writeln!(ui.status(), "Purging files not tracked by Jujutsu")?; + let max_snapshot_size = snapshot.files_to_large().first().map(|x| x.max_size); + + if let Some(max_size) = max_snapshot_size { + writeln!(ui.status(), "Max allowed snapshot size: {}", max_size)?; + } + + for path in snapshot.files_to_large() { + writeln!( + ui.status(), + "File: {}, size: {}", + path.path.display(), + path.size + )?; + + if args.no_dry_run { + fs::remove_file(&path.path).map_err(|e| { + CommandError::new( + CommandErrorKind::Cli, + format!("failed to remove {}: {}", path.path.display(), e), + ) + })?; + } + } + + Ok(()) +} diff --git a/cli/src/commands/untrack.rs b/cli/src/commands/untrack.rs index 449724468f..8ab44fbec2 100644 --- a/cli/src/commands/untrack.rs +++ b/cli/src/commands/untrack.rs @@ -67,12 +67,15 @@ pub(crate) fn cmd_untrack( locked_ws.locked_wc().reset(&new_commit)?; // Commit the working copy again so we can inform the user if paths couldn't be // untracked because they're not ignored. - let wc_tree_id = locked_ws.locked_wc().snapshot(SnapshotOptions { - base_ignores, - fsmonitor_settings: command.settings().fsmonitor_settings()?, - progress: None, - max_new_file_size: command.settings().max_new_file_size()?, - })?; + let wc_tree_id = locked_ws + .locked_wc() + .snapshot(SnapshotOptions { + base_ignores, + fsmonitor_settings: command.settings().fsmonitor_settings()?, + progress: None, + max_new_file_size: command.settings().max_new_file_size()?, + })? + .tree_id; if wc_tree_id != *new_commit.tree_id() { let wc_tree = store.get_root_tree(&wc_tree_id)?; let added_back = wc_tree.entries_matching(matcher.as_ref()).collect_vec(); diff --git a/cli/tests/cli-reference@.md.snap b/cli/tests/cli-reference@.md.snap index b06a2f5e0f..9ef2164adc 100644 --- a/cli/tests/cli-reference@.md.snap +++ b/cli/tests/cli-reference@.md.snap @@ -68,6 +68,7 @@ This document contains the help content for the `jj` command-line program. * [`jj operation undo`↴](#jj-operation-undo) * [`jj parallelize`↴](#jj-parallelize) * [`jj prev`↴](#jj-prev) +* [`jj purge`↴](#jj-purge) * [`jj rebase`↴](#jj-rebase) * [`jj resolve`↴](#jj-resolve) * [`jj restore`↴](#jj-restore) @@ -132,6 +133,7 @@ To get started, see the tutorial at https://github.com/martinvonz/jj/blob/main/d * `operation` — Commands for working with the operation log * `parallelize` — Parallelize revisions by making them siblings * `prev` — Change the working copy revision relative to the parent revision +* `purge` — Removes files not tracked by Jujutsu Note: snapshot won't be taken before purging, so there is no way to undo this operation * `rebase` — Move revisions to different parent(s) * `resolve` — Resolve a conflicted file with an external merge tool * `restore` — Restore paths from another revision @@ -1528,6 +1530,20 @@ implied. +## `jj purge` + +Removes files not tracked by Jujutsu Note: snapshot won't be taken before purging, so there is no way to undo this operation + +**Usage:** `jj purge [OPTIONS]` + +###### **Options:** + +* `-n`, `--no-dry-run` — Do actual removal of files, instead of just listing them + + Default value: `false` + + + ## `jj rebase` Move revisions to different parent(s) diff --git a/lib/src/backend.rs b/lib/src/backend.rs index ccdad4ec89..c159d28e28 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -26,6 +26,7 @@ use thiserror::Error; use crate::content_hash::ContentHash; use crate::index::Index; +use crate::local_working_copy::SnapshotStats; use crate::merge::Merge; use crate::object_id::{id_type, ObjectId}; use crate::repo_path::{RepoPath, RepoPathBuf, RepoPathComponent, RepoPathComponentBuf}; @@ -88,6 +89,11 @@ pub struct SecureSig { pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult> + 'a; +pub struct SnapshotResult { + pub tree_id: MergedTreeId, + pub snapshot_stats: SnapshotStats, +} + /// Identifies a single legacy tree, which may have path-level conflicts, or a /// merge of multiple trees, where the individual trees do not have conflicts. // TODO(#1624): Delete this type at some point in the future, when we decide to drop @@ -241,7 +247,8 @@ pub enum BackendError { hash: String, source: Box, }, - #[error("Error when reading file content for file {} with id {}", path.as_internal_file_string(), id.hex())] + #[error("Error when reading file content for file {} with id {}", path.as_internal_file_string(), id.hex() + )] ReadFile { path: RepoPathBuf, id: FileId, diff --git a/lib/src/local_working_copy.rs b/lib/src/local_working_copy.rs index 7a52af951d..4cf3396d9e 100644 --- a/lib/src/local_working_copy.rs +++ b/lib/src/local_working_copy.rs @@ -41,8 +41,8 @@ use thiserror::Error; use tracing::{instrument, trace_span}; use crate::backend::{ - BackendError, BackendResult, FileId, MergedTreeId, MillisSinceEpoch, SymlinkId, TreeId, - TreeValue, + BackendError, BackendResult, FileId, MergedTreeId, MillisSinceEpoch, SnapshotResult, SymlinkId, + TreeId, TreeValue, }; use crate::commit::Commit; use crate::conflicts::{self, materialize_tree_value, MaterializedTreeValue}; @@ -64,8 +64,8 @@ use crate::settings::HumanByteSize; use crate::store::Store; use crate::tree::Tree; use crate::working_copy::{ - CheckoutError, CheckoutStats, LockedWorkingCopy, ResetError, SnapshotError, SnapshotOptions, - SnapshotProgress, WorkingCopy, WorkingCopyFactory, WorkingCopyStateError, + CheckoutError, CheckoutStats, LockedWorkingCopy, NewFileTooLarge, ResetError, SnapshotError, + SnapshotOptions, SnapshotProgress, WorkingCopy, WorkingCopyFactory, WorkingCopyStateError, }; #[cfg(unix)] @@ -758,7 +758,7 @@ impl TreeState { /// Look for changes to the working copy. If there are any changes, create /// a new tree from it and return it, and also update the dirstate on disk. #[instrument(skip_all)] - pub fn snapshot(&mut self, options: SnapshotOptions) -> Result { + pub fn snapshot(&mut self, options: SnapshotOptions) -> Result { let SnapshotOptions { base_ignores, fsmonitor_settings, @@ -783,13 +783,15 @@ impl TreeState { if matcher.visit(RepoPath::root()).is_nothing() { // No need to iterate file states to build empty deleted_files. self.watchman_clock = watchman_clock; - return Ok(is_dirty); + return Ok(SnapshotStats::with_status(is_dirty)); } let (tree_entries_tx, tree_entries_rx) = channel(); let (file_states_tx, file_states_rx) = channel(); let (present_files_tx, present_files_rx) = channel(); + let (files_to_big_tx, files_to_big_rx) = channel(); + trace_span!("traverse filesystem").in_scope(|| -> Result<(), SnapshotError> { let current_tree = self.current_tree()?; let directory_to_visit = DirectoryToVisit { @@ -807,6 +809,7 @@ impl TreeState { directory_to_visit, progress, max_new_file_size, + files_to_big_tx, ) })?; @@ -865,8 +868,13 @@ impl TreeState { let state_paths: HashSet<_> = file_states.paths().map(|path| path.to_owned()).collect(); assert_eq!(state_paths, tree_paths); } + let files_to_large: Vec<_> = files_to_big_rx.iter().collect(); + self.watchman_clock = watchman_clock; - Ok(is_dirty) + Ok(SnapshotStats { + success: is_dirty, + files_to_large, + }) } #[allow(clippy::too_many_arguments)] @@ -880,6 +888,7 @@ impl TreeState { directory_to_visit: DirectoryToVisit, progress: Option<&SnapshotProgress>, max_new_file_size: u64, + files_to_big: Sender, ) -> Result<(), SnapshotError> { let DirectoryToVisit { dir, @@ -989,6 +998,7 @@ impl TreeState { directory_to_visit, progress, max_new_file_size, + files_to_big.clone(), )?; } } else if matcher.matches(&path) { @@ -1008,11 +1018,13 @@ impl TreeState { })?; if maybe_current_file_state.is_none() && metadata.len() > max_new_file_size { - return Err(SnapshotError::NewFileTooLarge { - path: entry.path().clone(), - size: HumanByteSize(metadata.len()), - max_size: HumanByteSize(max_new_file_size), - }); + files_to_big + .send(NewFileTooLarge { + path: entry.path().clone(), + size: HumanByteSize(metadata.len()), + max_size: HumanByteSize(max_new_file_size), + }) + .ok(); } if let Some(new_file_state) = file_state(&metadata) { present_files_tx.send(path.clone()).ok(); @@ -1498,6 +1510,29 @@ impl TreeState { } } +#[derive(Default)] +pub struct SnapshotStats { + files_to_large: Vec, + success: bool, +} + +impl SnapshotStats { + fn with_status(success: bool) -> Self { + SnapshotStats { + files_to_large: Vec::new(), + success, + } + } + + pub fn success(&self) -> bool { + self.success + } + + pub fn files_to_large(&self) -> &[NewFileTooLarge] { + &self.files_to_large + } +} + fn checkout_error_for_stat_error(err: std::io::Error, path: &Path) -> CheckoutError { CheckoutError::Other { message: format!("Failed to stat file {}", path.display()), @@ -1789,7 +1824,7 @@ impl LockedWorkingCopy for LockedLocalWorkingCopy { &self.old_tree_id } - fn snapshot(&mut self, options: SnapshotOptions) -> Result { + fn snapshot(&mut self, options: SnapshotOptions) -> Result { let tree_state = self .wc .tree_state_mut() @@ -1797,8 +1832,13 @@ impl LockedWorkingCopy for LockedLocalWorkingCopy { message: "Failed to read the working copy state".to_string(), err: err.into(), })?; - self.tree_state_dirty |= tree_state.snapshot(options)?; - Ok(tree_state.current_tree_id().clone()) + let snapshot_stats = tree_state.snapshot(options)?; + self.tree_state_dirty |= snapshot_stats.success(); + + Ok(SnapshotResult { + snapshot_stats, + tree_id: tree_state.current_tree_id().clone(), + }) } fn check_out(&mut self, commit: &Commit) -> Result { diff --git a/lib/src/working_copy.rs b/lib/src/working_copy.rs index cb52c0e27d..d8e036460f 100644 --- a/lib/src/working_copy.rs +++ b/lib/src/working_copy.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use thiserror::Error; -use crate::backend::{BackendError, MergedTreeId}; +use crate::backend::{BackendError, MergedTreeId, SnapshotResult}; use crate::commit::Commit; use crate::fsmonitor::FsmonitorSettings; use crate::gitignore::{GitIgnoreError, GitIgnoreFile}; @@ -99,7 +99,7 @@ pub trait LockedWorkingCopy { fn old_tree_id(&self) -> &MergedTreeId; /// Snapshot the working copy and return the tree id. - fn snapshot(&mut self, options: SnapshotOptions) -> Result; + fn snapshot(&mut self, options: SnapshotOptions) -> Result; /// Check out the specified commit in the working copy. fn check_out(&mut self, commit: &Commit) -> Result; @@ -177,6 +177,19 @@ pub enum SnapshotError { }, } +#[derive(Debug, Error)] +/// A file was larger than the specified maximum file size for new +/// (previously untracked) files. +#[error("New file {path} of size ~{size} exceeds snapshot.max-new-file-size ({max_size})")] +pub struct NewFileTooLarge { + /// The path of the large file. + pub path: PathBuf, + /// The size of the large file. + pub size: HumanByteSize, + /// The maximum allowed size. + pub max_size: HumanByteSize, +} + /// Options used when snapshotting the working copy. Some of them may be ignored /// by some `WorkingCopy` implementations. pub struct SnapshotOptions<'a> { diff --git a/lib/tests/test_local_working_copy.rs b/lib/tests/test_local_working_copy.rs index c09b328387..74a6aeba36 100644 --- a/lib/tests/test_local_working_copy.rs +++ b/lib/tests/test_local_working_copy.rs @@ -735,7 +735,8 @@ fn test_snapshot_racy_timestamps() { let new_tree_id = locked_ws .locked_wc() .snapshot(SnapshotOptions::empty_for_test()) - .unwrap(); + .unwrap() + .tree_id; assert_ne!(new_tree_id, previous_tree_id); previous_tree_id = new_tree_id; } @@ -769,7 +770,8 @@ fn test_snapshot_special_file() { let tree_id = locked_ws .locked_wc() .snapshot(SnapshotOptions::empty_for_test()) - .unwrap(); + .unwrap() + .tree_id; locked_ws.finish(OperationId::from_hex("abc123")).unwrap(); let tree = store.get_root_tree(&tree_id).unwrap(); // Only the regular files should be in the tree @@ -1189,6 +1191,7 @@ fn test_fsmonitor() { ..SnapshotOptions::empty_for_test() }) .unwrap() + .tree_id }; { diff --git a/lib/tests/test_local_working_copy_concurrent.rs b/lib/tests/test_local_working_copy_concurrent.rs index 695e3f6e6a..8bb48981d7 100644 --- a/lib/tests/test_local_working_copy_concurrent.rs +++ b/lib/tests/test_local_working_copy_concurrent.rs @@ -132,7 +132,8 @@ fn test_checkout_parallel() { let new_tree_id = locked_ws .locked_wc() .snapshot(SnapshotOptions::empty_for_test()) - .unwrap(); + .unwrap() + .tree_id; assert!(tree_ids.contains(&new_tree_id)); }); } diff --git a/lib/testutils/src/lib.rs b/lib/testutils/src/lib.rs index e5283d1b91..c46c6fbb60 100644 --- a/lib/testutils/src/lib.rs +++ b/lib/testutils/src/lib.rs @@ -238,10 +238,13 @@ impl TestWorkspace { /// new operation). pub fn snapshot(&mut self) -> Result { let mut locked_ws = self.workspace.start_working_copy_mutation().unwrap(); - let tree_id = locked_ws.locked_wc().snapshot(SnapshotOptions { - max_new_file_size: self.settings.max_new_file_size().unwrap(), - ..SnapshotOptions::empty_for_test() - })?; + let tree_id = locked_ws + .locked_wc() + .snapshot(SnapshotOptions { + max_new_file_size: self.settings.max_new_file_size().unwrap(), + ..SnapshotOptions::empty_for_test() + })? + .tree_id; // arbitrary operation id locked_ws.finish(self.repo.op_id().clone()).unwrap(); Ok(self.repo.store().get_root_tree(&tree_id).unwrap())