diff --git a/lib/src/local_working_copy.rs b/lib/src/local_working_copy.rs index 64d168e079..41fe495ed6 100644 --- a/lib/src/local_working_copy.rs +++ b/lib/src/local_working_copy.rs @@ -879,14 +879,14 @@ impl TreeState { let matcher = IntersectionMatcher::new(sparse_matcher.as_ref(), fsmonitor_matcher); if matcher.visit(RepoPath::root()).is_nothing() { - // No need to iterate file states to build empty deleted_files. + // No need to load the current tree, set up channels, etc. self.watchman_clock = watchman_clock; return Ok(is_dirty); } let (tree_entries_tx, tree_entries_rx) = channel(); let (file_states_tx, file_states_rx) = channel(); - let (present_files_tx, present_files_rx) = channel(); + let (deleted_files_tx, deleted_files_rx) = channel(); trace_span!("traverse filesystem").in_scope(|| -> Result<(), SnapshotError> { let snapshotter = FileSnapshotter { @@ -897,7 +897,7 @@ impl TreeState { // Move tx sides so they'll be dropped at the end of the scope. tree_entries_tx, file_states_tx, - present_files_tx, + deleted_files_tx, error: OnceLock::new(), progress, max_new_file_size, @@ -914,34 +914,18 @@ impl TreeState { })?; let mut tree_builder = MergedTreeBuilder::new(self.tree_id.clone()); - let mut deleted_files: HashSet<_> = - trace_span!("collecting existing files").in_scope(|| { - // Since file_states shouldn't contain files excluded by the sparse patterns, - // fsmonitor_matcher here is identical to the intersected matcher. - let file_states = self.file_states.all(); - file_states - .iter() - .filter(|(path, state)| { - fsmonitor_matcher.matches(path) && state.file_type != FileType::GitSubmodule - }) - .map(|(path, _state)| path.to_owned()) - .collect() - }); trace_span!("process tree entries").in_scope(|| { for (path, tree_values) in &tree_entries_rx { tree_builder.set_or_remove(path, tree_values); } }); - trace_span!("process present files").in_scope(|| { - for path in &present_files_rx { - deleted_files.remove(&path); - } - }); - trace_span!("process deleted tree entries").in_scope(|| { + let deleted_files = trace_span!("process deleted tree entries").in_scope(|| { + let deleted_files = HashSet::from_iter(deleted_files_rx); is_dirty |= !deleted_files.is_empty(); for file in &deleted_files { tree_builder.set_or_remove(file.clone(), Merge::absent()); } + deleted_files }); trace_span!("process file states").in_scope(|| { let changed_file_states = file_states_rx @@ -1024,6 +1008,12 @@ struct DirectoryToVisit<'a> { file_states: FileStates<'a>, } +#[derive(Clone, Debug, Default)] +struct PresentDirEntries { + dirs: HashSet, + files: HashSet, +} + /// Helper to scan local-disk directories and files in parallel. struct FileSnapshotter<'a> { tree_state: &'a TreeState, @@ -1032,7 +1022,7 @@ struct FileSnapshotter<'a> { start_tracking_matcher: &'a dyn Matcher, tree_entries_tx: Sender<(RepoPathBuf, MergedTreeValue)>, file_states_tx: Sender<(RepoPathBuf, FileState)>, - present_files_tx: Sender, + deleted_files_tx: Sender, error: OnceLock, progress: Option<&'a SnapshotProgress<'a>>, max_new_file_size: u64, @@ -1076,6 +1066,7 @@ impl FileSnapshotter<'_> { let git_ignore = git_ignore .chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"))?; + let mut present_entries = PresentDirEntries::default(); let to_read_dir_err = |err: io::Error| SnapshotError::Other { message: format!("Failed to read directory {}", disk_dir.display()), err: err.into(), @@ -1085,15 +1076,13 @@ impl FileSnapshotter<'_> { let file_type = entry.file_type().unwrap(); let file_name = entry.file_name(); let name = file_name - .to_str() - .ok_or_else(|| SnapshotError::InvalidUtf8Path { - path: file_name.clone(), - })?; + .into_string() + .map_err(|path| SnapshotError::InvalidUtf8Path { path })?; - if RESERVED_DIR_NAMES.contains(&name) { + if RESERVED_DIR_NAMES.contains(&name.as_str()) { continue; } - let path = dir.join(RepoPathComponent::new(name)); + let path = dir.join(RepoPathComponent::new(&name)); let maybe_current_file_state = file_states.get(&path); if let Some(file_state) = &maybe_current_file_state { if file_state.file_type == FileType::GitSubmodule { @@ -1103,6 +1092,10 @@ impl FileSnapshotter<'_> { if file_type.is_dir() { let file_states = file_states.prefixed(&path); + // Mark the directory as present. Whether or not the directory + // path matches, any child file entries shouldn't be touched + // within the current recursion step. + present_entries.dirs.insert(name); if git_ignore.matches(&path.to_internal_dir_string()) || self.start_tracking_matcher.visit(&path).is_nothing() { @@ -1152,6 +1145,7 @@ impl FileSnapshotter<'_> { }); } if let Some(new_file_state) = file_state(&metadata) { + present_entries.files.insert(name); self.process_present_file( path, &entry.path(), @@ -1162,6 +1156,7 @@ impl FileSnapshotter<'_> { } } } + self.emit_deleted_files(&dir, file_states, &present_entries); Ok(()) } @@ -1172,10 +1167,8 @@ impl FileSnapshotter<'_> { } let disk_path = tracked_path.to_fs_path(&self.tree_state.working_copy_path)?; let metadata = match disk_path.symlink_metadata() { - Ok(metadata) => metadata, - Err(err) if err.kind() == io::ErrorKind::NotFound => { - continue; - } + Ok(metadata) => Some(metadata), + Err(err) if err.kind() == io::ErrorKind::NotFound => None, Err(err) => { return Err(SnapshotError::Other { message: format!("Failed to stat file {}", disk_path.display()), @@ -1183,13 +1176,15 @@ impl FileSnapshotter<'_> { }); } }; - if let Some(new_file_state) = file_state(&metadata) { + if let Some(new_file_state) = metadata.as_ref().and_then(file_state) { self.process_present_file( tracked_path.to_owned(), &disk_path, Some(¤t_file_state), new_file_state, )?; + } else { + self.deleted_files_tx.send(tracked_path.to_owned()).ok(); } } Ok(()) @@ -1202,7 +1197,6 @@ impl FileSnapshotter<'_> { maybe_current_file_state: Option<&FileState>, new_file_state: FileState, ) -> Result<(), SnapshotError> { - self.present_files_tx.send(path.clone()).ok(); let update = self.get_updated_tree_value( &path, disk_path, @@ -1218,6 +1212,44 @@ impl FileSnapshotter<'_> { Ok(()) } + /// Emits file paths that don't exist in the `present_entries`. + fn emit_deleted_files( + &self, + dir: &RepoPath, + file_states: FileStates<'_>, + present_entries: &PresentDirEntries, + ) { + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + enum Kind { + Dir, + File, + } + + let file_state_chunks = file_states.iter().chunk_by(|(path, _state)| { + // Extract from , /, or //**. + // (file_states may contain file on file->dir transition.) + debug_assert!(path.starts_with(dir)); + let len = dir.as_internal_file_string().len() + (!dir.is_root() as usize); + let tail = path.as_internal_file_string().get(len..).unwrap_or(""); + match tail.split_once('/') { + Some((name, _)) => (Kind::Dir, name), + None => (Kind::File, tail), + } + }); + file_state_chunks + .into_iter() + .filter(|&((kind, name), _)| match kind { + Kind::Dir => !present_entries.dirs.contains(name), + Kind::File => !present_entries.files.contains(name), + }) + .flat_map(|(_, chunk)| chunk) + // Whether or not the entry exists, submodule should be ignored + .filter(|(_, state)| state.file_type != FileType::GitSubmodule) + .filter(|(path, _)| self.matcher.matches(path)) + .try_for_each(|(path, _)| self.deleted_files_tx.send(path.to_owned())) + .ok(); + } + fn get_updated_tree_value( &self, repo_path: &RepoPath, diff --git a/lib/tests/test_local_working_copy.rs b/lib/tests/test_local_working_copy.rs index 3f739923b4..610de0ed96 100644 --- a/lib/tests/test_local_working_copy.rs +++ b/lib/tests/test_local_working_copy.rs @@ -750,6 +750,55 @@ fn test_checkout_discard() { assert!(!reloaded_wc.file_states().unwrap().contains_path(file2_path)); } +#[test] +fn test_snapshot_file_directory_transition() { + let settings = testutils::user_settings(); + let mut test_workspace = TestWorkspace::init(&settings); + let repo = test_workspace.repo.clone(); + let workspace_root = test_workspace.workspace.workspace_root().to_owned(); + let checkout_options = CheckoutOptions::empty_for_test(); + let to_ws_path = |path: &RepoPath| path.to_fs_path(&workspace_root).unwrap(); + + // file <-> directory transition at root and sub directories + let file1_path = RepoPath::from_internal_string("foo/bar"); + let file2_path = RepoPath::from_internal_string("sub/bar/baz"); + let file1p_path = file1_path.parent().unwrap(); + let file2p_path = file2_path.parent().unwrap(); + + let tree1 = create_tree(&repo, &[(file1p_path, "1p"), (file2p_path, "2p")]); + let tree2 = create_tree(&repo, &[(file1_path, "1"), (file2_path, "2")]); + let commit1 = commit_with_tree(repo.store(), tree1.id()); + let commit2 = commit_with_tree(repo.store(), tree2.id()); + + let ws = &mut test_workspace.workspace; + ws.check_out(repo.op_id().clone(), None, &commit1, &checkout_options) + .unwrap(); + + // file -> directory + std::fs::remove_file(to_ws_path(file1p_path)).unwrap(); + std::fs::remove_file(to_ws_path(file2p_path)).unwrap(); + std::fs::create_dir(to_ws_path(file1p_path)).unwrap(); + std::fs::create_dir(to_ws_path(file2p_path)).unwrap(); + std::fs::write(to_ws_path(file1_path), "1").unwrap(); + std::fs::write(to_ws_path(file2_path), "2").unwrap(); + let new_tree = test_workspace.snapshot().unwrap(); + assert_eq!(new_tree.id(), tree2.id()); + + let ws = &mut test_workspace.workspace; + ws.check_out(repo.op_id().clone(), None, &commit2, &checkout_options) + .unwrap(); + + // directory -> file + std::fs::remove_file(to_ws_path(file1_path)).unwrap(); + std::fs::remove_file(to_ws_path(file2_path)).unwrap(); + std::fs::remove_dir(to_ws_path(file1p_path)).unwrap(); + std::fs::remove_dir(to_ws_path(file2p_path)).unwrap(); + std::fs::write(to_ws_path(file1p_path), "1p").unwrap(); + std::fs::write(to_ws_path(file2p_path), "2p").unwrap(); + let new_tree = test_workspace.snapshot().unwrap(); + assert_eq!(new_tree.id(), tree1.id()); +} + #[test] fn test_materialize_snapshot_conflicted_files() { let settings = testutils::user_settings();