From 8dd1e63a4c8a128dae247730c83af00cfe69b5a1 Mon Sep 17 00:00:00 2001 From: Ilya Grigoriev Date: Sat, 25 Nov 2023 19:04:39 -0800 Subject: [PATCH] rewrite.rs: refactor `new_parents` to depend only on `parent_mapping` Previously, the function relied on both the `self.parent_mapping` and `self.rebased`. If `(A,B)` was in `parent_mapping` and `(B,C)` was in `rebased`, `new_parents` would map `A` to `C`. Now, `self.rebased` is ignored by `new_parents`. In the same situation, DescendantRebaser is changed so that both `(A,B)` and `(B,C)` are in `parent_mapping` before. `new_parents` now applies `parent_mapping` repeatedly, and will map `A` to `C` in this situation. ## Cons - The semantics are changed; `new_parents` now panics if `self.parent_mapping` contain cycles. AFAICT, such cycles never happen in `jj` anyway, except for one test that I had to fix. I think it's a sensible restriction to live with; if you do want to swap children of two commits, you can call `rebase_descendants` twice. ## Pros - I find the new logic much easier to reason about. I plan to extract it into a function, to be used in refactors for `jj rebase -r` and `jj new --after`. It will make it much easier to have a correct implementation of `jj rebase -r --after`, even when rebasing onto a descendant. - The de-duplication is no longer O(n^2). I tried to keep the common case fast. ## Alternatives - We could make `jj rebase` and `jj new` use a separate function with the algorithm shown here, without changing DescendantRebaser. I believe that the new algorithm makes DescendatRebaser easier to understand, though, and it feels more elegant to reduce code duplication. - The de-duplication optimization here is independent of other changes, and could be used on its own. --- lib/src/rewrite.rs | 91 ++++++++++++++++++++++++++++----------- lib/tests/test_rewrite.rs | 20 +++------ 2 files changed, 70 insertions(+), 41 deletions(-) diff --git a/lib/src/rewrite.rs b/lib/src/rewrite.rs index af9e05c0190..b1994e45d5d 100644 --- a/lib/src/rewrite.rs +++ b/lib/src/rewrite.rs @@ -271,6 +271,12 @@ pub struct DescendantRebaser<'settings, 'repo> { } impl<'settings, 'repo> DescendantRebaser<'settings, 'repo> { + /// Panics if any commit is rewritten to its own descendant. + /// + /// There should not be any cycles in the `rewritten` map (e.g. A is + /// rewritten to B, which is rewritten to A). The same commit should not + /// be rewritten and abandoned at the same time. In either case, panics are + /// likely when using the DescendantRebaser. pub fn new( settings: &'settings UserSettings, mut_repo: &'repo mut MutableRepo, @@ -379,34 +385,58 @@ impl<'settings, 'repo> DescendantRebaser<'settings, 'repo> { &self.rebased } + /// Panics if `parent_mapping` contains cycles fn new_parents(&self, old_ids: &[CommitId]) -> Vec { - // This should be a set, but performance of a vec is much better since we expect - // 99% of commits to have <= 2 parents. - let mut new_ids = vec![]; - let mut add_parent = |id: &CommitId| { - // This can trigger if we abandon an empty commit, as both the empty commit and - // its parent are succeeded by the same commit. - if !new_ids.contains(id) { - new_ids.push(id.clone()); - } - }; - for old_id in old_ids { - if let Some(new_parent_ids) = self.parent_mapping.get(old_id) { - for new_parent_id in new_parent_ids { - // The new parent may itself have been rebased earlier in the process - if let Some(newer_parent_id) = self.rebased.get(new_parent_id) { - add_parent(newer_parent_id); - } else { - add_parent(new_parent_id); + fn single_substitution_round( + parent_mapping: &HashMap>, + ids: Vec, + ) -> (Vec, bool) { + let mut made_replacements = false; + let mut new_ids = vec![]; + // TODO(ilyagr): (Maybe?) optimize common case of replacements all + // being singletons. If CommitId-s were Copy. no allocations would be needed in + // that case, but it probably doesn't matter much while they are Vec-s. + for id in ids.into_iter() { + match parent_mapping.get(&id) { + None => new_ids.push(id), + Some(replacements) => { + assert!( + // Each commit must have a parent, so a parent can + // not just be mapped to nothing. This assertion + // could be removed if this function is used for + // mapping something other than a commit's parents. + !replacements.is_empty(), + "Found empty value for key {id:?} in the parent mapping", + ); + made_replacements = true; + new_ids.extend(replacements.iter().cloned()) } - } - } else if let Some(new_parent_id) = self.rebased.get(old_id) { - add_parent(new_parent_id); - } else { - add_parent(old_id); - }; + }; + } + (new_ids, made_replacements) + } + + let mut new_ids: Vec = old_ids.to_vec(); + let mut iterations = 0; + loop { + let made_replacements; + (new_ids, made_replacements) = single_substitution_round(&self.parent_mapping, new_ids); + if !made_replacements { + break; + } + iterations += 1; + assert!( + iterations <= self.parent_mapping.len(), + "cycle detected in the parent mapping" + ); + } + match new_ids.as_slice() { + // The first two cases are an optimization for the common case of commits with <=2 + // parents + [_singleton] => new_ids, + [a, b] if a != b => new_ids, + _ => new_ids.into_iter().unique().collect(), } - new_ids } fn ref_target_update(old_id: CommitId, new_ids: Vec) -> (RefTarget, RefTarget) { @@ -540,8 +570,17 @@ impl<'settings, 'repo> DescendantRebaser<'settings, 'repo> { &new_parents, &self.options, )?; - self.rebased + let previous_rebased_value = self + .rebased .insert(old_commit_id.clone(), new_commit.id().clone()); + let previous_mapping_value = self + .parent_mapping + .insert(old_commit_id.clone(), vec![new_commit.id().clone()]); + assert_eq!( + (previous_rebased_value, previous_mapping_value), + (None, None), + "Trying to rebase the same commit {old_commit_id:?} in two different ways", + ); self.update_references(old_commit_id, vec![new_commit.id().clone()], true)?; return Ok(Some(RebasedDescendant { old_commit, diff --git a/lib/tests/test_rewrite.rs b/lib/tests/test_rewrite.rs index bbd67187145..d4c362babf3 100644 --- a/lib/tests/test_rewrite.rs +++ b/lib/tests/test_rewrite.rs @@ -647,13 +647,14 @@ fn test_rebase_descendants_multiple_sideways() { } #[test] +#[should_panic(expected = "cycle detected")] fn test_rebase_descendants_multiple_swap() { let settings = testutils::user_settings(); let test_repo = TestRepo::init(); let repo = &test_repo.repo; // Commit B was replaced by commit D. Commit D was replaced by commit B. - // Commit C and commit E should swap places. + // This results in an infinite loop and a panic // // C E // B D @@ -663,9 +664,9 @@ fn test_rebase_descendants_multiple_swap() { let mut graph_builder = CommitGraphBuilder::new(&settings, tx.mut_repo()); let commit_a = graph_builder.initial_commit(); let commit_b = graph_builder.commit_with_parents(&[&commit_a]); - let commit_c = graph_builder.commit_with_parents(&[&commit_b]); + let _commit_c = graph_builder.commit_with_parents(&[&commit_b]); let commit_d = graph_builder.commit_with_parents(&[&commit_a]); - let commit_e = graph_builder.commit_with_parents(&[&commit_d]); + let _commit_e = graph_builder.commit_with_parents(&[&commit_d]); let mut rebaser = DescendantRebaser::new( &settings, @@ -676,18 +677,7 @@ fn test_rebase_descendants_multiple_swap() { }, hashset! {}, ); - let new_commit_c = assert_rebased(rebaser.rebase_next().unwrap(), &commit_c, &[&commit_d]); - let new_commit_e = assert_rebased(rebaser.rebase_next().unwrap(), &commit_e, &[&commit_b]); - assert!(rebaser.rebase_next().unwrap().is_none()); - assert_eq!(rebaser.rebased().len(), 2); - - assert_eq!( - *tx.mut_repo().view().heads(), - hashset! { - new_commit_c.id().clone(), - new_commit_e.id().clone() - } - ); + let _ = rebaser.rebase_next(); // Panics because of the cycle } #[test]