From eb985846aeffe75533867350376d16e8dcf26a20 Mon Sep 17 00:00:00 2001 From: Ronald Haentjens Dekker Date: Sun, 11 Oct 2015 00:48:26 +0200 Subject: [PATCH] IslandCollection: Re-enabled the partial overlap code because the Darwin integration test showed it was needed even when aligning prioritized by island size rather than depth. Optimized the creation of the smaller island by combining the two loops into one. Extended the VariantGraphMatcher to be able to better specify the number of witnesses that should be aligned with a certain node. Tried to add a unit test for this case, but I cannot get it to fail when aligning based on island size. --- .../dekker/island/IslandCollection.java | 47 +++++++------------ .../collatex/dekker/DekkerAlgorithmTest.java | 12 +++++ .../token_index/VariantGraphMatcher.java | 19 +++++++- 3 files changed, 47 insertions(+), 31 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/island/IslandCollection.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/island/IslandCollection.java index d63045d00..617e33c7c 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/island/IslandCollection.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/island/IslandCollection.java @@ -129,6 +129,8 @@ public List getPossibleIslands() { // complete overlap can be determined with one check // no overlap required two checks // partial overlap requires more, but since that doesn't happen often, it is not that bad + //TODO: hard to make a unit test for the partial overlap case when islands are prioritized by size instead of depth! + //TODO: There is an integration test for this in Darwin paragraph 1 (Also) private void checkPossibleIslandsForRightOverlap(List possibleIslands) { Iterator candidates = possibleIslands.iterator(); while (candidates.hasNext()) { @@ -146,36 +148,23 @@ private void checkPossibleIslandsForRightOverlap(List possibleIslands) { } // remove this candidate from the possible islands candidates.remove(); -// NOTE: only when we select islands based on depth islands with partial overlap should be split and added -// // partial overlap; find the starting point of the conflict -// Coordinate conflict = findConflictingCoordinate(island); -// Island smaller = createSmallerIslandSplitAtConflictingCoordinate(island, conflict); -// // add the smaller island to the priority queue -// // LOG.fine("Conflict detected! We add a smaller island! "+smaller); -// islandPriorityQueue.add(smaller); + // partial overlap; find the starting point of the conflict + Island smaller = findConflictingCoordinateAndCreateSmallerIslandSplitAtConflictingCoordinate(island); + // add the smaller island to the priority queue + // LOG.fine("Conflict detected! We add a smaller island! "+smaller); + islandPriorityQueue.add(smaller); } } -// NOTE: THE FOLLOWING CODE IS ONLY NEEDED WHEN WE ALIGN ON DEPTH! DEFAULT IS ON SIZE! -// //TODO: the following two methods can be put into one! -// private Coordinate findConflictingCoordinate(Island island){ -// for(Coordinate coordinate:island){ -// if (doesCoordinateOverlapWithCommittedCoordinate(coordinate)) { -// return coordinate; -// } -// } -// throw new RuntimeException("There should be a conflict! Weird!"); -// } -// -// private Island createSmallerIslandSplitAtConflictingCoordinate(Island island, Coordinate conflict) { -// // create a new island which contains the coordinates up to the overlapping coordinate. -// Island smaller = new Island(island.getDepth(), island.getBlockInstance()); -// for (Coordinate c : island) { -// if (c == conflict) { -// return smaller; -// } -// smaller.add(c); -// } -// throw new RuntimeException("Weird! Should never happen!"); -// } + private Island findConflictingCoordinateAndCreateSmallerIslandSplitAtConflictingCoordinate(Island island){ + // create a new island which contains the coordinates up to the overlapping coordinate. + Island smaller = new Island(island.getBlockInstance()); + for (Coordinate coordinate : island) { + if (doesCoordinateOverlapWithCommittedCoordinate(coordinate)) { + return smaller; + } + smaller.add(coordinate); + } + throw new RuntimeException("Expected a conflict! This should never happen!"); + } } \ No newline at end of file diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DekkerAlgorithmTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DekkerAlgorithmTest.java index da970612a..0da95314c 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DekkerAlgorithmTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DekkerAlgorithmTest.java @@ -190,4 +190,16 @@ public void testDifficultCasePartialRightOverlapAndTranspositions() { assertEquals("|και| | |αποκριθεισ|ο|ι̅σ̅|ειπεν|αυτω| |βλεπεισ|ταυτασ|τασ|μεγαλασ|οικοδομασ| |λεγω|υμιν|ου|μη|αφεθη| |λιθοσ|επι|λιθου|οσ|ου|μη|καταλυθη|", toString(t, w[1])); assertEquals("|και|ο|ι̅σ̅|αποκριθεισ| | |ειπεν|αυτω| |βλεπεισ|ταυτασ|τασ|μεγαλασ|οικοδομασ| | | |ου|μη|αφεθη| |λιθοσ|επι|λιθον|οσ|ου|μη|καταλυθη|", toString(t, w[2])); } + + @Test + public void testDifficultCasePartialDarwin() { + SimpleWitness[] w = createWitnesses("those to which the parent-species have been exposed under nature. There is, also, I think, some probability", "those to which the parent-species have been exposed under nature. There is also, I think, some probability", "those to which the parent-species had been exposed under nature. There is also, I think, some probability", "those to which the parent-species had been exposed under nature. There is, also, some probability"); + DekkerAlgorithm aligner = new DekkerAlgorithm(); + VariantGraph graph = new VariantGraph(); + aligner.collate(graph, w); + assertThat(graph, graph(w[0]).aligned("those to which the parent-species have been exposed under nature . there is , also , i think , some probability")); + assertThat(graph, graph(w[1]).aligned("those to which the parent-species have been exposed under nature . there is also , i think , some probability")); + assertThat(graph, graph(w[2]).aligned("those to which the parent-species had been exposed under nature . there is also , i think , some probability")); + assertThat(graph, graph(w[3]).aligned("those to which the parent-species had been exposed under nature . there is , ").aligned(4, "also").aligned(", some probability")); + } } \ No newline at end of file diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/token_index/VariantGraphMatcher.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/token_index/VariantGraphMatcher.java index 4e1aecc4e..dfb8d7b62 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/token_index/VariantGraphMatcher.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/token_index/VariantGraphMatcher.java @@ -15,12 +15,18 @@ public class VariantGraphMatcher extends BaseMatcher { class ExpectationTuple { + private final int numberOfWitnesses; // check the number of nodes aligned to this vertices private String[] expected_content; // expected token content in the normalized form private boolean aligned; // should the tokens be aligned in the graph or not public ExpectationTuple(String[] tokens, boolean aligned) { + this(tokens, aligned, -1); + } + + public ExpectationTuple(String[] tokens, boolean aligned, int numberOfWitnesses) { this.expected_content = tokens; this.aligned = aligned; + this.numberOfWitnesses = numberOfWitnesses; } } @@ -45,9 +51,13 @@ public VariantGraphMatcher non_aligned(String... tokens) { } public VariantGraphMatcher aligned(String... tokens) { + return aligned(-1, tokens); + } + + public VariantGraphMatcher aligned(int numberOfWitnesses, String... tokens) { for (String token : tokens) { String[] split = token.split(" "); - expected.add(new ExpectationTuple(split, true)); + expected.add(new ExpectationTuple(split, true, numberOfWitnesses)); } return this; } @@ -78,6 +88,11 @@ public boolean matches(Object item) { failed = expectation; return false; } + // Check whether token has correct number of witnesses + if (expectation.numberOfWitnesses != -1 && expectation.numberOfWitnesses != v.tokens().size()) { + failed = expectation; + return false; + } } } // TODO: check more tokens than expected @@ -121,7 +136,7 @@ public void describeMismatch(Object item, Description description) { if (previousAligned!=null){ description.appendText(", "); } - description.appendText("aligned: "); + description.appendText("aligned ("+v.tokens().size()+"): "); previousAligned=true; } } else {