diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/island/IslandCollection.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/island/IslandCollection.java index d63045d00..617e33c7c 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/island/IslandCollection.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/island/IslandCollection.java @@ -129,6 +129,8 @@ public List getPossibleIslands() { // complete overlap can be determined with one check // no overlap required two checks // partial overlap requires more, but since that doesn't happen often, it is not that bad + //TODO: hard to make a unit test for the partial overlap case when islands are prioritized by size instead of depth! + //TODO: There is an integration test for this in Darwin paragraph 1 (Also) private void checkPossibleIslandsForRightOverlap(List possibleIslands) { Iterator candidates = possibleIslands.iterator(); while (candidates.hasNext()) { @@ -146,36 +148,23 @@ private void checkPossibleIslandsForRightOverlap(List possibleIslands) { } // remove this candidate from the possible islands candidates.remove(); -// NOTE: only when we select islands based on depth islands with partial overlap should be split and added -// // partial overlap; find the starting point of the conflict -// Coordinate conflict = findConflictingCoordinate(island); -// Island smaller = createSmallerIslandSplitAtConflictingCoordinate(island, conflict); -// // add the smaller island to the priority queue -// // LOG.fine("Conflict detected! We add a smaller island! "+smaller); -// islandPriorityQueue.add(smaller); + // partial overlap; find the starting point of the conflict + Island smaller = findConflictingCoordinateAndCreateSmallerIslandSplitAtConflictingCoordinate(island); + // add the smaller island to the priority queue + // LOG.fine("Conflict detected! We add a smaller island! "+smaller); + islandPriorityQueue.add(smaller); } } -// NOTE: THE FOLLOWING CODE IS ONLY NEEDED WHEN WE ALIGN ON DEPTH! DEFAULT IS ON SIZE! -// //TODO: the following two methods can be put into one! -// private Coordinate findConflictingCoordinate(Island island){ -// for(Coordinate coordinate:island){ -// if (doesCoordinateOverlapWithCommittedCoordinate(coordinate)) { -// return coordinate; -// } -// } -// throw new RuntimeException("There should be a conflict! Weird!"); -// } -// -// private Island createSmallerIslandSplitAtConflictingCoordinate(Island island, Coordinate conflict) { -// // create a new island which contains the coordinates up to the overlapping coordinate. -// Island smaller = new Island(island.getDepth(), island.getBlockInstance()); -// for (Coordinate c : island) { -// if (c == conflict) { -// return smaller; -// } -// smaller.add(c); -// } -// throw new RuntimeException("Weird! Should never happen!"); -// } + private Island findConflictingCoordinateAndCreateSmallerIslandSplitAtConflictingCoordinate(Island island){ + // create a new island which contains the coordinates up to the overlapping coordinate. + Island smaller = new Island(island.getBlockInstance()); + for (Coordinate coordinate : island) { + if (doesCoordinateOverlapWithCommittedCoordinate(coordinate)) { + return smaller; + } + smaller.add(coordinate); + } + throw new RuntimeException("Expected a conflict! This should never happen!"); + } } \ No newline at end of file diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DekkerAlgorithmTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DekkerAlgorithmTest.java index da970612a..0da95314c 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DekkerAlgorithmTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DekkerAlgorithmTest.java @@ -190,4 +190,16 @@ public void testDifficultCasePartialRightOverlapAndTranspositions() { assertEquals("|και| | |αποκριθεισ|ο|ι̅σ̅|ειπεν|αυτω| |βλεπεισ|ταυτασ|τασ|μεγαλασ|οικοδομασ| |λεγω|υμιν|ου|μη|αφεθη| |λιθοσ|επι|λιθου|οσ|ου|μη|καταλυθη|", toString(t, w[1])); assertEquals("|και|ο|ι̅σ̅|αποκριθεισ| | |ειπεν|αυτω| |βλεπεισ|ταυτασ|τασ|μεγαλασ|οικοδομασ| | | |ου|μη|αφεθη| |λιθοσ|επι|λιθον|οσ|ου|μη|καταλυθη|", toString(t, w[2])); } + + @Test + public void testDifficultCasePartialDarwin() { + SimpleWitness[] w = createWitnesses("those to which the parent-species have been exposed under nature. There is, also, I think, some probability", "those to which the parent-species have been exposed under nature. There is also, I think, some probability", "those to which the parent-species had been exposed under nature. There is also, I think, some probability", "those to which the parent-species had been exposed under nature. There is, also, some probability"); + DekkerAlgorithm aligner = new DekkerAlgorithm(); + VariantGraph graph = new VariantGraph(); + aligner.collate(graph, w); + assertThat(graph, graph(w[0]).aligned("those to which the parent-species have been exposed under nature . there is , also , i think , some probability")); + assertThat(graph, graph(w[1]).aligned("those to which the parent-species have been exposed under nature . there is also , i think , some probability")); + assertThat(graph, graph(w[2]).aligned("those to which the parent-species had been exposed under nature . there is also , i think , some probability")); + assertThat(graph, graph(w[3]).aligned("those to which the parent-species had been exposed under nature . there is , ").aligned(4, "also").aligned(", some probability")); + } } \ No newline at end of file diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/token_index/VariantGraphMatcher.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/token_index/VariantGraphMatcher.java index 4e1aecc4e..dfb8d7b62 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/token_index/VariantGraphMatcher.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/token_index/VariantGraphMatcher.java @@ -15,12 +15,18 @@ public class VariantGraphMatcher extends BaseMatcher { class ExpectationTuple { + private final int numberOfWitnesses; // check the number of nodes aligned to this vertices private String[] expected_content; // expected token content in the normalized form private boolean aligned; // should the tokens be aligned in the graph or not public ExpectationTuple(String[] tokens, boolean aligned) { + this(tokens, aligned, -1); + } + + public ExpectationTuple(String[] tokens, boolean aligned, int numberOfWitnesses) { this.expected_content = tokens; this.aligned = aligned; + this.numberOfWitnesses = numberOfWitnesses; } } @@ -45,9 +51,13 @@ public VariantGraphMatcher non_aligned(String... tokens) { } public VariantGraphMatcher aligned(String... tokens) { + return aligned(-1, tokens); + } + + public VariantGraphMatcher aligned(int numberOfWitnesses, String... tokens) { for (String token : tokens) { String[] split = token.split(" "); - expected.add(new ExpectationTuple(split, true)); + expected.add(new ExpectationTuple(split, true, numberOfWitnesses)); } return this; } @@ -78,6 +88,11 @@ public boolean matches(Object item) { failed = expectation; return false; } + // Check whether token has correct number of witnesses + if (expectation.numberOfWitnesses != -1 && expectation.numberOfWitnesses != v.tokens().size()) { + failed = expectation; + return false; + } } } // TODO: check more tokens than expected @@ -121,7 +136,7 @@ public void describeMismatch(Object item, Description description) { if (previousAligned!=null){ description.appendText(", "); } - description.appendText("aligned: "); + description.appendText("aligned ("+v.tokens().size()+"): "); previousAligned=true; } } else {