transposed = new HashSet<>(t);
+ transposed.remove(joinCandidateVertex);
+ transposed.add(vertex);
+ for (Vertex tv : t) {
+ graph.transpositionIndex.getOrDefault(tv, Collections.emptySet()).remove(t);
+ }
+ graph.transpose(transposed);
+ }
+
+ vertex.outgoing.clear();
+ vertex.outgoing.putAll(joinCandidateVertex.outgoing);
+
+ vertex.outgoing.keySet().forEach(v -> v.incoming.put(vertex, v.incoming.remove(joinCandidateVertex)));
+
+ queue.push(vertex);
+ continue;
+ }
}
- joinCandidateEdge.delete();
- joinCandidateVertex.delete();
- queue.push(vertex);
- continue;
- }
- }
- processed.add(vertex);
- for (Edge e : outgoingEdges) {
- final Vertex next = e.to();
- // FIXME: Why do we run out of memory in some cases here, if this is not checked?
- if (!processed.contains(next)) {
- queue.push(next);
- }
+ // FIXME: Why do we run out of memory in some cases here, if this is not checked?
+ processed.add(vertex);
+ vertex.outgoing.keySet().stream().filter(v -> !processed.contains(v)).forEach(queue::push);
}
- }
- return graph;
- }
- };
+ return graph;
+ };
}
diff --git a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java
index 75f2adb1f..b0fa4c834 100644
--- a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java
+++ b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 The Interedition Development Group.
+ * Copyright (c) 2015 The Interedition Development Group.
*
* This file is part of CollateX.
*
@@ -19,33 +19,16 @@
package eu.interedition.collatex;
-import com.google.common.base.Function;
-import com.google.common.base.Joiner;
-import com.google.common.collect.Ordering;
-
import java.util.Comparator;
/**
* IWitness
- *
+ *
* Representation of a single textual witness
- *
*/
public interface Witness {
- String getSigil();
-
- final Comparator SIGIL_COMPARATOR = new Comparator() {
- @Override
- public int compare(Witness o1, Witness o2) {
- return o1.getSigil().compareTo(o2.getSigil());
- }
- };
+ String getSigil();
- final Function TO_SIGILS = new Function() {
- @Override
- public String apply(VariantGraph.Edge input) {
- return Joiner.on(", ").join(Ordering.from(SIGIL_COMPARATOR).sortedCopy(input.witnesses()));
- }
- };
+ final Comparator SIGIL_COMPARATOR = Comparator.comparing(Witness::getSigil);
}
diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java
index e2d87edfe..e2141be8e 100644
--- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java
+++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 The Interedition Development Group.
+ * Copyright (c) 2015 The Interedition Development Group.
*
* This file is part of CollateX.
*
@@ -18,17 +18,6 @@
*/
package eu.interedition.collatex.dekker;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Level;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
import eu.interedition.collatex.CollationAlgorithm;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;
@@ -36,150 +25,162 @@
import eu.interedition.collatex.dekker.matrix.MatchTableLinker;
import eu.interedition.collatex.util.VariantGraphRanking;
-public class DekkerAlgorithm extends CollationAlgorithm.Base {
-
- private final Comparator comparator;
- private final TokenLinker tokenLinker;
- private final PhraseMatchDetector phraseMatchDetector;
- private final TranspositionDetector transpositionDetector;
- private Map tokenLinks;
- private List> phraseMatches;
- private List> transpositions;
- private Map alignments;
- private boolean mergeTranspositions = false;
-
- public DekkerAlgorithm(Comparator comparator) {
- this(comparator, new MatchTableLinker(3));
- }
-
- public DekkerAlgorithm(Comparator comparator, TokenLinker tokenLinker) {
- this.comparator = comparator;
- this.tokenLinker = tokenLinker;
- this.phraseMatchDetector = new PhraseMatchDetector();
- this.transpositionDetector = new TranspositionDetector();
- }
-
- @Override
- public void collate(VariantGraph graph, Iterable tokens) {
- Preconditions.checkArgument(!Iterables.isEmpty(tokens), "Empty witness");
- final Witness witness = Iterables.getFirst(tokens, null).getWitness();
-
- if (LOG.isLoggable(Level.FINER)) {
- LOG.log(Level.FINER, "{0} + {1}: {2} vs. {3}", new Object[] { graph, witness, graph.vertices(), tokens });
- }
-
- if (LOG.isLoggable(Level.FINE)) {
- LOG.log(Level.FINE, "{0} + {1}: Match and link tokens", new Object[] { graph, witness });
- }
- tokenLinks = tokenLinker.link(graph, tokens, comparator);
-
- if (LOG.isLoggable(Level.FINER)) {
- for (Map.Entry tokenLink : tokenLinks.entrySet()) {
- LOG.log(Level.FINER, "{0} + {1}: Token match: {2} = {3}", new Object[] { graph, witness, tokenLink.getValue(), tokenLink.getKey() });
- }
- }
-
- if (LOG.isLoggable(Level.FINE)) {
- LOG.log(Level.FINE, "{0} + {1}: Detect phrase matches", new Object[] { graph, witness });
- }
- phraseMatches = phraseMatchDetector.detect(tokenLinks, graph, tokens);
- if (LOG.isLoggable(Level.FINER)) {
- for (List phraseMatch : phraseMatches) {
- LOG.log(Level.FINER, "{0} + {1}: Phrase match: {2}", new Object[] { graph, witness, Iterables.toString(phraseMatch) });
- }
- }
-
- if (LOG.isLoggable(Level.FINE)) {
- LOG.log(Level.FINE, "{0} + {1}: Detect transpositions", new Object[] { graph, witness });
- }
- transpositions = transpositionDetector.detect(phraseMatches, graph);
- if (LOG.isLoggable(Level.FINE)) {
- LOG.log(Level.FINE, "transpositions:{0}", transpositions);
- }
-
- if (LOG.isLoggable(Level.FINER)) {
- for (List transposition : transpositions) {
- LOG.log(Level.FINER, "{0} + {1}: Transposition: {2}", new Object[] { graph, witness, Iterables.toString(transposition) });
- }
- }
-
- if (LOG.isLoggable(Level.FINE)) {
- LOG.log(Level.FINE, "{0} + {1}: Determine aligned tokens by filtering transpositions", new Object[] { graph, witness });
- }
- alignments = Maps.newHashMap();
- for (List phrase : phraseMatches) {
- for (Match match : phrase) {
- alignments.put(match.token, match.vertex);
- }
- }
-
- for (List transposedPhrase : transpositions) {
- for (Match match : transposedPhrase) {
- alignments.remove(match.token);
- }
- }
- if (LOG.isLoggable(Level.FINER)) {
- for (Map.Entry alignment : alignments.entrySet()) {
- LOG.log(Level.FINER, "{0} + {1}: Alignment: {2} = {3}", new Object[] { graph, witness, alignment.getValue(), alignment.getKey() });
- }
- }
-
- merge(graph, tokens, alignments);
-
- // we filter out small transposed phrases over large distances
- List> falseTranspositions = Lists.newArrayList();
-
- VariantGraphRanking ranking = VariantGraphRanking.of(graph);
-
- for (List transposedPhrase : transpositions) {
- Match match = transposedPhrase.get(0);
- VariantGraph.Vertex v1 = witnessTokenVertices.get(match.token);
- VariantGraph.Vertex v2 = match.vertex;
- int distance = Math.abs(ranking.apply(v1)-ranking.apply(v2))-1;
- if (distance > transposedPhrase.size()*3) {
- falseTranspositions.add(transposedPhrase);
- }
- }
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
- for (List transposition : falseTranspositions) {
- transpositions.remove(transposition);
- }
+public class DekkerAlgorithm extends CollationAlgorithm.Base {
- if (mergeTranspositions) {
- mergeTranspositions(graph, transpositions);
- }
-
- if (LOG.isLoggable(Level.FINER)) {
- LOG.log(Level.FINER, "!{0}: {1}", new Object[] {graph, Iterables.toString(graph.vertices())});
+ private final Comparator comparator;
+ private final TokenLinker tokenLinker;
+ private final PhraseMatchDetector phraseMatchDetector;
+ private final TranspositionDetector transpositionDetector;
+ private Map tokenLinks;
+ private List> phraseMatches;
+ private List> transpositions;
+ private Map alignments;
+ private boolean mergeTranspositions = false;
+
+ public DekkerAlgorithm(Comparator comparator) {
+ this(comparator, new MatchTableLinker());
+ }
+
+ public DekkerAlgorithm(Comparator comparator, TokenLinker tokenLinker) {
+ this.comparator = comparator;
+ this.tokenLinker = tokenLinker;
+ this.phraseMatchDetector = new PhraseMatchDetector();
+ this.transpositionDetector = new TranspositionDetector();
+ }
+
+ @Override
+ public void collate(VariantGraph graph, Iterable tokens) {
+ final Witness witness = StreamSupport.stream(tokens.spliterator(), false)
+ .findFirst()
+ .map(Token::getWitness)
+ .orElseThrow(() -> new IllegalArgumentException("Empty witness"));
+
+ if (LOG.isLoggable(Level.FINER)) {
+ LOG.log(Level.FINER, "{0} + {1}: {2} vs. {3}", new Object[]{graph, witness, graph.vertices(), tokens});
+ }
+
+ if (LOG.isLoggable(Level.FINE)) {
+ LOG.log(Level.FINE, "{0} + {1}: Match and link tokens", new Object[]{graph, witness});
+ }
+ tokenLinks = tokenLinker.link(graph, tokens, comparator);
+
+ if (LOG.isLoggable(Level.FINER)) {
+ for (Map.Entry tokenLink : tokenLinks.entrySet()) {
+ LOG.log(Level.FINER, "{0} + {1}: Token match: {2} = {3}", new Object[]{graph, witness, tokenLink.getValue(), tokenLink.getKey()});
+ }
+ }
+
+ if (LOG.isLoggable(Level.FINE)) {
+ LOG.log(Level.FINE, "{0} + {1}: Detect phrase matches", new Object[]{graph, witness});
+ }
+ phraseMatches = phraseMatchDetector.detect(tokenLinks, graph, tokens);
+ if (LOG.isLoggable(Level.FINER)) {
+ for (List phraseMatch : phraseMatches) {
+ LOG.log(Level.FINER, "{0} + {1}: Phrase match: {2}", new Object[]{graph, witness, phraseMatch});
+ }
+ }
+
+ if (LOG.isLoggable(Level.FINE)) {
+ LOG.log(Level.FINE, "{0} + {1}: Detect transpositions", new Object[]{graph, witness});
+ }
+ transpositions = transpositionDetector.detect(phraseMatches, graph);
+ if (LOG.isLoggable(Level.FINE)) {
+ LOG.log(Level.FINE, "transpositions:{0}", transpositions);
+ }
+
+ if (LOG.isLoggable(Level.FINER)) {
+ for (List transposition : transpositions) {
+ LOG.log(Level.FINER, "{0} + {1}: Transposition: {2}", new Object[]{graph, witness, transposition});
+ }
+ }
+
+ if (LOG.isLoggable(Level.FINE)) {
+ LOG.log(Level.FINE, "{0} + {1}: Determine aligned tokens by filtering transpositions", new Object[]{graph, witness});
+ }
+ alignments = new HashMap<>();
+ for (List phrase : phraseMatches) {
+ for (Match match : phrase) {
+ alignments.put(match.token, match.vertex);
+ }
+ }
+
+ for (List transposedPhrase : transpositions) {
+ for (Match match : transposedPhrase) {
+ alignments.remove(match.token);
+ }
+ }
+ if (LOG.isLoggable(Level.FINER)) {
+ for (Map.Entry alignment : alignments.entrySet()) {
+ LOG.log(Level.FINER, "{0} + {1}: Alignment: {2} = {3}", new Object[]{graph, witness, alignment.getValue(), alignment.getKey()});
+ }
+ }
+
+ merge(graph, tokens, alignments);
+
+ // we filter out small transposed phrases over large distances
+ List> falseTranspositions = new ArrayList<>();
+
+ VariantGraphRanking ranking = VariantGraphRanking.of(graph);
+
+ for (List transposedPhrase : transpositions) {
+ Match match = transposedPhrase.get(0);
+ VariantGraph.Vertex v1 = witnessTokenVertices.get(match.token);
+ VariantGraph.Vertex v2 = match.vertex;
+ int distance = Math.abs(ranking.apply(v1) - ranking.apply(v2)) - 1;
+ if (distance > transposedPhrase.size() * 3) {
+ falseTranspositions.add(transposedPhrase);
+ }
+ }
+
+ for (List transposition : falseTranspositions) {
+ transpositions.remove(transposition);
+ }
+
+ if (mergeTranspositions) {
+ mergeTranspositions(graph, transpositions);
+ }
+
+ if (LOG.isLoggable(Level.FINER)) {
+ LOG.log(Level.FINER, "!{0}: {1}", new Object[]{graph, StreamSupport.stream(graph.vertices().spliterator(), false).map(Object::toString).collect(Collectors.joining(", "))});
+ }
+ }
+
+ public Map getTokenLinks() {
+ return tokenLinks;
+ }
+
+ public List> getPhraseMatches() {
+ return Collections.unmodifiableList(phraseMatches);
+ }
+
+ public List> getTranspositions() {
+ return Collections.unmodifiableList(transpositions);
+ }
+
+ public Map getAlignments() {
+ return Collections.unmodifiableMap(alignments);
+ }
+
+ /*
+ * This check disables transposition rendering in the variant
+ * graph when the variant graph contains more then two witnesses.
+ * Transposition detection is done in a progressive manner
+ * (witness by witness). When viewing the resulting graph
+ * containing the variation for all witnesses
+ * the detected transpositions can look strange, since segments
+ * may have split into smaller or larger parts.
+ */
+ public void setMergeTranspositions(boolean b) {
+ this.mergeTranspositions = b;
}
- }
-
- public Map getTokenLinks() {
- return tokenLinks;
- }
-
- public List> getPhraseMatches() {
- return Collections.unmodifiableList(phraseMatches);
- }
-
- public List> getTranspositions() {
- return Collections.unmodifiableList(transpositions);
- }
-
- public Map getAlignments() {
- return Collections.unmodifiableMap(alignments);
- }
-
- /*
- * This check disables transposition rendering in the variant
- * graph when the variant graph contains more then two witnesses.
- * Transposition detection is done in a progressive manner
- * (witness by witness). When viewing the resulting graph
- * containing the variation for all witnesses
- * the detected transpositions can look strange, since segments
- * may have split into smaller or larger parts.
- */
- public void setMergeTranspositions(boolean b) {
- this.mergeTranspositions = b;
- }
}
diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java
index e44bc9b6c..60db37de2 100644
--- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java
+++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 The Interedition Development Group.
+ * Copyright (c) 2015 The Interedition Development Group.
*
* This file is part of CollateX.
*
@@ -19,78 +19,62 @@
package eu.interedition.collatex.dekker;
-import com.google.common.base.Function;
-import com.google.common.base.Objects;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Lists;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;
+import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
/**
- * @author Gregor Middell
+ * @author Gregor Middell
*/
public class Match {
- public final VariantGraph.Vertex vertex;
- public final Token token;
+ public final VariantGraph.Vertex vertex;
+ public final Token token;
- public Match(VariantGraph.Vertex vertex, Token token) {
- this.vertex = vertex;
- this.token = token;
- }
-
- @Override
- public int hashCode() {
- return Objects.hashCode(vertex, token);
- }
+ public Match(VariantGraph.Vertex vertex, Token token) {
+ this.vertex = vertex;
+ this.token = token;
+ }
- @Override
- public boolean equals(Object obj) {
- if (obj != null && obj instanceof Match) {
- Match other = (Match) obj;
- return vertex.equals(other.vertex) && token.equals(other.token);
+ @Override
+ public int hashCode() {
+ return Objects.hash(vertex, token);
}
- return super.equals(obj);
- }
- @Override
- public String toString() {
- return new StringBuilder("{").append(vertex).append("; ").append(token).append("}").toString();
- }
+ @Override
+ public boolean equals(Object obj) {
+ if (obj != null && obj instanceof Match) {
+ Match other = (Match) obj;
+ return vertex.equals(other.vertex) && token.equals(other.token);
+ }
+ return super.equals(obj);
+ }
- public static List createPhraseMatch(List vertices, List tokens) {
- final List phraseMatch = Lists.newArrayListWithExpectedSize(vertices.size());
- final Iterator vertexIt = vertices.iterator();
- final Iterator tokenIt = tokens.iterator();
- while (vertexIt.hasNext() && tokenIt.hasNext()) {
- phraseMatch.add(new Match(vertexIt.next(), tokenIt.next()));
+ @Override
+ public String toString() {
+ return "{" + vertex + "; " + token + "}";
}
- return phraseMatch;
- }
+ public static List createPhraseMatch(List vertices, List tokens) {
+ final List phraseMatch = new ArrayList<>(vertices.size());
+ final Iterator vertexIt = vertices.iterator();
+ final Iterator tokenIt = tokens.iterator();
+ while (vertexIt.hasNext() && tokenIt.hasNext()) {
+ phraseMatch.add(new Match(vertexIt.next(), tokenIt.next()));
+ }
+ return phraseMatch;
+ }
- public static Predicate createNoBoundaryMatchPredicate(final VariantGraph graph) {
- return new Predicate() {
- @Override
- public boolean apply(Match input) {
- return !input.vertex.equals(graph.getStart()) && !input.vertex.equals(graph.getEnd());
- }
- };
- }
- public static final Function MATCH_TO_TOKENS = new Function() {
- @Override
- public Token apply(Match input) {
- return input.token;
+ public static Predicate createNoBoundaryMatchPredicate(final VariantGraph graph) {
+ return input -> !input.vertex.equals(graph.getStart()) && !input.vertex.equals(graph.getEnd());
}
- };
- public static final Function, List> PHRASE_MATCH_TO_TOKENS = new Function, List>() {
- @Override
- public List apply(List input) {
- return Lists.transform(input, MATCH_TO_TOKENS);
- }
- };
+ public static final Function, List> PHRASE_MATCH_TO_TOKENS = input -> input.stream().map(m -> m.token).collect(Collectors.toList());
}
diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java
index 3a1abbcb0..887b4cfd5 100644
--- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java
+++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java
@@ -1,85 +1,76 @@
-/*
- * Copyright (c) 2013 The Interedition Development Group.
- *
- * This file is part of CollateX.
- *
- * CollateX is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * CollateX is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with CollateX. If not, see .
- */
-package eu.interedition.collatex.dekker;
-
-import java.util.List;
-import java.util.Map;
-
-import com.google.common.collect.Sets;
-import eu.interedition.collatex.VariantGraph;
-import eu.interedition.collatex.neo4j.Neo4jGraphRelationships;
-import eu.interedition.collatex.neo4j.Neo4jVariantGraphVertex;
-import org.neo4j.graphdb.Direction;
-import org.neo4j.graphdb.Node;
-import org.neo4j.graphdb.Relationship;
-
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-
-import eu.interedition.collatex.Token;
-
-/**
- *
- * @author Ronald Haentjens Dekker
- * @author Bram Buitendijk
- */
-public class PhraseMatchDetector {
-
- public List> detect(Map linkedTokens, VariantGraph base, Iterable tokens) {
- List> phraseMatches = Lists.newArrayList();
- List basePhrase = Lists.newArrayList();
- List witnessPhrase = Lists.newArrayList();
- VariantGraph.Vertex previous = base.getStart();
-
- for (Token token : tokens) {
- if (!linkedTokens.containsKey(token)) {
- addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase);
- continue;
- }
- VariantGraph.Vertex baseVertex = linkedTokens.get(token);
- // requirements:
- // - previous and base vertex should have the same witnesses
- // - previous and base vertex should either be in the same transposition(s) or both aren't in any transpositions
- // - there should be a directed edge between previous and base vertex
- // - there may not be a longer path between previous and base vertex
- boolean sameTranspositions = Sets.newHashSet(previous.transpositions()).equals(Sets.newHashSet(baseVertex.transpositions()));
- boolean sameWitnesses = previous.witnesses().equals(baseVertex.witnesses());
- boolean directedEdge = (base.edgeBetween(previous, baseVertex) != null);
- boolean isNear = sameTranspositions && sameWitnesses && directedEdge && (Iterables.size(previous.outgoing()) == 1 || Iterables.size(baseVertex.incoming()) == 1);
- if (!isNear) {
- addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase);
- }
- basePhrase.add(baseVertex);
- witnessPhrase.add(token);
- previous = baseVertex;
- }
- if (!basePhrase.isEmpty()) {
- phraseMatches.add(Match.createPhraseMatch(basePhrase, witnessPhrase));
- }
- return phraseMatches;
- }
-
- private void addNewPhraseMatchAndClearBuffer(List> phraseMatches, List basePhrase, List witnessPhrase) {
- if (!basePhrase.isEmpty()) {
- phraseMatches.add(Match.createPhraseMatch(basePhrase, witnessPhrase));
- basePhrase.clear();
- witnessPhrase.clear();
- }
- }
-}
+/*
+ * Copyright (c) 2015 The Interedition Development Group.
+ *
+ * This file is part of CollateX.
+ *
+ * CollateX is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CollateX is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CollateX. If not, see .
+ */
+package eu.interedition.collatex.dekker;
+
+import eu.interedition.collatex.Token;
+import eu.interedition.collatex.VariantGraph;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author Ronald Haentjens Dekker
+ * @author Bram Buitendijk
+ */
+public class PhraseMatchDetector {
+
+ public List> detect(Map linkedTokens, VariantGraph base, Iterable tokens) {
+ List> phraseMatches = new ArrayList<>();
+ List basePhrase = new ArrayList<>();
+ List witnessPhrase = new ArrayList<>();
+ VariantGraph.Vertex previous = base.getStart();
+
+ for (Token token : tokens) {
+ if (!linkedTokens.containsKey(token)) {
+ addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase);
+ continue;
+ }
+ VariantGraph.Vertex baseVertex = linkedTokens.get(token);
+ // requirements:
+ // - previous and base vertex should have the same witnesses
+ // - previous and base vertex should either be in the same transposition(s) or both aren't in any transpositions
+ // - there should be a directed edge between previous and base vertex
+ // - there may not be a longer path between previous and base vertex
+ boolean sameTranspositions = new HashSet<>(previous.transpositions()).equals(new HashSet<>(baseVertex.transpositions()));
+ boolean sameWitnesses = previous.witnesses().equals(baseVertex.witnesses());
+ boolean directedEdge = previous.outgoing().containsKey(baseVertex);
+ boolean isNear = sameTranspositions && sameWitnesses && directedEdge && (previous.outgoing().size() == 1 || baseVertex.incoming().size() == 1);
+ if (!isNear) {
+ addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase);
+ }
+ basePhrase.add(baseVertex);
+ witnessPhrase.add(token);
+ previous = baseVertex;
+ }
+ if (!basePhrase.isEmpty()) {
+ phraseMatches.add(Match.createPhraseMatch(basePhrase, witnessPhrase));
+ }
+ return phraseMatches;
+ }
+
+ private void addNewPhraseMatchAndClearBuffer(List> phraseMatches, List basePhrase, List witnessPhrase) {
+ if (!basePhrase.isEmpty()) {
+ phraseMatches.add(Match.createPhraseMatch(basePhrase, witnessPhrase));
+ basePhrase.clear();
+ witnessPhrase.clear();
+ }
+ }
+}
diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java
index 9de49acea..137b76d2c 100644
--- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java
+++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java
@@ -1,33 +1,33 @@
-/*
- * Copyright (c) 2013 The Interedition Development Group.
- *
- * This file is part of CollateX.
- *
- * CollateX is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * CollateX is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with CollateX. If not, see .
- */
-
-package eu.interedition.collatex.dekker;
-
-import java.util.Comparator;
-import java.util.Map;
-
-import eu.interedition.collatex.Token;
-import eu.interedition.collatex.VariantGraph;
-
-
-public interface TokenLinker {
-
- Map link(VariantGraph base, Iterable witness, Comparator comparator);
-
+/*
+ * Copyright (c) 2015 The Interedition Development Group.
+ *
+ * This file is part of CollateX.
+ *
+ * CollateX is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CollateX is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CollateX. If not, see .
+ */
+
+package eu.interedition.collatex.dekker;
+
+import eu.interedition.collatex.Token;
+import eu.interedition.collatex.VariantGraph;
+
+import java.util.Comparator;
+import java.util.Map;
+
+
+public interface TokenLinker {
+
+ Map link(VariantGraph base, Iterable witness, Comparator comparator);
+
}
\ No newline at end of file
diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java
index 27f73126b..75c461c35 100644
--- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java
+++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java
@@ -1,197 +1,190 @@
-/*
- * Copyright (c) 2013 The Interedition Development Group.
- *
- * This file is part of CollateX.
- *
- * CollateX is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * CollateX is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with CollateX. If not, see .
- */
-package eu.interedition.collatex.dekker;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-import eu.interedition.collatex.VariantGraph;
-import eu.interedition.collatex.simple.SimpleToken;
-import eu.interedition.collatex.util.VariantGraphRanking;
-
-/**
- *
- * @author Ronald Haentjens Dekker
- */
-public class TranspositionDetector {
- private Map, Integer> phraseMatchToIndex;
-
- public List> detect(final List> phraseMatches, VariantGraph base) {
- // if there are no phrase matches it is not possible
- // to detect transpositions, return an empty list
- if (phraseMatches.isEmpty()) {
- return Lists.newArrayList();
- }
-
- /*
- * We order the phrase matches in the topological order
- * of the graph (called rank). When the rank is equal
- * for two phrase matches, the witness order is used
- * to differentiate.
- */
- final VariantGraphRanking ranking = rankTheGraph(phraseMatches, base);
-
- Comparator> comp = new Comparator>() {
- @Override
- public int compare(List pm1, List pm2) {
- int rank1 = ranking.apply(pm1.get(0).vertex);
- int rank2 = ranking.apply(pm2.get(0).vertex);
- int difference = rank1 - rank2;
- if (difference != 0) {
- return difference;
- }
- int index1 = phraseMatches.indexOf(pm1);
- int index2 = phraseMatches.indexOf(pm2);
- return index1 - index2;
- }
- };
-
- List> phraseMatchesGraphOrder = Lists.newArrayList(phraseMatches);
- Collections.sort(phraseMatchesGraphOrder, comp);
-
- // Map 1
- phraseMatchToIndex = Maps.newHashMap();
- for (int i = 0; i < phraseMatchesGraphOrder.size(); i++) {
- phraseMatchToIndex.put(phraseMatchesGraphOrder.get(i), i);
- }
-
- /*
- * We calculate the index for all the phrase matches
- * First in witness order, then in graph order
- */
- List phraseMatchesGraphIndex = Lists.newArrayList();
- List phraseMatchesWitnessIndex = Lists.newArrayList();
-
- for (int i=0; i < phraseMatches.size(); i++) {
- phraseMatchesGraphIndex.add(i);
- }
-
- for (List phraseMatch : phraseMatches) {
- phraseMatchesWitnessIndex.add(phraseMatchToIndex.get(phraseMatch));
- }
-
- /*
- * Initialize result variables
- */
- List> nonTransposedPhraseMatches = Lists.newArrayList(phraseMatches);
- List> transpositions = Lists.newArrayList();
-
- /*
- * loop here until the maximum distance == 0
- */
- while (true) {
- // Map 2
- final Map, Integer> phraseMatchToDistanceMap = Maps.newLinkedHashMap();
- for (int i=0; i < nonTransposedPhraseMatches.size(); i++) {
- Integer graphIndex = phraseMatchesGraphIndex.get(i);
- Integer witnessIndex = phraseMatchesWitnessIndex.get(i);
- Integer distance = Math.abs(graphIndex - witnessIndex);
- List phraseMatch = nonTransposedPhraseMatches.get(i);
- phraseMatchToDistanceMap.put(phraseMatch, distance);
- }
-
- List