From 1d09e77bf4645d0aa7ce084bf65c4287af44f7f5 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Wed, 5 Nov 2014 21:38:24 +0100 Subject: [PATCH 01/52] Java 8: reduce dependencies of collatex-tools (plain Grizzly server; javax.json replaces Jackson) --- collatex-tools/pom.xml | 17 +- .../collatex/cli/DocumentWitness.java | 47 ---- .../eu/interedition/collatex/cli/Engine.java | 45 +-- .../collatex/http/CollateResource.java | 202 -------------- .../interedition/collatex/http/Collator.java | 217 +++++++++++++++ .../collatex/http/JsonProcessor.java | 262 ++++++++++++++++++ .../ObjectMapperMessageBodyReaderWriter.java | 71 ----- .../eu/interedition/collatex/http/Server.java | 146 +++------- .../VariantGraphDotMessageBodyWriter.java | 65 ----- .../http/VariantGraphMLMessageBodyWriter.java | 77 ----- .../VariantGraphSVGMessageBodyWriter.java | 166 ----------- .../VariantGraphTEIMessageBodyWriter.java | 81 ------ .../collatex/io/CollateXModule.java | 37 --- .../collatex/io/IOExceptionMapper.java | 40 --- .../interedition/collatex/io/JsonToken.java | 38 --- .../io/SimpleCollationDeserializer.java | 177 ------------ .../collatex/io/VariantGraphSerializer.java | 101 ------- pom.xml | 12 +- 18 files changed, 564 insertions(+), 1237 deletions(-) delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/cli/DocumentWitness.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/CollateResource.java create mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/Collator.java create mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/JsonProcessor.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/ObjectMapperMessageBodyReaderWriter.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphDotMessageBodyWriter.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphMLMessageBodyWriter.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphSVGMessageBodyWriter.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphTEIMessageBodyWriter.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/io/CollateXModule.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/io/IOExceptionMapper.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/io/JsonToken.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/io/SimpleCollationDeserializer.java delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/io/VariantGraphSerializer.java diff --git a/collatex-tools/pom.xml b/collatex-tools/pom.xml index 349f9553f..eee38d53a 100644 --- a/collatex-tools/pom.xml +++ b/collatex-tools/pom.xml @@ -11,6 +11,11 @@ CollateX Tools CollateX Tool Suite including a command line interface and a HTTP service + + org.glassfish + javax.json + 1.0.4 + eu.interedition collatex-core @@ -25,16 +30,8 @@ jung-graph-impl - com.sun.jersey - jersey-server - - - com.sun.jersey - jersey-grizzly2 - - - org.codehaus.jackson - jackson-mapper-asl + org.glassfish.grizzly + grizzly-http-server diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/cli/DocumentWitness.java b/collatex-tools/src/main/java/eu/interedition/collatex/cli/DocumentWitness.java deleted file mode 100644 index 883cf2d60..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/cli/DocumentWitness.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.cli; - -import eu.interedition.collatex.Witness; -import org.w3c.dom.Document; - -/** - * @author Gregor Middell - */ -public class DocumentWitness implements Witness { - - final String sigil; - final Document document; - - public DocumentWitness(String sigil, Document document) { - this.sigil = sigil; - this.document = document; - } - - @Override - public String getSigil() { - return sigil; - } - - @Override - public String toString() { - return sigil; - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/cli/Engine.java b/collatex-tools/src/main/java/eu/interedition/collatex/cli/Engine.java index 33159b09d..252098356 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/cli/Engine.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/cli/Engine.java @@ -29,10 +29,10 @@ import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.io.CollateXModule; -import eu.interedition.collatex.simple.SimpleCollation; +import eu.interedition.collatex.http.JsonProcessor; import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; +import eu.interedition.collatex.simple.SimpleCollation; import eu.interedition.collatex.simple.SimplePatternTokenizer; import eu.interedition.collatex.simple.SimpleToken; import eu.interedition.collatex.simple.SimpleTokenNormalizers; @@ -43,7 +43,6 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.codehaus.jackson.map.ObjectMapper; import org.xml.sax.SAXException; import javax.script.ScriptException; @@ -57,6 +56,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.StringReader; @@ -91,8 +91,6 @@ public class Engine implements Closeable { PrintWriter log = new PrintWriter(System.err); boolean errorOccurred = false; - ObjectMapper objectMapper = new ObjectMapper().withModule(new CollateXModule()); - Engine configure(CommandLine commandLine) throws XPathExpressionException, ParseException, ScriptException, IOException { this.inputCharset = Charset.forName(commandLine.getOptionValue("ie", "UTF-8")); this.xmlMode = commandLine.hasOption("xml"); @@ -112,14 +110,19 @@ Engine configure(CommandLine commandLine) throws XPathExpressionException, Parse } final String algorithm = commandLine.getOptionValue("a", "dekker").toLowerCase(); - if ("needleman-wunsch".equals(algorithm)) { - this.collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(this.comparator); - } else if ("medite".equals(algorithm)) { - this.collationAlgorithm = CollationAlgorithmFactory.medite(this.comparator, SimpleToken.TOKEN_MATCH_EVALUATOR); - } else if ("gst".equals(algorithm)) { - this.collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(comparator, 2); - } else { - this.collationAlgorithm = CollationAlgorithmFactory.dekker(this.comparator); + switch (algorithm) { + case "needleman-wunsch": + this.collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(this.comparator); + break; + case "medite": + this.collationAlgorithm = CollationAlgorithmFactory.medite(this.comparator, SimpleToken.TOKEN_MATCH_EVALUATOR); + break; + case "gst": + this.collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(comparator, 2); + break; + default: + this.collationAlgorithm = CollationAlgorithmFactory.dekker(this.comparator); + break; } this.variantGraph = new JungVariantGraph(); @@ -157,9 +160,12 @@ Engine configure(CommandLine commandLine) throws XPathExpressionException, Parse Engine read() throws IOException, XPathExpressionException, SAXException { if (inputResources.size() < 2) { - this.witnesses = objectMapper.readValue(inputResources.get(0), SimpleCollation.class).getWitnesses(); + try (InputStream inputStream = inputResources.get(0).openStream()) { + this.witnesses = JsonProcessor.read(inputStream).getWitnesses(); + } } else { this.witnesses = Lists.newArrayListWithExpectedSize(inputResources.size()); + //noinspection Convert2streamapi for (URL witnessURL : inputResources) { this.witnesses.add(new URLWitness("w" + (witnesses.size() + 1), witnessURL) .read(tokenizer, normalizer, inputCharset, (xmlMode ? tokenXPath : null))); @@ -202,7 +208,7 @@ void write() throws IOException { } } } else { - objectMapper.writer().writeValue(out, variantGraph); + JsonProcessor.write(variantGraph, out); } } @@ -253,9 +259,7 @@ public static void main(String... args) { engine.error("XML error", e); } catch (XPathExpressionException e) { engine.error("XPath error", e); - } catch (ScriptException e) { - engine.error("Script error", e); - } catch (PluginScript.PluginScriptExecutionException e) { + } catch (ScriptException | PluginScript.PluginScriptExecutionException e) { engine.error("Script error", e); } finally { try { @@ -282,8 +286,8 @@ public static void main(String... args) { @Override public void close() throws IOException { - final Closer closer = Closer.create(); - try { + final Closer closer = Closer.create(); + try { if (out != null) { closer.register(out).flush(); } @@ -294,6 +298,7 @@ public void close() throws IOException { closer.close(); } if (errorOccurred && (outFile != null) && outFile.isFile()) { + //noinspection ResultOfMethodCallIgnored outFile.delete(); } } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/CollateResource.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/CollateResource.java deleted file mode 100644 index 6fc580240..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/CollateResource.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.http; - -import com.google.common.base.Objects; -import com.google.common.base.Strings; -import com.google.common.collect.Iterables; -import com.google.common.io.Closeables; -import com.google.common.io.Files; -import com.sun.jersey.api.NotFoundException; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.simple.SimpleCollation; -import eu.interedition.collatex.jung.JungVariantGraph; -import eu.interedition.collatex.simple.SimpleToken; - -import javax.ws.rs.GET; -import javax.ws.rs.OPTIONS; -import javax.ws.rs.POST; -import javax.ws.rs.Path; -import javax.ws.rs.PathParam; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.HttpHeaders; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.core.Request; -import javax.ws.rs.core.Response; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Date; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ThreadFactory; - -/** - * @author Gregor Middell - */ - -@Path("/") -public class CollateResource { - - private final File staticPath; - private final Date initial = new Date(); - - private final int maxCollationSize; - private final ExecutorService executor; - - public CollateResource(String staticPath, int maxParallelCollations, int maxCollationSize) { - this.staticPath = (Strings.isNullOrEmpty(staticPath) ? null : new File(staticPath)); - this.maxCollationSize = maxCollationSize; - this.executor = Executors.newFixedThreadPool(maxParallelCollations, new ThreadFactory() { - @Override - public Thread newThread(Runnable r) { - final Thread t = new Thread(r, CollateResource.class.getName()); - t.setDaemon(true); - t.setPriority(Thread.MIN_PRIORITY); - return t; - } - }); - } - - @GET - public Response index(@Context Request request) throws IOException { - return stream(request, "index.html"); - } - - @GET - @Path("darwin") - public Response darwin(@Context Request request) throws IOException { - return stream(request, "darwin.html"); - } - - - @Path("collate") - @GET - public Response noContent(@Context HttpHeaders hh) throws NoSuchMethodException { - return corsSupport(hh, Response.noContent()).build(); - } - - @Path("collate") - @OPTIONS - public Response collateOptions(@Context HttpHeaders hh) { - return corsSupport(hh, Response.ok()).build(); - } - - @Path("collate") - @POST - public Response collate(final SimpleCollation collation, @Context HttpHeaders hh) throws ExecutionException, InterruptedException { - if (maxCollationSize > 0) { - int witnessLength = 0; - for (Iterable witness : collation.getWitnesses()) { - for (SimpleToken token : Iterables.filter(witness, SimpleToken.class)) { - witnessLength += token.getContent().length(); - } - if (witnessLength > maxCollationSize) { - return Response.status(new Response.StatusType() { - @Override - public int getStatusCode() { - return 413; - } - - @Override - public Response.Status.Family getFamily() { - return Response.Status.Family.CLIENT_ERROR; - } - - @Override - public String getReasonPhrase() { - return "Request Entity Too Large"; - } - }).build(); - } - } - } - - return corsSupport(hh, Response.ok(executor.submit(new Callable() { - @Override - public VariantGraph call() throws Exception { - final JungVariantGraph graph = new JungVariantGraph(); - return (collation == null ? graph : collation.collate(graph)); - } - }).get())).build(); - } - - Response.ResponseBuilder corsSupport(@Context HttpHeaders hh, Response.ResponseBuilder response) { - final MultivaluedMap requestHeaders = hh.getRequestHeaders(); - return response.header("Access-Control-Allow-Origin", Objects.firstNonNull(requestHeaders.getFirst("Origin"), "*")) - .header("Access-Control-Allow-Methods", Objects.firstNonNull(requestHeaders.getFirst("Access-Control-Request-Method"), "GET, POST, HEAD, OPTIONS")) - .header("Access-Control-Allow-Headers", Objects.firstNonNull(requestHeaders.getFirst("Access-Control-Request-Headers"), "Content-Type, Accept, X-Requested-With")) - .header("Access-Control-Max-Age", "86400") - .header("Access-Control-Allow-Credentials", "true"); - } - - @Path("{path: .+?\\.((html)|(css)|(js)|(png)|(ico))}") - @GET - public Response stream(@Context Request request, @PathParam("path") String path) throws IOException { - InputStream stream = null; - Date lastModified = initial; - if (staticPath == null) { - stream = getClass().getResourceAsStream("/static/" + path); - } else { - final File file = new File(staticPath, path); - if (file.isFile() && file.getCanonicalPath().startsWith(staticPath.getCanonicalPath())) { - stream = new FileInputStream(file); - lastModified = new Date(file.lastModified()); - } - } - - if (stream == null) { - throw new NotFoundException(); - } - - if (request.getMethod().equals("GET")) { - final Response.ResponseBuilder preconditions = request.evaluatePreconditions(lastModified); - if (preconditions != null) { - Closeables.close(stream, false); - throw new WebApplicationException(preconditions.build()); - } - } - - final String extension = Files.getFileExtension(path); - String contentType = "application/octet-stream"; - if ("html".equals(extension)) { - contentType = "text/html"; - } else if ("js".equals(extension)) { - contentType = "text/javascript"; - } else if ("css".equals(extension)) { - contentType = "text/css"; - } else if ("png".equals(extension)) { - contentType = "image/png"; - } else if ("ico".equals(extension)) { - contentType = "image/x-icon"; - } - - return Response.ok() - .entity(stream) - .lastModified(lastModified) - .type(contentType) - .build(); - } -} \ No newline at end of file diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/Collator.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/Collator.java new file mode 100644 index 000000000..215ff832c --- /dev/null +++ b/collatex-tools/src/main/java/eu/interedition/collatex/http/Collator.java @@ -0,0 +1,217 @@ +package eu.interedition.collatex.http; + +import eu.interedition.collatex.jung.JungVariantGraph; +import eu.interedition.collatex.simple.SimpleCollation; +import eu.interedition.collatex.simple.SimpleToken; +import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; +import eu.interedition.collatex.simple.SimpleWitness; +import org.glassfish.grizzly.EmptyCompletionHandler; +import org.glassfish.grizzly.http.server.Request; +import org.glassfish.grizzly.http.server.Response; +import org.glassfish.grizzly.http.util.Header; + +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Reader; +import java.io.StringWriter; +import java.io.Writer; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * @author Gregor Middell + */ +public class Collator { + + private final int maxCollationSize; + private final String dotPath; + private final ExecutorService collationThreads; + private final ExecutorService processThreads = Executors.newCachedThreadPool(); + + public Collator(int maxParallelCollations, int maxCollationSize, String dotPath) { + this.collationThreads = Executors.newFixedThreadPool(maxParallelCollations, new ThreadFactory() { + private final AtomicLong counter = new AtomicLong(); + + @Override + public Thread newThread(Runnable r) { + final Thread t = new Thread(r, "collator-" + counter.incrementAndGet()); + t.setDaemon(true); + t.setPriority(Thread.MIN_PRIORITY); + return t; + } + }); + + this.maxCollationSize = maxCollationSize; + this.dotPath = dotPath; + } + + public void service(Request request, Response response) throws Exception { + final Deque path = path(request); + if (path.isEmpty() || !"collate".equals(path.pop())) { + response.sendError(404); + return; + } + + final SimpleCollation collation = JsonProcessor.read(request.getInputStream()); + if (maxCollationSize > 0) { + for (SimpleWitness witness : collation.getWitnesses()) { + final int witnessLength = witness.getTokens().stream() + .filter(t -> t instanceof SimpleToken).map(t -> (SimpleToken) t) + .collect(Collectors.summingInt(t -> t.getContent().length())); + if (witnessLength > maxCollationSize) { + response.sendError(413, "Request Entity Too Large"); + return; + } + } + } + + response.suspend(60, TimeUnit.SECONDS, new EmptyCompletionHandler<>()); + collationThreads.submit(() -> { + try { + final JungVariantGraph graph = new JungVariantGraph(); + collation.collate(graph); + + // CORS support + response.setHeader("Access-Control-Allow-Origin", Optional.ofNullable(request.getHeader("Origin")).orElse("*")); + response.setHeader("Access-Control-Allow-Methods", Optional.ofNullable(request.getHeader("Access-Control-Request-Method")).orElse("GET, POST, HEAD, OPTIONS")); + response.setHeader("Access-Control-Allow-Headers", Optional.ofNullable(request.getHeader("Access-Control-Request-Headers")).orElse("Content-Type, Accept, X-Requested-With")); + response.setHeader("Access-Control-Max-Age", "86400"); + response.setHeader("Access-Control-Allow-Credentials", "true"); + + final String clientAccepts = Optional.ofNullable(request.getHeader(Header.Accept)).orElse(""); + + if (clientAccepts.contains("text/plain")) { + response.setContentType("text/plain"); + response.setCharacterEncoding("utf-8"); + try (final Writer out = response.getWriter()) { + new SimpleVariantGraphSerializer(graph).toDot(out); + } + response.resume(); + + } else if (clientAccepts.contains("application/tei+xml")) { + XMLStreamWriter xml = null; + try { + response.setContentType("application/tei+xml"); + try (OutputStream responseStream = response.getOutputStream()) { + xml = XMLOutputFactory.newInstance().createXMLStreamWriter(responseStream); + xml.writeStartDocument(); + new SimpleVariantGraphSerializer(graph).toTEI(xml); + xml.writeEndDocument(); + } finally { + if (xml != null) { + xml.close(); + } + } + response.resume(); + } catch (XMLStreamException e) { + e.printStackTrace(); + } + } else if (clientAccepts.contains("application/graphml+xml")) { + XMLStreamWriter xml = null; + try { + response.setContentType("application/graphml+xml"); + try (OutputStream responseStream = response.getOutputStream()) { + xml = XMLOutputFactory.newInstance().createXMLStreamWriter(responseStream); + xml.writeStartDocument(); + new SimpleVariantGraphSerializer(graph).toGraphML(xml); + xml.writeEndDocument(); + } finally { + if (xml != null) { + xml.close(); + } + } + response.resume(); + } catch (XMLStreamException e) { + e.printStackTrace(); + } + } else if (clientAccepts.contains("image/svg+xml")) { + if (dotPath == null) { + response.sendError(204); + response.resume(); + } else { + final StringWriter dot = new StringWriter(); + new SimpleVariantGraphSerializer(graph).toDot(dot); + + final Process dotProc = new ProcessBuilder(dotPath, "-Grankdir=LR", "-Gid=VariantGraph", "-Tsvg").start(); + final StringWriter errors = new StringWriter(); + CompletableFuture.allOf( + CompletableFuture.runAsync(() -> { + final char[] buf = new char[8192]; + try (final Reader errorStream = new InputStreamReader(dotProc.getErrorStream())) { + int len; + while ((len = errorStream.read(buf)) >= 0) { + errors.write(buf, 0, len); + } + } catch (IOException e) { + throw new CompletionException(e); + } + }, processThreads), + CompletableFuture.runAsync(() -> { + try (final Writer dotProcStream = new OutputStreamWriter(dotProc.getOutputStream(), "UTF-8")) { + dotProcStream.write(dot.toString()); + } catch (IOException e) { + throw new CompletionException(e); + } + }, processThreads), + CompletableFuture.runAsync(() -> { + response.setContentType("image/svg+xml"); + final byte[] buf = new byte[8192]; + try (final InputStream in = dotProc.getInputStream(); final OutputStream out = response.getOutputStream()) { + int len; + while ((len = in.read(buf)) >= 0) { + out.write(buf, 0, len); + } + } catch (IOException e) { + throw new CompletionException(e); + } + }, processThreads), + CompletableFuture.runAsync(() -> { + try { + if (dotProc.waitFor() != 0) { + throw new CompletionException(new IllegalStateException(errors.toString())); + } + } catch (InterruptedException e) { + throw new CompletionException(e); + } + }, processThreads) + ).exceptionally(t -> { + t.printStackTrace(); + return null; + }).thenRunAsync(response::resume, processThreads); + } + } else { + response.setContentType("application/json"); + try (final OutputStream responseStream = response.getOutputStream()) { + JsonProcessor.write(graph, responseStream); + } + response.resume(); + } + } catch (IOException e) { + // FIXME: ignored + } + }); + } + + private static Deque path(Request request) { + return Pattern.compile("/+").splitAsStream(Optional.ofNullable(request.getPathInfo()).orElse("")) + .filter(s -> !s.isEmpty()) + .collect(Collectors.toCollection(ArrayDeque::new)); + } + +} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/JsonProcessor.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/JsonProcessor.java new file mode 100644 index 000000000..1b6e73323 --- /dev/null +++ b/collatex-tools/src/main/java/eu/interedition/collatex/http/JsonProcessor.java @@ -0,0 +1,262 @@ +package eu.interedition.collatex.http; + +import eu.interedition.collatex.CollationAlgorithm; +import eu.interedition.collatex.CollationAlgorithmFactory; +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.Witness; +import eu.interedition.collatex.dekker.DekkerAlgorithm; +import eu.interedition.collatex.matching.EditDistanceTokenComparator; +import eu.interedition.collatex.matching.EqualityTokenComparator; +import eu.interedition.collatex.simple.SimpleCollation; +import eu.interedition.collatex.simple.SimplePatternTokenizer; +import eu.interedition.collatex.simple.SimpleToken; +import eu.interedition.collatex.simple.SimpleTokenNormalizers; +import eu.interedition.collatex.simple.SimpleWitness; +import eu.interedition.collatex.util.ParallelSegmentationApparatus; +import eu.interedition.collatex.util.VariantGraphRanking; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonStructure; +import javax.json.JsonValue; +import javax.json.stream.JsonGenerator; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.SortedMap; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.StreamSupport; + +/** + * @author Gregor Middell + */ +public class JsonProcessor { + + public static SimpleCollation read(InputStream inputStream) throws IOException { + try (JsonReader reader = Json.createReader(inputStream)) { + final JsonStructure collationNode = reader.read(); + if (collationNode.getValueType() != JsonValue.ValueType.OBJECT) { + throw new IOException("Expecting JSON object"); + } + final JsonObject collationObject = (JsonObject) collationNode; + + JsonArray witnessesNode; + try { + witnessesNode = collationObject.getJsonArray("witnesses"); + } catch (ClassCastException e) { + throw new IOException("Expecting 'witnesses' array"); + } + + final List witnesses = new ArrayList<>(witnessesNode.size()); + + for (JsonValue witnessNode : witnessesNode) { + if (witnessNode.getValueType() != JsonValue.ValueType.OBJECT) { + throw new IOException("Expecting witness object"); + } + final JsonObject witnessObject = (JsonObject) witnessNode; + + final String witnessId; + try { + witnessId = witnessObject.getString("id").trim(); + } catch (ClassCastException e) { + throw new IOException("Expected textual witness 'id'"); + } + if (witnessId.length() == 0) { + throw new IOException("Empty witness 'id' encountered"); + } + + final SimpleWitness witness = new SimpleWitness(witnessId); + if (witnesses.contains(witness)) { + throw new IOException(String.format("Duplicate sigil for witness '%s", witness)); + } + + final JsonValue contentNode = witnessObject.get("content"); + final JsonValue tokensNode = witnessObject.get("tokens"); + if (contentNode == null && tokensNode == null) { + throw new IOException(String.format("Expected either 'tokens' or 'content' field in witness \"%s\"", witness)); + } + + if (tokensNode != null) { + if (tokensNode.getValueType() != JsonValue.ValueType.ARRAY) { + throw new IOException(String.format("Expected 'tokens' array in witness \"%s\"", witness)); + } + final JsonArray tokensArray = (JsonArray) tokensNode; + final List tokens = new ArrayList<>(tokensArray.size()); + for (JsonValue tokenNode : tokensArray) { + if (tokenNode.getValueType() != JsonValue.ValueType.OBJECT) { + throw new IOException(String.format("Expected token object in 'tokens' field in witness \"%s\"", witness)); + } + final JsonObject tokenObject = (JsonObject) tokenNode; + String tokenContent; + try { + tokenContent = tokenObject.getString("t"); + } catch (ClassCastException | NullPointerException e) { + throw new IOException(String.format("Expected textual token content field 't' in witness \"%s\"", witness)); + } + + String normalizedTokenContent; + if (tokenObject.containsKey("n")) { + try { + normalizedTokenContent = tokenObject.getString("n"); + } catch (ClassCastException e) { + throw new IOException(String.format("Expected textual normalized token content in witness \"%s\"", witness)); + } + } else { + normalizedTokenContent = SimpleWitness.TOKEN_NORMALIZER.apply(tokenContent); + } + + if (normalizedTokenContent == null || normalizedTokenContent.length() == 0) { + throw new IOException(String.format("Empty token encountered in witness \"%s\"", witness)); + } + + tokens.add(new Token(witness, tokenContent, normalizedTokenContent, tokenObject)); + } + witness.setTokens(tokens); + } else { + if (contentNode.getValueType() != JsonValue.ValueType.STRING) { + throw new IOException(String.format("Expected 'content' text field in witness \"%s\"", witness)); + } + witness.setTokenContents( + SimplePatternTokenizer.BY_WS_OR_PUNCT.apply(((JsonString) contentNode).getString()), + SimpleTokenNormalizers.LC_TRIM_WS + ); + } + witnesses.add(witness); + } + + if (witnesses.isEmpty()) { + throw new IOException("No witnesses in collation"); + } + + Comparator tokenComparator = null; + final JsonValue tokenComparatorNode = collationObject.get("tokenComparator"); + if (tokenComparatorNode.getValueType() == JsonValue.ValueType.OBJECT) { + final JsonObject tokenComparatorObject = (JsonObject) tokenComparatorNode; + try { + if ("levenshtein".equals(tokenComparatorObject.getString("type"))) { + final int configuredDistance = tokenComparatorObject.getInt("distance", 0); + tokenComparator = new EditDistanceTokenComparator(configuredDistance == 0 ? 1 : configuredDistance); + } + } catch (ClassCastException e) { + // ignored + } + } + if (tokenComparator == null) { + tokenComparator = new EqualityTokenComparator(); + } + + CollationAlgorithm collationAlgorithm = null; + final JsonValue collationAlgorithmNode = collationObject.get("algorithm"); + if (collationAlgorithmNode.getValueType() == JsonValue.ValueType.STRING) { + final String collationAlgorithmValue = ((JsonString) collationAlgorithmNode).getString(); + if ("needleman-wunsch".equalsIgnoreCase(collationAlgorithmValue)) { + collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(tokenComparator); + } else if ("gst".equalsIgnoreCase(collationAlgorithmValue)) { + collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(tokenComparator, 2); + } else if ("medite".equalsIgnoreCase(collationAlgorithmValue)) { + collationAlgorithm = CollationAlgorithmFactory.medite(tokenComparator, SimpleToken.TOKEN_MATCH_EVALUATOR); + } + } + if (collationAlgorithm == null) { + collationAlgorithm = CollationAlgorithmFactory.dekker(tokenComparator); + } + + boolean joined = true; + try { + joined = collationObject.getBoolean("joined", true); + } catch (ClassCastException e) { + // ignored + } + + if (collationAlgorithm instanceof DekkerAlgorithm) { + boolean mergeTranspositions = true; + try { + mergeTranspositions = collationObject.getBoolean("transpositions", true); + } catch (ClassCastException e) { + // ignored + } + ((DekkerAlgorithm) collationAlgorithm).setMergeTranspositions(mergeTranspositions); + } + return new SimpleCollation(witnesses, collationAlgorithm, joined); + } + } + + public static void write(VariantGraph graph, OutputStream outputStream) throws IOException { + try (final JsonGenerator jgen = Json.createGenerator(outputStream)) { + write(jgen, graph); + } + } + + public static void write(VariantGraph graph, PrintWriter writer) throws IOException { + try (final JsonGenerator jgen = Json.createGenerator(writer)) { + write(jgen, graph); + } + } + + protected static void write(JsonGenerator jgen, VariantGraph graph) { + ParallelSegmentationApparatus.generate(VariantGraphRanking.of(graph), new ParallelSegmentationApparatus.GeneratorCallback() { + @Override + public void start() { + jgen.writeStartObject(); + + jgen.writeStartArray("witnesses"); + graph.witnesses().stream().sorted(Witness.SIGIL_COMPARATOR).map(Witness::getSigil).forEach(jgen::write); + jgen.writeEnd(); + + + jgen.writeStartArray("table"); + } + + @Override + public void segment(SortedMap> contents) { + jgen.writeStartArray(); + contents.values().stream().forEach(tokens -> { + jgen.writeStartArray(); + StreamSupport.stream(Spliterators.spliteratorUnknownSize(tokens.iterator(), Spliterator.NONNULL | Spliterator.IMMUTABLE), false) + .filter(t -> t instanceof SimpleToken) + .map(t -> (SimpleToken) t) + .sorted() + .forEach(t -> { + if (t instanceof Token) { + jgen.write(((Token) t).getJsonNode()); + } else { + jgen.write(t.getContent()); + } + }); + jgen.writeEnd(); + }); + jgen.writeEnd(); + } + + @Override + public void end() { + jgen.writeEnd(); + jgen.writeEnd(); + } + }); + } + public static class Token extends SimpleToken { + + private final JsonObject jsonNode; + + public Token(SimpleWitness witness, String content, String normalized, JsonObject jsonNode) { + super(witness, content, normalized); + this.jsonNode = jsonNode; + } + + public JsonObject getJsonNode() { + return jsonNode; + } + } + + private JsonProcessor() { + } +} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/ObjectMapperMessageBodyReaderWriter.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/ObjectMapperMessageBodyReaderWriter.java deleted file mode 100644 index aea706a02..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/ObjectMapperMessageBodyReaderWriter.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.http; - -import com.sun.jersey.core.provider.AbstractMessageReaderWriterProvider; -import org.codehaus.jackson.map.ObjectMapper; -import org.codehaus.jackson.map.type.SimpleType; - -import javax.ws.rs.Consumes; -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.ext.Provider; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.lang.annotation.Annotation; -import java.lang.reflect.Type; - -/** - * @author Gregor Middell - */ -@Provider -@Consumes(MediaType.APPLICATION_JSON) -@Produces(MediaType.APPLICATION_JSON) -public class ObjectMapperMessageBodyReaderWriter extends AbstractMessageReaderWriterProvider { - - private final ObjectMapper objectMapper; - - public ObjectMapperMessageBodyReaderWriter(ObjectMapper objectMapper) { - this.objectMapper = objectMapper; - } - - @Override - public boolean isReadable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return objectMapper.canDeserialize(SimpleType.construct(type)); - } - - @Override - public Object readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException { - return objectMapper.readValue(entityStream, type); - } - - @Override - public boolean isWriteable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return objectMapper.canSerialize(type); - } - - @Override - public void writeTo(Object o, Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { - objectMapper.writeValue(entityStream, o); - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/Server.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/Server.java index c6685ee8d..fb2c63452 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/Server.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/http/Server.java @@ -19,140 +19,82 @@ package eu.interedition.collatex.http; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import com.sun.jersey.api.container.ContainerFactory; -import com.sun.jersey.api.container.filter.GZIPContentEncodingFilter; -import com.sun.jersey.api.core.DefaultResourceConfig; -import eu.interedition.collatex.io.CollateXModule; -import eu.interedition.collatex.io.IOExceptionMapper; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.codehaus.jackson.map.ObjectMapper; +import org.glassfish.grizzly.http.CompressionConfig; +import org.glassfish.grizzly.http.server.CLStaticHttpHandler; import org.glassfish.grizzly.http.server.HttpHandler; import org.glassfish.grizzly.http.server.HttpServer; import org.glassfish.grizzly.http.server.NetworkListener; +import org.glassfish.grizzly.http.server.Request; +import org.glassfish.grizzly.http.server.Response; +import org.glassfish.grizzly.http.server.StaticHttpHandler; -import javax.ws.rs.core.UriBuilder; -import java.io.IOException; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; /** * @author Gregor Middell */ -public class Server extends DefaultResourceConfig implements Runnable { +public class Server { - String staticPath; - String dotPath; - - String contextPath; - int httpPort; - - ObjectMapper objectMapper; - - int maxParallelCollations; - int maxCollationSize; - - Server() { - super(); - final HashMap config = Maps.newHashMap(); - config.put(PROPERTY_CONTAINER_REQUEST_FILTERS, Arrays.>asList(GZIPContentEncodingFilter.class)); - config.put(PROPERTY_CONTAINER_RESPONSE_FILTERS, Arrays.>asList(GZIPContentEncodingFilter.class)); - setPropertiesAndFeatures(config); - } - - @Override - public void run() { + public static void main(String... args) { try { - objectMapper = new ObjectMapper(); - objectMapper.registerModule(new CollateXModule()); - - if (LOG.isLoggable(Level.INFO)) { - LOG.info("Starting HTTP server at " + UriBuilder.fromUri("http://localhost/").port(httpPort).path(contextPath).build()); + final CommandLine commandLine = new GnuParser().parse(OPTIONS, args); + if (commandLine.hasOption("h")) { + new HelpFormatter().printHelp("collatex-server [ ...]\n", OPTIONS); + return; } - final HttpServer httpServer = new HttpServer(); - httpServer.addListener(new NetworkListener("grizzly", NetworkListener.DEFAULT_NETWORK_HOST, httpPort)); - httpServer.getServerConfiguration().addHttpHandler(ContainerFactory.createContainer(HttpHandler.class, this), contextPath); - Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { + final Collator collator = new Collator( + Integer.parseInt(commandLine.getOptionValue("mpc", "2")), + Integer.parseInt(commandLine.getOptionValue("mcs", "0")), + commandLine.getOptionValue("dot", null) + ); + final String staticPath = System.getProperty("collatex.static.path", ""); + final HttpHandler httpHandler = staticPath.isEmpty() ? new CLStaticHttpHandler(Server.class.getClassLoader(), "/static/") { @Override - public void run() { - if (LOG.isLoggable(Level.INFO)) { - LOG.info("Stopping HTTP server"); - } - httpServer.stop(); + protected void onMissingResource(Request request, Response response) throws Exception { + collator.service(request, response); } - })); - - httpServer.start(); - - synchronized (httpServer) { - try { - httpServer.wait(); - } catch (InterruptedException e) { + } : new StaticHttpHandler(staticPath.replaceAll("/+$", "") + "/") { + @Override + protected void onMissingResource(Request request, Response response) throws Exception { + collator.service(request, response); } - } - } catch (IOException e) { - LOG.log(Level.SEVERE, "I/O error", e); - } - } + }; - Server configure(CommandLine commandLine) { - httpPort = Integer.parseInt(commandLine.getOptionValue("p", "7369")); - contextPath = commandLine.getOptionValue("cp", "").replaceAll("/*$", "/"); - dotPath = commandLine.getOptionValue("dot", null); + final NetworkListener httpListener = new NetworkListener("http", "0.0.0.0", Integer.parseInt(commandLine.getOptionValue("p", "7369"))); - maxParallelCollations = Integer.parseInt(commandLine.getOptionValue("mpc", "2")); - maxCollationSize = Integer.parseInt(commandLine.getOptionValue("mcs", "0")); + final CompressionConfig compressionConfig = httpListener.getCompressionConfig(); + compressionConfig.setCompressionMode(CompressionConfig.CompressionMode.ON); + compressionConfig.setCompressionMinSize(860); // http://webmasters.stackexchange.com/questions/31750/what-is-recommended-minimum-object-size-for-gzip-performance-benefits + compressionConfig.setCompressableMimeTypes("application/javascript", "application/json", "application/xml", "text/css", "text/html", "text/javascript", "text/plain", "text/xml"); - staticPath = System.getProperty("collatex.static.path", null); + final HttpServer httpServer = new HttpServer(); + httpServer.addListener(httpListener); + httpServer.getServerConfiguration().addHttpHandler(httpHandler, commandLine.getOptionValue("cp", "").replaceAll("/+$", "") + "/*"); - return this; - } + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + if (LOG.isLoggable(Level.INFO)) { + LOG.info("Stopping HTTP server"); + } + httpServer.shutdown(); + })); - public static void main(String... args) { - try { - final CommandLine commandLine = new GnuParser().parse(OPTIONS, args); - if (commandLine.hasOption("h")) { - new HelpFormatter().printHelp("collatex-server [ ...]\n", OPTIONS); - return; - } + httpServer.start(); - new Server().configure(commandLine).run(); - } catch (ParseException e) { - LOG.log(Level.SEVERE, "Error while parsing command line", e); + Thread.sleep(Long.MAX_VALUE); + } catch (Throwable t) { + LOG.log(Level.SEVERE, "Error while parsing command line", t); + System.exit(1); } } - - @Override - public Set> getProviderClasses() { - return Sets.>newHashSet( - IOExceptionMapper.class, - VariantGraphDotMessageBodyWriter.class, - VariantGraphMLMessageBodyWriter.class, - VariantGraphTEIMessageBodyWriter.class - ); - } - - @Override - public Set getSingletons() { - return Sets.newHashSet( - new CollateResource(staticPath, maxParallelCollations, maxCollationSize), - new ObjectMapperMessageBodyReaderWriter(objectMapper), - new VariantGraphSVGMessageBodyWriter(dotPath) - ); - } - static final Logger LOG = Logger.getLogger(Server.class.getName()); static final Options OPTIONS = new Options(); diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphDotMessageBodyWriter.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphDotMessageBodyWriter.java deleted file mode 100644 index f5176c54b..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphDotMessageBodyWriter.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.http; - -import com.google.common.io.Closeables; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; - -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.ext.MessageBodyWriter; -import javax.ws.rs.ext.Provider; -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.lang.annotation.Annotation; -import java.lang.reflect.Type; - -/** - * @author Gregor Middell - */ -@Provider -@Produces(MediaType.TEXT_PLAIN) -public class VariantGraphDotMessageBodyWriter implements MessageBodyWriter { - - @Override - public boolean isWriteable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return VariantGraph.class.isAssignableFrom(type); - } - - @Override - public long getSize(VariantGraph variantGraph, Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return -1; - } - - @Override - public void writeTo(VariantGraph graph, Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { - final PrintWriter out = new PrintWriter(new OutputStreamWriter(entityStream, "UTF-8")); - try { - new SimpleVariantGraphSerializer(graph).toDot(out); - } finally { - Closeables.close(out, false); - } - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphMLMessageBodyWriter.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphMLMessageBodyWriter.java deleted file mode 100644 index 0226d83ad..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphMLMessageBodyWriter.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.http; - -import com.google.common.io.Closeables; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; - -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.ext.MessageBodyWriter; -import javax.ws.rs.ext.Provider; -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; -import java.io.IOException; -import java.io.OutputStream; -import java.lang.annotation.Annotation; -import java.lang.reflect.Type; - -/** - * @author Gregor Middell - */ -@Provider -@Produces("application/graphml+xml") -public class VariantGraphMLMessageBodyWriter implements MessageBodyWriter { - - private static final XMLOutputFactory XML_OUTPUT_FACTORY = XMLOutputFactory.newInstance(); - - @Override - public boolean isWriteable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return VariantGraph.class.isAssignableFrom(type); - } - - @Override - public long getSize(VariantGraph variantGraph, Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return -1; - } - - @Override - public void writeTo(VariantGraph variantGraph, Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { - XMLStreamWriter xml = null; - try { - xml = XML_OUTPUT_FACTORY.createXMLStreamWriter(entityStream); - xml.writeStartDocument(); - new SimpleVariantGraphSerializer(variantGraph).toGraphML(xml); - xml.writeEndDocument(); - } catch (XMLStreamException e) { - throw new IOException(e); - } finally { - try { - xml.close(); - } catch (XMLStreamException e) { - } - Closeables.close(entityStream, false); - } - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphSVGMessageBodyWriter.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphSVGMessageBodyWriter.java deleted file mode 100644 index 24ecac240..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphSVGMessageBodyWriter.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.http; - -import com.google.common.base.Strings; -import com.google.common.base.Throwables; -import com.google.common.io.ByteStreams; -import com.google.common.io.CharStreams; -import com.google.common.io.Closeables; -import com.google.common.io.Closer; -import com.google.common.io.FileBackedOutputStream; -import eu.interedition.collatex.VariantGraph; - -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.core.Response; -import javax.ws.rs.ext.MessageBodyWriter; -import javax.ws.rs.ext.Provider; -import java.io.BufferedInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.lang.annotation.Annotation; -import java.lang.reflect.Type; -import java.nio.charset.Charset; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -/** - * @author Gregor Middell - */ -@Provider -@Produces("image/svg+xml") -public class VariantGraphSVGMessageBodyWriter implements MessageBodyWriter { - - final ExecutorService threadPool = Executors.newCachedThreadPool(); - final String dotPath; - - public VariantGraphSVGMessageBodyWriter(String dotPath) { - this.dotPath = detectDot(dotPath); - } - - - @Override - public boolean isWriteable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return VariantGraph.class.isAssignableFrom(type); - } - - @Override - public long getSize(VariantGraph variantGraph, Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return -1; - } - - @Override - public void writeTo(final VariantGraph variantGraph, final Class type, final Type genericType, final Annotation[] annotations, final MediaType mediaType, final MultivaluedMap httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { - if (dotPath == null) { - throw new WebApplicationException(Response.Status.NO_CONTENT); - } - - final Process dotProc = Runtime.getRuntime().exec(dotPath + " -Grankdir=LR -Gid=VariantGraph -Tsvg"); - - final Future inputTask = threadPool.submit(new Callable() { - @Override - public Void call() throws Exception { - new VariantGraphDotMessageBodyWriter().writeTo(variantGraph, type, genericType, annotations, mediaType, httpHeaders, dotProc.getOutputStream()); - return null; - } - }); - - InputStream svgResult = null; - final FileBackedOutputStream svgBuf = new FileBackedOutputStream(102400); - try { - ByteStreams.copy(svgResult = new BufferedInputStream(dotProc.getInputStream()), svgBuf); - } finally { - Closeables.close(svgBuf, false); - Closeables.close(svgResult, false); - } - - try { - inputTask.get(); - } catch (InterruptedException e) { - } catch (ExecutionException e) { - Throwables.propagateIfInstanceOf(e.getCause(), IOException.class); - throw Throwables.propagate(e); - } - - final Closer closer = Closer.create(); - try { - if (dotProc.waitFor() == 0) { - ByteStreams.copy(closer.register(svgBuf.asByteSource().openBufferedStream()), entityStream); - } - } catch (InterruptedException e) { - } finally { - closer.close(); - svgBuf.reset(); - Closeables.close(entityStream, false); - } - } - - protected String detectDot(String dotPath) { - if (dotPath != null && new File(dotPath).canExecute()) { - return dotPath; - } - - dotPath = null; - Closer closer = Closer.create(); - try { - final Process which = new ProcessBuilder("which", "dot").start(); - dotPath = CharStreams.toString(new InputStreamReader(closer.register(which.getInputStream()), Charset.defaultCharset())).trim(); - which.waitFor(); - } catch (IOException e) { - } catch (InterruptedException e) { - } finally { - try { - closer.close(); - } catch (IOException e) { - } - } - - if (Strings.isNullOrEmpty(dotPath)) { - closer = Closer.create(); - try { - final Process where = new ProcessBuilder("where.exe", "dot.exe").start(); - dotPath = CharStreams.toString(new InputStreamReader(closer.register(where.getInputStream()), Charset.defaultCharset())).trim(); - where.waitFor(); - } catch (IOException e) { - } catch (InterruptedException e) { - } finally { - try { - closer.close(); - } catch (IOException e) { - } - } - } - - if (!Strings.isNullOrEmpty(dotPath)) { - dotPath = dotPath.split("[\r\n]+")[0].trim(); - } - - return (Strings.isNullOrEmpty(dotPath) ? null : dotPath); - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphTEIMessageBodyWriter.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphTEIMessageBodyWriter.java deleted file mode 100644 index 44e256b43..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/VariantGraphTEIMessageBodyWriter.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.http; - -import com.google.common.io.Closeables; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; - -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.ext.MessageBodyWriter; -import javax.ws.rs.ext.Provider; -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; -import java.io.IOException; -import java.io.OutputStream; -import java.lang.annotation.Annotation; -import java.lang.reflect.Type; - -/** - * @author Gregor Middell - */ -@Provider -@Produces("application/tei+xml") -public class VariantGraphTEIMessageBodyWriter implements MessageBodyWriter { - - private static final XMLOutputFactory XML_OUTPUT_FACTORY = XMLOutputFactory.newInstance(); - - static { - XML_OUTPUT_FACTORY.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.TRUE); - } - - @Override - public boolean isWriteable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return VariantGraph.class.isAssignableFrom(type); - } - - @Override - public long getSize(VariantGraph variantGraph, Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return -1; - } - - @Override - public void writeTo(VariantGraph variantGraph, Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { - XMLStreamWriter xml = null; - try { - xml = XML_OUTPUT_FACTORY.createXMLStreamWriter(entityStream); - xml.writeStartDocument(); - new SimpleVariantGraphSerializer(variantGraph).toTEI(xml); - xml.writeEndDocument(); - } catch (XMLStreamException e) { - throw new IOException(e); - } finally { - try { - xml.close(); - } catch (XMLStreamException e) { - } - Closeables.close(entityStream, false); - } - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/io/CollateXModule.java b/collatex-tools/src/main/java/eu/interedition/collatex/io/CollateXModule.java deleted file mode 100644 index 0bbbc59ca..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/io/CollateXModule.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.io; - -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.simple.SimpleCollation; -import org.codehaus.jackson.Version; -import org.codehaus.jackson.map.module.SimpleModule; - -/** -* @author Gregor Middell -*/ -public class CollateXModule extends SimpleModule { - - public CollateXModule() { - super(CollateXModule.class.getPackage().getName(), Version.unknownVersion()); - addDeserializer(SimpleCollation.class, new SimpleCollationDeserializer()); - addSerializer(VariantGraph.class, new VariantGraphSerializer()); - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/io/IOExceptionMapper.java b/collatex-tools/src/main/java/eu/interedition/collatex/io/IOExceptionMapper.java deleted file mode 100644 index d50741056..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/io/IOExceptionMapper.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.io; - -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; -import javax.ws.rs.ext.ExceptionMapper; -import javax.ws.rs.ext.Provider; -import java.io.IOException; - -/** - * @author Gregor Middell - */ -@Provider -public class IOExceptionMapper implements ExceptionMapper { - @Override - public Response toResponse(IOException exception) { - return Response.status(Response.Status.BAD_REQUEST) - .entity(exception.getMessage()) - .type(MediaType.TEXT_PLAIN_TYPE) - .build(); - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/io/JsonToken.java b/collatex-tools/src/main/java/eu/interedition/collatex/io/JsonToken.java deleted file mode 100644 index 7eaefeaba..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/io/JsonToken.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.io; - -import eu.interedition.collatex.simple.SimpleToken; -import eu.interedition.collatex.simple.SimpleWitness; -import org.codehaus.jackson.JsonNode; - -public class JsonToken extends SimpleToken { - - private final JsonNode jsonNode; - - public JsonToken(SimpleWitness witness, String content, String normalized, JsonNode jsonNode) { - super(witness, content, normalized); - this.jsonNode = jsonNode; - } - - public JsonNode getJsonNode() { - return jsonNode; - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/io/SimpleCollationDeserializer.java b/collatex-tools/src/main/java/eu/interedition/collatex/io/SimpleCollationDeserializer.java deleted file mode 100644 index 166af0705..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/io/SimpleCollationDeserializer.java +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.io; - -import com.google.common.collect.Lists; - -import eu.interedition.collatex.CollationAlgorithm; -import eu.interedition.collatex.CollationAlgorithmFactory; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.dekker.DekkerAlgorithm; -import eu.interedition.collatex.matching.EditDistanceTokenComparator; -import eu.interedition.collatex.matching.EqualityTokenComparator; -import eu.interedition.collatex.simple.SimpleCollation; -import eu.interedition.collatex.simple.SimplePatternTokenizer; -import eu.interedition.collatex.simple.SimpleToken; -import eu.interedition.collatex.simple.SimpleTokenNormalizers; -import eu.interedition.collatex.simple.SimpleWitness; - -import org.codehaus.jackson.JsonNode; -import org.codehaus.jackson.JsonParser; -import org.codehaus.jackson.map.DeserializationContext; -import org.codehaus.jackson.map.JsonDeserializer; -import org.codehaus.jackson.map.JsonMappingException; - -import java.io.IOException; -import java.util.Comparator; -import java.util.List; - -/** - * @author Gregor Middell - */ -public class SimpleCollationDeserializer extends JsonDeserializer { - - @Override - public SimpleCollation deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { - final JsonNode collationNode = jp.readValueAsTree(); - - final JsonNode witnessesNode = collationNode.path("witnesses"); - if (witnessesNode.isMissingNode() || !witnessesNode.isArray()) { - throw JsonMappingException.from(jp, "Expecting 'witnesses' array"); - } - - final List witnesses = Lists.newArrayList(); - for (JsonNode witnessNode : witnessesNode) { - if (!witnessNode.isObject()) { - throw JsonMappingException.from(jp, "Expecting witness object"); - } - final JsonNode witnessIdNode = witnessNode.path("id"); - if (witnessIdNode.isMissingNode() || !witnessIdNode.isTextual()) { - throw JsonMappingException.from(jp, "Expected textual witness 'id'"); - } - final String witnessIdStr = witnessIdNode.getTextValue().trim(); - if (witnessIdStr.length() == 0) { - throw JsonMappingException.from(jp, "Empty witness 'id' encountered"); - } - - final SimpleWitness witness = new SimpleWitness(witnessIdStr); - if (witnesses.contains(witness)) { - throw JsonMappingException.from(jp, String.format("Duplicate sigil for witness '%s", witness)); - } - - final JsonNode contentNode = witnessNode.path("content"); - final JsonNode tokensNode = witnessNode.path("tokens"); - if (contentNode.isMissingNode() && tokensNode.isMissingNode()) { - throw JsonMappingException.from(jp, String.format("Expected either 'tokens' or 'content' field in witness \"%s\"", witness)); - } - - if (!tokensNode.isMissingNode()) { - if (!tokensNode.isArray()) { - throw JsonMappingException.from(jp, String.format("Expected 'tokens' array in witness \"%s\"", witness)); - } - List tokens = Lists.newArrayList(); - for (JsonNode tokenNode : tokensNode) { - if (!tokenNode.isObject()) { - throw JsonMappingException.from(jp, String.format("Expected token object in 'tokens' field in witness \"%s\"", witness)); - } - final JsonNode tokenContentNode = tokenNode.path("t"); - if (tokenContentNode.isMissingNode() || !tokenContentNode.isTextual()) { - throw JsonMappingException.from(jp, String.format("Expected textual token content field 't' in witness \"%s\"", witness)); - } - final String tokenContent = tokenContentNode.getTextValue(); - String normalizedTokenContent; - final JsonNode normalizedTokenContentNode = tokenNode.path("n"); - if (normalizedTokenContentNode.isMissingNode()) { - normalizedTokenContent = SimpleWitness.TOKEN_NORMALIZER.apply(tokenContent); - } else { - if (!normalizedTokenContentNode.isTextual()) { - throw JsonMappingException.from(jp, String.format("Expected textual normalized token content in witness \"%s\"", witness)); - } - normalizedTokenContent = normalizedTokenContentNode.getTextValue(); - } - - if (normalizedTokenContent.length() == 0) { - throw JsonMappingException.from(jp, String.format("Empty token encountered in witness \"%s\"", witness)); - } - - tokens.add(new JsonToken(witness, tokenContent, normalizedTokenContent, tokenNode)); - } - witness.setTokens(tokens); - } else { - if (!contentNode.isTextual()) { - throw JsonMappingException.from(jp, String.format("Expected 'content' text field in witness \"%s\"", witness)); - } - witness.setTokenContents( - SimplePatternTokenizer.BY_WS_OR_PUNCT.apply(contentNode.getTextValue()), - SimpleTokenNormalizers.LC_TRIM_WS - ); - } - witnesses.add(witness); - } - - if (witnesses.isEmpty()) { - throw JsonMappingException.from(jp, "No witnesses in collation"); - } - - Comparator tokenComparator = null; - final JsonNode tokenComparatorNode = collationNode.path("tokenComparator"); - if (tokenComparatorNode.isObject()) { - if ("levenshtein".equals(tokenComparatorNode.path("type").getTextValue())) { - final int configuredDistance = tokenComparatorNode.path("distance").getIntValue(); - tokenComparator = new EditDistanceTokenComparator(configuredDistance == 0 ? 1 : configuredDistance); - } - } - if (tokenComparator == null) { - tokenComparator = new EqualityTokenComparator(); - } - - CollationAlgorithm collationAlgorithm = null; - final JsonNode collationAlgorithmNode = collationNode.path("algorithm"); - if (collationAlgorithmNode.isTextual()) { - final String collationAlgorithmValue = collationAlgorithmNode.getTextValue(); - if ("needleman-wunsch".equalsIgnoreCase(collationAlgorithmValue)) { - collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(tokenComparator); - } else if ("gst".equalsIgnoreCase(collationAlgorithmValue)) { - collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(tokenComparator, 2); - } else if ("medite".equalsIgnoreCase(collationAlgorithmValue)) { - collationAlgorithm = CollationAlgorithmFactory.medite(tokenComparator, SimpleToken.TOKEN_MATCH_EVALUATOR); - } - } - if (collationAlgorithm == null) { - collationAlgorithm = CollationAlgorithmFactory.dekker(tokenComparator); - } - - boolean joined = true; - final JsonNode joinedNode = collationNode.path("joined"); - if (joinedNode.isBoolean()) { - joined = joinedNode.getBooleanValue(); - } - - boolean mergeTranspositions = true; - final JsonNode transpositionsNode = collationNode.path("transpositions"); - if (transpositionsNode.isBoolean()) { - mergeTranspositions = transpositionsNode.getBooleanValue(); - } - if (collationAlgorithm instanceof DekkerAlgorithm) { - ((DekkerAlgorithm) collationAlgorithm).setMergeTranspositions(mergeTranspositions); - } - return new SimpleCollation(witnesses, collationAlgorithm, joined); - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/io/VariantGraphSerializer.java b/collatex-tools/src/main/java/eu/interedition/collatex/io/VariantGraphSerializer.java deleted file mode 100644 index 60a4e4665..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/io/VariantGraphSerializer.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.io; - -import com.google.common.base.Throwables; -import com.google.common.collect.Iterables; -import com.google.common.collect.Ordering; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.simple.SimpleToken; -import eu.interedition.collatex.util.ParallelSegmentationApparatus; -import eu.interedition.collatex.util.VariantGraphRanking; -import org.codehaus.jackson.JsonGenerator; -import org.codehaus.jackson.map.JsonSerializer; -import org.codehaus.jackson.map.SerializerProvider; - -import java.io.IOException; -import java.util.SortedMap; - -/** - * @author Gregor Middell - */ -public class VariantGraphSerializer extends JsonSerializer { - - @Override - public void serialize(final VariantGraph graph, final JsonGenerator jgen, SerializerProvider provider) throws IOException { - try { - ParallelSegmentationApparatus.generate(VariantGraphRanking.of(graph), new ParallelSegmentationApparatus.GeneratorCallback() { - @Override - public void start() { - try { - jgen.writeStartObject(); - - jgen.writeArrayFieldStart("witnesses"); - for (Witness witness : Ordering.from(Witness.SIGIL_COMPARATOR).sortedCopy(graph.witnesses())) { - jgen.writeString(witness.getSigil()); - } - jgen.writeEndArray(); - - - jgen.writeArrayFieldStart("table"); - } catch (IOException e) { - throw Throwables.propagate(e); - } - } - - @Override - public void segment(SortedMap> contents) { - try { - jgen.writeStartArray(); - for (Iterable tokens : contents.values()) { - jgen.writeStartArray(); - for (SimpleToken token : Ordering.natural().immutableSortedCopy(Iterables.filter(tokens, SimpleToken.class))) { - if (token instanceof JsonToken) { - jgen.writeTree(((JsonToken) token).getJsonNode()); - } else { - jgen.writeString(token.getContent()); - } - } - jgen.writeEndArray(); - } - jgen.writeEndArray(); - } catch (IOException e) { - throw Throwables.propagate(e); - } - } - - @Override - public void end() { - try { - jgen.writeEndArray(); - jgen.writeEndObject(); - } catch (IOException e) { - throw Throwables.propagate(e); - } - } - }); - } catch (Throwable t) { - Throwables.propagateIfInstanceOf(Throwables.getRootCause(t), IOException.class); - throw Throwables.propagate(t); - } - } -} diff --git a/pom.xml b/pom.xml index fd6219cf0..86c8d4094 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ collatex-core collatex-cocoon collatex-tools - collatex-nodeps + collatex-nodeps @@ -112,6 +112,12 @@ ${jersey.version} + + org.glassfish.grizzly + grizzly-http-server + 2.3.8 + + commons-lang commons-lang @@ -181,8 +187,8 @@ maven-compiler-plugin 2.3.2 - 1.6 - 1.6 + 1.8 + 1.8 From dc5343c71f8365cafda50c12a810333c9609286d Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 3 Jan 2015 18:11:46 +0100 Subject: [PATCH 02/52] Java 8: reduce dependencies of collatex-core (Google Guava) --- .../simple/SimpleVariantGraphSerializer.java | 63 +++++++++---------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java index bc1ee5065..f1d69a587 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java @@ -23,10 +23,7 @@ import com.google.common.base.Objects; import com.google.common.base.Throwables; import com.google.common.collect.Iterables; -import com.google.common.collect.LinkedHashMultimap; -import com.google.common.collect.Maps; import com.google.common.collect.Ordering; -import com.google.common.collect.SetMultimap; import com.google.common.collect.Sets; import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; @@ -44,8 +41,10 @@ import java.io.PrintWriter; import java.io.Writer; import java.util.Collections; -import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -68,8 +67,8 @@ public class SimpleVariantGraphSerializer { private final VariantGraph graph; private final Function, String> tokensToString; - private final Map vertexIds = Maps.newHashMap(); - private final Map transpositionIds = Maps.newHashMap(); + private final Map vertexIds = new HashMap<>(); + private final Map transpositionIds = new HashMap<>(); private VariantGraphRanking ranking; public SimpleVariantGraphSerializer(VariantGraph graph) { @@ -91,22 +90,19 @@ public void start() { xml.writeNamespace("cx", COLLATEX_NS); xml.writeNamespace("", TEI_NS); } catch (XMLStreamException e) { - throw Throwables.propagate(e); + throw new RuntimeException(e); } } @Override public void segment(SortedMap> contents) { - final SetMultimap segments = LinkedHashMultimap.create(); - for (Map.Entry> cell : contents.entrySet()) { - //NOTE: we don't want trailing whitespace before an end tag - segments.put(tokensToString.apply(cell.getValue()).trim(), cell.getKey()); - } + final Map> segments = new LinkedHashMap<>(); + contents.forEach((witness, tokens) -> segments.computeIfAbsent(tokensToString.apply(tokens).trim(), k -> new HashSet<>()).add(witness)); final Set segmentContents = segments.keySet(); try { if (segmentContents.size() == 1) { - xml.writeCharacters(Iterables.getOnlyElement(segmentContents)); + xml.writeCharacters(segmentContents.stream().findFirst().get()); } else { xml.writeStartElement("", "app", TEI_NS); for (String segment : segmentContents) { @@ -130,7 +126,7 @@ public void segment(SortedMap> contents) { xml.writeEndElement(); } } catch (XMLStreamException e) { - throw Throwables.propagate(e); + throw new RuntimeException(e); } } @@ -139,13 +135,19 @@ public void end() { try { xml.writeEndElement(); } catch (XMLStreamException e) { - throw Throwables.propagate(e); + throw new RuntimeException(e); } } }); - } catch (Throwable t) { - Throwables.propagateIfInstanceOf(Throwables.getRootCause(t), XMLStreamException.class); - throw Throwables.propagate(t); + } catch (RuntimeException re) { + Throwable rootCause = re; + for (Throwable cause = re; cause != null; cause = cause.getCause()) { + rootCause = cause; + } + if (rootCause instanceof XMLStreamException) { + throw (XMLStreamException) rootCause; + } + throw re; } } @@ -278,12 +280,7 @@ VariantGraphRanking ranking() { Set> transposedTuples() { final Set> tuples = Sets.newHashSet(); - final Ordering vertexOrdering = Ordering.from(ranking()).compound(new Comparator() { - @Override - public int compare(VariantGraph.Vertex o1, VariantGraph.Vertex o2) { - return Ordering.arbitrary().compare(o1, o2); - } - }); + final Ordering vertexOrdering = Ordering.from(ranking()).compound((o1, o2) -> Ordering.arbitrary().compare(o1, o2)); for (VariantGraph.Transposition transposition : graph.transpositions()) { final SortedSetMultimap verticesByWitness = TreeMultimap.create(Witness.SIGIL_COMPARATOR, vertexOrdering); @@ -440,16 +437,14 @@ public String apply(@Nullable VariantGraph.Vertex input) { } }; - static final Function, String> SIMPLE_TOKEN_TO_STRING = new Function, String>() { - public String apply(@Nullable Iterable input) { - final List tokens = Ordering.natural().immutableSortedCopy( - Iterables.filter(input, SimpleToken.class) - ); - final StringBuilder sb = new StringBuilder(); - for (SimpleToken token : tokens) { - sb.append(token.getContent()); - } - return sb.toString(); + static final Function, String> SIMPLE_TOKEN_TO_STRING = input -> { + final List tokens = Ordering.natural().immutableSortedCopy( + Iterables.filter(input, SimpleToken.class) + ); + final StringBuilder sb = new StringBuilder(); + for (SimpleToken token : tokens) { + sb.append(token.getContent()); } + return sb.toString(); }; } From b9f1d99602e3ef3f882b3e9acf229973b3746e17 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sun, 4 Jan 2015 21:22:15 +0100 Subject: [PATCH 03/52] Site: static generation of site --- .gitignore | 1 + site/.htaccess | 5 - site/Gruntfile.js | 5 + site/{style.css => collatex.less} | 146 +-- site/composer.json | 10 - site/composer.lock | 159 ---- site/composer.phar | Bin 673891 -> 0 bytes site/google-code-prettify/lang-apollo.js | 2 - site/google-code-prettify/lang-clj.js | 18 - site/google-code-prettify/lang-css.js | 2 - site/google-code-prettify/lang-go.js | 1 - site/google-code-prettify/lang-hs.js | 2 - site/google-code-prettify/lang-lisp.js | 3 - site/google-code-prettify/lang-lua.js | 2 - site/google-code-prettify/lang-ml.js | 2 - site/google-code-prettify/lang-n.js | 4 - site/google-code-prettify/lang-proto.js | 1 - site/google-code-prettify/lang-scala.js | 2 - site/google-code-prettify/lang-sql.js | 2 - site/google-code-prettify/lang-tex.js | 1 - site/google-code-prettify/lang-vb.js | 2 - site/google-code-prettify/lang-vhdl.js | 3 - site/google-code-prettify/lang-wiki.js | 2 - site/google-code-prettify/lang-xq.js | 3 - site/google-code-prettify/lang-yaml.js | 2 - ...theme.css => prettify-sunburst-theme.less} | 0 .../{prettify.css => prettify.less} | 0 site/grunt/aliases.yaml | 15 + site/grunt/browserify.js | 11 + site/grunt/clean.js | 3 + site/grunt/connect.js | 8 + site/grunt/cssmin.js | 6 + site/grunt/jade.js | 14 + site/grunt/less.js | 9 + site/grunt/uglify.js | 6 + site/grunt/watch.js | 14 + site/htdocs/about/index.html | 35 + site/htdocs/collatex.css | 1 + site/htdocs/collatex.js | 1 + site/htdocs/doc/index.html | 444 ++++++++++ .../download/index.html} | 142 +-- site/{ => htdocs}/favicon.ico | Bin site/{ => htdocs}/images/aligner.png | Bin site/{ => htdocs}/images/analyzer.png | Bin site/{ => htdocs}/images/tokenizer.png | Bin .../images/variant-graph-collatex.png | Bin .../images/variant-graph-schmidt.png | Bin .../images/variant-graph-snippet.png | Bin site/htdocs/index.html | 12 + site/index.php | 47 - site/package.json | 28 + site/templates/about/index.jade | 103 +++ site/templates/doc/index.jade | 828 ++++++++++++++++++ site/templates/download/index.jade | 114 +++ site/templates/index.jade | 46 + site/templates/page.jade | 30 + site/twig/doc.twig | 767 ---------------- site/twig/index.twig | 46 - site/twig/page.twig | 38 - site/twig/project.twig | 102 --- 60 files changed, 1855 insertions(+), 1395 deletions(-) delete mode 100644 site/.htaccess create mode 100644 site/Gruntfile.js rename site/{style.css => collatex.less} (54%) delete mode 100644 site/composer.json delete mode 100644 site/composer.lock delete mode 100644 site/composer.phar delete mode 100644 site/google-code-prettify/lang-apollo.js delete mode 100644 site/google-code-prettify/lang-clj.js delete mode 100644 site/google-code-prettify/lang-css.js delete mode 100644 site/google-code-prettify/lang-go.js delete mode 100644 site/google-code-prettify/lang-hs.js delete mode 100644 site/google-code-prettify/lang-lisp.js delete mode 100644 site/google-code-prettify/lang-lua.js delete mode 100644 site/google-code-prettify/lang-ml.js delete mode 100644 site/google-code-prettify/lang-n.js delete mode 100644 site/google-code-prettify/lang-proto.js delete mode 100644 site/google-code-prettify/lang-scala.js delete mode 100644 site/google-code-prettify/lang-sql.js delete mode 100644 site/google-code-prettify/lang-tex.js delete mode 100644 site/google-code-prettify/lang-vb.js delete mode 100644 site/google-code-prettify/lang-vhdl.js delete mode 100644 site/google-code-prettify/lang-wiki.js delete mode 100644 site/google-code-prettify/lang-xq.js delete mode 100644 site/google-code-prettify/lang-yaml.js rename site/google-code-prettify/{prettify-sunburst-theme.css => prettify-sunburst-theme.less} (100%) rename site/google-code-prettify/{prettify.css => prettify.less} (100%) create mode 100644 site/grunt/aliases.yaml create mode 100644 site/grunt/browserify.js create mode 100644 site/grunt/clean.js create mode 100644 site/grunt/connect.js create mode 100644 site/grunt/cssmin.js create mode 100644 site/grunt/jade.js create mode 100644 site/grunt/less.js create mode 100644 site/grunt/uglify.js create mode 100644 site/grunt/watch.js create mode 100644 site/htdocs/about/index.html create mode 100644 site/htdocs/collatex.css create mode 100644 site/htdocs/collatex.js create mode 100644 site/htdocs/doc/index.html rename site/{twig/download.twig => htdocs/download/index.html} (81%) rename site/{ => htdocs}/favicon.ico (100%) rename site/{ => htdocs}/images/aligner.png (100%) rename site/{ => htdocs}/images/analyzer.png (100%) rename site/{ => htdocs}/images/tokenizer.png (100%) rename site/{ => htdocs}/images/variant-graph-collatex.png (100%) rename site/{ => htdocs}/images/variant-graph-schmidt.png (100%) rename site/{ => htdocs}/images/variant-graph-snippet.png (100%) create mode 100644 site/htdocs/index.html delete mode 100644 site/index.php create mode 100644 site/package.json create mode 100644 site/templates/about/index.jade create mode 100644 site/templates/doc/index.jade create mode 100644 site/templates/download/index.jade create mode 100644 site/templates/index.jade create mode 100644 site/templates/page.jade delete mode 100644 site/twig/doc.twig delete mode 100644 site/twig/index.twig delete mode 100644 site/twig/page.twig delete mode 100644 site/twig/project.twig diff --git a/.gitignore b/.gitignore index bccaf3bc4..60467c5b1 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ target .settings/ bin/ site/vendor +node_modules diff --git a/site/.htaccess b/site/.htaccess deleted file mode 100644 index 92c8323ef..000000000 --- a/site/.htaccess +++ /dev/null @@ -1,5 +0,0 @@ -RewriteEngine On -RewriteCond %{REQUEST_FILENAME} !-f -RewriteCond %{REQUEST_URI} !^/apidocs -RewriteCond %{REQUEST_URI} !^/maven -RewriteRule ^ index.php [QSA,L] \ No newline at end of file diff --git a/site/Gruntfile.js b/site/Gruntfile.js new file mode 100644 index 000000000..bde87230d --- /dev/null +++ b/site/Gruntfile.js @@ -0,0 +1,5 @@ +module.exports = function(grunt) { + require('time-grunt')(grunt); + require('jit-grunt')(grunt); + require('load-grunt-config')(grunt, { loadGruntTasks: false, data: { site: 'htdocs', version: '2.0a' } }); +}; \ No newline at end of file diff --git a/site/style.css b/site/collatex.less similarity index 54% rename from site/style.css rename to site/collatex.less index 859375ad1..5d5a79ca8 100644 --- a/site/style.css +++ b/site/collatex.less @@ -1,5 +1,8 @@ @charset "UTF-8"; +@import "google-code-prettify/prettify"; +@import "google-code-prettify/prettify-sunburst-theme"; + body { width: 960px; margin: 1em auto; @@ -55,42 +58,45 @@ a:hover { color: white; background: #060; border-radius: 0.5em; -} -#header h1, #header p { - margin: 0; -} + h1, p { + margin: 0; + } -#header p { - padding-top: .5em; -} + p { + padding-top: .5em; + } -#header a, #header a:visited, #header a:link, #header a:active, #header a:hover { - color: inherit; - text-decoration: none; + a, a:visited, a:link, a:active, a:hover { + color: inherit; + text-decoration: none; + } } -#menu ol { - list-style-type: none; - margin: 0; - padding: .5em 0; - width: 100%; - text-align: center; -} -#menu ol li { - display: inline-block; - margin: 0 2em 0 0; - font-size: 116%; - font-weight: bold; -} +#menu { + ol { + list-style-type: none; + margin: 0; + padding: .5em 0; + width: 100%; + text-align: center; + } -#menu ol li:before { - content: "» " -} + ol li { + display: inline-block; + margin: 0 2em 0 0; + font-size: 116%; + font-weight: bold; + } + + ol li:before { + content: "» " + } -#menu a, #menu a:visited, #menu a:link, #menu a:active, #menu a:hover { - color: #333; + a, #menu a:visited, #menu a:link, #menu a:active, #menu a:hover { + color: #333; + } } #content { @@ -112,16 +118,19 @@ a:hover { margin: 1em auto; } -#project-team li { - margin: 0 -} -#project-team a { - color: inherit; - font-weight: normal; -} +#project-team { + li { + margin: 0 + } + + a { + color: inherit; + font-weight: normal; + } -#project-team .institution { - font-size: 80%; + .institution { + font-size: 80%; + } } #license pre { @@ -137,16 +146,18 @@ a:hover { margin: .5em; } -.figure img { - display: block; - padding: .25em; - margin: 0 auto; -} +.figure { + img { + display: block; + padding: .25em; + margin: 0 auto; + } -.figure .caption { - font-size: 80%; - text-align: center; - font-style: italic; + .caption { + font-size: 80%; + text-align: center; + font-style: italic; + } } .float-left { @@ -185,24 +196,27 @@ a:hover { width: 33%; } -.toc li { - margin: .5em 1em; -} - -.toc a { - color: inherit; - font-weight: normal; -} - -.bibliography dt { - float: left; - margin-right: .5em; -} - -.bibliography dt:before { - content: "["; -} - -.bibliography dt:after { - content: "]"; +.toc { + li { + margin: .5em 1em; + } + a { + color: inherit; + font-weight: normal; + } +} + +.bibliography { + dt { + float: left; + margin-right: .5em; + } + + dt:before { + content: "["; + } + + dt:after { + content: "]"; + } } \ No newline at end of file diff --git a/site/composer.json b/site/composer.json deleted file mode 100644 index 2348f8364..000000000 --- a/site/composer.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "interedition/collatex-site", - "description": "collatex.net site", - "license": "AGPL-3.0", - "require": { - "slim/slim": "2.*", - "twig/twig": "1.9.1", - "dflydev/markdown": "1.0.2" - } -} \ No newline at end of file diff --git a/site/composer.lock b/site/composer.lock deleted file mode 100644 index 8f27f81e3..000000000 --- a/site/composer.lock +++ /dev/null @@ -1,159 +0,0 @@ -{ - "hash": "f58f60102ad1dd291d4785abe3e0f42e", - "packages": [ - { - "name": "dflydev/markdown", - "version": "v1.0.2", - "source": { - "type": "git", - "url": "https://github.com/dflydev/dflydev-markdown.git", - "reference": "v1.0.2" - }, - "dist": { - "type": "zip", - "url": "https://github.com/dflydev/dflydev-markdown/zipball/v1.0.2", - "reference": "v1.0.2", - "shasum": "" - }, - "require": { - "php": ">=5.3" - }, - "time": "2012-01-15 19:36:37", - "type": "library", - "autoload": { - "psr-0": { - "dflydev\\markdown": "src" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "New BSD License" - ], - "authors": [ - { - "name": "Dragonfly Development Inc.", - "email": "info@dflydev.com", - "homepage": "http://dflydev.com" - }, - { - "name": "Beau Simensen", - "email": "beau@dflydev.com", - "homepage": "http://beausimensen.com" - }, - { - "name": "Michel Fortin", - "homepage": "http://michelf.com" - }, - { - "name": "John Gruber", - "homepage": "http://daringfireball.net" - } - ], - "description": "PHP Markdown & Extra", - "homepage": "http://github.com/dflydev/dflydev-markdown", - "keywords": [ - "markdown" - ] - }, - { - "name": "slim/slim", - "version": "2.2.0", - "source": { - "type": "git", - "url": "https://github.com/codeguy/Slim", - "reference": "2.2.0" - }, - "dist": { - "type": "zip", - "url": "https://github.com/codeguy/Slim/archive/2.2.0.zip", - "reference": "2.2.0", - "shasum": "" - }, - "require": { - "php": ">=5.3.0" - }, - "time": "2012-12-13 02:15:50", - "type": "library", - "autoload": { - "psr-0": { - "Slim": "." - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Josh Lockhart", - "email": "info@joshlockhart.com", - "homepage": "http://www.joshlockhart.com/" - } - ], - "description": "Slim Framework, a PHP micro framework", - "homepage": "http://github.com/codeguy/Slim", - "keywords": [ - "microframework", - "rest", - "router" - ] - }, - { - "name": "twig/twig", - "version": "v1.9.1", - "source": { - "type": "git", - "url": "git://github.com/fabpot/Twig.git", - "reference": "v1.9.1" - }, - "dist": { - "type": "zip", - "url": "https://github.com/fabpot/Twig/zipball/v1.9.1", - "reference": "v1.9.1", - "shasum": "" - }, - "require": { - "php": ">=5.2.4" - }, - "time": "2012-07-22 03:25:41", - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.9-dev" - } - }, - "autoload": { - "psr-0": { - "Twig_": "lib/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3" - ], - "authors": [ - { - "name": "Fabien Potencier", - "email": "fabien@symfony.com" - }, - { - "name": "Armin Ronacher", - "email": "armin.ronacher@active-4.com" - } - ], - "description": "Twig, the flexible, fast, and secure template language for PHP", - "homepage": "http://twig.sensiolabs.org", - "keywords": [ - "templating" - ] - } - ], - "packages-dev": null, - "aliases": [ - - ], - "minimum-stability": "stable", - "stability-flags": [ - - ] -} diff --git a/site/composer.phar b/site/composer.phar deleted file mode 100644 index 352835df19ce6360e3a35977dee7b5dfcabc44e9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 673891 zcmeFa34El0$73$7ZcS)*hw=dPz)m5r)*;OT3s;agrmF1K&C8aBs zDQBjtTHS7r1rI)e-(qvv%q*9=j~N60;l=D?7z`c;SYWvsV8N_0_kGyI{9yn8cf=Rp zk*Sobn`OWS?ofW;7cXACc=6)B7cX9X?xE?OUUz!6*`98+ZzkKD+hZqA^S{$~k0o~} zmp7ZeWWCvHBq+CQ-G0(pPtJC>wmZE>cM4zdqw3m3a;e$sC1>i5?pCecPENFI`se-r z$5g#>ir@0zi#y$VGkKu1vC*uxJIRSxO|9DB_xBpD`qZj?%CFCNx=DYtk*x2uTFF{x zd$-%%*z6~@c0Fk|*Bb2}@M^Dj&}hHeX&*_pTa6l?-E1~)u{x{zcm2&;pPguR)@uDm zJ;5)&UhVYzoh|mN?_FiLrpLw>Hf!CPnXTG3|EN~hywltGS(!LKR&T5~+l^}F?EJ-r z`K8&#m2b#(yA=KM9l-b?085*lTM+^sLxAPf8^7)zuYcE|`d7B==t%_OSDvOAu)KQz{1@#B z0qnBg+=w7NdRQT_e8zVsKkg7JgK(H@_xiO~D+2MFF$KZ$Z9lv5-43EU2*lM|tC{P_ zgHKf$EPwsuk9Qr$Ks3%Zwj1qwFGAz}AEzK#&OGg{|Br(hNX?~2t-H1vL0GWq!*b!# zhd@3cC#dIaYmY^t!FJo?bz>i^}-?s7PT zN!zdWcjCc$`Nt|zSl++>&XW#d;8dAwH{+@Ht5aH^<=f7leUaCn$W!E8=T@6kKrgPY z_0nJL#=DKIEU!KA1>fL63geqrn?8^AH2p~^&v~|@!t(iVdFERk$}m)xZnm>P{>mmU z%lCiL8-Ch>939f11@P-aZS6*FqcQsk){3=$r%Qys^fMG8mY@IV^#>f*JwsqQHWzDc zfD4dWTvLYHA3m%iUVLx83RXXCGN>Y*R&G z-~J9YxD>Md=@*)zLv}e2YL^ce`L=FJ=v{`aRVU08n1yT0Uy9ni2I zeO_}r1xswc^Wzi<%Z2ZJ-=$H2JlO1KfqXHV91f)+gW8OAGv;3Vm|M6QKVu7F2`XExbwwwND?@nMjB4RARWwQTC4yG{0(rPl> zd%L}UV=H|~SoeLj!eaSFKl!0&JFFKB>Ey%RX1})DYNU`dbL5)M5tjXnANk7;vcTDC z{b@>o^d}Mf*iTZ#SgyW%_D3B`fuYiB)9Ihl?B6y4&GIk5^#i*OsIcUuwJBhy!4D7Z zte)$FLq&@G(`PF}EPwjUQ$Ew-+&hF)2Iqlhe|2Z=Mx&oXI|-$kW61L7-uo5ra%kYS zfy`ylQXr3+z-IZG*S5As1wx@6`t^=aR!msd-}&pG=s@=B7Z)J`k{SH7ra7|w(E0cL z4~JHeB{^zCo&=0O&br02ee{dpr;Y!3oIv{5ozA2gxF z@@Kzg>kl2+eM5kmI@zd~%;5iqdK`V0pZ^u(zvl2>Gz8vKAKL!bka_ndHt$%z@E)5er+s93eeMSWUk&SHpc$2u zGI8}bBOJ@CV_&`EeJm{HDK2JIHv5shB7Pq*xxn)0zU%Ni9MlVkj6uLJ1)t&mFE#X7 ze(Kv^wCym5Q4d(b!@n1@Vgusr!Qe|_Fz0^5P&OJJVeglDGFa@^iyxUrnM zeD_Z|oM8lu;Vd?&5EG*2e>#IQ zlQP!Xf8n*i^%4g)Op`FCNFj10A2ilr`KM2rx#55cYn#;;$mcODtdb zVC%ZWD5!uAg9b26K3PQcZ-S{Ad00O7)PMMO2UAen9Lxo^98Ev9ry1pR*7%g=BdgE; z5eHdN+j1aZ-0QRzIuZMjaTUu$pZ~hY979CZ{Q392HOSzJyrlA~R%VF&S`(Wr+n@T?4}1Fs z_k{!D?6~Zmc!en``MKLY0*obCTuPEPY*=;HO7~nr$S*PM>6a z$@2aG_@&?MRDc?l)tk=N);47$;$2VEF=YApXa1L?UgNp>8ma!V*ucgKtk&s;4)&d( z2S)oEUug)ieBe{RA?Wu7p;`iNai`T-YKT(v?`{2I`QqKD2kZR#BVe^8$UU&Lky(D; zVu-Q)g2S&4E}O#iFG1{FW4#8B)9!KvA2ylK^6UTbde}WEO#f0~YfVb19s#}dnM!?@ zr{D5D<4*liup+r0uJzY8FLmlNlz(AUfaUe^zYZ41@sWv!tseMYwt5-L9fl8_O9;!a zTDUK)1BE5Gq_g6igum6ClFu@U!}87l_}gFV-MxPV4CkSy$Oh>2m3C9NQo{5*m#*ne%`QW`N);et2t#( z?W+-koq-&COy05lw_&ese{|7Fl>z+6HltX+=i7hls~qK*>)l2}HkygJ57-Vghzy7|v{$Gb((6~!jd!f^5k?;Q3I~4}Yiyyfi_MAt7 z!6{4_PqT%TWpnb~!ApAI2z@lE`Cy~X`Jo#d^Cq8IuAKPBU=xlqWzRKMcQ$mmiQ4N; zfn)i$|NDD`;Wf(oV5rSueQ?a4W8hdm{8L5ql~E57uE*HaVKcf-E=x{OgW5 zw1Q1hGJa_n|HJci1X-@#aVFS;BduB3f^76;&3e7bNtVYx{rl$}nNh%);3dJ=KVGq6 zx%~L6KEc84i$V_~AYzlye%I*D^7Y-nx#`dfD-~1}>~AwF0>x5ir@Pk3OqDM;QnNht zRUa8#PX#tNKs4K8U_|C)o~m7A`Nu!~8-MM{d}@C1&&w9wF6(?C(K;;O_alGyPOpQX zS&imOzuAhmAy_wdJ%7(@-+avL7J@9Kt~%sd@ApjmVEM~0`5!;w^)SOr+QitcHF{Vx z8*A7|7Srj~#tJN7|AzN|j{~@?6u@F*tJ80!E=i(L2TdrMSpMWo|2ilJ1l|aA`+oyWLLrAw-+B z)Pj7DiEftfxc{}k@3jjvLOZctf8>J3JI&P_?LHJ|1@nBf0a?E1`9IuoFrQY61U4T? zz?VN=yUy|#|LbSJZ(t?&kFwG?e6&_#`B!g!R#GF+8leWdqFe)Q^6}*E`bV`L?X07E)c* z-!HQIEPw3VfBH}Ns&5e*q5TfpA%C-c#yg)A_TC1yZ?<2k|Mx~KmbFj*zK`?v;d#jp zSKRS)&E7WEZe`9tc$%WX@;_|-W-t+}c@VDmb*C?NI@{qF$@n~DJC@(~!dD07udrN) zTGJOB{o1y={lY_n`Zkk1EWi70>!YAUr)vhr$39ED#PV|<{=yxnN@3cDTGJLI79J3l z-?d(`JT-IdJtKg*&|K}-y09oEQc4N@c9VoGf9DVW>{lGj)L;TzeX$mzo!aAOeXxAy ze_Z@Uhg6tip?&PP8zz~x@F$FISw6J%=5N{`pr~*EWD5<;=l|p{EIFV8|6;E85Ue z4yQ2U9x65Me`M6oPl1lY>B#cIH(eb8%-L40*SlET&Vc!KllLq?VW>}|k(V5iwKw14Yk6+Fw6Kl;-l^zEew0Us9X>F`%Zu&e_p`@-`2-<`Vb zeVM<##uj&=O>H6eEs76&i#5gan?K{%X1uA3x7XB#&PH=B)6iF0LoAaY_`SdMhOXRR zL≧YtWzSnFhbfq%+HJ`p!$!-r&Qx*Pv}}o#}4uY+)K@n*0vyBg>!v(R)JJz(WUV z67#Zb{``CE7Rz^h>$zF)R!JLcwptYDzOMtONKG?R)&8asc)HmOEGPfyOOJaS=ZgIp zy^RR|Gc3l5hHq!-iJQP zjy=^RHOt+n&V^8ir6;~kHh0A=HI|Ql$8QEBVEKt}E+cHe!jylOfA`1l2~M(0PiVXB z;a{4K!16Ob@0~AjPAQQ{d$Z)gLCkb6-tKsA>+{22!2Jj5T*UtD&W~MC#%K9`zw@8M z=I4QC@f>v<=-K$Mi&nPr^~QrNUvm9Vf--g>{G&Dq`-edK)bW?D-Jh{W-=`EWe=t7a@%3+`;<*^4|un55Cvu#q8~L0^!V|7r*dS?FGxhmo^(M*;n|F=9FRCe)s!YPMil00#)U` ziG6bDBeqJi{K=2}v~O|f2jWnJ&VzCKCtG+M@T4xw@=fpk=CAM;7HDD=ZlgyfUHN@e^jJ=P#+ySx(SfF( zbtOYMj?QsNCXNovh4Y_y3T?4n#6*CMkZ3i zJ~s6n@6GH%Mkdo6i%0lHlT<9<@`@Mry@3OA7B{K~Lr$Y`OQeBBG^gRRag@qC6c49iFVw-1gvo|pFz(swr*8QtGp zthG0|l>@86U$DIm;~$nEn7{mNZx0&Oe!Jjcr`Y?Kn9s6AC7|Y*Sxf+)65?f*92A7(THU`k~h{;BlFa7xsgcB*BcN=ts zCEwbqH_qYIIu8c!7CZT^)+)>2di0f3K6)i{XXH*6TlpcIp)7y)qn{Q+9k1R--y~2D zQH%>~-4d?+b{jmF?|Ypw?6G5Z|!#2@*GZ(4`9nFYmVhBzp@#o-R*Yn^6qv?=a#KCmfK%h36ZR~ z!zhc`yhh+GPD}bU@cB=%@&ap-ME*P)}5MS_cAj1atOQ%WD^e5Z7%NUa7A8yva z&xftVB;SikQoz%fcD7a<-Sh!l+O?Kg9)9I>{+qW{vY_vyrED9ovo=_s`nu~c^EMdM zwa;~HX}g6fg@_D2a`Pb@50*c;I{q$)d|w0^IYy?r-Oy^n=g;$`ICEc@J!`~|_7Oy< zsXZh!1TmsE$nu|F{SCp%jewYd=^pU7Eu98if7cNO$nsgg+<2ZNcyx4t7qH(Un^W(8 zrov+R-A{Yh%N-W{{i7mB#9*Iie72u$;+Ey>zyA}0Upi8=Uj^)An(M zp6v0GZ0j&;#!lq)vV?sEj#Lts<>oto{nd_pWTqde5ef#f@h{nW&+@f@cu$DUjLf(L z!PkCC3r412Lis~8>leKTx7&hFO1AK@F*D1lV}JF5C#r?{Ri-Ur3twf%F3Yt)_`HAc z7NXVQKvYVHJ!ABY8_8IH)W)y;i8mLi3%A)EJM~Fp%33T>eb(=N>J!&Ws|C85Z{sRF zCuCrjzj@-@Lp=BGGVpABXN$e~cSb9g z6drjrGOudkcQGa#{xMrC}h{sprF)7LN$3Oaa=l%0a`uUpe7EgP<6>2w# zHo<-^LA=|D#qxE3|3`0j5O)p(vBoWSBzPhKs>#HGTR3?0aAt+(K5WxFgNnxFq0Z};h_%A{Og@As?;mhXM{pS;t*!@RxY z!ra-}OG~rQH75+qr+v}APxDVSpVwIWZ+)jNT#VdVqTkQ0&pv{kqF!};9V_X?*kfbO z^`y$bR~qq)iR3XPvFh)1+g>w~8pp>T$Nx#C>Y?$LoOer3`Zodk%F4O9#g&z*%JkIK zw2{S!lS5FMm`LurD;XICpy2;=5Qt-AZL^b9Uf$VBwy;bky+%LT*_Lcy+nTT`sY|9G z9Mx?$dPkD(PFvotcUrB^Ev$w}H72Q{dNY-&g@+baX6G+Vja8rqwvx$Svf1x%BYoFK zqwj+=)#+|bH$5OGc@5L3#WnGhAT?TSm5Y z0LKFJhg@vSbMr_Vwu9tn<~CS9%LWZbXU@6FQ_?};<~_B?TARNwU74mBqJc)M{6MnD5}?00uW@?~g0 zd1_!4aFBf(aS!0Us?%&&1KiWe;pO(>WG1N~8DLd-Zz2f@=wvS$vJQ@r4Q$zv1@=97 zgYc`3%>@qYSPFUnh@duw{-I_M;<75BA&v#8H3K!i)o52yG`Cm!wGAv6%0LkTCbUb5 zz0p3P#!43gx7M#7UVh~03lAIW6GxKuTC3MU9pbLQ3f&H$aH(mbW7_S>qy!%yS)#$- zXq8jRQR_+EM1)l(!RW}hF-lpCqR&Goh)`6+mOvkTwwdWvg;^sX_h%8+*eOSS5bxG{ zH>%@upaSYAu!V`rgLl#JXJ>T~3zMfXhpLsjq+hE{Or8=*N(q-wC4hAxOL!C2A->m} zmUD9eQa+vbfm9aiZ6SE3T{nFc(>)^EO-&Nm>4Jca@Z8$Qx^q&w@OEjMX>&04WhkUg z52HF7cnC3;XWG46jc(d4o%!T;^IB9ZR9POLyL4{;;U$X1rMdY_D-T?myKs&g7MK$5 z1N1tmW6R6ASW2cJRdJxHN-H9FdpoO`1l5(5^T!Ra^mOYSpkeNQwK)OsA2Q_!tZWlsntJ(w8x1h2% z)&w}EKn8+LKE>#}Y;)WJ$C?=9x4`OhW zSQDQLp>b&}kfFDY;EVn`It7G4Y}CO&@_^6^DB_RfW2h#Zu?13{I)8$N7OD$1{a(e8bMPjqo%fhOnubv_A5)5WhIth$o&*nxM0uTV!7vlw~ zb1et$`skf{vJpv6L!z@^1suUwGLSYbx zHhU#H{~k3WQzA89hSn)4k*exi z5j79ZqL3fWSzE|C>!4u8ER8)wt%D@r275W8Co|x&Jwv`_nZK8DgdZm1o!c^(e@#g>X zU++Cb2iGmMRG5^5>)z(VPH8pr;J3<(a~GnIbo>4l)?zx<%bUR8B}Vu_}+Y#c5y_y*zjGY!h! ztHmC|b>k;CV{Oa5s_^k8QTM9a3>Z=Wm^=jrh&`kt{mG~1b|p*&fh4dxD%JH4ZVITv zI_5^e7LpWncWJ71Z&lzAicyDA#w88vX*{a2&+^jz;>9x;prxkOjwFZZnt+KMI!3VR z4@RKtlF8Bvas%PtNcN-h@5Vo+Jx3Y$dAXahQrh53X>gf|FU-6pd_G{=h zoTyISb9$o6Kku1%9Ohi`%)_HAht7Lq*a8?v4Cn+erMUgQj|6bGHup%17jxGpEM;ee-xn84_2zw2Q@AT-` zE)SwADiPqIxQWe#ghs#YkD=R?6aKi_(FX;|0eo@z<%)2|<8Ni_uD#a4l|d2B3DUhy-VDJ;HDv-7aYh@DCk^<)r6unP z;q^DWom(P&mh6IStAU8?IjS)o6la z8?h6SO>xosqiHG_g>v}=um46)wpd1;+|!VKV9@hr#K)5h_x{knh!a-ifesUImeyoB z%AK04?q4AEFUn833|ob%2%)1>p1~fGQt9$YyqQ*o+ruP_^;0fSKROIZfOz69Fjhvz)1Y=>+EBI(ZN-N92-aYj59Sf*u_MT+h0UBy7nIYh6?AX4*@DeP zW2EQs^(}0UGZ(JRu0$ld2ug45Y)yu033TtyEiC+1Y=$990)u9_QZ49d%HBpDn|g#V zM{G**fL@Jj`6hdkD-YjN53{k#B6Df}(kwv1rb8T`kB`WDWg2H0h14U(+X>XfUK6K# zj-UeL*BG#M_3RRUsf7g6f}NrfcJdfnb;PTEM|tGuapXL3!>yG;75>yjb&;)JMeke6q;5(VR^VCoEPP^Hj5n*mc` z2r-b7q!7HryFf7yIOh-=R8I^l5Rj?fMK1Z_R7Te!`Q-TD2t7X65q(>Y}A%t94vQ!6@bg5HOmO&xaFc!sIz!2GtWMPR!0<9=i;lI>DKBDl7X> zAl?xVgV~^ODKSbUzX*G{EF$@@-vx;~l`vd$t=6XDku^vtyZt6p0_U>D)u)$W)3Q&VGO+S6jVX14EWM-oc; z`qr;?VQ@@_&Lr0ik?TgKu`%x*;+;AsNOy&uT)v8_6-{7Ww|00o1GWV4#x^EIa07lH z$*4Fsb~&A%J6zZ}SB%66ZEL`E!yHzboo$Mi;H_F4@c;m;$p-ch!03#C$3$l#>Z4wDWl z^2ef~TthPNWOX-zNe%jwF#Jx3tTKr?7cV-_pX;sK##C|!WXE#B3xLOJ^_wPHA_1op zo^UlN7-H5u3K7px+7%Emo&f^rbS2YlW{gt(N7W%1*&Luqoa2UK6*2DrC&fXoF%AIt zle7IzYjQa`X;H~ryXkX=CFe2XoSqmHV-hsC5Zj>ssSs@JGr*PBX4dGpS+}4wwd%I6 zndL`<>lF`*S{)>jyJj=*!~CMGD*eKEEcf1*|Dfg!c%NmUY#RS`ifWnL0BcqU&rgzgnDlZUbfso=G+W$cbdeZd4OMGh#<3csrL`As?M7} z52Jxcu1gZ0%wo;5j5p$p508;wlwno=(2zUVxVfN9OB~FXQu_+jq=1fT6?ztT&I|&q z1u4{aXGyG?_!%jQJMJ=Hfw^Uc_J`tgQTqvx%rP=EqeAYi*j2LqH6HgI&IR7ED-nI+ zxqBVM1C6HYt|9UZexvAdQdvAJ&S|>2EOs|?bHsOWi%uvQ?zYl;E>klR&gLim5#f>^oH*zHvZqKN=C5l>mq zU1m6y3}Rg@LLCOVsdT9KFyr~C=7!;%M$NL4TpJ3iU_S^;13Zc}e5L8&j6M7+81sjP z_(*HJNrflgR|H=jNfyprerWE}`T3REmtDpo92kifUzf)&HU2~EyH%p?9)kjCzRWuX zh^-PRV0ly!1S6qO1sD}B3l{dND{6sYjp+JMv$-o+P$VKM3iv{Axd{~$D{^Lz!b*po zmAX#;T`^%qJDf@$f__6?kB)4~1}auH^$RS2>;^xcI;_kAvBC@SY~)S2s4s+*Y#d8w zUG4+toDHENzE7YBPT)sYD3hn8qzns?n%$le!zCDBOEjQ(6l9AjOkL9!1n#w?*RNIh zO@;eSGBF*53qhQ6vbefJ#hd0E&>5Pq2$TockXbXn4KJjTg$_w!?tcT$6dUS&>VmSjB;}ZsELSTwF6fG11lVPnH$UFP8S~-b{kB>QClc(C;s%_!m zLLMp(;gHwiP2aN0NOK|Gy@8ckcg0=XJ)BGdx5Bs` z*8LdckUagTc+A8;2?HCAF=h^EWGHiO7O|18*dkLgN)i#@ctoZ4;c3RrDzWb+m&6M^ zl(V&4I0Xc3aje=ywK^3(=Opl;RQbE7H ze=$={;Z2n<4YdWC4>L1hT5LKyzFyN$#^C$PgxM54J)&GYK!=eLEa0Y^j@!XBdpB!0 zVMjH@{VFb&`c%|}aY%MmF$zhC7NF|)cO7F;nDS&2stKi9dqstGxYS@@dh350miY>8*KS%t?_hY%C3mgpegTaTp!#)1mmFvxl)^FJSrgJ@8zLcVk>h2^ zAyc*Ebll0TOl22lRmAaYn|KpZoltS&w9&qa-HiE*3-e2}iz|!s^OyZF%}MCJF=jbV zWc{(k7LNKB2C>QisD_C@@&kfL+G(n0d~cK!Fk!13+>%pfMHeJ<7!|1v8Ic-YH#`EX zYjlND%qu(rOb_GhBK3R6A~MEoS|D`64615!2#blES42e?;i2LzMRmYux?ye$aUjLy zDIDsq#z8#C!s2%bE4Z3-kXVj);K4B-K_5FLLOvJSI`*2^Pyv=BbWm{8GjO&Hz?JL1 zHEOb5#Dzi9(n%%9=zX7}ItAHxB$?{rI+m%41i=^F0XyxkqiZLyq*0SR_3|U_6HEXg zKdwV@J7Uo-u-0h^QgaA8@Ok+mGC-eCp28{j{$}+k&ipy+)84^P98vYfo4+=olr^w} zQq|Bme{!zFtY)a;5phAmG7-)mP=>EqdG_j({H)iy;+n2Z;g8KpEYGzqx~eNvTOEi2 zxV7ZR>Z4sLZ6%oEQXnUr;FhK;7WEP0gSy$czmc)*cXx#fRPiZ;QVUC{fIjmL>_PFE zuJk*;HYt6;e>O+M@6bVtVED43JK_)Gp?cmr4r}YEp7V`MksHEik=j>?)`^03j4L2Q z2wK&Q$KhkewJAw;IppC<#XVn&IOOS&8kGJCEg^m{xFlC>5)|dGL-3Z6TxbzCm?l(4t(liK!a7r$#raRCfRKfX~fV$cL*$>tL-uaCA@<#N!e` zw*V2?k`1UyH>Ir4P*gH(H8T?dGyb-Ki2^X+J96c;POYcu5cRD`=U4~^FYW+2%sO)K z5ClrQwaeWlUB+OfIBdku3qKI__THfV^ARBT8m)Cgr2;1;C`DMtkD6{A9uW^{Db{pE z?i~{{+vWyx4>2%3;Jr(!N{@p%LPHlMQesdD0~s&GDi|O@goXswiBSSU#v12^?ON`V zkF_u!klt{bIY%b7o2W{}u=D+-Eokb)$jm#_qKxOlT%=bmk_q79H(f4akb>Hxi~EvCx1GHetujE{)ddx4AJ9yfUVg(v!DeKed)E!9 z0J^{>G=&EH4S>`}G!z7A!A)I&0V^~EMs!-=q81o<;`V2g`K6?NpH0H^8Lt%jk)9B6 zA_aR16g7d8>3M2f5*Z^!Zar`^7+;ACD{SomKpr(8WOH3gUx-`~7S>dT8_?NdFhkuv zf#z7p4+!CLpb1^BMYK87bA59b8b(@SJ5j8>TLlap%+O{FmnluMa1G)Z?%G>62S zpdcUJKw)-+6mqNw&7bUZG22t}(dxAr5yImn%ejV1_c>LVfPg%Rz!?g~ zU)W0y)IwdZTbUm_C3FWGt3|$vwFy-jcB=;M9$_Gl_PCDUP4KzKa%k(V%BukxBkE%0hf;-#tT!F^f#GI+N(FjB$XZ9i@)XK z5&WSTd#{S4XOU1@P@5z;(i*Z6htGxGP^Ocr1kVBQttpv$=EHEz(22-nF?j#q@S+c4 zCS=l+fpYy3qOU)VaaU6yIx5#urVm0{ER=W zHvV#23w=zT;+(;6p)03>&I^IZLm-R@7zhvwrAg&@yac7s57<~S`JWrTUHPH4|hjV0aIQiZoZ}Q z^h@qz1>;!U>ROLGRjGCAPNn_P7WRZ}@9v+r^sOUXWB&ld0yf~eQ>khGDA5+uZOY6X zSGJivg@}JR78i8V!bKW1C}+M239YQ&<-eJ3FkVXX9O+)h%tw=_a4Oy!2_i(fp^xn} z2oQ5UJ7i;l83qN#OMb-qBf|`fRsksIbZ8$-nK-C}LY%UJprLZd;vI||5sSq7*RMHs zWTMmQTb;N*K|i}q+`{(m(3)9sL4xb}_d7WA9^(?9Y=YW$c)5#n-l{$nwI>&PSfkQ^ zB|Z{pV8%98E=6jW1=mbYl|OM(<(%R)zjbe7Znutg}0dI|BIkeOBco0;g{ff(6hmIt7IaKG{%=j3W$X>DZj*`g_ z@m~ib{`I~lE(w%I$at3ldW>FKTUWia+$4$^T!h}w!yhCJFHNZ}lxS)2D9 zoKP#f?&K8Em~XNu3fG$(L0MdJOlP3SIF&j_QZ$!3;49V+(w(M}G+47a}A2t}F{$)lr7UB>&MPaFnA!7L$Ywhtz zWsou=cnBy&vcb5{b9h%--Wjn&bPbXmDw*fGDaY|W=b=xq5~A&#ZDllCXPSU5Jg(rM(72SX3BSab|VFt?S})rNh(BN^$!6Pl6MIp9J?9v>*}phhxN2376`C|cLh@`(f$ zQDk4?8|-W-0aJ@E#X`aYaEXZ^Qkc&<<_hQ;z~X?+>6IW0XnT;~AvII%{t!g?^fyx( zAm)3X!Z}3Zxt?p%cty4767^YD(IA@t=U7QiD->@l)c@3hXX=hASb3t^dTSpA5vMXf zBh_;%3cm_QL#VWzLqib*RH_pK^;+ake-Wg8G>btq6nqnLLJS& z5EjU*3MA6)MA6Fi0(ygl?pq*vufcG#$U$mQvf39(EmpF|@dJQNJV#r@_tCJ?_HNZ( z3Ydu`mqGf8z1IWIc~RSW8pM;Yi}A z9uN)FN0a;CuYj2(>HQ2s&)QV!^I#{RzoBvm+4G7?3DN zO&ja!IsOox#a(Sgb}$|R-Ztjqw`rEt)^K@EU7{p&dtI=gfUY*BC#|6p!1IEe7O@+M z1RDqxz(VY?E2GDugun?W@yyKJ(#n~|#WOF*O;Ivt>6<93#C$}=rG`taLT(vFv5w>H z>$2lwUUOvR%Y-L%iEUY=h@5KpaBh}3m<89Pbay8aF+M^xigLbk2-8SN7-=tA-RWzp z9wyJTWAxytU6_-I4Xkp(Xs$u7;)VhXXPKM?Q}X&FI3>#lid*KR2gld~CfE#(kST-? zm7X}?yJ6Tp8kR%0dopsgEZt~sE13(aSyTWpeh1S~!guof);d{B*grn>K0+rClAOhc z6@ypd2+9l}7&O&OB*;@Ek(dI>Ga7N6Xp}S&c(e_tu69!rAc+JZTbUL;AR|s^yWiZx zNy|ys`Wq5&-x-P%5oM($t(4Svm>f9!WD5qz9tmSqP`TF1zEHx#e~P@o6wN`tYN!EY z*d=@b@5V}E4It;RP1l2H=jn+^SmOdeIcXsYVQd~~uofZ3i^7l$FBxPdaYqYu=nJCy zE(x=pYfCLz#{l^NXnIYYQABhvRvY%tUR8S&WAUL7g9Wc~Z)a_DW^l?G`_NO(=swdi zIG{Vzyw*fh>0FzdnhHr~p0p@5A1LZ~>K=nSb=Fo#iFooaM3u@%@8gIG&d{pdT(G1G zI33q4#5Toqlja4&{xeN#0!??b;Z@M4MmX(c3wL)Hhcj%4U$E7gnXUjHL3q{frYphI zC%AtKvn4tqCvN8zNBlVJzZ>PLzyk96db1KyoLdnw~{ zoI+|OD6RIYFi3mP1h4A}9tGvI92?xsk0I$T^-uNC=DsRFHoy!QQDV%ZXp8gu}%28#*X}u7#p5v9Ash83&K9x_9=Q{C(dKjU<~X;D zQ)T*bnJy{^%v8Yzpb#GsG!oKHx%=qsCMBtbF~#Y27)^UV7UHa>}w zmNTM))3CCn50gA;iq*sg?_KfNsSybz9F~3wlDJrok7iwtvwTBo@7xlznf>O-BjFmH z)vL80F1__mF`pa9k9w(Rl%q~1;j!VXY_b;WJ{j&zUu%vyX>7<>(22Mi7Iqiz1U3{8 zQ|n+px%T_cWO7oQNc}USvf9CwNs4I6)K+WVDYPE9@^VX4 zm~X0dVy0bZ+Bl|gqaLl{w3at6?TI)WO^)O!MTTZI=$Ab5hg-~AHd{yO;p8Mq9(Uq$ zV0EabT>~X&m#M;H&}9lAo;^2*L>`A>u_Co;t=qKI4yX-&|)M5rCYaZyRd1yB`nMfig92l8av?*0`PD#s+ic2>d2c z(!nB6@`!eP^rL)rk@HHL@kV4(&<8A{*Y7ono_EB@#vS*Fkvgsfd1Vbl&~T4B}6Q<5aWHr@IiScgUU z>Z-H+3aekFaNLYPsbE{Jy+6HcUdHzve^a2%{o!S%Y zAs1Hc_i%x-nC^*`C~XJNR2lZH@-`HwIFuzdEc6Ek9->Y%fv7MfR!Rx|fD?+p%@Z}k z-Mi8V;8rlDkj=FoB6R^N>?7L%-fV7B$GFMEf=DUWrt7A%-CUEBvru-K3#pMvn`#AF z0gzGvJMigy*(V7-OJ8JefR&AUy%Ye~=qkG0m)4zexezFwFA+m>&w3OxFo3hd-LF&0 zE0IX(l^{4Ksaa^6eLyK(oYF2Jq#yA#ojj$-ZAxYp{Y^4k$V&MWA1+Wu57mKJuza9X zeMddK*?EE!XvmzygFGgy^F0_h&FLjfJr+wR*Wa>QVc@E?8vB?biXyL^VM)qT3KhYc zF37A*CBjh^dg<}d^}rWl%6}97SojS~C+Jaa@`%ltwSvg1r> zL5gNaUq(Y`CLAXlD-x+#txUf{w7b*Q(=(jf(C{Ys_wp3(lA4$}J&oxU4HSMzJ$&WU z+D`M>W8U;6tf&3PHZa_g@ZJ!oYdFwhak^**ssF&{r7z%curUOsJaAGNeh9Q{ju}wc zNAAwcazTOI=tDt}b^~G10o`49qx+)ny(>jOg9*nvJ!XKNQ&0fG+G;d_^&zboy8=!g zU!@=WB$PBu2g)l1qAxK4!q6%?ZSl(FgcBePxWyknL^0L)+=j^cM1aY2CpT#wR|Mgi0lR6*WTsMISYxHP*<1%-2`)09!adc3(V(6{ z6{J>F!Lf>T400@MgIfBf!$se3uNSf;HhZ3LM;IzQbLjG4L25Ti9i{KJqbg6+6doad zEGrE4u|ynHmf%OxrR>Ff+ZDl zRpv1aC`@I9Y&x394lMnbrRhW|zQLAQM4IMt&SwX|zWhk&zLH zWDaBEv>)GTH*rO+!nn$HKMlPft3W~L-rOXuNz7mwSJPo32HHTs+gzja+HCg$ntrm% zT5WB!G1>38_VM*P3?;NOVjbN(ICa_Y$5d(aJ4_GD{gg*Gczte3C0gfk+6Hget zw+jN=6LKF;cG`X!N`%kYKpeQ40vwJE{({3KtW}YSw+fZLDlW@tO?U^!=nTZ#y0z2l zHwO%!aWIXn&RVC{8;qO;&zi0h33nAuM!AZNd*D=wo=XiIVytmn_`{d*LuuMjJarKf zGK3=avt*C6RMxEJZQ=8#BzjcLZTq}2gX^WRN6ZSCG*XWu1VY3r3TbhYFGpJGU&H}D z)=~;aT_~9e)Ja0Huis!As1>coddP*JdgpICL?5mV>PubFs zRdL{GLFT3dlj)IhZtxA1%;kSQ>^JgY?_;Fzgfsd!$GNDkBCNyok|%V;6($5##KePh zGnf)W+b&i3VOi8WSeAdAwd=jMvdH>k<{SK@CgIz&{? z)2S=BJrDF_gR^fy(1e4ju){ZALdkO@PE+7u#G{2hvFza!t~rE)V;6G15$jyEXyEg( zXaEjPAgCI_z@*hsUDRh3JD+JVtBwgZ{W1r+yaxXMxmXq$`xfTNVag1GVc zM+QDCrYA3NXmh}L>Lk(-r?*>IE04WGFhCcJNa__2BavDp-jbo zNqr>)C#2hxj?%KoMhvvtv^b&?XGy?Ieq@s;V=K~ilAmzA9DpQ1^XR%jX7}|kw810c z!9E8C!bd#%9S=9PqjW?;AIB0KTq{to^_&D(E=#s6m5V|BX_j8`i(;h1K&G2mI3%Y> z2>4bTip~|F0~%3&;xx=Loq76S>IXfklLpqz3{5?e~FFm|9d%8ha6iXjpR^aR|A_%?~)dqMN=v=8&Q= z7w=7XF{W=1#+O`{1}z)fD{rTV?InApp$-`qGkE0a4}e9?*oVWg;{c5`2+yM=glo$q zR)p0)9ESbjFznud@Mn+G#A@(wIt<&S;|Ous;hc-aVM7Jw<*Udrp3V7Y)>;&YUAIe1 zsbq&6H}L}p0{{cbGgrHa9zg}Z&d6RUGdO{zq3v?ag$|ynWNuxAD}MxCc1(3y1S-jm z$l=o4j0Dye^yE(ui|CMm?<`rK#2lWEAXQ$q3O^h@jVL>)iMjxNy~Dl{aLZMuF?<3dujL55N}fr^=m;@xsva z<>t9m8I06XZxgaG)ih`)yTWg{~7xWQoKy=`9&0i)kBeP;o1Rsv6BOd(R*pl)DrPOh4dCf z+7r*hzFNR_)p9ly$u#uPmLnTAU4pZHb!x~R4fW32^wIk!d)UX?s!eUxnH>!J4ReY! zRlDdg`troIgDdao{(B{D_2}|wAkidlqR=oLpN-OIi?!=MM7;#J@$xlNAES2$N}M&=N5d{Bh#l{J$_MG5ndKq28H34|nJ(N=G@3}2 zFcoE+F4|-Y4bivX>+e$TA(7)&kFz$46qG08G{vg{Y$SA07?5sC7JN-N--+q`OY=&( zu=AM@-)FWbaMB%vQ%(Z*4sv+vtg8{{+!=#gGJ2gB%(i7-T)Nj1|IegJM9n+_J}}LY zpgrz^0t1dcrHf`rn-O!Ir4CMct{Solb9wj7!m`V5Kjdi>w|Kjp1&dc*0VdH`URvHksus6sydP_J*nD=@YJ%TlIGA%pdEFSV|xJ#i`t3A zgeh6Dzs@KaHJ4%)4bC4V{>9;lAs1QT#5kl`kk{M^5`&BPYSxF?$2OEfg$z2Q3H&F~ zVwRni0p_v<9H-G!7xfkLAcXZ*r+fMC$B+Ew*w z9m@xu-_;%5=PT_H3&m+7lZ>g-q$x&nigu}UkI%E4L-AaTW*M+cV?+cJ9$*P?7LL=f z(j84+{O{8uuZD)0c%N-Yg!2@k4#*B9N{yi$Yfp^Ksk()va~=G z?SX%>diu_Ick+~Fa!Q-@nbfeS;qLUfJkS>2={~3^L^kTvRxz z0ILhSat$GDno7#RPhfvBNA1#X0Bs99vTeV|mEDA9-a8pV*@FyE0`%0P3hYJ*9(Y`$ z221xZM?v7`^n?y?qiYK=k0ruj$w&-O$|@F%puGQm%73LNHq zMd)1gNN0KW5h7SQ!Li1qAC9VLYAV1~LyXl->7AIz#^aGXI?%VK8eFx)2)0RXzf+OE zRIdHm9c@ZROdCYvVsVv9(9`Ij$vEb!6(xuRbB|Seg4@-|^o=W+Ktd~v`WSjf&Av<@ z#8k)G9A-%j4mS$<2`JIXUqy{El`N2!R>S6Y=6e?cL2RU-s}v=8m!|`+S5PBg8i53Xcx7l^m|h|^ZmXF zTc}-1OUtjNta<8W@?7B+WzfO|7J73mu~JH=i{h*eq)sK&u*n4KnEewh7HvC~D}(GN zm1korOvDdP^x#U5IEST1aY6BXQArsaodKmJHi#i zREH;A?BA4&Mw+m&hYiF%CzIz18&j|GR10BI!PZ+5vR!Wh)iN_NUQ&9yMasCDHlMrC zHZv`qODCKhhj%FucS`n4!+wIWpN_`+>qk~MxH@%`AlX!F-9HH7s9~yBD~gi@ z{tGFmwl{0I#l9?Ca)@V*ea^Ho)JMXW3>WvbtXWrK9j+lCI95_T(Plc`4Y5I>FgLnm z*yZ6>;@Bo-`XK#8qsP{D=M8CmG*z~ScxoEwA@1rTGEb(1dYuk6s{VOibJsn3?zgO3 z25L$_xPfIn>e_B6zBcN`|yNEA0a21d=Jke{rmm~WKy`upFGJb-U<*Tjn_Kc zyJf`9p--t8wZ{(jyStE(o*pN9iKYAnFL*(6SMvEgTeaQ|Nh?Bgl`}~BVR-?6g9)Na zgovGV9MFL#hPU{QkV-0;na@b$xO@2Qaim^?qa~>>hYU^0xOi#r-)Yl1nT3EFx^ggA zSWteKQ&0I-9&1QJNiawDTVfwRAmivUq;WYhQ{rIM&2fqZj{)WW5_I40&Nh!JSK*LH zW+U+NMRieEnd~|f>M{`OLJJp?;zIcb6LV`UMJjNDe}jSx!&TdWUp52RGgEM_%WF4> zReVhw8gj-+MlqZc+;oo{_cxGR*m=N$CN#=lZmXz(u%JC@?|Ns@p@nLsmjUW^bm zd51l{CNf@YIH4$Px}q-Xekp)umeaX*lgWtr>_DeSY#Y{>(^#XaoT=kzihU4#=CYT& z%fxxkCz|Ik6Z{Nr+DbtV^gE{3zS*q@y)D3q6Ohz!pqg_qQ1#6VWD%I!n8GUCnMQuY znPXE&r;hT7pglMl9vrqOi99(>rz4j`hdd$AXgJGr73lLM zDwyj~k)TnSaGjaK0_rl9>W}Q=%39A4vth0hkSULm-wuaDP0N(vR@`9w*mC=Exx=<> zU95OC?38?t?NA&lZPg_nmf0T>tBO@Uv&_0(ckrOt(AGWzVlhOm&ITxCu1U~`HcF$5 z6VVn6g2QC;5w|bL#wGq)RWz2s_ROSj^tlS+k<TeHkhiNc*$U?cgo1 z3b}z!z5{p^14@5_OyVw0J+bk35{uvtV1`SYMI$CqzK!5)lKWomOYwyS>SPIm;mN}Q z?Ui9P?`Eioo-?*4uXmsUWm{|Xf1MYD{1OxaS6|oawYr_#E&LcvjfzcH&Jq| znlT;E4VN9VgXNE9i!4Nbk;bj;M56c77YK+9Su`L?qeVlR8_#$jq?)@(R~FQBRN@Lsi5e;G)c}+N zAf2iFlv^e71Wnaa&QTQ>Q3J@cS+8FZOVcZRAOs_#xJT4h8thTmpijX#_9==l+uz;u_+y%APuZnsc0ya#d4@{21Z7YSY*zVsY`lQ*Fx&Y8^-U6xofTtxZ%PiH{fzE zR(?ZemlxNDV~iQQ@f^NT$EAOVyO)ADcAz!rYktRWTA?eDxqc&)t8j6HUf0x17M2!| zrU4$Xu%NIJL3m+AaTT&5e3RB|=SZ^A>F9YC?5Eau*0|?7P&Zd^NijksLo1I9dD3nn z8nBoechH6Dv=&L`-b>a0e9@zhrMpft$#t!a13M(W6oeTLdC;d9R&-e{=TtPu-$>mM zTH9g$maLdA;9RFU7`Q}_2|P{-OQ+gCMBlP1`{)rR$6$G* zpDmvgLqLCU1$qGLa{LT%OsK>kdS;UR*O1uJzkGNC*y^RojmY^_9Y?}M`3nRR*4j7_H*oQ|3=a?#T2L_fb#Pag9$QKKnQ_8M1Dmy*jWMx} zLf?%T*advD7MYO@D3f-L`t1f2BIWf~L0mf%hGhZLzghfjz*02k=k?43AYy-v8?Q2m zG$_ToP~>G%vn2#)Wn%4IOy;^J_RyuUzLhZ<8{PAruA$MW`$FU^6_*>aU?UjPwuWS7 zW}CFt#|~hlf37L3Zd02w=wf^n02||xfN2|o;RWuaomIZX7MTOqFi<`0i9-PXSj>L| zrm_yEd-mM3AUHt(xYW@}>JuR{;fICh>&_W>pvdT6nSsP~nWkq`lW3`&@0yAI!cqi%!)UH#0=VK@x^d|K`R5OyJDL((jy3UAW!;}+aB zl$6CSX8Bei0V}ThL;NF5U@+weIIu>Rd<<63Ak2j-L>A=-3*6J+owm zzYk#GsdkGsGgV@j%i^;$W}P<;uMQ(WmaSZOHt7oW zezVhELf}bzqdE@r2){f~7W-?_JsH*03nv(paO&D*vOINU`POpfeP8(2>-S6~C-@5a z4VtY}Q}>*nI5j@KLxo;};SY(N6A2>T8&`qc?aW*y@Qkb=y&|rwLGUeR7RMh>CK%D( zMs?w#g%yihfr9e9spn2Tmzv56{aV3as|W&!Bj!o66r$zM53i3T=jSe5o?TqIdgj91 zxign%S7tAsnY$p^nS?5Y5^7YA^unW+u^nj*>X~~6VT3lAPFtkNC>dJEDt3aAwke3u zkW`GtRFYg|o_?6KB zSHm{o)zL>mVOnf3*>vl7n2Z=RNmi5-gYP(xT6*$qy77H)79&T6URn(u*rIApfMo*C zF_uA2$lZiCufxtPKFG3k#Rmho;bP4mrw@!lu)bZZD_d3fZe>TI2b+8429r=Xb7#E$wJ*y$oF;BnDyZk6F4(*P? zOGfB}fBIP9T;6&Xl9&kp6#9bRNT7xFlQzqFc$TWcJ#3eFaz1chj@V-!@IFkt9?H1D znc8PUvnRW3*mG;%L_|!fjqciB1y?l?I21c5P=F-5s!0ZHGWKqWVojbG$E_NCblZwY zSiB0sty@7@mBa>7-|B85%3b!SP?6$&7un`foQ9m0^DgCxd87?YDcQQJt>Vys1N;B# z7ZCAa8mnDTuT-=nc7ZGRAPR$lkfaX2J4E8be9)MI@#Y5Znt&l9K}OL2TC|S!3wvL^wLFB&1G%V#FdC(A#lrXqv4VEHQzV;5y3?^>7tQzu`R+JRsWwK1wZjUITLF zVTnK@y<1IWC8Kvlmnps7#}}&9J&7G-_(@@oqfXbpO|kLsQ|~zjEXvq^8u2vdxU!)i z%p8tpJkZ)h;4rXww{Z6whg#}|`Ajx*57%pI+;AH|8a zx^Z$9VeH4(H%`imhiBZEr_H8QoA>tR9X1gXmCMn!7tsF@+qbdPtT$vz)27?m!0CCK zMBGvo9s+K9hDGF)B83z)XpqP7DY({d)#F)kc#4tYCS=4ez_8>O@TwkXQ;xTfV1U`t z06-))?YS81F!fuw699Ke;10{`wc6yP*C(pi&P;yZ^@-P5sfE6pkP9Z3myazkKX-C@ z`QGK_`#>v!7(tdZk~o3az^Mf`hF6@bwpfCruGtM|Q}E_=U3Kt272nI`7Y`u60k5Y? z9dRb24@D*%`M83!AD7NtoXrC*`Cu?qu=$FqoGZl876L=C__iDpe5BwS@0FAkVsSrG z=c0(R*nRTYD@gI9lP_ACn!J9`bo^tSgP??^)Ev!cz%3KAP9;SH(;5*c1dJg@G9)p? zZ0~J~bd;@r4B;BsM@~_RPnp5rl(GT?x*~T0kbAJ6^q{kB;(^=?7&mkT7tX=g0mh$6 z;$CFBWF82v>rOCVB_S%$wCgx#A~|BKE)1~v`WG0GbRBg=Q@~VDD7L4j)e*&%XiB*J zEnU2)g|0`g!VOZ3hVZUPGJ0t8aoe@(mP2=8cGBcP&lYG zFP4D`C?5vOxyb3lMWHCtq7R@5oSxBxY5*R+U`^)-o}$`|O(MSyc_M;@JD5uGjy}oP zFtMC+i`qoxy)k=9M08#%QrJw9vmm}y&5{#dA`#`Cxhz$_hgQONJ~rD=+}S%t{L(rK zFZ0O3sM>(D!Z-v)Y`My#^8s*7()~=rGino}O$CL8G$xl z<)r4R692T$H4Km`zGb9RknX4r$OP{!_+bz;YgN+QI!cB%!*eNkT1vS(wJ?RJD~!1= zs~K_ieulvPJpl_MnTNmkxD;yaJwo{^GFGy_sRA(2m2^!-huWc|t!nNGw>Bg17SoQI zkUTsUJmaD+WL;ikz9R_&;r~Ueqj#iN9f&B>BneMAQ|%@4(xEOJ1G3C8-SS)mv3;US z(#@D&GNjs~DT`(@doaD~ra+Q(k7x0IOwzoS)Isy*sDuQh6T#Mi{oE4QWV!8{!`JMK z{U3qwRmI>*=ETR#mL2~9BM`SuSr6asV&u+%ApB<`-_6Wu1`^UCGBO#9s6SDZ6u%4& zkzv%7k3i8J8ag>j3{h~82X(*!$c2eJhaQCye3lkqA~M7JBU_TGhr+{PvPE(@8jjx$Z9k3m8y(eS;k(MH&;HyDOU6b=ttrT0D>_2_`NTT8TH>1=)o#U(-T9g3+*7^ zCF(PK`xFIEvJx+`uJn8hshWMRYL1!AtxMmJ`|awTG0_k2s5hr1lJ7D`-cN}w0onUQ77e=dtky4 z#8Nu3>zwpD?mDSTK+zkZ@k*JOVN3;xsiOu<9>Mbj-)U8>2*&FAgpG$bOJ1D|N?b9r z5nCA;I4f&yl5^g3^z2?wtE3Qy=paFBKF7r=TXdAxo?!w4QX&aP8&WHS&uj5B-I!gVGku@*!U zEnt}Xl7CM`Yw?u@9Kk_kypbnjg-nyZY$dXUV79V9@X6 zzDZ_C!#X)^FwrHBTGXyDHaTxpD(d`$%xW}C$GcCD7H1#CmcZqe`Neayi<#lKZ$xjC zzh%sG$Xv4+!eiHEyyd%VCJKFob5$~!N)T9RA@FO*E+jl z>$`RVACi5fGp>OI$e|Hz9*G+$i$nmGc9Y=Hx_zJGB$E~6k*5KsjU6nKXrCp7iC|Zq zXq!l=^yCq|{3_t{0Q|U7HP4AbeZ~Bc11F{n;Lb$lU9nWO!A0Kx&P$kd2_40UUa|J5 z)5ic8fVZ1Iy3e=;_dUZG!c9-`^{Ph5C?yqRM)@YLq0#Imhm&LyQN{3pw;-oC%rZL+ z8#&XOYUOx(*kfs`!CvFXL45LH$IOg2YY&4Tj6p)Ezh-vc1mh(Pp>$xGhi9vEvw=fl zdh}41lFjtfddKHb0PK_x;L`qyeOErBnn$B#JQ<;iI3Uag9l$VC;d_z;WZqsFX?^^} z4Ri$WfaBc8n#MU2LDDfa?~wH|vZ8&o7|WQcrx=9ST*-wT9+*oM~`)>Nnh=ekf5%cNM< zaYH$}KyZYUXolYKSz)jOV#B{ybn!8mxqKE&0-D?$R^$iGS{~Qv6wD)^SUyYN*ybmLA9X08>bNrP#R_ zD?tG-Sz4JZ`etrY#>==0<-rSUW);p!oRWEINR&-WF|t4dF*EJEy_a3e@JLNr+`)|% zTWLPwYUON57Kn9SGUy|7K^-1KWZD;%g=c!PZO>R|&;f^0D8h6_E@){CLlSQQw9pN> z@iCv&Ve7vmg-yC3;?T`;XsY?h+A>0n>%vb^Dhvu<8@p68D`xhjgf3W7&IE}hVooM=IS#5fWpF3{6{OlI$!J{n9MOC|i!2%lL(JH@ zLyb$L=@V}6#uicr8?k2L$$V&MHHYVbdlj7iYUmPfN|RA6T|L7LgF|ktTqA+bVlmT@ zczB~~tsW?r^4$m)F;c&0YNTQ<^H|!-Jjn_dtgGsq@-7z5$bFg8GWf81*+sxs^O*}m zd{G5_Siy?U8P}FI7CE(PWJmP9=9>4 zLgtn9VREbXNU!;*Ue@~fSb5_<;wnn>HvisE17x-7KQS(Y@ttmq6gQD9{lQhw{VQGg z+kMr>H@RhK@MXE&SI^w7&D|>Z5wNj=-hq?_cpj^V!{xykpM3-=PJIEE3y2m z3HWG^ndjD@0Z~7s!J487=^YmO-&oDlDTq7_e!-B=Ah|8$awiI&I?I`bY@4CSoM2=J zj%GYH)5|sjDLBTN{)dWqqpXn?w}GomU($D=FH%+!P2X1=D3hv7=4Fkz9kgoH&-2g$ zM((MosKJ^qMdJd}EK;u*Q(xLK&DO2h%Mfb4ZD_q=7v1jhAimA^ifzW~O&MpIEyvIH zR_{A{-;w0Lqt6A!Asypk)^LNs1C*T+m9Xi=&0hpjG+|1^>sJk0?$b?2M;`C$T~MwwYxsF2ZA5aXYOilrpuNgA{=zlmr%P3 zl1%g6Mkj%rPXn9IEX*ay_KD<`O=!m?B=;ntl28W$r_(2_t@{wtb!|OYud?S3u!msK zz+WjSocff_aV!h-q*O$oG0-wxz(tnJMRPHY$)O0VJScGz>bX?rxb`6emTs3KC^;BY zcDagj))O?Dg8NDBHEjY5n@9pq;;^rnnbDDTH|W-wDinGyStCWh}Jg_Rt+gq7z2 z$uL_roY>@|!H~7e7^M~dH5~Y)FBIewY9c9vikUsVUIeE~*q+XD+Phtm;z5L? z!?s%I_ySsPUf~(Z{=UebzS~MRm!ZT0QD&hV#zlM5lp*he`P+TobFmU( z@9ak;bV?0ZLSg_;QWq>V9Sf_Vj+B@Md6KC!p~OQpvMrqYj?CNSemL(oq@ni8SqZpi z_929}CBtd(Mxm>6=7dRKE zaLROwe^3Rl#tLM#V;GeGRFhmdbNL}C2`uI=ou6NM;LOr257Vlg6!zqQO0YG}>CZc)T2PU&48D z@R@4?R*J-+Yc4dl%V<33c+Kvd+-%koX~dQWCn}mZ`J#$O2;q!p^T3XKm$#$rXmY%F z9mjjGKX%{I$Mr<7@oO;6f)k11rC8b>oA^61gOGv;1MXHtS^cRB{;%+XZm3Dm_5<5y z9j{%REP)!Ky;rlh8`5DU-^-gNo~EUni0bYoDEJer2me_z-kWXL`k^p-j^<2^1-KUf z^1^vP&QFSA@KIjLP_OP4wZah?BW5iZ%Mba|bTrb!}t3T*}~l*!@ja2KFpPMb__ zNEShaXvriyjGa{|Vu*zHkgvmc_NMRbF*I5}AdV0c>TU9+zudnOcI9lGqK)o-tzJg9 z!tPRzYK8X^Ct|$ImY_V*;7OPM4I@qusqfb|_axA8(m-&OxSw4%T6z@$fw?^;yWOb> zkQr;;G06*0iVY*T#(55s$&n`~HN0w)Jl$=>qN0APdQ`glyI{%6r{wNyuXtHv3`;ecxyDar+sZFS&Ua-fGQ>@sOSeYpg(XRw6duC=BKp9*GY1>H|6}W_h(*~4D z(`w?6SjnaO#XjCvS_QjUO!W(r;4$hS(X$Ik?A`1m$QMTymb<5y+kRxhU~`zF@ryQX z?dCChDovuwd-l2ZxZ4RkMQ zo}midaoJk%z#Wx+ye%ApF?GZd#p#*N4mdJ7eDg51mYYo&kkn}qEZvf{?iMv=YKxsD zk`y*TLh;YfR$dqjhcfNc+gmBd4#Q_HZo74W^%Bza$|@Ww$aT zdfZVpausyBFzO;VVz-}6E+(|!bK-{(hs~kGXM(4N5}munbub(RT{ z=Pl@)Q+R9@JDfL?d$9&6?rq>ctWCSmS~rz(@B%=r=1tU$$f&#)hW@y=6uv%&giJSGy3VrTQPK+6?S^nK}H;_BLIH#NMDNf-rK8^B~geB_F7 zqOdh=s!PTqNah{H>+K@g6yZ030;Qsd>%^Fzh%@vi8W0|I233Cb*QfCsE;$?w#0daa z;!Ke-;9&UXe91aNZ8{2d8DU$MY8f0Zvj2LsDOx=|=G3#{3$(;WQ+MBAU9R7QV*zC1 zadpweJGQ`1wruwUAll+3clhE0@#tz(Wv$L*XE0^BAS1pcne5)#o53Tj%569~8L(R` zJ1!ya6ik%3TU?^BtE?!pMM~x2bI)-KQ*Lj&>Ad=sAYyFE)^m)uSTi1k(@vr{N11o7 zSlE=;(Z9W2a8c#Y2Zt315&o$zzW@i7shhKpNXT@j3odaNvF{6aJMeiujGYlEGxj-6 zDMY}mHy^<$>g258ej+lB-lf66Zr!>i;=tjFv(F==GY^4U-du=xccr3RhKHc=aWSgu z)0|8k=r*k^%$`{cOB(_BiLrG#r->^yu$b~WIEJ219W*|$O;>e&pAQeBJy!t-hZjq! z9SB|M7qm;prps23IgW&W(4|S{EIAng2y->1sy|R#BU9 z={PARF)@HQhAm&UGHnXS)FWGPXzV@nJdsH}qEpFSU)>nkf{;_m5l|7i7Tlw09M9rz z`ZIExG>RTv-D$S!(VHTwIz~o$lE%BXy(@g7JQ;z&r}^%t{z4h>0x(j2pocjNjGP*D z?s#kKk+p_okQ#Mz1UVcFH5uz9CNgC-8ciQ3cP@8b&LkZx670azY?(EkjLAh4hf3P*l#S@h{ROL_+)c z8EXcp{P0?Ziwg1zaGW(FT7xK@kwZtN>1#6kk&qSBxN?0+E$)^n{C>xvL&X$6w8#8e z!CB!Pdx5)v5c@0}Zu|;AC~f8Dr5vxmGl6SFMW8Ca+@?O+{V9z+n}V_ z(LhX&-VJ9gfWzQ%DOEYmH)IJll`I1!hnLBo7P91--+I3?b+mEln(4JBv7#Dlcy*eFv0A1ifPHGp)Hu+G>cG8M+>1=5mM3? zqK79fmiYf;@7?>OI<9o#|M?URkx#d5T7cupIis<`7)ZExJ{yxvT)NR+^-4eo1X2N8w-h0<|)v9%`ReNu6FN8HLl)*CG7}XXO*b+AN1mnMR zn`RH4DY2Zw9{z@outkAo!Kf#VvqvIZNa5Q5ybGEPi$IAxmw~tKAw=r?Ag>(E2Hmi> z9|g>xUmnbQyl4)L5i#6RM}!xygnmiE!l~?IZ6~w8P}&ZKkhoKf$H$@8c3(NYq1sb~ z7zx7<==vkbFfxE2&(}fAtrV3fLng6pZ0erik!YeCu}I%Yb52YgUl!Pk`Y#Yoc*>Y4 zFvhuTXTKUKx$C4P3C^V5Y^);@!rP#M*_i*=3gdBb^tENwnZTA#OE_urLP&<42_HWb zASPd7l2!u`@OdnhOsonipejX%cTbirri2A%rASA*yJy>039(jCzJz-x69CIrF4;ff z4G`BsD&tqqmRoarjEvjP-tyTvfn?Drg=B;BBI)K9xayj)rqF6PjtRZiZvp%5L>5sR zG%?Ozi+M&5AT0cbweKU5XxWVkHii$|a;VSlQ^?<`G(-wKl8}{z4yvn03Ed?JYS(Av zT?T3kkX-Aj0WPt|H3jT$Dz6&CW=88VA5wZ#ll+PN0KY5rA3FmJ-pHPo-YMxMTh>$_ zxTK0{@|dh5*R~JQSx-H&47yo(>A0loet4~u=!*_0g|$WM zr)JGxhI1CkUdS{FUI3^nKw|8_AW;$eAxxsgebSw{?J%k~qTIVty`yXa;!n9UPDOpt?}6yc5t*Yq+Y5QF7ufZbP$8g0-@{R zUlSH8NFGKbY2r%45vLLB!_G3BzB)fciriJ`wPMP1nzFb&j9f}X{>P9`Lo6EcE<`

8H5~7>t8DdSAO+=TV!U@3 z8VvC{sV53nJN?(`)T?#3{He>Lw#`fj52B=!N*pptQ}WcvRYbMkO?u_15lOKjaMRX# zwv15JC-J&zZ|Q6Hmp%g|&iPq*iO#r4-W7j0Imt;et;>40z3L2SD?2;wE!eSeS z#3{ha&QFq~Fuv^oX%nb$8N@;9Fm=^hN1?=gj`_LMHFoU-Dt$5S$4p-wzd6Plgh*|k z=t9@LtW@TvIG)0Mc(>VK#LfC9&Sv?0Tq36ymx*VqZ;xshV3=;0r2dk+m6htoL1(z%PC14tIZNRI{=#Z!=7Wmiftiq0S~C*f7C zYa7EOv6iOnAQA~T-XG0-$4ChLY61(ofdm#kL=2ryX1GABQdC1h?wVS$i2Sy{aO%>Ki^07QCvTBM z3|@t-dsfmDn6ZC3hH~B6mG<7FWwR&}Mj~4^bOriXK{}|J*z6=)!2y!B2l~FK1?@hz zkz;Y|ZVR!<*=CbdGIq0@Hq6^`I7kGtP4;l~P~#sVM;O4O#L;?sKVS_>T!Ppw#H&Vc z9SUe6X&{pJK=4u|C0Jcd!`~9)Kj=TF zSMi{w4YI%tLtkGx@31Ik60IPT@fXRrrV}BHjuVsp?!&Hc_sTk#7;RYTpbg^*KR|tn z*dZ{%XE1?C+C0&z6}ClT*{z8k@LU+LyOo4sQt*=xzX8 zN5l0u+Oo0{1uwjkfTr3Go;NdPAm$M1rRTk!An#$ZVnq7N!DNJV`A}&afW%(5L$uS%Hid3CZPnw686L53Y$stYE&b_RO<Xo*TMkHT9jbNVa!P@>|$1;?emo({W59^l8?PAih=+P*D1=#U| z$ES!Mhdo@XN9 z=cGa=nJ3l`kT;8>v2rz$q;0o?#qefQ+L%to_! zIx#daXEgX)h^l6GIU4`lfN8OjaYZtjvK<5T!a=1yuv#3kU}w@lpb(s6;`_H)F@<(g zkFiK=0QyZ^h~g8zXb=L&M$mI`?#Zja zBG26})6)ftvkG*=|FMQ-hH}~!41OZBCkdb){pIc*P&f4ldt+d13r?+rJ+%4P?7ytf!dipbDz*Qa~y){_l0r4q=SXFe>tKZlYwkcj= zw^;tETen81(?g^`M4rrXo`5SeNsAN=>$V{d-@kF@eg-w~6o$o>Iib6>j zT8ow2P<8r|B>r0Jj=}$$o|>*isoE`vcpG)3pgLK>_=Va}5MB@sV7CM|jN56Ji-p$# z%H$y4GbJimtezDqCdSAC>C6fw_V)N8lxe%-&sRSkp4?DKkyx%K)KRFAgNDmYveh*= zS?0PZqMiV`AK|R>Ekp=7B{v{2(1!=^x;CKq7S0+ruzt-ap z{9y_+mjnpFYKE*e%%Tkw>6&JQUzsr35{7q!WsfI}c)>z~h$|jRMjlPVDX}obb{5D` zZjHIZ#kXujBXWvU_`06A$1TK6cK_$cPai(}^VaTnk2Wcj?tb&)@so$9Kt%gY|NNNU z_t#1jr05EWFB*<3A5DxW_fmdmvcDl%G+6;#Jab*sikq6e0%T$WtMLSt!C&S`IRPLT z9cB*nkfoICmqQQWwP1ms%&Y+|tTeN&_tvgmp`lnPVHbJUmDkO(f)N|J;1)%4GOVw? z2w`q?`bb<5&w8HW{56Uu6>+8z>0ix6tv;CWO>tgk=X+!wyZcX? z!}LTsFy5atV?}T9$$WLe5dZY(PtP|0v)R_4p!uEPPAFu(Vm|pLT6+5I(bMP6hMt}v zBR%dnz&?<52K@yf7mVuZGayN`Em;D~QZSMhjZQ2A*t3mCPn*p>7#;V|B>8bNI!))= zv8bDm?muj{#U&+b7|JgJudPRq{#b7ZQJ!z=?SzbZCbk6(%^@Qh98&ne97Wb83~3L# z02BH65&(s5S8$tsOowbH&q=njKiK70QQ*RxC{_i5tMt`|^9ZovY~jew!DReEb030o z+Cc`k*>vQ2!Y{bW)KNroF8Cg3rhu{a3`K#RYKDfB`3t1p)nKM@y8fC>1Ba&{;BDXVq49(0@CJd z4xQv3!?$V9&W|6c7{M$t0#_fhj7zL#Vhc)q={90cQ_ZCaMcFu?AH0yOfr5q2BpENH z;bqTH*dMZ}3WGyS4uROy1}1;@&T@b7u9yrXHZ>~);vX!koS0WZJ@K(lDu@TcbLmTu z9VNRwv;hhw%J8xda$)$x=;c+saVP`PV_wDtQ31*!jeJ-P!ANuLHc^#^$@q3SJR2c1 zrq^N`I!ch;cjlz`?TDa5KY)coFZ7QlV;;U;%1_!f$oOz~=AWsZ2PRY4vex;ZuhD_o zS#Q0c9?>4Ejz#Xr8l0qIdwQ!Fd=qeiyh<%bNG!;2fhQrCPDN6m7RNkGMD@D{^7E7^KLYyEzAf# zzbV7(A{fZ};4Cj$ijHmN5p%{3nuU zI_!pS+9JX$h}it#gXSbHTqYN}%Vjm8o+c8@l>KXGD`)2J{~B;FOc3=#J zRd5%hSeN|d%iRhfB51|Z)-JEX=y$xwD27)E!`VYP`w9X|C+dzsAk86d<4`b2JmJpR zdqWjOBScGjD#|d`UTq1r#O-KVIuOkO|MK_a$=l)2;(z~f_1+LqE34vzm#jEi5!m8w zz;v(v6PK{$Kyiy=A!D?@hqL-R*x01qB)X6B5SY%US`8p9D^#@C>oN^d6d01RJsK8w zT!r$h*Puy-F8GjFg>vG03Y>@X?QiF>DTwl}U4qrEWS7&(1(-DnHd~S5H(ZpqbwRW@ zz)|`R4BCwk{bY<(HEm!X6s!cFJRG%sO zbgs^qiE!e{jUqJ{E;FWQZj-y zL?m>}G@8nm9E2oe`CY1Y+s41j<)L+7{#69zZtn$*s@NJ7WU#6sNeB#0<3q`oplRg< zzz8Z<#Qavj3O#L-f%*IZ;eFfRfBo{aueZNnfBD(^_V>Iw^)p`R#2`S~FbTxkb6~)6 z-_!H%y%(EL5C_8Qx|Gvn@hIZnODK5wIPcAIIS}q6x9k{9D~Vy)E zo)W0Qaj^_B*#n{7g>P}TOY>>XsYf&gsBIqzXyS$sz)i)yXo-1%k1}hmC^q|d2+BC}gc3N7g2MwKX=Yd;Y()>Tu4J!; zqw|A=AnEy85$`MHh(N$4fO5|=R{N%2wk&28;wlkdm-D7xUxjNlG!+?x} zIFy>dJ%7dM0@7H-Tm3;^c)qo%+@&MpW{7Rfa0GyI*r9>GyV)5C173ALCzryWBPDz@ zdDp`Z>W$7}*@6&=Yi^M89- ztH9@QNh=hh^nqf-wN?S7xsL&5_qBrQu!-$H=>?C7A;T?>RISUV(JxN!^~=|&;ls~E zslk2#NJ7=|6CGU)>6a9dQlyV`OQ^yX(3XW{CA*HWLRc;%h9)!vSWrgMWP%Cs1MCJB zUY);&jVGL7;#(o9lw%|{dW21ZR*OzUTGKaGU3J_fK?EhnrVMVmg7*48w%M2ocnK@) zaA=p8kb^WsSrc)p$+x2w*66)kTr=H(6qz@^0!2kMW@NEkOvw9eVIGBnoEK=x9a0zL+|#>>cr1b}7v^bMvZ@p;P!Y#!t(r(B8Zs1P5$t<7b z{n>2vE{>@ucc?Mwd5*g(9WJ3CP=-ibd{fs_SfwXmL`w=JAkmvh?GnvN$r9q-ks{gX zMWR6<&eLX3g+XEpor7V;O4j;%dK`;W00NF1FJiB-``DAh1eAj!ZU!ewF=(amJ+FYP ziD;ocghYuu1@->X=by|j!?1R-KDdKyFVMpV(Fs?`ZLSWwx2gd1T5_0GVjW2;V7c}G+Q9Xzyh+wZG| zI_Bb&OR(!wAWwm4fEMXhrVCX;HC%aRQ;|Hc{#!nzZG5MwfrSuMwoh}>!F|l8`4-C* za-El`4dr1&@xKxRYaMWry!prEi~b-O^x}B>w{yvSAtz_z7HmVz87P7E$!Q33T2E}9 z7@8Dm^pPN;hMx$RwE0?;x#D9JBa>}!BM8JOOp3D&8iI_*c(+j$0lmJm5Mgdxp&k(~ zkP;%nQ)P`CI&Z01H+KUWEL2M)_12BI0{Q@A2Iy!jdbBMi?s=+^wDM# zkl4rWEqw?r+F*xGP1TKFOl)jrb5zbt7G%ZygzRGUu<1K$s56Y~&`=X&-(DxdjerTj z1w#%;mxSvCC8YvQ8+H>zFj-#<(j`2qs3dEV5H{uaDXez*Y)DJ#IIpD1BPztlf#1EW62;>+0#LySgle%hb9bCmy)Jw z(&$+bcNK<;7>n1{!>H5c#?Vr7&Mv4y!iQ(9A>#ctO{e8MEMXT~i-+NaO-qA`3&fcl z_h@2>Xg$XYDofh1RcXTRNM}Y=ICX}S)`fMKvHQeX(o`Fm9gyF6cW03b4ci# zWOTQto`fq8^z1mI7EfMhTvH$?YYnhz*GR69Aq21iDh^r`iCJ8^fzwOf1v;}(*ueQ= zG;>3DP(_(mLN*PFw>yYZplNl2g_cV4WQ){By%OZH0R{pXL=H?}|CMboD>!KVW^F}A zF2IL@_VKfwBrQh^4>8@Sv`%DvEkbo?0~gZmFc|rGa=0m3v0K`uGImPq>`<%UodKsQfdsU^}P{UkmP}q;hCo>Tp@TJ6zW)YGP%)uBd>wnd=EPjomaPG@peO7pSg0fhq zz;bjgWJxUJO}Sn!+fRARqBWCX4BE3>tJA)<0OH1=91~u+wx|6wE-Z4MM)F;c(efo zjpT>W*V>Gcpmd>}SX*wF!!pZU`OVk49F@{ui=5Dj8bWD`;KfdB za+Y*=uPFP#=mthHtxC#RV1L#K54*{)QG~%PND?$b{+p9yOa3+JGwIQMjv!r;ZMGR@7K=u|9(TM<=@T` z2u7Y8)s*@9KH}?y5yzJl8DLH--hy!);%c0%zJOe?vfP2Xb}(4CtzI@aAvFaGi6FEL z4wYR?#Wdwb7q$a$)LseE#$-$MnXtO}e$fBwD>~R4`J()qa!@i*B+G*q zYBR-aVbifc$PC^px{dcQ7sZ4Sn|b`YNkdNVgFq?gvz&IMV#9SvYrKDYh&|r|NO8NA z-)wzReHq+c4F_COXvjhV+Sm(?0l!}o(}CJ0-2N%oFLEQ;DV(_oA9^?eQ9&r)-aGOe zUd#m6F zHC)`Eo#192dN*n_#}UL-^r{vS6IkubKO9Os``liUS+Dg;M&rm6MAJ)~E8DbzJ0@2< zSvAb2fS*`e+Bi#tHvy+jlp~iqRY6Mf{0RCf?LVC25Z3G+b&agZL&Px6rn0`lkXSI& zw1~uVrCo%(Df=`nHk(&z2wPo$l-(l=7gu6KJ5$l5FV>l((J5rIyC%aR6@R1G$H#kz z=i>kkGK%$`aV%y2l7@`&pLRG68C^;>DCMv4m?4Bi4l4q%3!qb$kN_z*XOCX~?+UrBMLV zX427nsT^CQaPgDN792^!+T~Hoe^fIl7jn8!4MG2@nGXwN!DxRP9pjkEEVbw%ljI1t zPcM;}@Z<~b9j<hcg+m3=OV_KLhiCAu(O!h`Of@Zud&PE-fO9S$IV#kt&$xUvLY9rbkgEI0B-z*Vas zC$SPNH6b5O(30%dmGB;$vSpIZL4i(SK28q|Od(jQX(EP|W;3AIApT6e8nN#LBf?88 zl2WAb2637lAl0~#*bx*A4Xjs0a*~77WC;0j?IX5rJ78OCHB7c9PL91OSxOJ zNGF+Y1QEA?Z4ut(V3B>vvT-HKUPK=-vVtWP7pG%Q6KmVs!DuDRScLBdkf%O|iHt78za^FbeqE>`)N) zqWpFT`E0R2GXI$;rbTp0^HSczAJ-K%Q@awO)2+FTbFV#FFV5OMgwGA012N#?-eBhH zl{X>*_!%9&K!BV-yusaajOtF_L|T%)W+Kt-g=RUJaV`y(E^TFXE7~ z_k0|ZGU>;w1inu&SwOq+<-9WIkmx(qm3;OSEhq5}+(3?lR+$ zx|Pm>4IMzx1j>UIoTG>LfxE_q3|o*e*+p=uw8VZ_kZF2CPp%Nx-8r-u+((x57tj_< z@*KMy#-1L7PveNxX>H>PJCfsZ2L%z=)@A}!K?asBP5jLD_ZtQ6plK*b$AKY;Etjdq z6m&!-Cc0x=iFaNMFo2~Pku8yx6w(QHAsV&zbjFv3aHE|ogKv2Tw|;|L*rV;+5t|fA z{`KCrHH8c|Xd8G@bV=|6>T^)=^CMjs4ez6fSlQRK>@aOQt-H)b1Y_D&Kz~uQvdOq{ zvzU)G51HN&7x7w>he3OxZev)fq|sdHNl7BRZvbam+sPK8HO%8h*Vo1N%SW0>=&%RD z;mC5mA@IJeAqhe;2P`V%nCYHm?epW=$ZS;HAZCQS6Dj&@*g7z|GdV#rY?6NP5?QXb zzhc1TD*hru1(e}|URMg8&Icu1+u}Q$(o*8!{o?i)n%6;{t`9f{o#nyo1Zqj)a7DKv znqL`$4@eflAKC!!-$B-rk!IlcBHMl5S{mbin!x^$eTZI=(H<8swt!wCjUsI`ae`Wd zeLlOeWIiJ*UJkM_DV-4cwXgdVcEwfpoHJ+TE3|o6n%Wc1ed^ zJ$;nPa=lPze*Oxs=Yf6MhO6~;A6Ho1;lU`AQ@n?5Ig2YRtgp8Fdik=D_SQxA>cv7M zp)r!|#B?7<(B^}Ny`C~H=+$^Kf%DYTh8RL*X?UT8U!@9;UPZVtl+8tnuFGZ#UebC* zu@3K+k!bN2m;Tn9>FFuMIsYI@je27>yxo=_ z7y_(l;ynUUM?0P*tubJG5zaA+MK;<-LSW*4p>hGpR<)7}L#c+akGeu1a`u>#9Bf5N ze~Cn*i!7=?=CTQ!f^8E4JT)zqWo<-|jG^bQuV)e^mkem;hlN^&$t0T+Fp4H^U4mA7 zM_`wL8gsFPXP$L9o5VPJbRX}hE5%faPx#{YoplXsfH=GE(Mc=i#EC>2rEOxZU=%2d z<-;noK>5%$vB--Yce{X<9}-iP6}O0{lu`lPfNlg~gtU*y=E-bYli%BETGPK1$d)5a z8fBg;!ymU2K`+G3nls-EX^t(dG&~p@0m6h^dZ(8A4WqEtJDQx0AdQZC1I^+dY*Dh8 zzPv>N;qGb@H_QPwXU>bPeUdv(_j^|Q3>wXSS=A9Ysj?x64L7OWC~Fq zNs59B6~zI?>Te~&+#_4a;9zhv!k5X5-~VN}=?xAMx{os|*x&2DPj9E$Y^?zyxGVVp zoNT%3d$EP^G5MkP76eZkwSfCC<$}GtMw{<%JjM}4>R6K>4?vLto3&od4h1R{v5y3t z^g2hsw*votiU>xia^7_2!}^bhvO5(fY!AI9&&fHZH}r@#OXB z{P1i&dB;dIWtpjSA}~$a+oh=*+!00!WtpJ;*tQ?8Y|tL>ZKA>DwTv(f;`#Z6`L(&(bpae81Q#h zLUpr*lJeZrkNJ1hqsTEioKMK28=V}D=y|_zA4N6qBI0#pfPJk3Kym$*{*N8q0`i0!8aYEcz~8styvM>By$o+aAvR743X4~Dr@pG-*8z2!SVzY z2nG~RtaNIms3wC=UC3^3eC0scSv#5S-^%>NnqBvoaS?}ca2P^XYEuGZuW%!h!bswE zN9O<(7M)nfd^b(bwvaSavWqcUtXj*mq1?K&b{lpT+Zl-bNSw~?z6Q)qab3;fJ+GYI{imJvCyhN1#u z7)t0r?Eu!V>Rw&i*NFY48w4AnbvIuMT5wfzk4GoMhj|-GoUR8qP$^ovN1}~N_;(R& z14U?Yezo`oTfsxuKn|iy5j(;Mr<`$saUzbx1au%?;Zxfb!)B~FYx8LbC}s{r?^<8N z(tC8O9srwn;Fp=hnrP#|6>626Eb5gy$l7yw7eWwD$&ASJe`ywkRbjWkvin94Ab|nN z^mVB@Eb-&$VI1L3*mS$cFh*RF@1|S;m$80@Mewe<5c>Pq_Q=%M#7DVpV_be|yk@aC zK8DBTf-)=QkYC8B@{J48l9m0Ja;;oN>~NRzB%!+->VtUn<75xVzz}qsg&s?h=#VJB zc4}X$kT;dCT&*Il$h4n`Zp&#)B8E@%VBtXA!2~X%L6Ma!UazlD=TCSvi4w5!8AmsJ zw2sjQT%Nml;+eC7oWNsB9tfQ=qJveje?rKisW7y$O_^B5np6Ws77qJ%Z?4g>yzJr@ zFS`(XcI!=`inAtI_(VS%~1Bipxni3*IA--|_r>HVOCFJkqp& zh{B~`*XM5&W^9GaC%ZInGq>TOtt<($ngnR4X&65S{Rc2e^HfQ3B?2G@i$!uFik_&1 zaT{h)jow@9J%?TVMt`lP@3sDo9zN-nJbrk^2h6L*XfN42pV4W9+glh)%tg}4A-E#!3s^h@NSUAFsj!Q`F;qH?%{ zFWWOh_qkn{xUxg-c0IfFeTZAQ&T;*mx;7W|;1VX>bh#rZn%=D0ps5MshGdT^zIsAR ztS+~bTH<;~qj#|N_b^R38{s1xgqA@6>Nj-&Pg9E<>}*b0kOZFFnKOPHw{y7$*uU3$ zr{_qj@^;Yw!{g_>_qYDIaer&;tI6KM3HJa6=Of-Il7z28#kM^<93D+)Gq|Nb;eEh> zV0i;WKxNoD>faJlvUm}FGyxgZucwF+iDxUgj9e{&DIeu>7effbk2f9u<6P&`ynoCi zxoyZXjTUov#r&m`atjt0@D|)ChC9W2YkoB;>8CiHZ0Ep+^c);s7>%L@-h30{DGQl0 z_z6D93}}QRJH`PtiS{sHzYQY($0tKv>UWag^qte8AoQo<+M(IX z<*n*Hx(-{Nc`7~y37}Jlq{xLI!SLuL_t$c=S0Bh%xpk}x$!gBBc0TQWHrUzvOm8gI zO6Wn4|0K#`2)?gQBH^(nC-^Rz5U-8W9r@hsCy-$T};g6q@`V5z! zeyVq_X$x4DgpE18wgcl5+m>=M9$3vK5--lh_umyn&pt)x*p*nl(ux0O@I5Fd3Pl0|B?cNEeH&G6EJla0GCv4BeyMy6o|6;+GR4))^rly zV7#oZN)uQ`F73I^Eo7Us5$ea7@8o-o!M+BC?CgAol*CA{CFj1b!t^GGo(qfFM>WTZ zv2N=vZhfBj@Q05@_ocxuv-K@VGXT8u*a|ehoOMr*?d0Y?gcZKfEZ*U?zmiX&j$PWB z?91e3DQ+g80$0jtW>NsCaQudes!*UW53trJ-+i}x5r580ahzU)zwwp-Ja%~HTmUYw zidzhdNDbpbx)to!C>)ZEtd$>Hnu^^j)_FmpAY$PP(V@>~mBwJ5PDQ^C=OOh%A# zIPKvn=ADP5rQopcsgc*Jaa)sGwD#eks&wO@3}%&{yo6O=*cqW-=_py4N+(GrAyS-< zY;-yPiG*t>q*tIwGyF<`EHH+SYfT2A*~MoH?zn7z_C?=gk#6>GgGi%IqmBAOwgmbk zeH04eJzgj+g0K**ncAGYjKDTPvD(^93KcxQeGc$i=soeeK;pVmo=ZacCY1yGI_&^G zS%P}&#PMcQ8bGq6PCeSh2GvmS=5LR|IOD>*T_VYM454xU9#WnS;KNnC#Me&R zB_04(58~jI7z9f{@tX<|%G=Tk&Ups*SP!8hf92uoR0#^CG!yXAR8`|bFN)kIP>7DD zK&>|A#I{Lf zDKdm<>DUQ$2rb_SMTh*u0C)pBwXxho1!1&gCATcK69<}iG9%3X1FU@eqN&yK) zKdWl(0r#$0ThA47+*Zu2sv%9SLz^)%1&BzUUFrD zr|}0oqW`Gz%96!zB115iUpTi^?8K3Bfvy6mw|33mdj)cxujvU0J3t7QDcv8}Qeu?VtScGzxLqf(z>M51 zfgHYCie!Y^e;ZUt6AJ{rNTLf8%bI*B)$qm& zUh~1mI19$EsoGSsRWM7XRaomF+0k0ZzdN64T*%M-y|$E5?z$!(fVb{?mwnC{Z!kw8 zU3hLv1F>r{uw7e$U5?3n$|})>cI^hmQ?he06dw=M6Qe&7DDQ9J6x4Jd)`!XX+4&h_ z1$fv=q&#aLoLFpa!E^=!p&66NpQ3sVwR$RQbuj@ENgpwcK-*`||GuA(l;m9C^f(7O zURU*mhrbLep{E9vXnMiFXxJ$+BsZsUjUk=1MMjAP!f=Y|{Qj%?$sqzJh|Po2?ixbA zzy&c~=D$G8n9j})l})kJB#aQ>9FNQ42LHEnD~e+b#DD~9gXN$~Hwx`e3F+pe;m4L* z0o}x4GJH@<5|e{14^}xSK)LkCRwD`(14FGv=i4~&1tA#3%Bu;%@N1ZqcL8*~f0fPu zV?|(GOH(X|xE~pAeutT@&IE{nVue1(t*LDC!_V5424Z5UP>0tDZjAX`(=&41aD}BP zHFv6wv{GmZ7q{0f+pk!nI*NW1+;1I)-1Nl>H|B%>8b%&#t1`5TNvfmc7c_eL_h#2P zR{OxL4vgE(j#b54KH`$zT`Ua_(dffb=%czgYWS(=J-^*w&Lu%zuWI|xUgi>O{z;>D zvv)x?&XkY}CZ#*W6yus45a?EFnzNuebkXHKi&aUKd61kMpO_5b!=qfFCAwmbVcF6` zvCG)Pf;i->4?_-Jy9h$hrN{C9s)5I0WS0&#RxhV}Vl$wNQokTn_Fx|=w;&~(XcRGDiFF-&%rv5gTk(JqYNb~ZLZi$9Uyue7*QFfqY5H&-k7`;K5uqk* zMmT)9N0EeL`Jho{Nu9U`KUZ%krvcn~uuLc^5YdY_IF^yK>uNFcVnP#j2?Pg1eGtg1 zchW1O{pjpVgoqD_2nDT^fY5Y{(J}P%?DgLN`TTSKe1Ais0@NrEK_sK;-jG#aQ)WlJ zVt-Re_7f!9(hnSg41Ngv~d(r><)?yqT+A5p6Xr ziasu0ml#kgtE1KO1@iHBE|RZD75@T^ew``bTi=H3@a3nx5T=fl)oJ^}cF#MUn{hBX zcKGdUVe){gmr+KS6AJ%UpzDX3FAS{eoMOye(*IO~>TaJ|N__^iBYiS{xdS{TZp?YpnHM z{b+`)2XNC|8>Fp{nCdm5TZ2e>6^5_{@w8d&t%hD+BwF_c?D0DrxmT#DFGY|fHOncC zDa*eRGj$_NDD+sSOAG1GE|L=s;LOZmAzg6xmUalAA4sML`Rd)9;Zf~TSw*9-*}F0c zpHGK#!cCT=Ll`k$9j2+kcX6nLH&37MovE0w$rp7*H+zi);rfBnDT8QlmO<>nuSSf8 zz2s)EgfQFBVQxjm*`xwBAcvPI=%Cpa^`U;fR!6beK;B%?&X%}^H0R%J#{kdp&B~?> zZC9q*oVvA90*U#|l`~-B$|dVEdV|Vmsk;Z@IiN%~2`Enw7Mz>I_I6)YGg!{z>7u(X zx?pA3V5|XygSQTydKNF^K_Q{~E6^~}fTTFS zeTHylk~H+{WXw&7IRc8sDAv(JVYh-HqLrH#S|vO=&a&jmX4ZS}-t|ae8Ue!ZNM18jnbY@&l1@uhL)@>CWz{N)KvrM2b%oYq zKrgN=?tTeVTO`!ap|94iRPt&3DrUATnE}Zu*DEZ!vj;{?f*59s$5Q!5foE7K7X4R} zM+0kjO3Z&Hc|_FJ6bZeS&1n0tB#+o&N|H>7Yqdu@sqWh1SC@K|W<5xRG$#I9cXZVf z3UWCAO7d8DJOsKQMQ+ELI;(ek7%(bWTVuSrekJ2i4&c|;#GCo2PV$(`qf`)R=r#Q& zs$=5DTA*i%$zJbh`A(05=mkV_mzE{;3ioCOHGq!n@h^0H)+kM@-vJZ=F0Rm7 zx{Bp&G^jrMac^?UtD?L6(Xq76v@||sI1Cm6GovnkFvUh#2J9?0GB7jRXsB;wR?<_& ziKocknZ36IaoM}Yc39Nn3i-+;{W5V@))X3~PTBcen}!JwcBIO-)kBSATR!AA90wLx$eUA2&U-hiPZ=2-DL9MS{4UTUO) zk`grDUJz5%`WOSv{3}w&&x*vE!Z@XkB*#S*NLXBJqUGKr-*GaxD9~nO5}8&956f3LL<#>Kd2W$s z8W{k_xOVTm2U}@&-tsBMGUz+)M>Du4)G5F(Pn1;h2oo<4{g-&09Hk0)dTY?*ICrDZ5ev>&G0?4QpX3dCmQQnX=8CbIL$tzi2Q^*f2<7Bgp**&r_0{beb0EE0@ zxIxlJOR?QCiwkJc6m_<{a?qZ=%U6H93Qo5WWE>swC>#liFh#EDnI)4zq0A4+h#yWi zKXL*+kJp1T)x0LTM}!klMUSj4b%tc)|;{KOEjAp#l^sX;6 zhr%5>!tK;IJw-Yr{(8()|K%+&uX!%PqagX)snoc6a0GYBJ&-K#l??d!B;Z7kZqd#G z`^gXqQq>(~G{JLuWiN5L3ew^qPX9g`_a*9@Z)3El!4XW*kdl3|hEU6xvE#aRS6&kXfmu_>1wYlm#g$np{ZVd*<}Z z@*wS}#N;kSW#0UZ{wSx>W*1Ruqj2AdQW2pF^zG5 z?MM7K?_BodMd)dBDVkKZ{}LqKNvTPS4^4DY?v7zDLo=~7Ry}S8)jymBC#{3F$V9XU zQ&SRxST(6K_P(I)P%Qqe;+q6{q>5RKiPbrQ4-GAdc@Xu32_pFxTbI=k{{X=w#L)(m zvL2gb%0DW^eI@3LtCM4M8#QFp=8EEIP9g9`Hqo)oJUFq++^)N#Trde6m12ucUPn|~ zST%c!6=~O~YSRI&D>z%OJ2e~Scv3O^vaC+Yx}qe|VnfZ_ZHToQ++abm9Q2=_^h6EG z1*Etll$}P$xQj;!6s`vnnFkU8#U8ODLMzo0;uUKaHpJvs{(X zH>44X+FMC{p&5bw4579*ms@}H_WWJZV&#;k&`&opu|av!fS>D;-^dyT4i2znA;Gp@ zCN^(~Ln+aHirVO^b<1H98;(T|FCtBFK98S1-`#rrUynrC`#-|J(8KS(=osMecVEDTws@Go`(mkq_6E|P z$W$!@{@oW`T|iC*jq#%&5oJ%`CtZeMJQmKs8{$sP(cgkiiA{`VhE>@r8j7w6Fj0B*xjQqMkP) zCjx9waI-ifu*fg55Bj(1p0l1V*eR9=3t&KKAT~Y?!LteSZv1A!Dt=pST*OFBM=`~L zHatfGMH=R)yS6MU%zv1Q4I|*j9xST8K0M)v{dFYl!CzY*Oq5T%1}s^v#M_zLW2(8sZnd+X0|VrFaW2~M-F-WzPo4X2;sR4WgW+yk|wDi+wr zzz$hs!+<35CXbMWl-Bk#YdGA-eHv=|$e27hnZX9->1+=h-|Rixc=Qw_ z@OkY%fAp8<_>u$XbPs!V!PI0%yo*hTZ8ZMD<|A*q(dzm6bX;xq;MCEeFJ&4vf5ai9 z%5b<+TTh<1F4cJQdUSqxwhjT)Nlx!`A%(_8t=kK3)gW0d;FI2`5#Q;nw%rX}TyA%> z;1atV#b!*O1US}M0zTslrTI$;S`yBsa#4u2qnmF24)^`lM5OA+NXDah~kra8fm;o?LfN{|k^3lz= z#ZlKX?Zz+!Cw!?uti2t4AfV{vV5GlLO!RDesJTr9WELL<#Z%=(k< zZ{xn2MdPXKw-Z~1tEC;;7R>wc`6iUHq-ZdV9iIs`Tsb>IR64+mTnZQ79a{IZf*@Gq zDLAzkr>_kHo0IiTC&}*uTS_Soky*^%5hqhGATxxMzQoDIa-vCv=7k{yy39Iejmn3U zaG4??BQ%VbO59RZ6a_`Q~wIiojw%%b&z(CV7yFDhHz|Fc0Adi zF+0Sckee}`mN$hCw5Ij;4)MAu$(v~F|BfAYXd?$ACA)f zI%yHx@9y5IegX`_&Y=ZWBDiV<#q52KbCxFjh`pK+DRIIuMKq*Tbdyw4*o$~UAp(4eZTvR1oAo z5kL=3? zB#%%acL_?h#%|%e8>qZo8{ToW84!{ocb#b_$C4dlQ!UpuYtv6cV{vtcPRTG!dn0Wr zxlEfC*Fo3x(!w&wl0Bw6iy1{Eiii^@S61#si$6J30GlF7TDjUf(n*9csK;ZqL6RSngki1C$|HXQcJrP+g44;n@Dj<;xi~tRebC=82ObfC93&?89$NP}-C#2SJ8HOn>CKc(hHi1;nAKq>{L3-sDj(f>B$RVIxpM_U`;(^!dO2 zPSXwwH0^V>S*SQt9Q%uEnIQ?arN83d>D}>U51Ow}(K-NihCddA^2GxrgNZC@!TNnY zffrqN5$ho2Qc`j*IVf4AqAZfy5Q;R-2S~)7t7bV>v3N8pbW($2um-GV8$qga#AumUPFBEQmg``x zKQzi68Nh;`QaoC&ljI!L;LVgPLp6>^@?c1^O%&%+ z2xr#uhsq>NB9*MS4XKHVEt@4OFCe%nQ;nEyNrwbs%c-X9Hi(#@>?*pniXltRDBmVB z0F2TVo&`wFp;{F%#cMsgn}9egqkag*gQ81=U4|0yU??%&_LUZ(e5cCsVAXOJgz{`L z0w}y**o<#&j%1)?j`|iGv_;LA1ZNg9E??_U<{#U-_<+j>gykd<3!Mryg-uG82!;-Y z0a8;|zot^Qlu1P1D?l=iLW9v|@Jnu}ZBb z!xSuE%#ep`R$Ar5U<;a+)}UB-c0h3zGgk8gp$T4yfS$ohtthR;wy(K5oggwM%4P8xaOkjKS!#TOY?_a#Qcx?kk?2HL-)F2qtM2`o+T(_2D z9jst&_KW7-fB7+HOuNeE=1ND0n1E!KW?9vR3Ph0z3k5l!ZumN!9`7BVk0&r#str@2 zo7xz#n!&V`>$OfNh{xphmiYsd<+y^w@^1c?xuIxAeM^`+l)a z*{AzBvm;LdBncAmS{7CGY<;|tE9eRK7oRY%pdyBgvs%jd4LJ(?q7ZPSxuOE;L{cb9 zl&cz*&h~<$h8i|V0+XZ3*+`%T{=lvb1ck0|HT1fd6|Yk$_f|-}F8~Y)QN$x1b9}H__WbLlC&3Nwlr5Y<%tS960x1$rbu8qpc`ClAQ#EPPYjG-PT%YYy^1~v3L_2y!Brobx(ECdA%Y3|R`WtFSQf{e zcV13Ohp43mquk<=SM#@0+#VVT7(iMZcy5-{VRQm-yB|MGxAxfRwu<+6;LBAKRKxl^ zTub1975N2mUNAKP%5l$$i!W;6H9794!wmvv6(K6yAeUu@?kUwmXA+1+ifd;Dba{jo zPm!tv^4bv6k17ZSYp+s&df_;9CFM)W%OoIeSwT8Q(g!E!hhv=7K2sUfKnT&&O~5u8 z*aW%iNI00jfLm559pc2ql*wJM8rZ`e9 zDG%_~_G@J&5=sc~9VBTv=p;1zkCS)63A2H!?bV*0A70lxt5HxeIH|SbjX`_!pQvne zupkQwUkIOtcp!WOH&P;D%~!qO{>QZ+4lpDz5_>ruvKvDSLW;w|am4?NpK7)BuJj_y z<~2bI!X)*e1&$DbgLJ``hj+zv%-6aajOA)Bj)vl_7;r$4{U-%|$V3M?il;_vAhtCm zVw2Dbn-(W^K0lmHP6u~x-$s}DIlW+ENzd1N0vMj4q#X=e>6}@?fC3H!;LZ3%a8VQg zFtTWlyUIp5(uBio1O-eMy(5GM^$w?RCWr5O<8x?HCpd-9yM9l2l%=;18yvFZygJ9- zUV_{R+2=N#pw!Xm7=ptPde`u1I%dMV(b*Zu_Zkx2yz9-*kC|@^)zksl8|{zeSUfVU zNyaY;BbuG{rug4${ukkPxH9~d2A%oYe637tV8~oJ-&f0NSal_FYW)Uw6FX z0?2{{NCr%`&eV_KeT5ZtJt2*~lbzsrHoRMAcxdj?GM|r4v$onf5wHazwRy$70EbvX z61vIwW=JEDYPq~mTfeKqOMUt91m14YIs_acVZj(nilhZlqp{q;CJ>-TUNIY9I%RAP z75IG%`Gz!GE+64WH#80W#9#|Dwh7C^V0A50lVl06SrWz-M8AS>XB#CGLP#x@&Rr}` z_0Gm*gkVhlr%|vmT$gDK!HPnZusd>(*lz4kK+%f-AZ)wA9&8oONq_=XC7 z5$+n61iCdC;k;GeXz6sS9kkgeMLLW^+-}Iww~PjZe5h6o8T{^>$vYgg zCs&cRV_hqSjo6&D5cqg@RY1&Q&ozZ~FpYSBz8y>}>%W6}1V%7vTKkG6t|2@)JY7Tl ztnK+RkFdu+3_58WGAmXGC4G%sDbMD8)b~0BAgY@?1VC(z(o`iAJQbnE*JgDR&?H4M z!cQ?YI(lOB59AU|uyd}~Ku{x}uZ&ePZRsjE<{|1UTJ~x-22Sr&pH_j2+Z;f$bT7pc z7^aLdhGX!dpr`gm?gKdy&qNhC(2Va7kfZl_a`+SiFXkX|A-VuYi~IxTZqkIc@Mgc- zLId1;+JVdYKJ@UKN5)UVde&%}KAxP}`sdxpDN`~}&fx1{U}molhLJ0mY42o?=CtWK zE*+}3U2V7@;S^4PSHQfeu4(2tP=8%fCRrwttdvHGQlAs>h?0Y_k8Y-0A{NlsNz3{o zfjl$KXTpVco6y=tRJK8<$>f^^F7ok|>;Z6cfc}}Y;PhiVNmTN5H;{xs%udcvokk|6 zNnk&V>SuljK?$C8vb#u=e0xgMOq>@;SNer*WWx&0r_?$fo>T8~$Fhu!DBZ0WUGs#r zeSX^rOI-ax=2j7QOnQJneJ*&u%~c|?9YB4w;Kzn7vE9g8l&8p4KvMEWck7D2)spkF z^WZ&e@BWb|68R_tfKTr9g8<+G*>ef=Y@jm_WC9BzGG{=nG-C&mY!;>aYrT|W~qJ(qY>6wPAaZfB1Hoafj|jpbUpTWF zlP{|sZSRKp7i`)QkL~)biAA#1Q~^0nJ;4jW=5%tkcei(2;O{l(+mI(D7N;yL#JpV> zuzi~;Y6<;s7qge$nP3QY#1z^nlQ6sq%j}4x%Uw_wM*e0*nC%Gt#bP~FeDGEi-iOB9 zrG;2CA}j~dAgXA^&_^W7D%p*VVWdtMe;H05bSg#3rPi?n{}TfLkF$y^5x(2~AZ6BM z+N`9@l~=Q@2?@%E6EqehLck$XTOuEX57TC3Sq!UxZM!mX;apW-Ii5d9LN_YEka;jX zMcZOj*o$1W+mz>ws%lH31RH%66;O{*8H3R>+kK8+eUZfnAGm&${;jl1ju_822#?MV zg}^g?TC(fzm7P0jlT)`+(^LA_7_u)M(FSbE99eW{mcbPZ1W_Q7QWvJOv3 z^9c+r5ZGK zSLtJu#<)Wxf46Qdv&7|^6;?*p?j)KSs_6ad--M0GJEw%KhDsPB>RYoMuUi2nut+>0 zJihz(-e70^)18~YeZPv+bvxtFaCEo$D}hGdF@_*@Cv({j{+jI0-f!P|d2Mar+P}a2 z9B2pcUVM}Uakr7@rdjs4dD-8->?r$1UiOPn797>~s*h11=JJi*l4C}~1_Y;S*ECWl ze+WOTeO)O^Vnn+GxM(SfLTG$P0H1vqjJfu~7I_m`N^?A!S7M5{cu}8TaBb6k#ebz4 zIWx_(Ee@N9@ysF{Vtwa{jh^MW{@S-mJt54t2w7DnO4LV_YoYTvCYQs3$^I!Cuh`qQ zua0{6^$1~HvvgdYJ(`$Pv+}1SzO0cy%T3aeeuT?oDM-|Awuz%VlUc|<@(m0Eh6MTr zQw@{=G6M_4T#c13p_(po=@EvP<+ZFW7>oq=riJAj8l*&WcIUSCft8md`aVd0(E-6t zjkdsurTCrJ;&<}m?%mPysm1+TfAi7S^E+$573QBc3zLO@`@G{DLM6#t(&2=9x07!+ zGdXeXDAs@?ho>j00QePT@Z=Uyc0mI+AoQ0bWUD}Q!p+`|TPA+4q1+9C1Q|#(Yb(!# zz%{(uV?#K5F1y&vY@Ht+(en($a$an%9{^!%O_07;Zhs%+nT%~OUW2@Q%DZQfR#}-^ z5S%C*!EWR8FeKt4B1w}ZV%|hcRx8d}KPI0BoCt_Vqa!p$hJL-)9$9k|P12f;V-9+D zeD-?K-*|L?Gs(Tt`(z$3_wmaOCG9+1xA?|rkCVNV!#NYp>&ist+T*+CaMk5S9G2ae ztg*pnyEc;$LWZ+tfSu8ehZ7{iL#k~}cx+8tU`4$Ziwl9w2Apx$#eCC>J?b_~Pzh5= zQ2~$5uF+IL>Oe3;e1Byntwo@76+%S+ScKErhhJ$j`^yG`y-B|37z4zY<2fMARA1}s z55B$s^y#A~yHB?`uPCxLz@G|lBDnp?2=eDZLEzqvocfUU-f@-N#K3| z`)ZO3#jPA$V`#LB2Vt#EOt~zxArtZ3@ydBifgwSjL~byw;p!!C;bKFs=Jd_{08}K& zV4f64M6~BMPu{`}#=Gr2G=RhFVo$y(97sfFd$QpWDT`s?Rnk44yxT--NhVG@mOK(O zOplQ~q3n)DGN4!y8v{QF=-8)nP%Klv3tc6{Wx3=r6!AImyZ`pBT*457(}T!hD)Lh{ zT_p#VHZ~Mz2!Ki(89Z9sco^}Z(ndDX);6|^^{q72glubTPZW(Sjf5p^t-ERXTdAC` z$rbF^hap2=bZCIo22q|QE!kKw8bLUq13G;faUgk{ts8eB{*{ZLiGAz}n<#d{huAcp zk|vWa#(Kn79ja}M*a|u}JbL8={=dg?B@(fdJpi(xQt(4 zTxcg%8h8#^R(e*@(p>NX8YK~+P&FXMn^@kda0G0Gts~@fq+kTLUQQxIsb#bbCBkLP zSa9ka2GF=mV2*(>VEL#xBm66epfbC$;Y6P|)Z}L)o7Ox;VK42ZR@yBiiaKjjK!QY4 zM)I`l4A!OS4YwFtx_`nDU{@YgM_5A4y8qFNX;8H8F{o@EErwp?)-X9t*T|JELIf@mtlA+WtooK9^>f1QJXh@6Unon^Orv+3M!8*kqpN&24 zd{l&=I^X=EvE*i_I5}d#@a|g~q0{~Qj5Nh4na5yew@H;X#u!dAP$Wf8u`KqvDa!Wn zu6y2%wXnDZtJt%2nK#L_&Jhwmcyo9APy?0%JyjjSxC6YxOCMbe*hZph(h*OCMpldT zGlJbak@hkYYhR){r34s3Y=uJpj|>xZC5S8D%eMcrRylVv%uSuDz%Y~!jo!Q#<0KTi z0?B-UYHAP5R4V~XeIA7-{TXaySZ^6lmT+_%U?K$~7%kb2c(VP5U!~M9ace>E#FInC zJNy7CNhPENjHxsZC#Pm-N#yJRD8X}JzK9MTS8=R_R=_0l+KE&h3SnVm4PZbiFA2V( zOx}`n%2GChko+(Rak2MXcx}<{2LC;k?e_Xg=?KQ!|{) z5hGZYqGbxu^`y&bP^w#Uhj-R^$CgO`y@sIGR(X6jzD48I8ZMYHc>%j9wA0GN7q;iZ zhi#N8C}n7sqYFa8)2)SVVQkh+92wi#eDvhe{VfqBmubVdUKods!R3;LZP*}CACkOh z^@ier=gPUgRJ~~RYPf*PwEU-AcyZyk452ybVf-L0w5-0 znYR5iGaE~`2|C3OVcI`p@}aW>1aL{>Z9n>hnsG#uKF4~3-r;Sjom#SG7^}e{Lmkhu z_Hw;ekhXH8syLR`6MsENkE|HIF;cVh*c5Q{^yGMKm#hXE+xm0T(G9G>(CraM&uJ4- zh?ZC3cP=+B;kP6T-&K!B^f-t}+yZYXL|alDx1v2dL~`%rvl};ic1zmRXOA{FpKVec zAV2o(rP8hFGnZPa3%MhB_E20BC72Z&0%;sDd^J0HGdae^X`WOZ)8UHnoMP|)eKP-l zt8_&O;4mBDrwh{n+Lh7FhbKQA%h|WtPV=5-?zs4`X&NvG-~TYvz=kPwL6z0jF+e$m z3W^Pt&SsT1?GkHCjgLg})@jfkFQ7MC0~piNDr3PIP&EpzXl{5n>Ih8PpNXzUoE!R) z83*jMiy+Z1U=Rr~6ig3hc8*?6B^OjHBZsKLzpXbeMhOLf2{#+S*0#@RSLA}sPb{4Vk)J&v{5NQ6*O^B+A@nAnGGK@c>+^BT|k&`V37&q z(MnfflS=)8l67s-g-rofDn>jzdk+wV-8)0Rrv*p^3CpFUkBdu%*w(efaeG3(!)=6D zp`3b(0^8cIm0eInbx|qORa>;6w?LL|+3G-yd_Jp!8=lmn$pVp`;74ZbgtMN~gQ)o7 zjfrXq%3I~JtS@eV5k;|BPkA-t10kIwWx(Jg_Tqcba5R_ZSw53W6CqmTZi4g}jjCuzJ0^pP zBd^)4f^Z#J5j%)=E^3={;Z1S>GV&WJYo_iRZS}nIk9k)(uN@0gn?yRxMPxC@dMFiy zHe5=N41!D)Nby|%l|}((F;l6h<0`z+FEFjJZX_2!miIoAulc`@YS_@n>ijq4Bc35D zBCXD5noWGj%?m!ZvQ-#K3?2v%{jB2i!NBGYv#C!cEAV$l=#Eh-2-Q0A;HaL4Gckja}Tbz=gr z-Q)8kSc&%r5{jQz)ms{j)Q8rjJ696*t$^SB^i9Qm$uy;^0THEOA+17nz%L1uaV<77 zK+yjYW#x4I!b4AW&j?dM;2uN=dqcozpPqurivpRPP3$~!&I2YC84Cwu8e?3&P;}70 zpGLwV;7z>mH>~{|ILd&{jst{_m<6WE^;JdAq_rD6$2-TatXAsm z9N&;zpLX39=97{&a>Hbrz|{*VIb3cVZ75K4G%qr{1|+w_>79H<(LfMo1Du>ma3iIJ z^m5^7XL30eE2zkbxg}qL$3NhzbBM{w%hCB60$1{PS-?wD_Tc1bayr_Vkd81IwPc6j zOp=_Wibcz%BdSqC+!?VYkyvxTqsp31wr%Wa#eQ;SYOR3+dTXrj+)cMYQMXBz(_2kM zSWCg3j3<4~J(E6njBGPmyo%+y)=G7ZPb@;nsGO-N$@uJzY-bOOYlWA$)`XWe<1?y` z<&TD0zIcvl$!Po)hvUD!&60>?{X&b)0hP6@Bf76>?E^9_PsU?!IGnHrTA|~uH3w7F zt&Sy)E`yM!2uPV7xM@rzdCL(cj~y^d+ii}l(nfW3-`b`gtoZg$#^tX#DF59tqR>`z zIA!hXShh6e+gHtqM>3vBPCN{{V>I4cb1>En>t2Yi?>sy|LYAF25s6-#zyYd4lzk0! zy|_!(K11rod8$dcqH0lbX5wnLETmBdEs+g$?Bpbc06fN|Iz(q;DfV2X@YnjUrpGww zFZpurB4ik+32{b&yqtfL2Vpki56c7~*&nx%%^TOhF{(IwKb-3a6vtM=Z4;w0tHRDd zeQk*&Um$O!)JtDVHc9D2&LUD@WA+|tUET~&@d{0W9YLGIxkaFGV2XCIkwUv3Ge1?n zEjsfC_{Et-6p@!%Q4{RjN}H9SS)p;aTiR1nOO^NZYQpvA^6k;6h>W)Ot*TC^qoy4eUpby*Xb@qP@oilq1hn3 z0LTyhr;X6dgbSi;f-A|R$z=R#0tkO59G{QQarxV2CjL$(Ogb1Y+@}+js?9LDPdE(YLy5?=`LOpqGg$soMTW1sSQZ3HzpX!GYpvRgEQPFmdjR` zR$AbCF##(z6RGTCI=!txuq6a7I$9zq+IR@YWHvLc)~-^w+`FRa`sM4g@(ppQVnWi^ z!diP<;f1faZ5l1k74Tu$2d~Ia{u>LXiYXi62mXZ)T{+u&qqJ|((P!78toLZ)@S}pT zh9K%dmeWI(eGZ{xtfb<7Ar4aNzz{0Bsg4ZWy_6tP<&glqQprJ3ulDEBjQNIZ2bm*) zE-Ix5B+6uPCbZr0B)8ZI_@@}gcUmRIAc~?3&j@I>ge+*SL}SqxTvu-pfZ)FphWxDe z%doc4y1!yzDK+)M3(X65N4ZBProN7OH5jWA5@@n5r75AJH-l{#3?U7F&UpW225 zUM4L7W0JfbAu?vk)}k^3(&A&YcduC$g4dnE+q=S6u`n;wCO( z=87huxMbHS2px8}MyaT2#;axQ?MBXabY#HB3{~xGi*{&g!;bw`=anL~Vf6iYF??!W zY=R-N)V)=|S!k^UO-dHCp2-X8J_C-6)5EYAHw(&ZLDpD;Htf8oaBoUB0`3^CRV7kH z7&j9rx?F6Xe2^AVtW9{jEZHZ^L{;@)9KSh6kWw&GL%K(dT91llPYl+RF%rY%hG)ZW z#IIQh8~3^=lk>~T3NjX*?uNNkg0flr+N_ukmc6{A{0@f~(LHtC?A$N*K zwx{L3J5e8t^;gZ2Szyut8*j?L!U2KHg_&ue8{Pysn2qCjQZ-$Hmh~J|`!LF^f?&Lg zg;}d=SwVz*mRZP0=U;8ZNxYIw6uIqYqc}D%*VYToIf{0Mm@XE2N3p<%;^j^sk%qWl z0CHpxVe-6&19|M>{1Fn2NK{^zL8!;kYv(}e{pni-C@RH`a&|UGQCZnGbz)RZ_~y1) zt)RP7g#F6nUs1-xUn9y148JjpS2>V2Af~o2w{ck(wGynvfWG}Cs0|FMvmM3yE#p6%kA`|^6)QnIS^)5#uedLW3Pkw5L+;Ii)ASnsW^ zt=;VH+(3W!^#10~F%n_O`rf#CjiNycKKaMLu-J*iPGe-B`PJqifB$ zf?>AFs;ar`0t;*4s4eS5j4xkQpSc02SRj_q7SxMZmy@OAVzz)dOc4Z$HYKK#MY1*|X8ZS_{`dRU&)y?IyubD%!^F$hbTb5^ZGy|r1Xg7Rk-VY^C(4{o|JB5 zq;^G``j0A&KGsX&#N!kjE^39wJ;k6?3bj4Uy2P8(Xsn}Dy92w+ltI}x?A3pC&aMq=7!Cy*wQunB_VAxQM(82dHHZ!f*AMIkh* z{)v2Op$4bvGZe9L%Ys@WLVGh2p-b3jgu!>XA3<_F$_vRz10@0)Jd9BWab5B{VM)sV zgWn(kw(LA9(dhZx5KiDjO-i@+2`OAclgL-A%1u(B(d8dK`XoR(Dc#zo`%o&F?ZGh7 zBJOjEl~VQ}-hv^E<`Q8ZKAEvqBy+p@#v64PPTSn$_r}yg;TXLY`-^46#2d^*((0Lt zmt(7iUj#V1Yp6{5<+Y>_HKmp2qC(&>o0?A94V>^;y&dw>ka(ZC!^F7`^2sAD=@+Xt z!PgX7sB$9N3`NYqysT`gwzd{0R5eCS=F_AUEj5L1sFt9zm5>kRe)@tft8t}2#9Z^K zrUh^IhF=I%!2H9d9z2f-&kCbxC*$YBrM({)>kLc{Gm4fQAe%S#E@D!X$^uaKrVh2r zz^Z+=jgSakvz3GeoB8s+@_)Z17nh{pb z#oy`58E1ildK8Z{I1sD-aXCMVDMHi|E@<|thQ}ZQJH;*bAaUw$HP4lqrzd6~T{D}0F3|N9% zwS9m1U!&pQZx8?dW&F9jHhlT%t@vDiLPf3=1&?sicHYaDBzO(ochVxyh@EFhO1fVK3zA=CC?zb=I7tTR%k_N32hdz zk1qVK{zordMO%&|l?R&*(R31HNSfc1kpyuo143_Y*Wi%az48T2#3P`O3i-jQ*j2Ou zt*wMNmyP2yoMKA4Z`K-N&is(@;Ybho27;sUm7~zTHasZBiga%PB%x$FGKj=)r zthNemf8;YYXK19Xj17`Z__%j)c6NG;|C#q*Y(80b=INw3@yfIn6>QEr+13oG zD{!;-`^Qh7Kib^=?*5a<50P$e_tBs3KYk*@wt;E^1f-$P>04L%+N;R^Vo1y92U1LKY#6g#YrwZr=b}Cn;zjF3KONlkPsXB%d1^Kn+OudbzM7A^wyU3R) z0l|j3%zY%+#Sux<}0v3?+W(YW=OgIn1 zmWo<)KSvF*t9NpI_^wxs1sqv9VJZzK6G|{LA@fTN4cwompi=fr$201=RvtTSkq|MO)WShYJ0ndi$+~nIB*eb-AQV?4y4MAST1^p#BNqgXT0;m5>q7`)d5pck zdO=`_RKxw*DUIlvnNDfMj@1ZehV7Yehr)53ZimE{5cWeNR-~Wn*yU}vVSO8FY0I#M z?Du{+c-K2adOIH%(%6|-dNOW-s`pB@EJZPaY7L@WW+H2zHR%9uv#?Sf3ZoXyQxHGP z)QTgPaHq>XAHW-lbgZA%dl2wcxS99-&$qXra7{ip#* z(|pi66}lX%f_0@&7qQ`7>goU6{M?9@SZp%iJ1UyXr7AGCxN-#RZ8_X;l*7HNVg>Y4 zeC<`@m;&qrU9`+3a%Jq&Yus+|;upJd4{= z&7H_7$?zkU0uy%>kW1l$jLtCA_QX%tf+sDL#_d)T@uyBNvWU#K@{j38`-Pw?78v1l zOriv5%+V`E>K{(uOnRTM-M$iFllhbkNO>bEeGCz9ImqSaL9GH2N%x(j8(t{s6N(U` zE9gbhg6c-{qa*32WmRTVMolZLEuk)*Qa}^;H5%!sSk!Qc!IAyv({6K@v)SEDVm9`P;3ehtVNFy?T zp@O+6i=8bUuk}~npHqrWp$x`p<1;=4fpm@}FD4g&c5vF;dLAqD7 zdYpS*kU;dF^iK(Hf-FeDK^w25l&0;B#i~px4u|xl0VR7u7nx#B+ZuK)06>&2UX5cV z

  • 2B;{J*k}XCOZEq$Su<;me-?qS5NQujC5vP(t)PR=AJFO_xLxeMAEF`0D8-oNQ z_Q5vPw7(D^vUxiT27=EK@P&E2|ESBa&W-?=uC2S|>Z^gU6*?DO{Mu2AHo*Tfi+|lO zFigeX0Kh0Xf6%()jLF!wQl_*V>jB;#3htHbFB#h_wARO5bOg_lSB6l2io=ZLd2v12?auUg>O^$9`#OU z5Dw2W8l}xHjg-Of=2JJ8m&=MIQOa$E+AiROjhW>vCQ_bsZkeM@_@FcswXBls)pVwS zG|`_}%u7VFDB#RBNrp!Sg(L^)TLR3)Y32jI2`V?)2vrX9|WqBr<4P(ng zOLDA$o0P3YzMuWRZ~a2y9GoU4Ym(VKSGyUraezXhs!%8t z3I*sJ&O|%-Av)Yr%kd%NyUniiqz-U#>C)FVKybh9f?&kfKfo22bxY%*R+F8jvC{Lg zcFMB=EVk8`lQ}c!tYF*h%(uriC`gPA#iESJwX3YA_q^QQx+5k&u=GZFntlZS9K;YsP8W3hpcm@=hk*j*&J5Z$h25C)&XT>iAHDACa-U6eRDd<7;D^_l}`~iCtr#qvpypy!}$LOr}m5#xOoOSRifN!GTyB*)rB)cSGwm zj2N5I>DlY`@%^1|cXyuRfPZgi=h?yc5BHuv+Ia$P!Of2Q*S{EDQ9JLDN+YB8F^->? z->cKw!^@R&s=lp3r^hi~9FjMO7$Vgh2w7+C2Kw$b5nh3Da+?|Lov*?!>jApQ_xU&U zu31v3z_~m?1*7Y@=vsb?8+%o8c->4X^NF3=LP9U?Pw||zPo&I_x1p3n4CM~buk6K% zj-%9{yXVa^o?XH9>xW%@pS|2<16$KgHV2pzc%e2++e30@&w_lUH@qNEjuoED$2+ zla%*3TLX!Eo(Y8_2!cE1*Q8D^@&w8W)%?ASS0A5TJ0nnaoX_!nF0{Pk!l(@K+it!r zo4UGmZEc+~PtfV?Kc<1GPin|b_n3edfKEKz>zq@JY`NUpI>g<-i|K>u%L$%}jg^CG za_Le7E3*|BKTX{OXv{3kJc6d=@h=&WwzbL|_Ygl6J-ne+j%q}!VocPKRK2-(%j#BA zB_j!3@k%%l8K*CyVHI}-Rn!0x6*YjwZdTyJfORdwv&}aYzFn1fWRg3PbCBhO4YvniGe}@Rc`LbpimLPK4)}kG_|& z0k33SNV%$sA^vq{DcBfm1;hz*-%-)!Sgw}jYcX6`@S5ey@sGd!147b0ch~rBeV&$4 zy2YF^YrilKEODY12!;dz^%mc;FvO5iC4TUn(YU%wVYnFFVxD};uPt=}Q>gu19z)PT zqv+vH{xrfZ%V@*7096;Eu;okDBw-7yy=uV&o+6eQm(UpM!mgzMS!dE8II}RZHZ>Gu zymq&yBIK+;Cxt<1*`0%O+B@bT5Q{4}oYo8CI^*jbF_vWOsfm#2vzSRbli!nZb-!TE ze=BDyw?i;lSh;e>rX_J`wx*TxeH89rT}HnAyY=UjjhDQY^9d$tEnP83=~v>#N4-%0 z*Or<>GnQq9pLopthYC@HAi6n^X$U3 zj~gX67im+-mRR%XkuVPjn` zrh1+YhMA|*iGUJ2zq~OjW`_*tX9=*}fi&u|K$ zTXLL}2B=qqH%(MO;|8ct7dw56V?L7ziuh_{<169QdQ1d}TLZi@9P21dNO;q9FAUe5 z+*&pX9si^L)9bxVk>M3NX8#)))dRYV0Z|w+`>3rG8#+;hqmM!TA3nMB&BN^nBT*?z zT9Kdra%F7$vcb3ObIP!RO&~cuoM#orA9d+9G&6aJ*+SofEi&Ou$UBo1A`^rTJV0~K zF3h0~rdk;N<`Bm>;rj`A4aX^~$QJdfam3b%Z2y#y(1Eh``#`b? z#-GlQM;nXLO{6-(Q6hYt@aQhr_s2;x_jyGNr;OES#rQY)b`wj(1$^(g&p934{7*hf z`pZ7D_P(O%#`al;_OCbxf}ILcHGL5o2baDuPr0X zf<+@pX!R9!pS$G4ufXICrcxW3-N5zn&75`!^td6KRC|YXb&Y#!W63qeX=oFQ!f`r1 zJC?U6}>8Y9OR4b58S4&nAYF6aFJQdCRGwJjCWj= zpWo0ZXrl`qBd1d0wQdkTRlBNVHMlk7NT6l1S1LRyD_|tZTnvoQ;YJj)3(JB7ooer+ z34?GSYTZqCuL+H=Z|{)27wb)R%=1U&&PoZUt8bR*5vhZ7%Exd045z9aF0J=HS*oDz z+L_iiov!wyb3TV_i zfJVL(W2UrF+;5Kx2@NT_C_1CKji>UkL$>svQLix~Ohb+&9iVwnP>9hh=9Nz8UQevm zD~#%m4%whl98HJ~>nMG#Tcs|FJI@iOY#mDPTt&r)RWKvC`ovr*1>1zM$#J4UoyJa*F>5X^L|abcoyetF`+8B; zP0nULIMU0w`osDfL1-tEGGBOD&?zS*=(7=&XqzRByFq_9OCuyp||u?R-Q`pffA zVSLaxRtF|T=w*+`Z7H%*O{+77jz9(0!^AMxI-SEI=4G-AtNx1!mW0#~Gs;6a*P>)` z9@G>UlZbCi$TIFDmIE1E0X#ZWr@RMWOB0oN7gukrqN!RaNzj1465IXp{BUymVEQ^! zmy&g5o7^B?AMP(MUvE+cXCkAtTw z#tTEUGoMuJ*2Jha)-#X_BkBM(Oqp0;j>9u|iogO5Q@M*KqWf%|^J1O&?b~y?DOO z;5M2nric^8EI;KjN_swv$$FV7IpCQ4I5V^Q?(A&#Bdl~Rm$e#mu@e2%R9f9-8|z5# zuw4u@s+W}usjuNlH|y%zI)Ja&*G3H}8b5y$2B}&u7)ANjm^?P-2`Uu*a`d3eL-!U4l9%KlQ~>Whd&~0bP5;$<=N5Z=({ty!F4f*)|ed0XJ`NKGdD-!7vt8G}6LMe=U3F@XuStLsrOE&^Xoa6=L!bRwih zWHST+SgOq6{W{vTDRO|n5nSM1%O`Rvm6;d*o}7<(AOZwKE>}usC5ArqCzJHjLlsZs zbwpa~MWJA>ujUm?%4AMVC_`0RnKd9ug-dNOmFo6GstXud;K?nsKPci=oQKp>8p)ql z5B9@Q5Js++BQ)5;4VH{Aift6yjs1{Ot6o+7f-7=lP{eQ7L?e_tZ{yYTD+y2EVPn}}uOnbkpT zXl2tQ+lB0ntM*?)l0r|Nw?8dr%`PMK+8ZyX&+b#F`wrhQF5v;^f zSs+ms^x}F$b~>zzi(O|Vp-FeM`=Am|dIhdrD>C~J*9*uXPNT_Tao=J*%LkS@2`N&q% zT)S11P{7~=CmF=MLM%Iq%3C)exNU^>7?IFqie4_~3uN+C+T0$6*sZTkK&The^&uWw z!wb`Jtf6DLj}J)~_!-m`k8zolR`w@wt!%P9+gZr@&Cugmta+8N>&(2eVt>tPmf}@~7Davkj!t5{*N|9m`?zcuQz+ zF&z;#;pZQ6-YkcFXLoT>r~AV%esg$%OR`99F{veNJ z#i?L}+Po66tV-p2;A0^%%tX597`1D0y`r&>ILgnfwTm)xm-VSB%xqGxK>Q zk$HP^%>Vp?qsqkx8235`ehVX)D2AX@IR#ryfQZ$&-rz+``J4^P!fE4w54`Y!aR-c0-(rUbKc{Q-47FQ6F3i3)qveH$Z_B8F_AXu|{$7NWP9h0d)xuxCO2t83lIML1-J=DCtHq6G{~(d!vfQmw89<-pM7 zD{8@xyB==wY*ImuRBjM?+aE+KH5;|67mlK4w<6|LQF80$&rovt$Z-$~IA59;s&zX< zwRnMX6)f3ItOTQSc6Gh)&SrnPoYqRTfmQbrq(S5k4ld?THF?-zotOrt;$L+U8d|Y- z5tIZU#jC;@m-z|b6T#J4c%7L)AYG5hFIzMgGb0^*Cl|9rgroE>yTO)o#}8up<+dID z5L$-dNlj=!o1cjvVr+FJL$2P8>y*>cgRP`YTKO~t$t(3hl@Ejk6Ol7}B`B-YgB0Zi zw9v`F5|GvD0g7@3BxDMo*U$i+RHJq96i@$3T+%Dtj=e<-JrfV#T;m|4^$p4?h=730 zr_Z*(di40wvwu6_UymPxS)WP*^Xl8)eU$%wzx=C*&$jX9ZllTe7hu@>s zXK3ZBETqTWa(YL{#fE-4UdDjbFoJArtBW;=zjwd+eeXWLy_jB1nqP@OzE4iiPudk^ z6sQ6EH348GN=*pD#qIwXNY+Fl=65{QBaN}$(zY{td^Ya-*0H+K^yYv{H>c z(6%7F0s`A&G36lw`zrTd9reJC!a+}1*K4bYivINlT>Mw`^~v^C;0|1h4cA)n>ju2N zSnA&co{>=m++te=4@^v+(8j!Q2Lj~Y4R+EZu?;r-P#X-ICOjI{T(cGpHe*aCk&R|q zT!Cb_`MX&SKuTN=I+6mKEE#s$jko<+1Swq4Gy^VkeHknwQ^wqLK4A6Ej5i7ery;b=>+y}s9bEL;AeCHHc$K}8cB+&`sOV)IQ6B=z3tvbuiv=)@C)sUkovqe9xW)74%&-GQQHDptr6vpbL zncvA?^!z4RNh(v&L}Eow6|Z6c4V!-`3>0c~#i#p`OJM(7#7@u3@9Q-(Qs!o&9&f0X zYM=F%1$gBiTQ?_O&5!sn`C^aB6(Mfp=X|;7OWdVFfHQ51w=@mjzIeNL|E(ASZzpfz z-a9;bdw%-%{1$IW;pr`0j)H&d)|a>?B|l*d-1-v9J?x(ek@XoG8(!|^^r;^+#w{b6 zd#@uF39Dd!WBKpmnxS3=9`l63YSU<=&eZ!x61wiM09p-uQIWZS?D0QdTk2Kf6M@Br zF1C%%2I3-KJ7hT@Z>p)aR?(1oB6&qQ+Y*i*FjbeIY7>tzk+SE%tx^w_KE+LLM|WED zuGW;1n&HNo?;mAwFJO%2!aN*l z%01yEu4;DYeyC}%7$(sfUUH*BN$ZHn3J;4WH`gGk{82ecIM?Ii-f2XfE5MtLO$$#K z^jyIQmm9Y9tKI1cyl{R2g30BD+*EyY$?JAYi1oWq6XVMmVj=k6PZ>v>Bj|ekuTQ6y zz-1=>Uah%n3?zV881zsU4%S*HI2a-ong+~zi69Ca0rxFe1T0?rULLeIxei;0hThPQC(8^zKwJz#-HU%GD zVsv$RPCDo+*0NV}eh0qtYHe48!aD0!{GIY&1+*s3YpyB(dH42jf4Q~3zscnV1@(Vd zD_|$SlW)4z!?-R1anvX=-cOi=!p}6Ks{CZo8S*>bYSe}9X3Uz`{p}Ac>i({- z`@5)%%_gEIWfg`;Koj_q=qCfi~OnB;xh24A3@bE-pwGzj-PcW{`R>c#gzf~cdCP4oOMVjbOc@}(=wlxaRE_3PcwhCed=I|xp@B_u z@$p+^LDOW`W8mBqMb%m3O~LmsmvH8UV#Q`m@4{#OD8%Rj#x7`d6B~q7adUhVc@&X6 zf5PaP{BXeZXS!c)_#|VzhEmb&&qFvV#&QOm`cp2wJ7P;Y)gQ#~HuXJjZsR46i6=*( z2V?YRe(CQj0_~TxpIHk>_9ygDZm4vm4gPm$5Mgngea{zWO4Rk(lFBxHFZspw)uYlTL~kNm^#hIUMsj-alK z4@+_E%G=-lS(b!PxEw(FVMTcR{O6A`$G9$BQLy%Rk~K(ibhTB_PN)r#o?c=c`eJpx z`%k#bF!~%mUHtbGWY59sZd9d?F~#N@aBU{40vw1-tK*M3?4Prqlv-RxepW8qbgf#k zfiJ#nYkkta(bXNg&Dk_prh3K5*e+jXpM`6+KO&QZ3N7I3AK$C-hTrLw0l(<`TMFxO zIRw;DKQkCp+B2FN{G?qm!UfE6v>XFt_6z@`@o~JgQabwf+1hZhrr*^ zrl3RujCX;O5TN3PvupVrc!%>>M|6wvoD8xd?GowqE2z}%vPZ^FKd~J_f2#1C^RPea z0!Ntwy#arDAq**&>e%G0m7>|2SWZ?Y+)Affe2C1JO&m7l>|3cT3|Z!YPNwt$DCK1l zHWt@XObRTX+5|;HgziL#SxiOf*{8mI8-ML@{>({mtLp>EV_$_(-EVFMY=v5qIR=(F zLUoXiPQrC`1CT~0=@f{xC2P;0klpL9sESg!^Wxv3y$2>ysix@w%kR}t4qi)95_c1eroJotp&msHtFCm^@@ka;FVE+K6>4)9cHCom zMBa_aQzA;b`SeuJpAkM+PaO;)re*0;0J*O$c95NknKAHPesj)!@OdAs6jU^BfNSJ+y!MQtivqxl_fqdF->ZMswo@*T+~>)+ueE_gRd!{bY80V#`X9 z0$*Y5!>s`)HZ~N#*n^&4R<;w^^L@g(&oBPA5aN7)dUu%7&u@S5ZldUSF+~~!Ki{mvY)6wYB*tOS6{mhsud zyt|S7d)oshDA(SgqAjGUw>SL<6enN9KU}pN{rvz6U2D=p38-mJX77hFw?4l21&z@% z41`Dw)cl+k(`?dU3RdlAKIT=L(ou#P&G~0Z&LFlFzWkN8 z>=+KAYspz@iM}>mQyB2nNm-T)vENrAv734b(Ce{h?x;t1S0vV5v)}$8DoSCzgOn;% z!HgfG%KK5LfWCs@X%De_?45BYw5su9Bx$_WdU)L_Q*CT2IVhvLCc)CV%M~;HuQS0v zzxVc|FW%n!;>)+69b*eI4h3W|4-IqEG^oYdaoSzdh?l zmWXWACWoMPp{E^{k>s5hmE8mE7AqxS{CPeF6XSupfuE|hS$E`7($neM9gsY4JBxu~ z7e^`tTwz0%UPGutQ9({{sbnnr+lMOhg)=JN64?)r(rAr$pGN8dL@1Fv|L)H(PmknU zzZk&bZz{ZlnoL%=YD!ZtQ`N-|j-;dqWUce~uNd6d@p}0acN_v-9o?trdhXufn0Pbd z7gSrOe3Vm?%KaJl8k3EBqhx{$B}RQaIYrE$T&#JKzSs$?emKVm`|C8-kyCW6D*6d0 zk_E7wn3}^ZN@FvQ6wtyV@g20CJx}0_`(9_x?qr7Sd@}Xvu)q%?g!}~}$cF=Ymj~Vz z{r*ql5J514f0v-*;u`zZTw}J|vqr^D#sV~ztx!2Q;9=JnTS%sQ>pA~J4IoQ9a#4@g zKEq7VjkZ9;kpR|kl_R-EDjF3GL-3Vi2`AYID=!Q`lQVB8PJ0x$oAIy#HPHG_q*Va2 zlu8O^0sa&E+sY;$ar;in947Dxi*ywX4nSr`Q4-dt3eCG^coPXL_jEjq(yksS= zuk7%f`K#$U4o6C*zTXuCz@>u|X2+yj86<_qgR{;%MKC$Y!l3jpPoFtM+*`q61Js~n3i7-yMbkb z+{dtn@UG(Dj99b<-vb+W<-sqXgF07CLB~8v-u`+H@OEG&?0V*btB6%;zJ9H-IMru8 zW8&?+MltT=HA;hM)wd(NY^uc}(E40rl#Es#FTQTOVvRac6f#1>cO*dtWGVj=Z%jlu$FV=^zr07gjti&hRFgFz=oTaFLOX5A@K~Yh zpnwRjqK@^hv%UY&iuH#6tq)L`khQ}Eh`ub}VUikMg=R5}y(K~ms8X-(>^Ggz+|2+f|m;}T=GFP<*KBAAYQ!#W*sV8iT zn)he_R^Z~$`luL~p|*|*vc@ois@p$FAs>d?gAUSnW74c_hkkcOgK2BiUh*U1pgK)T ziAnmo0-p@GPmgQB0B((p23^h6w`XK#uhvryxP27+kcUPKOx2WX>ds%yv=B|6x%GMtjs=W9A`ayW3`pk=uhl}5n!%JM03@lJ|IoRH8&kV zH&efw+aC~lQQ_LQSty97&%1E#H zu>z|CwU1V>eG{);&(mUP`Hn4NQlgfAI{b!YW$1kb8!(zUM(P`*n(vGgc3wo( z{O=Wb1;jp5-PTQvBE^njNmaY$aBBHnaD?b1)F6B=p^sv?H;;rn1h8nZ97YR^GJOQhx1oRgmZ*I^aP+xbMfdjyeiov z);;gDedW%}_wbS#)#UaokFWLBI)=izd^BPE zfZjBOy~UjgQ9$ce{-&ht`DPwogr|Rt&L6#lkr3Xv_n%{!OXJPU!V& z>hEINK;)A|rRa{*7|TSW99Pf1N}U$DN~8*DBZ#TG>(M#p2wYp9!%EUhxe4%VD87af zkph()xy1%z`0I%8JxSQW-wNeLmi-!nZ~+{1OvPo$@EpBM9|80b-)+^7ja^O5%R!H@ z&CW*K&74z8JW=r8hzi>ka!*}$p4PHR<1vM18BWe@n(U>#b6 zU5Z}qHveQ9&?@Y>fnt4%VJ8zUAbyUZp)CzaLyrH9 ziq&c10?sy^uevN2*I)o!RlNlsq-ua)O}JKr0oW_Ovw<{_$Q4(@YWwc`prqbPNUJ<< zeR!#M3o7WYLn!r%b9MzeSGQ~R1Y*$#cMSnA)mFf{nw|u+ikkdhM!jOJhIy*H019i1eGIF zaOF6o)&P`MehD>jM2(he18}bJ9%RzjRR`d~@1^MpC17&p5bLUg@WO+8g`ibl+yQ7S zPFM3tOSJ(wC6aUH2urmAI90IcN>EkJ0ca&8bS0=#Z2-;+FI=WlsWt!y!l$vPE76Hs z15loNbm~edQELFo)n2DOv|jBN)OISv5`j}vPUH6Gx_}?eUoy&D_nW!HFzYwRx5V7~ zYdE~PTPz#va6JvUy}G`YI*^aBz>w^@kps}mnWM2JRS^t+^S@QaDfvTHuA7>Iv?D?1 zP*+1|$52T#p*Y+g4Gcb~EudS0PnuP^C{PH03375HIPPN37?mzg){LuyJ9LB;D92DE zrNdGRv9?$xMXXav3rnR!E>x^;xEPc~IMzr%qoW4Cy)jU}djt*3uc3K$G(*jviQ(O7 zrsErGR`}d#T1YX}T=ya!AgvguE=A}axz5!ee~xNQVH*8rDGyP-m`=||uVzc&_i}c8 z$(Kd>5Y-8u2|Szg4oaOU<9POZdba7YPMQg!6ZG6|(;RPG`&0s$GoL2G$i>PJ>Zedk zh0d;vT7sdpeyTYQx~jsHyI6U)msD=-Lvr{TBxIQ>3i2wP8z?s+zPm!*ueF719d-A2 z(m-VIe%&lIY^^8@fos>dvRw(`J+9JmcAAvHB^8uPYt5l~U%GJZ;@|qf0OAmK!7hrE zQ5Uk@7yZ8FHz$_dK1^jB_q;Pd*ruz0#{>^kHN5E6tXetUr9PxrNsb*#}SF_hlb-Fe` zpDrdBbGaU1Md6dRqol6TR|bQ(#}KWZGaWZq<@R56L6*l4$kX_XFVsaw5gC7e@2ct_ zS#^^p@0pV6f5+l&xbRyHr?FJqH=Zsc0^A<+!%!Svj0ML%NKVM>;hqZ<@BT@;Ovz!< z|NO!}XE}GMfFpxv)3u2b=|z?wL+b!XeFJz&?UpqsO2S#b{=MEMEv&CK5{{oAGNJm; zIS94PIna2%BN>`?k~rqHu*`Y9cyeW1ueotAwa4|$W$BbMpaiHpLO-+RllfVy3HA@x z+*M!qB2^E-eJljHHyP-p)e7fumXf!^4cUN!Y#&}?{YPogU^;(#eM9*Mp(qWyLRnC; zFA&hGCRhdHi2J3WSkn`Ugo?-|u8UTev**Ikgm)|sHXRH#|;{dVn%f zNVQ$z zdDY0Le46T9fu?*jXF0TslZ7DePu2@?&w6$9<65?k$cwWkznIXm0?CVb*)&v3vUSKv zN_J#FiAou&@H_F<_+Kl-XsI*BI2Ii5GWdUnmBeXwDp`8xK)oGMRjTO(a!#(o!Fstd_dn_%ue_D+V%3y@?!c*zdn8Q z>gD|G%|49`ypDkgs;?R0v9SO6mPO4tc?Ub&XR|qmVaYPq7>$&EPS2QbnJ*lv5J$bA zPVm$=a~!0~FB4kVFznJGvAWG^!A=l#jVo37GvK4^@nqvaKHd07Zq3WcM{A`!)QR-$ zs+MPSlmTd1YaADf7Me+-bTq!RzmHfCq(Zs_OVK8E?z~_UpR0~D8n}AK7>t6XT<)i$ z`e?R1IFh?!P&OO)h>a{!^6d|=e(ZdMW%v8v|DJg-a1u?WV~7=Y#Hu0rKDvSp`$rkq z@YWa%G4HzVAmBW*ZnRRlXxO)SR2Ofdk|eUN;~$_i6n(yi_8fVayR^j?MY04UB{%pj z05q#dXRJ)v0iga0w0#wRBJrNIOKMrFVi+)Np-iwa`OdIvAcMbh{k7-h36jET0Nr4( z(G(`J=C2sMukbR4^UKv^$QWI3IEVA|H(xL2uasM8Z+=hcf@FJ{czlTQ7=<_-)m9t# zkck6+I81TmJ)JM!Jp36NWVsIM=S|oqtFV$fT{wW8Yd5A6rmqW7@lC`G>8X9fc{UF+ zm}eJc5Fo#TfqgQLt|`42^UK4N=J>nASN)cy8Jx5sTu%-b(^vD?AU65(H!)`~&radB z9T@#}7FhsrSje;kIRiVXljD90RWfd!@)K-8W;lJbuFr#AoWhn~ zPBG7qmfY%49Jq=Cf=M#;iY~0{w>Iwa#q9O*5mf@K)pK!_*)#Md(+7&Z=y@6D@d?*F~69ca%cscJg=7ck_m_oRT!fg zVm+NWTu-oii&Lh6s|L56FD|}%vweu4jE7hN}~H?PzS=JG@-*g1mK8#t&{4_#45B{@__cus|%6(~440NScpsI9G~n z4V&n=3HCQMRhB^KfFsS!en<5O7@z(9Kl}|1>SVG!(Ls$BHyQ+eJtr*lwIFHt2+q&3 zq&`of)oq>ElC7;nSS*;i5a-!hrXmVNs9ZFzVY%-=n4Tj}9yxs8>`j;RQ|!N?HY#J@ z&Qtcg2QjFJe`Uk7JIBjXZ^G)QecsuubetkxfN=BO9`y_1BrL8**yW0tCMrRz5oVw;o})Jtn|AJxt4o${XXmj1uuF* zO`Xd2{Kk7T1ccVD&)((f^yw7uZWOo0zALot<73#YAjItv_EAvl-1vYEH%3?x@Sg9p zf7^X{u)BT#U$+1GAv9M!+AHOM-1!P+cmp=oef)5H@5#dlDE}Rp4M&ANaSKw)ThEMS zFUJNqHjpO$-e~;YlYe=#^QR}{+c)$Dxw>0aFcj(v(my`pzx9Ve0w3*fQ~;wALTd|< zr9SXt$yaY4;c?W0HzK%yz90ozoXe&8S8u+-Ear!fgk@L0SQ|RA3os>SS|7Cn)*8m@ zAq7}{^}+N4QGPDroP3EvZ%?R#aLvIQidCmV7ehRNy3TRo<|_V|s)yGszd)2$@#{HL+kp9!CB&F@pfk{ECTK+i0->w~*l|7c zg$`xT2XrfvE{$hR3P}4ahy5qKwlu%A8&vNZv;0t_0&zVcLi!WSR3<^V@wLc=K49- zN$ja6rw=aAVG?0MrihsM2w7Y~blc}WRtpL@T>Wy&FooOH+f-kFZdGJW$jWLwStri_y*fvzr;~M5GXX$@MDZmf2=)k(HK*sps&$WnS7;xIOycIIv?9 z+|AL=tx?s7fD-8Zs{>Pru&HR_LS5+$MS^SDG1*7Xy6c@aHErcE&7O+&{MF@gtep{E zu)oJm_N(%|88ls20JD`5X`orJk#+5cSKvUOVIf3MhtqRryc_)lA$c{2xpzAIaeDd& z52eGJKN`Jw!-v`~kNG5>IN6ue#U=={dE@4;L_y2L>5B__n+`|nuNY^>$K3F`o<3No z@6$~e3%nvGAEt*>#G4(##@4C}*qQL&6Y#$;feHTq$;HL_a_i0=JnMRL`C=2_?|7Z^ z2jT6!*1|M4cqJuE!^BVZruCnWP)9v-Z*DgZn@qrqq7U#qJa(} z`!jAnLkUNTJWJ#aMN4_V79-_NRRIN^yjGJKus(Jdv-tw=L|5C~-EP+E=okZbGf)SX zL3k}0SKOI+<378tj)J28Aba5s6Wp>|%rDO$+kW}0Hzc!eyY1T96sDO0$c+>?$? zb}~(zkYv>VDs`|10t|f>zAAHy!&}Jf-U{dTC>${s?sRKJMZx~r5Q5%7ggh>!W zH8|<3H_o+dj!0Q$_&l2%dw~pZkPm%)fsbAHi|fg8mYMBSStg^bbW{kZe>xnn|*ZlSBTg!{#6$+wIt zB#FSx^v~YI=ZGl^O>s*Bsz_lIax!}fHPY@HvgTNW3$e{_SJoxM>XT{7Z1on(%Z{af zrNX(Ajk~?2WY8@;JH~@GQ>+_Ia0T+o2Ew;ZW((w1MMD$Sy2$0^yaf%a0YQGiZN^Yi zGe)#TL}a2GfFT_S3>DrW1wC#*)n?n;i&XE$a6Q>N1xy_Jd=xE{M&H1bxi|27jELA; zuHLF`l}(Be$(RY1Mr+$8&k#lj_}U9`%h|+Dz4`_+D$_AO$(0>PbX!pqKAnn|{GEmd zM{toYEHN^tq|N6}eQojO2rk|&I*Dyyt?7~4>*TPJ0J)XTR4Uj})l6F%jN}9Ir6rbu z$t^rj>Xt;Zl6;vUS*roiKG%Q6Lmc{W;Z*L=28}sr9U_!t_@d~vw%b>l2u}UMJTWF0 zME>|gDm}B?O)C2eF)2A9JW8x9yLi|1X%FAm@{M&ps{_|hnpF1%7VyVqfmbCSRrGz- zHC8ebT|-^ltEz=Mrr^{~hKH#oosZZQPMJ@Y5HtR!k7jme_cdJ0UZYzsghFMVZk=N& z2gIpYMg#A|J$pnal}cx-AOGowR3D@eSKtzl`EEU9FC`PS>S5KAka;E3tRd2B0z8@i zR0bKzobv(~XJ*bESEO>KBiU_cMqTl?32*3)CTGw^A;BP@Nv9}bG zyUnKH^{Vk-ht*BEU6g|Fskc>ZPt}C$&<+j8+pJ$F$^SpatYK6{Hkq9>_K{eG4 zoMs3g*4-&e8PJHL0cIO4&TF56GP`8+Z6DLd$E({^ZmvA%Ix~gsWg&?jL5Gr+HlZ+; zZ1p|}MXro!#qwf(Qxqy?Q}4FrNZN;ci}wwJVC?G!4)%W(2t!Dhg*s|D=ZAS;@0%LT zuK0@6uE%q1oy2+qOKe5?>+sQOF=U=e5Ki#5m5xYw)WuoX#UQ7#CO{;g6)^|nHX}_asH7ZfS8VE`M+LHFS#u# z>Z$9{s+{iUn4O~8bg|}7G`4PS>ZdLQASyr;PCX>Euo7250w9 z$X;b=lTN=fBw@1-%3>^nc0t!-NqHPb)N%Tv3Q&<)iU$5m#CR+bS?B{0lx-$H;@+#; z2ZK{?qla4v_V}lqzox5|s)>NH`+jt!uUvP=3by7d0`BeG0{-(43m)JYR?;8^WLJ{R zy$3DagYZEH&wxZzR3l(}YXoY1unvrx(E*ruYk(dq+VI92GCPZ1^BLs80e`NxKlCg) zxh-G1ZIB=FC^DC>>Tvq%9A5E;5WQo6)|PCxg~okW7b+QZO-rcHp`|mqqU_;Z_g9V~nJYC`x+u`?sQXPP9LFMCilpP< zev)x{o#$9YUv6!MCN*taIV_MSMVG~HWmUTpI|Cs%I;f4@5c&y1v0h40!(X_H=@05) zw^$mZdcKzGwu5veiunr23CQRk7HF}-a^dGV=*59{cdGp>5p7Rl+`dPqxCu^$8__{d zFlqmM1!%sZ{=*8g_SRr_`&eBVAG}}ZeRDSA6kX;^O?t^rz;oy9J-X@?Jv^96R z5?)Ub83iYEt07W?)U&yBe=hQsa9l-s*a}*I^vUS1V0irj^^BMgH5@o{E*^u$78!KH zdufhV^r4ijB;l4(Nld6=Ml?Cw3$9y1wI+bY^e03t3(5$*Zi_&(VO%dLO2CUcYu<{9 z_~R6Zy9EG$UcWH|2y<&(k$e3P&Fuz7>0Ne~a!mg21Ht`3S?<$dR&YX`h_>eR-J&Z^ zaG+Zjv4+`;4gB==Y<{_XV0s^QuC;i=8{Hv&*WkrnOmU&;vefMc&I6c$z|O3&ryi4K zP`Q8ExMv@q&GmDvs@z)GN(Y47P0XsxZA!FoKip~k#?5DoH?y;2q%@C}WGsOMOFON^+C{{w4oT`XmQc%czwH)2tT*rljTlAXLMBlu*dDE;_p_X;a zJ;Q)L_0ePVK>`R`n0Ik5*Vn*xi;ZaV0XEb*y#G&=i^G#AcwilpX9m*RPb|ZBxcdEB zy**|y97^P0*RZPgxe_%;vDP8-Cn;dq69>jb~ z>GlG6fN^Yu>1RDk!_V3c?^X@(T0^X6{;xYU1-5f$?#*?dhA{9bIO3q7k_c=#)-v+` z=#+L1*mWfR7_4D8^G} zYP=Y|nBoBsI5H5>j2uRtYDncMIY78{`kHLDYi#$=c0IOS<9e4{`)Ak{ZS!8voT0s# zi~-^dK@{@&St>-VF!7nZp3F{pKa7ngK!|^DOwVK?wgb}9FDza`DH}Wa`SjTKR%sGs#f!;Bqt1oiFCiXrgr6g1JRs<8CgkLr^W;Eej9*4m|9XVp5< zmD8nLe0k>J1!mMwHi>oF4*?X=~}U^8~#V<*hjuI7&^sd&xlO* z%+0~dRgfz|XpASs@W$BI3}pU;t!O`P-xzCuT53T{H;`_c9MeMmTQAch^i}UrU2D`} zy@#~X?s{%xE7S&d7Mm0sDhHfSb*lO*xr3t_K%5pKW??y<{p$(4FU>J=J3H)MJp>6k z(1cC0M%X#>y1tU#jx1-O*GO=e19mgIAOD^?>a6Ua%-~mO5sd zn-GQ1V|LM@e%rUO)Y*HmEsW=bgO?+;8*8tz5516>^rD~5GwjD?f}?V<2%6MZ52b#` z#Ivt*?&Lp022A8!V65K|1D>>UmP^ zA}CYUdXTFA`m9}+_uxnv1uzI<$@zD#H9Ez0_SM&PdSR3pSCrk4%UOa@(iy!@xOrdT zA|+Dn42{!2tFZ?aS)8pGL!V8v&lwk^J*PRSJPNvxRB52!1SZ&b)`YBo z#QKQ>a@VLFWq4uU^;M>9Qe)438P!A0_LlYfDgr1N0mSK67>L7`LNQEz=FE~RULDiD zH=Q(?)@o2o@a)F-12SIw#;Ft^(nvcxL^|!UzA~hGIQ;ETX`2ByIGfC6#uh2^6+vna ztxuH+&lSgcSXp?!0*RGhX!bW~vABfDFxtAm5`fxgIx<72J_j+9ND7`1r$liPO`Xyr zwLt$C4Bf%2v3B|) zAc%Yv&U#(dIkKyJTs)v+8!7-R)hnacN_uO`k?ujNmU5)&1M(#adda1?31>)}t#Vvs z0wGoVhClTh^5H!zUtUcXKZ-T*NLMhq%c+bk&;=jkYO?r*(!8`B`|5pz5(N!ZhX$u) zL+&tl5|{qPtJpP38@+!goJ^fJU=4QfOG^V~^e-~;{?z@>q5n$y2AniV!>W*LXo%P1 z^>u0eZBKB16-qI%CV}1=sRZsSwB9%7w({jQU4y0&Bl8-xnEX?(U!WKW#tDO-Ktwr4 zpPhm_I~~}{vxF`Nv6%>Afzst!HWf(oG{E!=dULI=-P1-f!4q+lfwjD{TD|3z^>IKO z4z>Q{`~_3YeQZ#$^g`90aH1Jdy_6AY`T3Mb!(r-@eW_iOTax zSUGSI5Ld2kebWJ;=DWP!c7;>;eFUl2de@U8oEI*8_Tw2~Y#zJYayyc(cidK76DRx?+Y^bowutXJQC-1Yec) ziq-CV^X=TVolU2{&MLDhqY+y6{pX8y{okjt`*z2>3NOiD$*zs5FH~2`l(iIT#SX&O z63Ldt8`f+d&!e?FSs-o8K?u402zuv7uDJb()oYMEm8>p*~6teX)wCyQORx@_kDqd7ZkpluwUJLeABoG;-;);IUlu2K!lF|t z3Odzx_jbO2^nf)H$DV2)>^@;yikdsqPZj4AL)W1kGtj*%PQ?#@&F>o8O=jFl`q%!r^%bzjhX-A&(g*l!> ziU!V;AP#0&DMz8H!l|^AjFa``l#PGLhH;N2HoJI0S#H93T{|`7PcaeGS(#SRs0n;6 zl#~w!RZq-Vf_aSxjCfxgjF*9Bocp?nG1I+jtM#J_0z1o~`}$Wdx3b#CJtPO&wHb&& z%ruBftK?A#PV&&eUf~MzaF-t-lvR@3mu{mUNu+1W57k9rf;?=C zZ{a9=6~0#VAoJv0^_dKZ5#gk`l|3J`V2r?~l-{>|v^g!`L$uM{R#M+YoO7@C)Tc&H z<|}j2_*`D@#jC!G(spRk^)(^_;f`*qK&a>A+0j_$oX)e^5#MF7NoQkODqrju7q5{3 zEo%OT{HC^GERIAy5~O>VMuqq9w@8i5Wy^{$h?=md{2yiE&+8%1?XAWSq<6d_Y@^_C<@^+@ z)W+yDy@DQ*r7WAvi4-F~KRkDlR6dkB#m1sF$|UPGaqX3Q2s%&bemg+k zN=y|zIR?SHMIB&b&_ZNI(|18`6&NlfO?n8agd2i)n!GSFHDFghAR=k<6NZ9mVZUSc zR>UH_=VPf-(V(=GZ#vt87^#(L1M5P;%wKBl7#}BVf!Cb627nzLlnymUt~sxz5@vPG z$NxG3=9)bklq8$4O7-_;PmoS2nuW6vve2ohXe;YfI(Q$J=)^TlOq>3WTXuUWgqIzTeIP>i2^LL^$$Hri(?cTUF$ zVzlq3sAI_&MN^4$O!B094HN0rR(ns&S_J+24Kdz;h&c9bYGCA<<$qebi1f+z^|-vz zO7&e-g$c9=TS$ypWI5Ql2cvpdg~WQZUtm1qQ-D8w+Y#C8WhvC`5RwAgL2)=G13~XqmGztV22bT( z6O{>E_tWH*nsOgx_ORA0u%m`UZ?3g`=~n>FYCb z$`5MUi1h(s6Oc#9M>siRFj7Y&;F&E+cLTZceut_gEc!tzb$mRzetT5A)e`TD1S4?W+*_^I;sN!eJ^!&V;R=_Hl}V@Y;n^Q4^dD5R6XaR zbV#Uz%(^!RYhU9zkMaP-HQU2ChR5ISF_5z9Ap62*aRwUwWcr9PPf4U~mzpf;zCptA zVBYAw=3YDclW88mZ1IlYYH63+qA8dk6{!t0mrqo^-fCszFe9I&4ge%0|6ev{E6G;B znn{iB@}hB&?Lb~uQdkoh2Yk<1c45+8 zqM`{8{*$P~c8Bo!+ly-E+Jz5y%hS;|KEw07RU@jlyJcMMwP;p$a_G)oQN9L!hx#}$ zk23nIiTk18n`0XAjgRjM_(-3F+R97WcgU@Dj<R{%cRep3_^1->7Zz0ugd0%Y=Dt`YH;q=i(oHYx zi;2D}sP1O|s{U3SAOw;>@T9>+EedPQV^RhdhL!1epw%-x22|k@ooAq%nq~f)sg!c^ zCu@ymzQ3YJKv!F__IF+uSvAJ0rP^zQ^~fspTEJDO)y2ib+>ETEMYLbW$oqc^>vo$P zy_p#kA>`0c%uUyn7g$zjCJS2?8kwMbU@sYq9dQCpSK5^etCfUW1OX62g{ z$liee(_^n_-`3)M4eOlz_f5P@KrsX;2E}xGf~A3I#`om)k22fjcj|=;F%cSW^=Q=t z9eYr5v_!oYbg3G?1)&v1w+oy}qtxJyDU^&Cdq@zEfu-}t`w=j(ZjTan5y3?GGMhVC zFf_4iIoA$^X3GHr+nPz6)qKa%4enB^iH|vVBeMJ%HTBp8Z>;gyguI3;BwCW9G|$`` zH3A`(kax!?%i?p@(N;O_<`!F?nrR|uA3q>+-dzW|)KNk%%~_=qNm#~Y3yeLy$cg6A z`_=aRl^&h2J|!eqg16#BH9SA`a~|uH#hfT*%jFanv-%W_adld5kUy&)=b>E6@nNy1 zi$In}Ngi+I1u2W1nXZ&NECmH8AH;tLPG;^JPPSfUq@Q4ukmiX{-HDjj?B;}Zo72yG zgzfK0s=U}arLOL{Q}fy#CwjPn)S||(h{wV*p@EQlNV^CXFLE{7`|k0>gGW#BUc!^@ z#|PVwAMgC>;Je)i+s_yx6G1gn>yJBMq2|+PsDy73R}}|CZy?j2DGM3O7({ba#rl}BUq|`ysqw1T42?3Vf1kgo3z*OGo5}Kor7L24 z7>EU<1OFS5JdOaC1`QaFKOU&Dg}yL6cj);>jDTpr=qPWD>NilKQr#Qv@LAjchF(_N zS$pIs{j2kV7?h9vRp2@`@!kC#22>GVQyN(rDTJST`;5T^HhyFKAaZ{_&XsG%h=Pw%dg+74q|GJ3TQ}9$LU=RM+HxAaHk4o>I^Vh~bFXQUh z+oaG$tI{k7UHO)geNi~^r3na29E?3AW+XS(m_e)!_MbpQxGC4p_y>;8fRvbtHCLan z&3?#Z@V;5z>K-Jsp~ZfzOFHmWu7s{p>_|2vnMt7(4>3y`_PNWZ%_cSvgu&KLni_*B zPS2=|23nZm2pbiU199H7ZWD?jZ~F&T)(P|Df`K{D=TNylGyV35I8}} zc3URvBI@6Kq>dv=sEJ+6h~vb}_Tk>TIRHpakphQrl)aw56Bu8<-E5)bSG^1njv<1& z8=~eRcLP|stT|It5Ab84`CT>-8afuXl}S!ST*ku?^);d82;TaWAf-6v7O-AgbqP5)<8(8c_~o-6ixq<~i3)nOJ;U9ogBRB%ZG7y*(si+j2_(I_*= zyR!XEy!08(vP*raQv-tTpvNWEZb4M#o@Pd$nDyY$TvifCm~+L^uvy<2fY+7**pF^Y z9fN)Bqak=J+SCQ3je8?~YimYzK6>#6B2i435p5WQMYJG6l!&s4*7!nq#i&?!sNmaH z@JF}E`lu@GC7iwEut;{b>UtQX3;51uNUHkGu&QeP4r5rGE*4nCztDSGOxk-uXCIHU zi6%TOV+7PnLgJqmbEHBr{|7H)pP!uDFRjcg-J#ONEsZqAudrk1Jw_&18ABr9iqlcjOa`N zVt}xiEXZC014?cB1R+Ks?+QBH#(B_>IJUb)l4RV6Kbb&%k`fp$T*u%Cvm4_!1Zg+} z1uCH-Z(aHZ(@#qqPO`94(QJh_ial7U^;ie6zbj{qwhqZYwY4F8=?*$#`6_Nw1Kebj}=P$wKFW(h`WIqUoiS5Wh6 zo-NA}u+zC`W}o--c?DhEK!8Iz-Ke;wMlXg|qNdiu*u6A=^2U-_| z_Ck(Ibl&H`_?j*ueZw0916Hf%3%sZ>&milO4oY}6fE zzvrNVQ{=>?ZG_yUBRZsdBp9TCZfUNPFQ`D1CZobi{wP@*0u^jhEU$+;csPhXRs(tT zbO3)CXP47OEANs^UB;*wq+qR26ccWRWTLI7GmD07hpac{*6E_aLf~(Y7_6yxLZc8a zz(VwbS%|ALH*%9i?ij!zg$l6 zPN%g`=RzW`HYcqOdXAdFT|81!t-(CQa{2Y#A7vd@ZZhY7nlFC*S9>U#r>Of+&rjty z6l{FkxHmnM1k@3-Lk;E`taH71mrZ7-Ol`lUqUlUm>z9!s<#1OJs7LP@{b z*`+$tu5N>#)qj@NZBh`n2zXe%ANygA`KzU!E=Y?w@qe%?Z*4vMx7~++I6?_kv9j9z zCCirqRP7?G!nw6oos_t%0QF^*WMDC*x*&S{+r+*Q1@5_+A)B&@bvHpv6m~r_NaCnK zDJe^OH~N)G%M|wAtPMoU00X;OUcKwBt;WdGi$ui~80KEkGi^Z2GSOA3DhnL&qgO7p&~;+-hH~PZ@RdYZtXdx9#F;~(HTM@} zq0N+4N0eK+qVIoVzZHnEP=8LSF+Ow z`YSmW{LqbfD~~jvG|$)xANgWH-yZnjdQ)wNA}P&TD|=kZlTNG>boK#7G7~`AgV7fe zPHgK)tP5f|K@iE51lm9M{R7t)*hOO%8q$6(!Odh7*@g6?UU$pjqIbh2sc$wLs+K-cdF zuaC$_}5~!$4)0n0j~JOejp15IdzCTjw0!SOXHa131AQ=kae1tnAv*~L zD{pA{vfu?cyYNTw;L?;?KRrB|^WD)@zBSrEyV;Ks@b~r5GSigTWRSE-o&@GRS4&ux zC%<9!muIW94J>u^5n+4OTF@Bd-OUm1yu@V}JvFuYt3mztFb8wwRa#7&Vn;{1^ED>x!`geSewjWUGZ2+Y&ie@z|q$1Rz{ z1!uf;^QV(Hj6~YJX^)@Z+%(1KwfLSBvX{pFo?ECZ@^oE_$iv47gQvc58d@1%h;0a_ zJa`P%!R65gWkM-mD3neH90ln_w0eAijp1(5%%!S+W^G-~YmrZ$VJlqB{csyG|I-_L ze@q-;r?`8~SLBYSXLvo=T0nIPr1}*!#m?$(YE^(1k?xM*`RwQm3_^;<=%ELGb9`G0 z?($P}d{$p;Mg7mNRsS>4I^cXk3?kM4g--W)9EuBfWE9;$~5RL-%Ed@fR zGRTd4ouV zZny1S+3ZmfB_8TR{_7VaTdf~41O{huIXm&b7gkG>7%%l%8y&$YNgJxQ!^_126x(_E z5I(9%d1eSj}I$`+POX7H(h;LhElF%`xxm??gpKVgP?( zS_6M=WcLj#v*R53L+urtO`x8L}D1)jgtON**I9WBkL~bDj&aQ zO%Ml5iPHEJvqIcI!Ffh(Qg|%)fN$Erc`VUnk6|m->%&}hzDb<`>`_W@p&N z#|r0PN$4u52B#yIDyp6Dnd4H9=ynmr#&&FZG5Tt6`w6liKYoN8S=*1FD#7bd55IrN z3s}6VQXEMUw^9{#B5Yz8Uaiq0ki8PPN+Oo0`l$Y{ZN`M;{;_?Rqg7z0rV|*LaM4!W zskXn1RrSw{$@xju%=a+0X%gcAyc!Gics@Bsik(NW^M9^DAw2TT@}sm6L`ww7zM{XQ z_4yR(rOu{D5;YgAg;Y|o0NSkC2)#86140SAIFsys3Y6%vV#4&#I1qoT+s|d zZQSFZjE1iE^|}KZs2f}A+oQWswL}AhtME*QVCqW4$FY?*#H5$1orc=%0b=p5#MGiWgfV%7_;XZD$F`YTvQ0*?uQ-v1*S*!GsXmDn<2-;Y9jqt^OM7 zMCGNHj#AZU5#^+m_9s~RW-aOl4yH`HYG%pxKPlVNB^`GTl>rNN4{cUh0Pn!SV=l7IrOoe zlBlFaJ>i7;^RXCYKQ+aoyIKKkBMgPSGB>kWh27rd^n%Vt{6VNI5609I!MRw0f@b+- zsf#3Kx3czQbZvCA9uB*wJuJU|`HSyZ^(Mnun{8mxt`O}ImrRf-3O~dRf?l)RD%)A- z$nYalGm}O5^?o|XZHuf$E%@1^MKopvD-(HGX32p32fYT4z7T`-dWx40{=#cJSb+48 zWKE$LIqdRnKI zh-7Swvd|U!6uTL)oy@YsGh;f5dFfeQ^F%34!U77~8T+Nhg(SWqQUdS^uf40`fd08T zv$3qQ{xSj1o=z^-De)vrK3P@7@&L0~+tI=6Ij+F_N}?@-2OIY;VehL+=;lyS821Gn9mSMiFl`#60m!VL!6OgL1q}rL22a zN{SHar3HrzBfJSS3S*q4akwuFV3D(UR7y@n>3r%h17_{{d5onWFfClii_-{wTZ;+fW1r<8sJ`J2Gs?K`H)7E<{_A4Dq@Xls#0Sqwrd$f+b_&xv{aS!E~ zO=)T(Q%K@t21!pROTVNqr|@ZprCMng8!&;Kyt!WaG0D{IsjAW69aNtsx1P!rXqLL^ zP@mQniSihuAo@*40Gok*t!PV+XSah%?ZKTA;xjg=UiQ3#=B2Vp{^7YM2Xb!_T|z(5 zUtdoB#fhQEnw_pp6lAuQ|LN`NvHI=0pgpGd!z2O|*LZ`!Z2u+;mj<@j){<^(A*_gJ zSDJkT8|)!a5&YPh`a!O&7hsMoJcZ#g!(2`=Jt$Kj=>k=8Q)ErR^Q=>n68rQ}(oD`K zr*Hma3MV1V21$D%P&(r?MTMKTJV+LYkWFt?go=A?1Hhl82MT{Wd4o_>U-GS&LdFHx zU&A{9P$WMU&w_)j`PyBBzPdav%g>FQ&lYcZB^OtARbu(q>`atm#0RV+>IA8@9X=ul zKsN`PcBP~sqRQ6Eg(0U>V0}X2 z8P^K3MGMwyDwU&0>hS7X3mF1)%kDv#bAq%t$u#UFmcsZaDMLzmk+gUZ#u$gMHz7)qwe-D6uDc>s8J zNk0FyF6Cs zqIG2b069bsl9+;rbQiyfw_F64EfUaxMnS>}ADLU^R&J<`+=j^ulG}@44e50;6m0^&X;u?34}4TU>*?5I#gRuB7~4kWLN$CoDh$u; zF1|RM@eWEH0Tx7Ab;5o{Dwur1q@h28x(V|riSR;@sSPgKVFxPawkq2eZNd&%OhxMMQ5l*s^ ztfH))uCvjKhk<35R87SNTwuwJ)Mf5Unj|0Mb`M(Ltv$tm3D@Oza*+y@N?f5Gc8I@lE>DuZ_u{LgU#kDDJ|4h-S48d8yjn!dAU8EKD`7caO0p+X$Wug-XLEGgSau zRgNdsZ7vQ#TLI)C(w75lQ(BtcW^~M4lAc>29FK60IMs1{6Rt%$yb?s^YYH>S8m@l^ zLB#9B5uu>;MB|#@g~igDbf8742O1EqVN?_#{+*d)N4PQu`vxAQa3&0v8#!&}TU8Xs zHqO6k?$!s&oX5Ht?QvwHceVZMLmS|4|9TpD1#k?iD_cTPq1dCrkO6ncsEh$mXB@e! z*yhzUcp%Oh5X2-^M%Zhj>nNmvdS&e;f{KoeFx<)7wxnQP!D=k5A`Yoi9mpVvcl*$d zU$(RbNL>w9wFwt|>YfL8K)&LB)kmH2WFS7sD%?hSM=2N-(n~p|t5#9aN%reSW?OOz zrQP{|Wv8R}p^~JAntpnDehwQ5TdGE#q_==W5_56Hpokt0&D-cc58DZUBA>!{v@6fi zWAb$OmDTQ=ZWAGSUUkt+cQTq=3{)f|=_1ke(774m2%A-YdsahjUv$kDwysvUVo-5r z%7ei+Qbf8X3JChLBJtWspnjcMh#RpOQJDz=g|mPFVn{jxyK?GlexHwL$dHJm-`Voz zOj3}TC!}L@X_ZE{pSDzxZLmijri%Z0%_K04HTlLr?>u_jG zwSl)A7pcF_;5mp2R$$ulm_ci2#A~vq3mhbA{gGmw8mlhT$@UIZUz^t|1DeFOip_WR z+%~-7H9DQSi=Q%Cple7Fb(JV+%5j;{eDv3qo2%UW*lGjgMswr2wmf;wMrhrWN8+ZTWAgrN|1C`x1QISiNld0g?jyU5eN`# zC9`5~j{CAIc9*DXF?{b!aMj%`J8s_RRk!<*+OP97F5>`U{IlD08b*qJ5H>*mY7=|| z_3b1cU!zs`%36IhsF{N}ry4ds5jwaQdmpTdUBG55Mr57l+OJ0!!x5LFWitd^Io~b& zIygV+*zgcag%okY;3zboH3laexTwk30q_A)QTV*Pe-$QnL$Dso2gF3-H!%s>h(s?< zx~?H(({02>W+}XqPZoel&VYU3#l`$|{?il*e$p54#($=R1}+-eM6P z8tIT6@Kj|Y?0IkmEpBMej5ZPgtNpZCr!N#`)e?!1CTO_Ibb$M&sMw#gWOB~F?a@` zqTrs*`Phz8rxfGFr2mu*gk9W2*CA|qQU(Tceo&^5)a?-o>k4jQaHogjUPT)`-Ka6pGxXto8jv8@#GXK z6Bg9G0?JbhjsLR?i=f5>;I5>&&8bnF)O_CS@>BylUQCgi5WDo-cz$QZ$fYItgco~e z7t7HZtphynV}ZZ7M>mm%Dvx4we3Rc97y{h|`{9TIP2M@1FZFpI(G2djXc-U#G+RAs zyrb)nlITs&-uN?5w;6nemx*3Y-i$zq$>`$EIpdrqCd;977&eMugO2IyGEG;`-adok zouf5SbvZefSP!#6AP;48ZQ8wi(#kb3>umlWLup^7%Vpc_U8<8tR~@h{4S_~b94Phu z{2(fF2WWETJ@njw8jEzbfL2SVSX%T9^%Ty{c$Fx0&`nH5$6HM_TkxB9^`7orahV9s zl!RQKxuSXeKwfNVR>7@tROJuSX7LHs)+35~nfClFW@P=KG}X3h7WTGgOl1-wFU2G> z2oj*FW6Q=`>>4_ZLi?2$Tg_InjIYh>n>G)|2+a3cB1(j}1S}3#8IB_@yBWy@sC=-g zKhfW1T@%C0dw--x6;6b3_3eXpgXOdJ;GOZ2HlA(+nBS&0PM8x{A8*qfu(r{R_ov3+ zogX2s$cILk%Au2V)~U3|EVPpk%!f_BHCm?O_>Zuwu?=O~L1C~Y5k~gHkv`iOYPHr-28u}Kw9>nbzc+YV!##OKUkkPTy z`Ynb@Y?qgI3r|EIVhz+~(HhHj80IP=V82JB83a0KEw!STb~+;XU)3{RsrzOHD18rX zl`#nb!OpZtvlTBM+eH%@J{$NJGgDCi?$Qx#(D0teilt_4z-f^E#@NNfi~okdmK4_0 zC?6Xz-;nR6A?%9&0@xgYShzMl^cVu@uN_Kuuy7VPTcQuG$9=5lQ}jy!0>CcgS_BH| zLp#Pk`MwqNY;8_!PN>cb%5ErxohUA+0zR@*s|Z*Ld6CNng=XoJN9js!dm1lBZE=-F z=MENl_;+=WPfiiYbcV(2b#AB{=;e$b^^&}|x;;udDRp;iM6Yy=3_LElRSZx1>r~}L z!%Y$_jJKj9BjUxDGbH%^qf8H|AW5%Np-Pp9|5I;3W0*Q?#S_u`3P!4>#q8yvP>`egpCvzB-=IGgHScmhf|6N)&C!GN7 zPL`PFMX3hE6D4@X(zt<6FXrWM89HY56*D%P4C}H|Wra4=abArXC#LeVn*M1VKXB#~ z5GO{fZo#LmC$Hm1y$?y99YZShAFi%W7cVD=(~*97I=@^TBCm~=QaQ@MuBxSrGBT~< zy)*0$!3Iu)*)e6l*+@s%Z5P#h>moWatnex~ZW2X5a@H5^y0~dKjh4}(zXYQzo*clF zh?H8KKRTegqaK3VQ?DWVxt_J5ID%Z&f!ZH&bhyv|`Fh4Xi0yCRPR?fMm#0ie?Bupn zHMX?D3o~=J;#{7Y>`|#}(72LuSTBRDz3TVUrFK4tbEHQChfAx8F~M>zQt-N>cLA*w zsRAlcnzFa`an+1|qDmI?YVp5JK2Znmrn&t`Nh163LlIxNcU`h)Q|x1&OKrU;i4R&| zReO6N17bcrgl>Z}iqL%P9ps6xou#kDnxZJ;$O+k?xQ+(bZW{Az7~$^3EC{d3j2WOh zGKqrds8LZ{@h6?}UC4|YtI!^v-@_O;+7zUlR0c8)BG<@XMctle(BUIv&oHK@iHKc8 z7Y$Q3(^Oi5pqvU$QF(=NP!yCtJ$zT-sV$yfzIXydw1@5WNm}7r2-;fxT^Z@jhg0j~ zWKK8LWO2-p8^ooazJNnQkY*V)4)HjE&52W_V+q&4VLM&hWw02XMq4yd)AM|OzAk|A zRV_GOys5$Pz~;~JcmSRg9gn!fPmJ#HRl0+mNSPl^X*1cCU!0&FdKg?mG-$SU z^k%!rP`q=zBx}Uy>#RaT5<&)letE%^t*Zvl*3QcLLUy(Hy+uVCgpes2Fvzr!$U&c^ zn^#uFmXVkMxVFK-n+J)(YDYb6Ev3m%3v3EBoLo#9=y)-ljfNcP;mNBx-qgqf5uCj} z`t%<@)g(;Zx?aO11M#aR9<+cFB?f^0sW#b=Awb4@zxMCX`BvI= zQHqA^mO2nKc->)jR9Js^F+1JgUCa;Rlz;g1^zc#)lYW&wShyEcBCYYT3g+pXS1;#h zZxo|5OeXxr>@m~*ujdOKGr=6)-?_YiaK-PZ7jHOl|ECc}Un2q7eDMa_75{9xgTMV^ zc35_enkc{l^`& z6Ft&FVtaS@!S=K5@hv`AbH{l_k6&mGYq7Xk%wDZKUWzjg&F)y!%|qj}cng0q6n#YR zil$5kZ=A^Sc<281{h=5$7>AJ&6 zHL3|?sk*?9X7Y};uul}3sd(JnSqLJVCl`}LO1E-LD$bS%_zX7_8gs;dc=elu(iz_ar#NUuQWK zHIm|U#vHfCfhYyWLV*n+p`}3(37`;LJ*_UBvg<)*0lxX~San?JrG9^9ger5ytOiba zpuA;VHYg^rxyZUs5+!m;s|uyP;D#~t2>I~M?&RX6_*oGIMV@ZZeve`4>g9sLu$b|k zlZ%VE`nFnUG+7Czl@2E>12ra)uueFJtguA1>SAH_@a4=;SyG3lNIK5E%KE z+kjdj#nsRmDGy6kqJ8G4Nc{tPP!daRFO_pp`ytik^j_vE#Skm3N{p<>>jY}r%!w4_ zpfk(>MRS?qT7&9F=BZce!d5}NwdvoXUJcy0%U6Esz?`gDuV8D2x=#XlFN z;rgj3r_9))8OniR{NXH9Er|}z_&Ge8ogU$_9w2vo%8<^CF-|usl1F87M}F}8AJ823 zI7|z|f&v2OP*~&57ZapFDPMGIKw(^-or+7s2d0_;_J|+9D8pDhwANuw@P$L>FHK`> z?aPI)byzIE#94{26*{+9!Pa$j`{-!KgU89KEXY-`n9}aX_ynoCPVs*_H>kf_Yxz3% z-A6LZenL_WxO-#*4A*{f3R|C(_|23t7i=j?Wr!Av?}O>f$>k|R+w4IJZ1LBVv4jw{ zIe1Lh{cY2-J~5&=+P*PU2>t{~Z;nQ?_n`hMS_w$`k}FzM=8UoqAk{kA+N!M*(p}^g zkXc{CqG_ZHoLmXQMo3oJdd7 z!wEJjU3y?Hi%G@NgK>&3W~_|ZKf;#`t$s1>Q3mgm(9fcyRqhncq@{YQmD!;SG7_vz zf7U2Qd<-I#DM>2J#7hME4vyt>1Z_@_zUOs-M390Sj|>GkPPf;`BIgIwvqS7+8T1F3 zo0tmA6NwCBFbpDP7K;fy7voLL8ScrW-sl*o7P0~wyX?Lm!8{+cDvrTBJo{iMzPdyz z=99^5i`SWs%+NHxQKNuqpcj0+`cU7`k`}YV@p5qr6mgQ;iNg9bm&oOpTX*jKGUFWM z246>IEw&iUDQ|+<8C3$iG|z(!U7Qdjt`DE-sz5Ma0|-Kr`r@M$b`o0ODnG9L1dQk> z-vao=58s2&Dc$9Cr#YaW&ap+jFSC;JZSz;R!u%xxo4FQwnSY~|PvD-a72G&+!2%pv zi~l-xsKjw~OYKZzqzag1v;&BkZSX~f=PF3@jZfVz6(^mTNvbsnq_)dgp228bs-t37 z9Y<?!n^OP5=>X@^ze5WktOAJ+a4M*RfDa8^u|^+Fmxqhl3ys=i{}g_R zK3EzJ=8NMyqnrP%j?&RnKD;tLx;e7YBS?1ALW64A3A)Lx;KQBAR74OB8;Q!dS;_E$ z&KM!{6b9|9$vTZ;vVUvq@%H~Yc!~|@x7(=V;sL2(v z?jJHtbYY$$9I^d46>3*@;#h6IjrrvA;zV~wMZsGWT0uwiS9~Cg59vy2=yTCKV&knd zkW`~cJ%UuLUR#0w8_qkRl4-S%1R=y>0cj7pQXHW5G2RPc69JtgoS`qU%uN@|Es5=v zW(evuRXUr?4rLod#40;Glt?S=`eaN{BT^ko@@FN(1`N8{p|*u-wuU9*Xuf#RH?qh2 zMfMV8=8M@`s}_pA3YgLHv21N&@APc;YC6BXShrNO&QHki`WSP8yVi{htpgqjPi+BL zAJ#QjZ}e_XFFL8IJNKx|XLPwhV>2mOjMaKMollNnaHtO3)o8K($5ObzH=joUnUoj| zMV|z*_#sNR0z%?z)U1@(|Ht09H?(ym`TozRP#C-twgpb&%*-W@Jp^#fj%`@LnVCh7 z1|wl>3=%7WALDpG`~Cgu(XVqPgyUrQ?p_jubWT62tE;N3tE#K5qaHOdgHDIDIeI%} zX;WuPRfK~XR4S#hbY9VVa8Y1`*)XGuageSt$xb#gA%bI)*M@}{P=$eJ1tL-%0^pYD z$Vyd)*Q=JP8x00+14`%K)MBduMgY1r$(urssl}qAl%w^|FUK$YHWuC!%F^Dnu6h#L z;4o8o>5_*f?lr#wkBc-QEXwk8j-BgZcrqf|v`dz`-Pm0}c(ncW;ZAq!uLoOCncaPP z;Xz|^`2upYEPLm6qkR|Z8+&LjKtoBJq7i3B-T;&QTSJz77;F#hUnotWowd|rnhC7#a}_-QilX- za`xlX&JTl&_$`AWo;JiAde;=GrX$C8Z%*3>z zVY^Xn?%-(riOrou->tT{i>II1AW5s*Ao+HV27|Qe1!+?wY?M3dG@8Vy$!T-3F?J3X zbv+@$z<<*g)v7WyvrXzfG#2Wj9kq_znURh96J_5(Z!!!miXaQ$kTwCSE5`4`gFyMN zhl!PfjC4gqyI7;9x6(Yv>o!g`vNPEeT-9D^D|{$G6WYB3s3-l4BU`1jW?0vzrDp1L zRWlLM@ng2ygr@5#K(tg$MBJuwMZi``R)o+Z?ZRq^9MdKUS((0n}|94zc#*d}=1%d>(!^9WF@k?s-b1r$$Uws`2CK3aN}OKMDAs{Pp-A%s|G*cFeW zTiC3CTj!j>!?cS!$$6VA%%Q^w0c;=`60nE(fbj&jQ*3u(r@{7d8=G&9Wz63DWBksmXW66K~<3#^7sf`+wfv z?QZTI>~j$tBXwQgu(P`Vg}8wsH#F#v{qNe^#v{ye_u=;5{sCG{rSw32-a_}_D3YKV zNSU^7tkHWy2L7D0fGHPeY&O>^51HUki;>HXsjR(%P)m%%l6pCg(r-({2@^7z&0Q6q zkKK`v5>xaZF8zk#FFL}YfIADr5~?x-J}w+sxAW_b9mEG z9@G6-Be)JHve1A(#v>ZeFSr|U!Hca2W?rbzx_6zG zvcN?pL}IFQGK^YUZM1G6kc>nK;Pi_G955onK)|7OM4Jg3!Z2(jr%im$7W~T!LGDuZ_19R+YN*yAoriKcp1aahN?OY<#dY5Ns@5LT3u?JEdr8U}0 zkw@(+JTi3ITHajAqO+zWY0xo1STB8qNjavSh7#9+dWI2eyc*fU zTA|ifb%Z`&TYa!3;TKs;Yy(TT#Jt5eWZ>~DMTcNJ3@yGR!UOVyc)BAk&KSYZKMU-{ zR{{@-kKh!Kqg*BZ)Syi%B56peKtgRXSH@Vx@n-*A5UoBR94>%L({cxp(h{IP?c+k# zWQ;~tXemD2&?D0*-NgnR0ZRz~!4wBJ4!uQb29>d>zY?5Jq{+ITHINPs%dqE_1q3co z&i{0I0JPOff{F~Vayj>Nc2psO^i#qEYl88OZ8JfN91&f{=6AbIrh@1~-Fm!*FEu17 zglf@D==lG0vWU0@v2TvX*t(x%{leiF5PyhmPniso=L+8D+&O-Q(8;#fhdFjh_7j;i1WUYNq>A^zGSN~wAass(YTTTtRuW+GZwq=#u} zOl@e9`fBX5Y~b29>kSWbn0dNRgDr})anT3XIY2)82_Q~4E+XoTt%26A2`#m+$ybXt)# z^NuNMiYHoB2;Vj|+!Dw@Z3`3}8BomVfqC~}dh?&GHfHwIw12;g*~*Myp033GHoAFTZ@{*F#3IHDQWjf(LuN0OXaMHnXh zLZ>htB@lSrtWlDX2M>jwo#S{5wgVE%MH9J57Yj0`?DuUmymfZ2*IyE>%3BC3 zl_E!oWW!Itr!?P`6BwAkqw@jJ6_S}bht4w+)j+?Mh=`c|o7Lpxl%KxRGzof|5T}|C znmEUJGUAb`F2W$C8F{Jg;ItObXZH0-=%PW^KEP3ywMIfUtBl<2jFFIlry>=8X@$@I zIE_Ovf$b8RR2coOk0is29Q6Xx+nH2DGn(VGE{~b8LO=sncMKn!vi^95!T1=)GLL*& zZb1+}&knY#3oP8UBwrLkMuF6@Ew^G7E2*y{kLUqU%lQ>$$ zL-Kj5Psc~TOsgM8a+4$>6&O&y()@<>v@c#3x+)O^Xpbz1OnxE&{F!!19<{|GEEY5M zbI17)qWz?(Z+z3BXNy7nIM^{By~Q-XfMNytW(xeZRlI5z0#j)`u>mEF>tltslPP8v zm9bij1lS$h=mQM-$w4{9!h*;so=RG(;m`diVmwgy%{uoDZi zZ$<2zW^O^_*m3b2?^F*u2llEyG^b?&3_POx%p-Jo3YkR;H?R;%b4iP!wM@bj<-iC! z#XhaFbedQC6%9gr0A#Ucz_0V8+mm`MWyQDRoU)GdKwjqE0NMY8B1M(V^N z?>JJJIH$%;EgDon`O5)OvcZN$<$*IlcJX3jmUeK~rZawkj55h$7ZtUV)G*#`0I}aH zebmC9%~Hs*_Tuv71flO}ZQ9Im0(6AZZ2$PxsIjEg(tgSysFc1UtWJH-m9tuav~~*h z;)Jtof>dEV&epCiaKfI_KNOq6nOn^(c9l_5m0K4KBt%Q;4$`u{iLYh@97B0z&tANT zv+BvwyW~;O!;)v*wPSNrJfR|%Jkl7I$ zbECRswJ}|`5+>qM&U>dD%vjj#38h3yTHAK!p_ki_hB^%M0Z%nEp1c%P(DBWx-105%n5p8@^Du@lk!Z%7Bm#g zP_Uy!6v-gLARZRb*fb@i;=F)2Sj|oLS>Tmc4^IOIQ8ubs;kC^rUk1*x_W0d4F!D}$ z$X+H>5U>^+O<4ymb2#9agvhKB;L0%Za5M(f=7_7H3$wCxe1W6*IQ5F4xHBP>cNdmw zNmg@vgoYHuu2EB!ADq}mGtvJA5Uk$5I{(nWK#(4%KUoQUtg;F#CDT@+%J?ELk8ufz zg>S8K54!^h67LR)x770@2MR_8hgpuWz z>$FyGKIkm3HNq7sga&Ql--T}2194U=!_}xFJ8U31$*k==sZKCaCOfDlT!g|?HF|?Z z&1Q?bl-+8?ohV!!Fn1!qc~`fUhj%R9E;eAm?<3i3LZhiGOJMMykVH0JqOT1HL*JUTeoUG41OYP1gTe)GK3>-3g27N8DQKr4xnTVRdRi4(6j zI31DEwiZ1661SdkO%TJP@P^tUtu7$KbsLCO1Si2#0HXkMnEew+2B1~qox5!U8Nso0 z*Bm=ygAl$&ifDDZOZ%x`pl%ovuc?IM43r}Uyv-%5E%`cx?Qg!sQJcn1i<7XrdDptr zWL4XxONbh0EpJ>fZa5aYR8*EjAWbS!>Klg?_k#+mAaG#z>9=^s-_pE6)P1vZcjXSW z?dJXOEFL*b5pjwmVnh^A1~4#K_^=P_DHLFA`a{tw8_PjVyq~PF8JLTZG~j40wbYDo zu>GQq{Q+e6x6C#4`@6R_>Co@*x~0auHJ0-Ms??n#h!Ji%zET)}dFRfJL zmQo>y%C%4CGGl+0*8d%)bDG_4mUNC2+IoRoB$qXBXj)svvnh^&Ch%dp2vQD~I2@}0 z;)%wUs;q8_xtn)c84vRsD}JO&AYO~bIR?|w)@(%y)=E<{bEFev#ADMdGbzxdTByus z`hY5sf-Ay^T$v0cEvbY+#RlevPaaLrvI6vb75E`RWjK_Q7z;tUN^C1kb1r2z8M-dy z%6Qc$oTx@fd!DFKgbt8ScpvfLf(ez)r4c__5m8qKQP^g~W{Co>!OCX#?LaK1mDV{Z zxkP9GU~g;vN%!f_!S=(ybsw&8KYq5iwKgYB>D0}ntT-23bk(Nr{u!>i`G_zjc6)Ih zbnDL!9(DJ&{_PpUnm5;E@?gr<=pbuT<`rX;i8WoA8s%V;c%IB1ZB;;Ri}VSBF%TG$ z4G9kxn~i6CkL3;{_}Y67n&#;_Qlt7xXNZI(LDRULV7-zH)o}gXFMQ0t;oGwE3 zoyn?kl4+n8R!K$>MRAjQaqc*Y@U`wS!@~XMint|cVo#IUVkZC}E*33}35ic}nqsAS z4`--RV|h(XYwmO*&nhm6K5_rB+eYKrr#XSN@^xH6+I!e!flwSad`ule2B&NftCg|| zMa}r#B(FXQ3n1}+H4W}Rdwk#&_;oD>`fSccUYRgfzaNE{P^6LRb*SfSp`IC|kN~;; z?SJ3uZtv?b3@K}@7$W7X$B)W6w)`l8H6nc)t^w7DDfk3t*HAd=8rg(cmSCu?8& znB0^!iG8S6C2vK>p0VLgYp2Rqy5WsfRZ~!=XKFhD7D29ZHQu6nxZT~a#;t0DxDZwf>fwhN`sVDw1ZetscF%ePrj``^ znRmyB!e`g=F^J6?hfZ+qp$3uFS@6KP{$p148h^?LH{Q5oS#XPRSQR2WRfB3?U>AAS z&Z0b67GtVm-iZPsGI_{?7y@&%3C^hPK@1!=xkv9}+4$?{Do*42{5Mn76{fmAnmoVl`9du|61pXxPcxU0f5bA}fV~1$&9i6l@GI z<;H+y2TVeH|4@vW2F|rV-hqBww8F@w>CODHu9>@czh*nMSVgGioQum9B~K+Nwhwa) z$wB%GVIIwkV-;?bqaY_Z8_le%91iKSC$e7BJ=>6$lBivpl1ecqrC&bcUEovlwWNRCL(jjc{_oAc;LtMY>SYi}UA-T@X%#S5`WZ^n53L+ge)2TxBn!AdH9$7=d8B+8U z;9IKvZfW^{Z6v4I*Mbcgtz_TLfyHn_Jfz8w z7$R~MmTCDIT`8s#A=eC@K-{5)CAJiA4#IWs^4$k`Rhr!huX+>etFuoZhn5T8G(s;! z7JWI8h`H9H&ZvQjib_jBWSe~IjudVId7?~axK{--7zM&HY_)4n%GVIT0ltd2FRPK(6=5{BD~C@|6L+iD4G*b@ycjyM`@5TeZ5V(J1Tw?i zM=)>EFCfIus1rG4B*%BW(9C%bHdbpc@6t|w$+2E#``XshfnS<zD$ zI-R>>1P+8^-GR;W&aehuj1bWbdPv`ZHWqS3#VHmrw)I3|vXVef{F1bzX5%ldX4>YA#v- zVCy#KF$F8mt2r}Wi>rBoDQb#VmR5=^&t8SX z!=nu!=f*6#0WkXLpP(i zai_)E(R=X7;>rTzV~!fQ?Flhl;7^7L?xL_mQYskc+_R)oMSAX1>%t9=JlL8{| zAedKl-W(JYL`5?{>K} zm7pSEj!j>SCb)tGjhKo>abOT&nz^TL#Z=M>0iJOULl~x0Qi!WdRmoI$#BB?44F)en z^i-OVbyHShTcOyFkWw(!`3n&g5|0jE)VOqC*3kp&;Ft~0dr5?VzE?iSOcC{77}$j! z5Q_qYF%ZmVlesq$Qgdt0I=2&z(FxC#qblypvD;+ioCh|mzQ#hIR6r0VPfIl1mxD94 z8RCr4xWQG+2SraNOfexs8>CAUkiRD8OnQb*tG}BU@VKhk2*!-n<4WWMxJ^|?ZmVIw z7TAmR9vklnhiGI5-QZ|i^hB1C$EtH_i#gzA2&be#GrWo;BM!{B=*!gJF|nC4Z^;>G z?OH`L4*ee}TBQQjj>KvCldPG&d?@F)Nm2c48_-& z-3k;L3t04YL`l|XiAmx|j{U@a66`l4G0rqQEhqxbb9g*>8mdFrNOPnoyqZv|M&q9$ zN95`0;GGu2Pf-^Cs5Y23DLM#OmEyY8S-dnr*a`iFL}mlWUeHngc?$&){47B zN|OjhzqMWgwWx%B+$y`fxAjAJ=i$Tst%L5y`tAWDA@EuOE(J{0+P5oLnCeX=+7;OG z(MgpPeEWtd^x6**IYj z*(Rt>>LMt!qRSA7hyeB=rGWQDlvYQ&eu2drG5oTm)6K~&QIO3;uzkPBld4$S5qEOC z@#0c+-X3=SRES%>(M4NcwGphzeajj2NyxAVypRjhiF78LCLtIE4auEmm`$EjVo#%U ze6gr!OXC8w1VpW19Am-A(H>$Hfnqu{WZoto1``NIVqeIA=KYpV>1yyk6Jj)L1(pT_ z4ZxUh-I_VhnxSj>OZqLmE%9L(__Nyms%RG?z`%1v2!w>9wi!bZdk(lVgxJbzkHvZI zm4PFr7co4TT>d}46z`|!tOxcVrNnJlEyik6uws!Qmnl6?Kj{1l0L zi~1B20JX9xoDg43LWAxo;qVPR^cUFT z;DJ!2K#v;`urI*2nTAvY9J-!8OV4HshNMLhk{_;g>50=&u|B79$qqoUv&*5Jt-#t& z6%v5hF+N@J{R}Fe%SAE%$nXh=Wq}KJXo&)}r3j;+trSg} z=pEA_d(It6r0E-`B=nw(JVl)N8?{MnV>Uu0F1>FST&Px~{up>cU>^IOwIus#Q32a_ z`hloxqaSCr#ygO)mSe$2+@I#dh=4I13&*jC@+Ss4p796(k5%ZfxFIsr74n0+@xeec zX`*YhJ?o9=BirPm6R|xnz@&-8(?cimS?q&}CEwtWQ<30d_7LBWN7y?hLJy*P^alDr zw6)q{kh4*wjG@W^7cC#jI)L&(!xCJ&fB6Ei@$3MM$eh$h>w}FPZ2ECo0ytriMDGbm zYs^yRyCK6G_|x1IO8ZIiEXpU4r^jzDJ`0hXY}R_us>e{ER5nQX=X#dYV{td+^hiOh zk@MD#0U=(lF*HCRqP2)LsfLxsOFkzd#bF;zSEN$oa>znw<-@}15Gy9w*uzUZPY z1frFhdeL``FjEmt$->U*N0O)gT~X%ZU=gpo*qY%E=9-T&m2N=vdl@ zTBUW_Z^@+RlJ)WE<={Bl3>V>Z!BQeo-?&0ldEZh`_%xVByoMVcHFUHf_-AGb3*-rE zMEzt&Z2cA^tAp4lUn4tY66)A35S776RO{YVv0(4hg*SU4>PKk+PMIrj+YW0`(M- za1~IS540kFVYQUlGD@sAl?%AG5RTRm?w)I70WpgikCtc;xk&Eyl!FzxM+OlMwMF$7 z#Ny>191wb`XmX+8lcOO{>SE)Vwc}Un)ZX<}l=aiX)qBZwPhvK6H>?ALFt@;d5yFof zO%FcS3T|11j6Qx1;EJb6u7v9(i-&Zs=-9%5bWyVO2Yw``{;eFnMRF7_k@Y12GD1ac zK;i@E(e`sCjI1)hg z0pZY3zPQzZp>2pCCWm5nO#q?f*wQ~Rl*kMwK5OvF}o&!h~#fm+^RgG zT~gQ~fvrMW1k;MmfV0we4ak+Ir?m&@oy#2d;(GuofxMvk`QR?vt%15|b_UEQMN+nE z)jX?%y@n&_gTJQznlaQg&vDEpp2aDsl0XeR^b}%R2IrTMAXylhTjX&g$g@41bol=9 z&c>hjF%Wtf4uAUa@%hc4I}G8t^0OA`d|bYn{==FQ+(hJGNi&k_hbR5kNnVrX*5@PZ%02{hBGFJKn(+9E)`@j1M<5{`6-1)flJ0{VKg^TUYf9@lls9CGDFkDC2Xj-_quWaHr!eAVTG{8f*)ff)^w!_!BkeDtT z@W=036_^m<^wY=Zx0YMr0MM2xh{-sTCMwY5f%rgDRW;E}rE^Zb6gvD0A=D1Ng9PY3 zkhg^l+pr;Z@biG51cv{(gqb2R3{KNKX6nwGtw|H4ur31b_9V!@$3LeWvmlC z-GgYan(8gq$G_#p!=DzP-&&NWD3a(EUR#rl4hdW<&7}`teq=jwMgRooTf2uG4CfM3 z(0_OMQ)lv=y)7>Zc}VdLuM9xtECP`k+zZX$p+80Km(KwK>Mt~J*D{n*n93`n1g0+L8Xp7zEJ#n8Sp!EOO!db7I_x{I zD&r|TW46{e{jxtwr@R|2H?t?T&XgShz6}9=-?C>qR0x`ZFR0Q{IvyQ!G7_ybV<0tM zRl6w0lJ_!o6h3y4?O;fJM+AqVOq1wdS4BLSwXPh$8l3iU$5pVfdP1*1oFvzE4$b5>6FQwK6Zy%Lot0| ziC~a%fI+?8f*e}Eu6|(jFKKBqEwu)1z%E@5#)g?{@Oj5?V&s=-7jurw2P2bENUw4t zq6me0OFc1@NuR1(O2JdbBxm*#OSu^Q5`sg^o3!??x<{q1 z1_b3T0#gPv9S=yFJtX^}=o2=2-s+3f&ZZ*5zCvJ{=-Dm1L>@H~w z=Gt1ySlDqsxFLsD+A8s)ej;dcaEe*3sBYTgyvCiX$pqlz;Gft_!YHB@D5^`GMyGlk z>?BjyW>Y8OHc+i#FgVSG$q{=kkjOjbl)2{4tfIho&+cNq<1-f1?XZMtBO(OkJH!j; z6{UukIEyzpZUJ$vB`wSKAucw2Cd&*;xK1uB2wxm7;i+svbebJ3sPqA45BE6wLv@u7Xnm>!0?|^h~wcTzR8xHcbg^UvbYfd(P$` znn1qN2uF^P&(-=h%0tHjoouwU-AI#jKr1^m&~A#)D)53%DN+-UQ1L9(@c4(HV5hyc zyjZg`VFVTCHrjVZLK7IfNi;(|r8H3eD)7f8F1C+I{wPYai!cwl=%_ zIKlX6i{8QC-}TB;1V>~J4*rJhOxsTn*z#9@%wES9o;}^(*xB4dnXkV2J-SH(DK1_m zPQdYrn2uyDrC`eZfEwXh;tXdjPH5v7;|E4>6k=i2aBr|PkxvqVq?vP3qdfQiljIP3 zfJ{NE+e@$=MN<+)I^9jk$_+#%@f^OMSUIX*geZPqgtzi|3{HkKf~eoioQFCTjz|~| zI4LnA0T{SyuxirPkwW6tVzU1Zhh%Vs6Bx1u9O4=WD&dS#$syzQUaS4${BKWcYswB$ z&9owb6$$ee5%d><&zBO%c2-7k&lSaA3U3A9hwenZ3#} zCOaq|5?)Adt0g&<FHIxlz$oo1_L*-MfF}{PUV)OcZ`o6)$kgiDh~{yu}JW7|UYM zhcX#PMd3w)jb}J)g`5<78mlKyZBvDU@Kipax1(b#grpjYdzSgg2ofA-pY z$Bg{p4}UNfxnRaCI}8~gq-W_EyTq9C8litUu-x;mKQ71+^ZeX#{&)p7Vn>dn6Envz z6vdDZse#1#V%D!pOgj9BixTT;r*Wq+ER-JRGU{8-nkC61cZ7M`s~8F-~XU z*10}c8nwVBkhJeZGqU&X`(!E}P?lvN$#pcfLJx7?I1$2dzq)$VVGX?@Ne)Jsh+RjlYDDE0Wfs4liBqGJ+unSfd zQR($;D@Ys6xGsj`mGEHctag;R_jD`W=EJ_*# z4rCG+`MiS+5H(YdtGbwKkU1NQB5oUW^eSD;J+ASn{IM0;qN#!n!9yF!E_G6u)Mp1k zkVS3E}MSRADIAeK*}pTp2y$-bo5^-TxP7r(_$fQYf^E$lUSPX3LtH;JIo zqjTQZaHL4ZcNHXLGAtd8bHDQ~4%9X7L5Wg0`1RH;jzIiXM_jhu=tICjX9E~{h#|Ex zf&(N>42CKc20CEw((XjK<=}n64W@%DXi+pLPoMY+BF@f zdG>iZ6a~*V_S;{5za?9QVwg_B@fal?_A)yGc+z%) zd&Lb?bjl`(rBl+E zIXg7P$lQl2m>$@1@`nl<=E7K&12uk=bqOr<5A;Qr`3G~=0}g&vc1bG*&Lw&1;FO_) z3>d3rUcWB~MLn+wS(-qis0P;=-ywV~AXN~BG3F02zVJZUDrqf@OQ@Il2}Px`N?i>0 z)-54-6&|002{MZdMukNh^rddfGFRQ1Q1QYdT!vSnQ6L`flC`zEwv@QbUSVk2lDw>`fyayw z+P(0tGBT>45-HEX8Kqc}G$wAz9d>|CwFb%^^1IX{*!Ho5~Vuc_GtqXGSss1^Z z5T^vCh)n(`%e9{d_CcnkGZj%qAyEXiyYvmZQ>dX+BWFag(WYfW+yJ%WJ#tI6Ry}B; zW}K+dM3!7FboXABaZZ&6ON)A%zqas&>R>EF1Q-T5z7NE@Awe;Ecbf$7@K#LYdGyB} z8!t5#JqIBTToJPp1ZjGo;($)tW9qY-?m@|-ok{fKl*!P1+Oe%FrG$5oRW@1@OnCfW z>84)9%stnXA`}43+g_`BsmzN~WvVi4}eOX(hZ0`@>0MX5johQ3HNP&oag)1}?T#S$y z7Wz0p!iym?rP=1X+=U#$vSH%V_I*mS26qeCM6)nk4&NYW15VRbr2+ToumDgcHYDg! zsIr6%Vi0PG(ypmpRLYeZOKB!%2QdlYbQexK-SjZ|WqxLo?bl~7k!1V9+Tg*9PpbRPR64%CgvUUo8bg6|MZ zg_x)A_S5}?^~aC98&5WYUAQy*NB|~Af;O^n!P4$ORs~nYTlvxkzmL{UNe?Ln)Ld_M=6C; zQ;(e~aH$jp@L!adjkCR@saYyi^&ndR_X+8kLyXUEM!! zy~E|P0+cd?{Z>j6651S05VMGY7riRY({hHHq17-vg9k=uQ~^yDR7vYYIfLw z6P0|5VgZJk1;~9g-5ClX1|IuU8wJe_VVi@@8eWICx?}84c*7L+a&8G6k(1q5=c-df zbN4bVv?LtF2}YkDOX4LY02VuKDBZCdj2&xCKn%%t*2itoy{VO_)NI^pFfD>^S)}{0 zF$%z>_S={^w|UR53%b7nFEFx;BF2Fq2mu1dJdD&N8u0-Cr6n%rm{>swS~e-H)xyCL z#C$Q_?%t|5PD2i~wqTp|y_KUS;^HbWp9q2kPD6SG(_8y2Ao_J+N}rY*S?DXBc*hmE zDo!1~2u?8Q5I$WAbM~u}KqQMwM-60ml{DIL@=6>XzDqj|I0Mgc`!q8b^Tsp;+Cop^ zMd%35TMX-AI;-N6ypLoKSn`J#C&|T3fmj{NWacn+_g1YcfLA-#l5BBR#Na*;I+u|=xF_+f;|^H!p7wVY3Uue6B7ojl-T)9F#(=!%nz7+g0WSo>78}{I zRN^XTxb}xVQdQVolMsYpoE)I=MDBuvEK*FAL)LYTn9Jl=rCr+`L@{^l;k`T~VCw{z zD}=0)H$@;?C7}SF0NA!oj+lGON*`9ZT#bjRLBEi5isbZMtAZ=TG0)o4mQ^Zvk?b-{d>=#2+|`8N1&1Tk^~v?gV86a3{_*0_pw4v zYn%Ny@qML1+GHK_Cr7+mC62Xy|HUN|#@OuOt*= znH`^4c5K$sU98HQ=@b=UY9Rk(xgeO9sPnQs!>aw>HV}}Jh0|IM^+Yv2(HetgY*4$p zf`6>x=(ws`5gQuLqh%NdS5t=T?fiPa&46T~&GhLw?;S7Gi_e0#TIldL-+l>mQp`1R zS%Vu-N27CZb}Xk9am)P>tQ#JNs%lRu6*sxQZ;V4pKIaFBC7@npXbeF{=(nspn0PNe zPvY6H8=HFJpo{C?Ge8vRGwDwpcJ2GH=vwIjm`nyoUjajj|CfUxD#r`}L`se661$IM z^$y?h=?Kxsjy@xqIylun@`+&brndo);8u}%aWsZ}A`Yhwz$(jPn|Pk4pQN)+T)7)4 zhWkCqnB%RfR91XwLKjB~0<{HNAY&Rx2J}pnS{Y@+T8z5U=M#~7t^_#?bzX#k`ehid z_HYpbQ^UqT=+#_tI1`E9E(d2MSKu%$Q8#$Zxv}3aj#svR{iCr-?X7pn#(Z2g|3*devnr1&2-$xaTkvW;9=sSI zjo*hCsy5bfXlneg*j;`2sc}{kLu1>}JV(#%Q|+k9)wkBjFD8B8=5TLL&2*aF{Po3! zZGIxF39|yOsO-4m&lFNGkTP%0u6>}zH{jx)y^1@Q(h9!H05S2kC|`({`F=3;m3?V2 z`3ug{jNWo<91c0`KHBa6W&7#o&R_PscKJs4`)AvaH@S*0oxeKoBC?DdJ>B@>3sndz z0kJtM0X|D@R9Sg(gy>{w0@+hGXv$!yD#~B%q`7)I88Zrel_6!4DaKyPqyx|5>7}+4 zK`OMM1RaRI5Sy+tV++K z-BUpE3IpOn{z@&NvFS1IJRdW?5Z@9mJGp9bsqUJnXg})BW;9R`*^Y|!G4e~{n)fh^ z(@?fTWsVsr2E&t4*mg+=u%2H$8zWK(`;Ya5N83*y?sUIj-`{$={$xui1*i!!3_VdZ zt`l#eRRgM0mnK3Hg$IB@iEbLWAw=1yL{qSb%I!v{T4ZhQ&s%@%uJ3mrZa)?ZY5ZJY3^!oxZ9P5kqNFMf6eEc0;J7a#--u!| z*NPmRzr@jg#G-J|A|k{GAwk0AH*QM24ayEN*ZSVZON&scnH&@6iTF$wsOn3cgCs)h zGFG4`0138A#UrA-0%42KmnXa5z2ALxzB?YhdoQa1&CJKIaF?U=z_gZ_m;+_62Z0Fa z3KX_Fd;%QSp20GW;&rUvOBgE239yiTs0i?DPU{?-k|46{L0aRs!ZA*E!(NR|uhc_z z)H3RB>hiVP(+D9lM{KRcoIB}r&lj7A0{QOAzBK4YMSqcFkCFlt_ z7p5{TNyNkW!;=Bw0U0cf8~*Uq@4i??gmAjDEk9tw(VteI-&$S1$(X;Tk7uL`_JKbV zSX%(AXJh4!h4%%h_x&-Yr_Z2SA|r`?0~y&tv?tno#(IO*qgVwLmJdC#lg2yoqKbQW*KmTq(w z8#iuBF%jP!#SHvdZY##($IKpC>T_`Zy0AXQvaSIpHpkp%O8SeE@kKW_=yYr(VZ()_`v2sEM zo7;QeMbh&(KW;%q-RsIr&M-cJ0&!dwEb3!2^6)Asvj3eDG#@?O*=wwBHg4Sbv9bNM z(Yg^^L_;^18=E_gRse=ee5kOi6>j`kUyfx-!qVRt{QbqZH*Wmr$!^x#a`+ZMfhkwu zDr~foifMR!dfCGw0-5%mHj>ir^vSMSI)0<6hFD8w+T@iAlKs66nWn#=zwP~9AK8R` z_ji*5jio#GCQib4oh2)_S`i-UBuicZ3M@f)XA%F2{DRifP`c!$$epFw^6JfRk$*-@ zajqa64~cTrw8N*66VJb;GhH%+EiAGWf7qt!bK0nOP&TNA^MSaoYfl6OUvG_BrQ-KesM;Z!ImU7D_Enoq6rCKr6CM-l6v zrwNU;j9L!!l}y6Rj7CQ+%+Xpu9I1$L8L>Z&!^JxqrQ&oZjRnl3ke$JFPz8()ITUM= zSFJQZiJZigJ-&javo&8cC@ZI4$D%WX{4z$Js}wIRq+5XYD{ zB1sa=ZCe+#BnC=_N#i@dcf)T$iY!bv<%S2b0wx9Ww{=a3x^Wd88mlwg7hg|Szy6Xw zZ2<0FBO>QnUz@fx%oDpblU7^+mDA<`1-YfT(J8Z+G?qwEOw_JyX5-w&0y-^4_)qL+ z8fL5kp|EK)8#?ib<&yEDJ9FFicb^==Hx15ni?Sl1zg7qp@>|3ng73TuHs2QtbAuV(6C{m#n^ zk-%lT>Of6oQ5Di4oT{%(PSALm_A`A9Q9Rioy;>N9Eg9G`3MmAUh!0+N=1>yGtipR9 zBR8|wrUUFXg4lK-0>Ii~;Ej3V#Owy4J`+&iq8TJY_Mzs~87E!Raf)UIkIG2!0mtTe%ngNQt-;^Z_IW z7j}!9(H3q@Q-7`CE%&V}MuKQfedw82nbZXyR%8c%`ZRp%bTa+203~cda-E}Z_qfgK zJ$jjs-|{*F2n~GTTBldJaK@ohoPxIB^uJGIu84to{NE-^k6&?p$3JzR`sgR`&-jta z+d?0X9E>&Iw8GPFA>ArYrE#`C-z0oQSOei(!r+C7ucUHpN1|;ctjL94_Fpd(iQHc( z3_ONNri}DgD>qh@K1EQZHT*d%iXK;kwJ64dW++JxjF(bM>klWF<31#s+Pk;(;vy>; zbT3R{`3OkBM)~wyqB9CrO=i4qIwN2peRTRsP^kotAnErSctXK?+A1CC8CyHg{K}y! zl5m{fVZ^chFOFov1ewyb_@}LnM>~z3hY!V*4iE7D)tisg!*Wo+Hkrn zde1ZBAv8W@-p+gyvBY(P^{<`HC+l-&ZL{cvj5fvKX4ZWs`K~^#syoy^srHkj@oSk8 zgTCMUWzB#U-LN&SA@LIJUtrxjgP-665y%haFa}b_X8t0BC_QD3bs$6xct1(hIaZ_a zTGEqZISRqJ;f-WGAJBeoBpHsci!r1$*&!J}u6|Ae3TD?=gKpKk2s5ywcpU8RI1pmQ z9bCQrF}u=yipcf*YH*TVy($|r#GPYZ48xC0aAnBTU{QVU8sLa-B&mtHdywRhj@s@7 z!RW_`4GjPRINM+vB@kX7Y*zQX4sCLdy-sjogt{D=b#!-2T21kGC{?o55H+v43M6)jdi3~g zI$|PT+V`W+jSz~u?DiLJOeWEQ0qMlLx&Y1RYj`$%J%ruaEfa{hWp8pyTDp~CI8IB9 zB7=cjQk9K9!zp^=-0dGJainV;(dD>MD|VgZN-nN4yjKAg^EX)ITt^_bEa_wC#LN?+ z3vDAOpS0QmForq|>37OS+&ck_?SuCMZI)=XbStg6{3bF?n+bwHc{56r%vqil(m{AG zWBS+83xzK*zZDG-hVrvuG+vtpqm4a)nm{6=-cVKr(TIpkMI>Stn-lSAHB5psRT!Ri z8-?E)*5`+#@zc@8;AC(tYhj?cStu}b@Qhh1RE`n>wn?laU|S8$DIV0ET`j6!hNL7> zCgy3*GRxan^+^b2a^}T3;YIXYRzhT**VS@ZmXdnpxvOU|8(UaGE^LLH zv37bwp=gGxBaXCtbm@vypapv$X(4Coh$S*}K}QjU>6{#UoJA!L)_*mpL};%etT;Wn zb~{TZ%+Q5P?l3;Bl6*?+Tphj;XzMeq2$hn==1Xvbg{>6EVSI_&J=X}6^1o)GS=s!QES7u8bEcazxjTwgbi0E? z!Qe^qo!AJ z(Lpr_#du?f&Dh9uImD@L<1R4xp{i4uQO^tl$)_5O|9NMd>-<_Hoo!wPSeab8AJDG^ z_92r3(h2?j=bq5O$r6fRPO6D4vm}Ue?be83%dFERc74MVj07$Luoq1C9>V3RjtJRf zr*&2HsEVmW7k%}0pBL{XkLYu*cLsv)+kRoFvkma7vn+eT7d}o)WKL&~{-Bo*B*WsUfe-S3WmmWLE3~vM_5-h8nK# zBr#vo8kc9j>q@P*(JBkW%f;XrOCGcI;@TzhDP#(6A@`q>PCid)z2|C4Thko&9=48k z-#_|Y5Kub6{wBKz%8BW)C=5d#<}5XgrDepT9M|Toc$_?oiL!Y4JRz#wZYJ6v*BGjP zu{l0b1y2_WsHml$$W@|lWa8*_SuSl*++}Nq=MN2pkQ*M%@m(WR3|GI9vuW;>>?Fye zBY;Zg6C##lv!u9E5VstWu5b|{(Ue3lPrnmb(hWD>zGASFfz5!MBC09XfL5R)hH11a zAYxlHCXLA}xlQcV5puvBHI$VsP6s{k;47jrbzCrXH!env7yZVi7_C=GZ_t~ZS{;wo z4R25=G{ft*5+|*}28T9OGdI7oyw4_0*MQ$}&XNxQi>7RD!%+=$g(Z_DQ05z`mNzLL zq<7HF#D3{2K6klfpwa+U?tZW0H5J)??iAxys0G(CTsVJ(NMYSu6wg7P3o+?3+T=+i zU-f?Q#M2PISLROE=B*PWeyHE+@RxC3NHQd;kZd)3muF}1xzqb{20QW23uT8p~6fbfvM?o!RfIPXrfHH~#isDONgdk1%W*bA1VOU>!k_d&darqW9 z>z0{X9-<5)y`**E*Q=g`a-xag!%tG6Gr863?B81ccsAhR>R1; z`==*JiY{+$v6h`pk7{urd01P`2gqnIJxP!3yTPEos~h@up{Dg)w?zKA4k5Y$XHG$I zzm_DbQ+jrI*}Fb5nVGJ^VqHr_q!odq^*VbPZ0m9R4{t@IF17#u>|`{2k1N7~upu_} zyeypWJL}OL!Rh`UiB*zEss&E3y*)$(BsMwt9T@u>b#~+ef~zWMdi{?6nY$aSW#iOu zSM~IemZ1wMdDdC@t`!A)xa*E+Ji~=g^-nvG`ls+7bvBWZdGu1hR7=)ne++R2nfg0Z z^~OsZ;r33^iNr04!te;NBCS!>T9tv4c@FsXAunf4~jPdgr zC*!rd^0Uxrtjfn#eAS2iYMU#gJt%Hw+kJDx2nJS!2YcyPyHj_VHnjPK1YV7%9b@%;T4MM=A_qst6W1?^p9(6mZDO`B==|K-fVn1gpQwzDmwJ=oeMgyOvNQia*;<6G4P_u1AFh27Z2R( zYq4)nF5V*&8zqg2*Vc6PUGMeaK^zT_PWPpRIAXEVs+?iVSTMBTAZ=O-&62i>@g#7d zbSBbxeLV%aHX%s(erJFC;BVchJ9|&oA48Xrp)G|z6+hPGO&wZm5q2o82$x;^WqcO6gBKMfhJ(PoQR=!^O zidw=wrAix(ogFzHwvro*OnqpKhNthD@N94lnT?NbTVq;Y#hztslF8S>MCuhd*==wx4Y2?G+J)wpz(q z7oFo}xqhcfmB#L&lmu{Wo?-yo^81ac%?r6JiA1Ev+j#uOroSAGVKc)`(+9W>7dd6$ z^iN0U7}!NaTp2u}gOSQINCW`04Ecd4dmaK6S&=7@!s4>T<{~ z8jHB+C>hHl3qrQY6-YSGfOLRJ|1@Ssu@t4CvMz2U2jv_#IW_xRj~{X`u3DDhTCru` zP`F%h@Az1CmR7Uf#zfOsVq}IjZ+vk8?18$^2FJMghnSQ&GSw0>sK1QHuK_S(*ii@& z+93~{D!oTLmNB*fKy+J9E>9__oMDv-t1RnBD6-zz1Si*Sj}D-!(q7JG^Bz;Y-ha|R z8;##{um4hFmc~37)jE@>qTI_H(8{XZ6*S0mpm2B&beZ^0+D?1{fxgr{bC`E z+aYpTI8rq2f($!cL{tT!MDq9|;)em3knR;jc+#JcP(`0WVxx-|BQnkA=QNSS?g%N> z1T;45?&F%`T%*$aKy9li+V4u>ShTAJL&w$2!BA$ie{uxM#guT1py0H0f{qJ(&xm6BMOagF#~MKwqqrg7DBpuCg0Y zbuaq6nt^Ms8qXwjeXgSW5-L1b(H{{|KqYss!bnp4cCLbZ%8hfC-Ml&S z>d+e1E((x*ujPhH9WX_#BE}V4!pR*{y1Jyh)||YCYQ6sW*%s95r&|!}$a#)178t|_ zxLEO}sE4{V>VgT|xhmWM8w$d8V5qJhxFVU`6a~}a*xUNIXWM&Qn}B8eq!Hvl^h(%! zV770(#97f{vP8hTQ&~5#S^%~%IT25DO;4hm(~$m3>>g0nYv&~?Oob*CMld#%d(K5_ zFcCQ(;0lEa)_m?y+gzZl+4irn>8hH1xQA3r$)k(2(;e9ghT_3uQalGD+!ziiCf>uN zH%9}x0Tz|}FGU^NKs0=E5M%+0FZ0j8Kw#8s1QC7xrF_LY%r9TbmzU#yf5@+2%h&fv zo;-TX?{}^0C2Rr)j02yK}>;jKR&fs$dHW(hKOoK#BGV-4$DaW6S^^=^1)b zK7PstJvB5)d(<^;bFb|$Q!R8wJS|R@3EPUWZ>MLZ(6&@6gzHpA!PIBGoHgqAWx2?u zsfE1D{N67xRjtJjojYHD-8pO>-f91zZ=c^l$I zR8NC_Ku=x=!Uzg;EYABId8zkAfQMEAu$%XA)AtbDSRm@VMVP@*ynl533LejQ=cl~8 zr1>rOyk3)O%GJAoS5)ib?!=U*sZ4Vy_Ivf2a&IpN{A2RdT5theRe1%#K$J+9^yxj9 zEKu7sk*|Exo(9B}cIE@ZXglp&w=RuViK(I}uzCz@KBNlyU`>ojf-HCfF4}MVgO{%^ z*7%ONc!Ak^4I0xL39}^ic=WB^gM9*5Jz!Z|lTtX3-cK4RG;W!nMstyF7bBX(ub%Ug zV3NyyPQsk>+*KPB=+JP)bU^Djl-0uY9lR$Vf0g=Iin8XHoORCAiHVvXeA5| zKqgIU!kX|B1!?~A>>E?S4aK?IU?+R3$raqsowiW9zXDr4u@C{rabO{@e)5d@$q4JL z)+C2nL`H<`%GM7?2cCXw?v4`RZl@_9ydhe6c*K%OL5wWjP9UaFYd)gMnyj{%#hrJV zgTCA4d>fePnu}8oh``1WV~DEb{)ChY9N)WlZ)@kFN8qilHja#0yRqmBbE}&l9*a6G zl`gIp7Cf&!y3DDIWB(Sfc~;(D=I{Vjwuv$=X90i^e*lmCi|d%#q8d3W*Fu{YxnSdr z{iCE1?s9e(ReZ^ZN$$2jpaH>IY{QRN1nafXNy|iJPzPc|ht2cJxP7PjToi$TJu0Xe z(#0zYIzS+XW@skEL=6v3#ulZpG{R3}B+6=ufx}=1t*|9whJ;-(`59{#JR$I)Poz}} zEswD+rDVfRQE-qE$ktLAM4;~YNC^nh26BZ!rJehYMKuhH8v)n3r{ktQ{!1BH=piSHo1B%Ih57dA_n}l{oThd1TF&HtkY+1ixI#B(lJv z;BU(!$etABU5J#SO%pPlgz7|0+D-^p*udX$eG+SASs(~Un8N?%Um9Onh+o=nAaymK zoYEOWV%E9FOsR&vig}Z}e01fifRgl-u2{NmiID+&15l-gJ!Rw|MA8M~gX@_mfu%I* z$^=VF%la}9@<-b~bP)vzUY{~L;K&8o!|~`W06f275Azf)<^hIeWe&4w@>o3brX$(D zJM3IsbhSDbZX|_21SfuHsi(r&L~j*x-o*!9)qVaQ!<^t**)Kc+2E161P)T_hVf-8M z?nU{R#C1m;bC`Z8w+=3DQS+H9tFRih8asi*Z~pT!YZ?te=z-vLs4J4nTsCDSIi-z> zBsd7P*uf9pIm#eVlxlG~6RrW2yQI7?{w45Vgl;eZL?9rH3!dpW{cd`J4MrtWx^5^& zg`zx0G_v`8IOe1vUN9`IE-d0d)(BOB*oM}}c3hTB{DnjiQG~?`Xwf-z1~2tlf*R)X zn(J4rM^H7Q4%Egbo$7SKwuPy6jYWy=fZxQJbBfe^7>K)J7n&b#Ojd79R&KQ(-1~z6 zR+kUp2kmg`aHAM(AYw-Nk4q#eYFSEjv}dB^_JWaM6#976g_D?OVVNHUPbMvxzzR!5 zDxQ`l|0<15LL~0@BJ@NY8o;8jQ)$AIzmwm{eOXKCrI6Qq@mme$QcYuLsUOtW4%1^QD= zVAI7XFKb-M3!T_+aKuH7TAoi^h6aDAR;@sb z$7bJB0Dv3OkQj4Hd6mzwG-2d}w52o%Yp|*)p7J(nASz$7>$4ShaS;+BnG$*TImy5p zoJs@UAql`&;D^{ciNuqa0;ywwSa5NU(Ppxc1+OCyv+hlx^!ZaeOCThr<<5j?U2>!B827FEckg(bm8jK>OvTplMqM?;o8wz6uptp&?#!S#kGPT1!yp&GW*ss4^FGi!&Wg5%m zsy?3|^d6{2HC8;~kl}U}%*L_SqB?{Acy#_AL91fB0A@BQW3aaw zFuw)RtOLQknQfd*rwm6t#8Uv3E3;In(xt!Ctqybt^-9Jy#_Fm1WM6!R_eN*sGfH)g z)LxRFCiix3hfFM~twgqhh8@msV6J5D^cDQVRQL7yQy;IXwWNCS|{MKP2tUJHz>8Ltu4GBtHQtmbL2)N zO7b=?HFu#v3Ya`}F!4e=kf-tyje+F!r)25u(2o(8FIVcZJH@?);jBs`Mrh%X#nQqA zL#^C=5!td@V2UYER$c8@0x&X58D`tAEJ)Z$3(GL$Ap>9F8B14L?P_@FVOa7$=kf<< zOe*Iyqhz8NM=lVgz~O#IG;pI9xC{hnc3H|4?5^+~{Ae74FJqZd_rW#@LTx`wCoknZS4b>NxFI1MO#g$4d18$ z@Aa%4Aow(X6=2Ae{6Nwcx1W3iDW!hB=vryQnx{k{^7Q=#T*nlH>n~ujxgEDI1m9*FwAw|4&qT{nIPB5_t>Ao`*8?Yo4L>*kst<0$9Et5eC@~o8*$BpF zY)PY6R?kPKwUWq&0_v!5CLjmI)dHMUxeD5_!Z~cy3>Y)y`Ytj5D-*)6HS?|mMwDS1 zUD94b2663E`i)M_NWv=4*pML5y9*EM+3VUnCGe6p4deRyvxA+-JL{V@9#3vNrinb; z(|1S6KAU>4i|rZ7JZ`4LzANfV?l|>9Ff-2J!uE8Mze=}2qPsY(Gsw%^3A?x@ga_{K z??L_IC((v}vR#{1(v?w}i4jib$eJ2E!vsIxq1y3vGC#E3ni< zAxzVwt@X{Vy_!kl#e}k%oiUra?3lO=C8K&5oPuuQ6B zxh7)|JZc6&GCc-ps-v!o`j>bJW!hjroJgYapzb1NAE+w#htZPi1B0(1fBBEmVAvvP zIGwh;zPEm`vnP9o*dnVhETp3l=|_HY(N-BVPchJ7osL z@T43DP)G?uqPHjJ&6WY;F3Ve-5RAX$SES-@Axe#<|)jcp|;k@Ni+YuJEte#}Ps!r^GVORCs|9#VIj_Ttv7eAS*T9jOUHY z;^sCzA&aMo6h*sYz(eSq%m$y4L-Ss{XZMNU&3jsbX_!O`QG2?9xKbcKBJ4 z9ojn#?x%@g)i_ylsBl4p{#K@*z6qb>6W;RV4`#e3fNZ>XuwCP zh3)q6;sohR9?;HWsJb!XF{(5!Nc#|l%bK*2;?I=9s*4Ddqb5#_IK)zc9Ju&%^dwQJ z4OqHxz_l=j;yIyAp87(ga#gmK`{&{)-^NK;FNh&0hpsdM0+)a+Tr45ldFFj_$Rjo3lU*SfVwh(83uRH$?#Sdnu|=n ziDPZO5ndxpH4}W}l(^-^Jo0OV$H=95f}>T)$*TTgvL0qC<+fBHhZ4Q!F$(i8u;=PL%)~U1$ImStlYa5)MP3MZb`9UdU^~Gp6)*kpXxRM?ln0qe-*c zton6jJyyd71MY;M`-nUCA!B+I{P}zlh4R@sGKs7URfX*m;e%uNlZebFKTB2E4D60y9Bb5c` z=Hkzt=HX8rkahanzc1hHG)1?r?U8Kc+ao6dLG@RbrRm-SNzOC_CqJ#+dT_7vK!=6m zDUM~fW(}+J;Qq?3Q2W9CN_~2?MqJv!#uAqvAORAQP!p5JF=$kfOJL4*Q6*~un92izp~k#OrANyi=7J`dtdB~<@XS0pk#yV!(oeT#agA$ z0PbQ+SRBvkQ@gc=MU-0G7RPSkMGs?%&7cc3A~c#TGS(0&w??szZMr2MD1_mF2!vf&LUi0aG$3~ z;9t55VlB68dH2GqSXRVUjOwxKY}!XlWM35A_Vna)oF3npef<3CXmlQ$E4$h0Z105l z$>Qa%+WO`>mHrr8;3>8F3J^kY!-#Ov`B9VL#V6CMjgDVe@a_=V->U%M8;v}!a9YuL zTHPE_P#EL00=D?VXQkg;=V68TVtS2TWC4UkI-93#EHek(v2Z;p?H`W^=NFwVLk z*X7t;e+PsxQg z+8!rEf65rsH0Mns!?1eeR#p{>qH=0diW*4}jMldsg)EM_@QvQmtQPi2uY`jY68R}Z zDO^JWGK3l-f3K6wqTo(!rgE~pT}}0mGZ98Jfi!hHh+L=Gu`|Q1@TP^?DNpjDbEmy9 zuH>7*p!TX@Ocp5~!O*M6Au2q|!j*@ixMGN9oOP|88kS|Ln#ZB70#z(tJPJiC7-Qke zL(%O)btTDxEhPP$SRHiOJ>fmk1@%GXIOQ8hPs&lo!qZ1wq~E78DqF>&*;(Z?P#UQ2 z=rpVXy!^&HE#;=}S`zTFfeF$p8ALJ3k{)e0DH>DCR@$PVo2w-jjO}pSYjfQ1CF?fk z=lB#E!97?sQfOUi0W?_bZEHYeO)Z>S_+xpPy_3}%FmMK{3%IydgvL=sYMas3XQx8{ zVjZi_7w+2A&KKCxw&{MQZlT^9X5^I=IT@bd9m|2X+xn8iAO;(WD)^bEqc`YsTbs=2 zO!+L696^z5l~5tjA+Cs2j+otw@Y)hGS0d#IR~xH@5EW8^Tto|kjs!0yeJXZUp zRP&^PWCKhU!}fi-@G-f%7R000%q8eQaE0m2%v*JVR(wrAXEm!cM_aKyMKUuLo8l%U zJ;oP@P4%%g;jt3-%;u+k9|mcdIs-EVbMcHDf+a&I2t#^f$82}tMtXaWnKyYGJjr0i z)MOf{;G>$(3`MwM?d=%yrIih0B1&=-sn=jv6Wvg}8_cuI^rbM<)(sh@Tg5A&mDiY} zq2*xYApBoA!G%TH<0?cYGS+4&iWOAdgYFH;GH)Uu9$5( zP0Yzzg2gaRl-L=w^Bjpjn9*LrAVrf9Ow`5EvPr>&DkjwV$@@)U&=*8f4a6vwE zP|{-I{ln9vmza1@pOE55Y1TAOpLI*#{uht{CzhBKr_{YwN$MWv3=+5`7^`X;tz@+| zC0AOQBu!9rG`$D2#D%kVUAq#aVa1Yb1Cy1e)UF}DnefhFRxKJV62S=4+prkE*T@W* z=#DMom3B^rvbq`kj4K%KhMu{E@%^v1hD9@^s>S#oH-bT6_2NnqunOd4;tt=Pwy_(@ zx5er(FQ_}Ra7x@&9>Agvy-GPkD89a8$raqE+Abzhen^qBDUN#}kXLw`sJqPc^H?Q~ zNz+u6napAkn34$af-PnuvK$90fDKhiqqYr^dO3haixO*2DHs?6Dx4vhT!9?Znbe5r zahTA;wa}W>t6~)tVgLp&?lVc79?Zq$*M-P_kYNUGtuzbUf_h;ma#ijfFiGdV55i(* zvWWrt6im*(2#YX`YPBeuPyJsY{M3gm+e za$@BSv6gHd(;1w?JSd;45>7B8&uiN$2`KU*n!;o$)t2;EyVz1rpW@PrhjIl`Br@qd z!)DkIgr{z7k%cdeQ$%+X>XmaH)vVaS^dZJF#o#bJ%-L9d1~xC|LBQ4nSO*H=s0mhX zD^js_x0>HYVV_DPD+&Dr(-_@lrn*%vwdItS%(8v z(8zE)*$EmqN3|#gKjT-1byg7*IUeJR%DA>qn&2m+Iq?Q1_~I)eu>;YN1ySMIMwn=s zFzz?#teeVD3R*>wZ0qD)1AmPZEEcu;l(C=oCPA)RSo4)B*o}XB)jz8aI3a5)2Fco8?hHuq#y~Q57%F)i6-1RkmKs5h==$Jk z7}P!!;wPo3N)UL%ttrbWmNaaN+7;Cfn^LYjBq^Q37t-TK7aQ&^1n!)#^_3+SJ0*i(iHV!oyS&HZ(#>Z z>fVpE3_7v&lT;e#rkb>eK=!|ohExtSPA;C4$)FcJ;6O62o^vZ^e80RveF?S4qc;fp z)Niqr71?r8;^RWT5i*lq2byxGArrY*1m7CB{)+SmOPjNNb6Ma#ZHc}OL!Mn)LlHze`do1!Us2vu%bvYixl)7tn;fO!4H*=FykR@bL48o~4Jd2`b zF@nm7VBVhG$wY<5{0pg?s=S@)BC*P0^<)9hO85pk@>XDmVgM4(Yttf=mbP7;uC#IR zZ)`tSlNQ5QIZgnZ$k}_go|G?;#qlBaqs>Z~(fCN@v4qb%8rmRe!$sX_sw_AcJ zW1B1&4lP_m2E--=hD;1pNxUdMoyr+u#VvzE4bDXJ_6by0C(Kr|ZRKWggo|HoPlyj; zA0pZSi)Jf3N*L|=t_{W{VE?$^7J8z6jh$0j3OSZEK8d>(Mqo*A(rHG;sA#^15pItb z>$qs*+7fmnY*FvRgJ&P5gP~oUhh&m#M??6N1qIivjQ#O=G`=tTCGm_1dfv-9VSt`R z!b4B8r4`Y}|>*cwZ6%Ga+1AtYxx zP$Kj{l2gDGQB4(HmhE+PE)?~zqZg7-OtjA>d$`TJ#26wki$rT#NcFOhr`N!5zeBf}fnH z;_E2he>)D-IcD%SQ0_cJda?ts=~ECa zNukO+R6X28>4JN@q?&eqd!cC#p9BoJx0NPv8v>oDAcdH^Lw&QIs*84aedEvTKWuez zNfVBPj4p9*24_NYbOeS)2Sl*kM+euC_M>d9Vm)^!8&alvd~MX~@fCIID7b!ku2S1H z`IAzY(u9*&A^BxG&Q*>82WoS!6GrI-#36DrW9D=$WfN-F@cd#e}2BC-|7Eh#^ijtKRJm8OLTH|g$quh z6`J1RT0GofgEurJ1t-{Vu$DorPhN)vxnbJKRg@rp@=}TkhnsroiyyaZELd>Aq&r9s zfK9tsxz&7vJu4T%q-)q&FR22mcUZXqP^R6Rc1xl4f7yE*=(w-zPHd=;ScWZIXV*;Z zwX+!_U}h*0AU;Tm;wL}=H0h8841knql7a`o5EuzxnHi8oY@a$+oJM8TIPI=?+mm{u zqivJ+INNQLZIs#;JIZ<0Tp^x3O-V>kLyU`l`{qDW*qQ zzEYNS%}Mq~F|xFqb7~2bi1C#uG$}dc4vKNrq&vLV7b;E9C{9#FV+}V@qm4Eiz=(E+ z$96G23&(?rj1z{pnGZ$ch844R`ZfEjF5C}F=*?LP_L^yX!3vIz@)kLesRMSN| z1mL#D&UCwzFdyjH8em6JS1~CYYB>#eGKxailZK^;Bllanf(9Ax^e$Fs(H){^#v)LR zLuNY!Us+LASBHT8aRCEO(R6fzbaNPn281rMZ1PpQ0>hB}iZ_lk z+2WU1QukzJaLex`sl;v8nNV$%T|$_tFsdYuSg~%fgo*dPlk+hTkD$L zvV=Cq0$NNWF#;`gX-pvo2Mim~GE@?H#o)yiX^3`&a-tg&^i2zYwe97U;*opvQ8t32 z!y#LWlGl=AZAz$?=O}>Y#P70z1@Y)akzyk%1gEN~$HaBc*DX-(DIVIHF`Q7qtbjkZ z>-*TPsgpz7Yjh-av~oVPGGCkZyC~&STt~67AbTV$%ZuVnf{pM+Y!zQ$K!Ae^Lj!5Z z8#?Q(Vgto5siV%0bxn_gaHJGJ16w|=x=kn&a1de4u$9`JaKr3kwX$5DlR~L%=>UTd z!bV=|;lDs*ep;{j(~vxglAM=C;7c}9Z_1q?cmly+T2|K6+4Z3s;wiWRB=fClZ$vX? zV+jJ;pI3lZ7CxCS^+Y_M^*E(sudWBE-W_|iPETEbG8%wKDaF0Jwe zMhpu$rn1d}%mfSKkRePpB9)TXXs$W(LJYRZ={w3aGSxR^a5HLF61I-7(7EaZf{k09 zc@k!Lw;E7%-7r!9RgUqj=XuxCQ=>W>IOc2_^_OvJp%c?2=6Sks8LKVa)>#T`{5h8$>&6A?w3%d59U7h&OWrcnqsuET{sTrIV* z*Ml@k+_pm<8aCXgZH&iqPIQJPkb{8|oQv$P(n-g0Y7TQk3I#g7DW2f-+6ww98!3R$GT{S?25%^RfDDqw1`(^n25Pf5ixVse zSV4~^txLFrrjgpb9Lr0j8`orCqykuEGi1hoHVcLdP8Ma;PiOc4BvXotXhVh)w%$s| zQNXwyY8U#W9I%ZP>GtQyvBt7VtZhE&uy%EaJ`!*;cPpFcv|>ayXffIAMy;wn!gZ=) z`*gXR6Ph!Ee{0&ds=+eKM`^&KD0)J3WF-??pL!cr9KX4Z>S;i7mGgJ2s_k8-Vy17a z+ObSz9J*?>ik1^mJvUQ?Pxk$`@X|E_ZnluML6!_YJ*raEwp9%zGb8?Z2iI&xbN=vS zRFfrctEvefoCLmo{&%**#Ji}@kiM;IP0Hz)3riFAN7<*t#8=pfC_-$`M{|);Jwrh1Ux&^OxLCufYD_ss&?5DA;mI+>;hp zt|NAD(1#ncQf@&4$f&e{nyWSFDmlE?Xs#@o6o-LLLfDwBw9VP1Z%4~=IaKk11ou7I zQGBpCg_Gd^qzrDk5FgE9o7#GigPF>+dy403i;dzCViO|@#&Qv{E%oQ1`_VI3J;uEJ zyl168S1X=anV+v!mRE{Lp6r>i--FmRnd_O6tjv89o^(8mbCtdcysNlgn_obDWE>~L zCQB9Wh0rIVh{DJKGb0QWux23bS`E`N%bhtrjQGQ&1jQh~NUJo9*P*6@BshOJ1CJLN zT55=P%1E}v>WvDSV=to|udl3OqFL(yl`9^(2|5hZ)1R6Mfmw9 z7l!k@QCS=g zqtbyH1gk8dWhW6804R7){953O3GkE`I&=Jq?IeMHHo#aLs5)sh8{uik}wV<`Q z9K<|USyG}v6>-;j33PHRX9^BBoLF6z7IG%YYbh2c9wi}hfR?5s4N5jey9#M)AXN8B zy>{K^%xS7`xWZ5(+)v6z`}qApm%#5VTwgRQDKM-@BU(+^v;d2oJE0-<@>DA?UZKyQ z%%7Fz^&%YMG4i-OaILno*1!mbWW?Cx2h*N>IqGxzMOppWuqZ;^sQL*=nJraJ$oxSo z4C@Z>UD9H-H&Y2HW5W%Sks`-1O@Ww!-O%apT>jJ8FLzFro z!*7FRqcV>ZU^JGIlx6|p5-Xp6N0hBA<1v$B6UEmzPk*Q)>h7t6_|N&|fX?KLIE@f; zxs5M!U~L7>oMQR7Cfl&Y6s8 zZxTzW+O7=0+hgH)D9r?&bT{X2uX0@fnY+HKOy(8Dx25~M8wpuU|?sF#~)TgisE zYG%+QWTEZWBHWAy0YXDFXsfQK-Dopo@v(&}$Rqb7xug`iz#FozAbYjwXumXc!WP^K z_HW>lE|&U$5(&TB-JvX<53dNWyoaeshAU7VSQDGsoay8d4-vM5Jf zgu1f+ZcE%Wr6hA0CZsJdDS;ERtxf2j9E~Fq$XFn{hlhh^Z?P{{E`p{+nC2Rxy(JCC z3ea7_C0-Ve^87aTvCd6rSLwuj)<68oN@U=hQ)%j2B1KTHUrDzHW;1V;nX|rQf|8N2 z@zExagU%2zAiuv-h9D8C{`AD?FmB+!7+=QiZzy!@e6p}uomzh=lx zD8+}8)OM#91X=qB4Wr2m(FYLkNtvvDyEszYprJGf86xUO?nJO&iF0P$axz1&_|XFuP|L)EF6&_X$S@oWhYAT& zs=<#mx43GA-!SZ_t2bbAMGeTC-?2QxDby$ThNG(5LKM9%moso99rw#^;jjcR2HBU$2SOxqkWO<`&bliOnsSwabY z5K+@*$QO1_LD8agin$~_-#K*hOm|;TAMc~mpZ)GNtxbgF!hrb!MV_ zd_)Ss=D$$Yw*cy%6oNiE(BqRyDEh29^|xZf->yu(3C^$0did_yy&pQ`kUHU@7duZ( zj&%2T4=*x=9C3`FyU$gdc+MmvP+V2gwTUb;kOHy4#dG)hVS*bT9X@>~(L$dhZL}xD z+@4NEB^AozkhRe@Z1~*7>F)7$n4_1w-;IvoZVl{L*xr+};jKK5j2H?)NPnYpZPW1h zPx$$1sL-VPsGl?#^IiNl9QWaGD&-poOGl>xR1}DG3irnKq_@r6HxkI2fc` zui{}OJ3rJt7W#5D{HzYIG#q-P!@lGB(~uq&VcWxN){WMx%6qv#sqvsj> ze7;_rlUL_MZKL|z^V-QuO>VtAX&ZAl9Px(8!p1Z2J}Jy9pzXIv0Q)`Q7>z|l{1iLw z=R>TYzYSsgiDV^enbqZa`4yKI zE)WhmJJ~%B$zSZmj|u%ztB*}OU}sNH$;*}IN&|oLI{-Q>KLrc^w1%F=9fh^o2J@a9 zlJV*J?_7o93FHmvDc>tk)ns^_8>j3vBA%NX=gYC-GhSqkjm#MDYBR?p`8@XSsY!vR z;#cNh{xpR2r=aQUP#|MxM$irYJqM*WY)VWjobglR4i)<;rN>tq&G~wD^1V`-%_LHZ zf_rZu&4iz$i=7jaEV69klt>fP9401*l?oGUGwVcQa_Di<1`InVRzQiMZcNse0<9+( ztJT$Y?7~RtwfRbYc;YPh3Y;Y=4j~2j7_O~bS#lEK-0-CE*M%_OKQuTZ^qfB0zc@n$dE;V>5;!G>fk2ewP4Y_1xX`^8^exY?x-R`t{ zLW-oiYM^YwfMV4+^}G{d0^}M%%DsT1tVgk)U!GlDlbGf>fd}H%yIIA)61V2Ci0MfK zGne!^K%!5Tw4OD6ND+_GNg>$+4dTAHScRO1nXUw?*DLk)lG-P6Q4TD-P$GGCAlCl= z+JtU%UV@L5<`h3CHH+ayRGoR3s?CZO)l71oYXl7@DL7MTNn%kf^AcNJrrUi5cfgui zr8u(=qYHwxz*7J;Sz$k_4bEWm2KGgCAhv@u#7gx`KCvXobT^h`AZdG&on)LpXBZNi za1R?9&GwN|-x)tA+Bpf&j;7-T^t2xz8N9Z_?(o%z=g}O8a0{fwh?*WrIIFjmEfWu* z&fM{w*Px*yw6cAMh;|znhQ5jQs)VQ(9`#&EK+(cBkgRiIWvRM~A=kP^xtOIT%_##@ zJU?+pSb}y-8$%gNgJs~FhZ;NA>a|XfWGq+ctfNA#1)aBq)wqh2qCwk>og?+Iqk~ubLupSX;LppNK{he0b976{P+*0pv*9g2G2Jl3wdTP!_!+B z;t9>b5t84Xn`;S)>@75gM4nvjA(6uoHg_Z?a;B(!l5V$z?>uUh+-Z^|K(rFBai(be z$H|=-kF~`Nt0uDCc3VaXy9oNjYKOgLzr%`IqiuVF2vh75gTtZLszEJft}DoI&-mN! zpIlkIg+2;xN{Hncsa7qj!=3as>Y*WV&MY;Gtb^h&F|*{;J(C*dKDH)2MeXS{7R@xn zcSP>^O$$P&b5)89(2vMbCTw)XlreE*dx5U%ZhnD$LKjooG>T=+Ex7Swk4mlzCLK3$ zsQ%#q23~}@KFy)6ENR>!SySUfG z&7nN#QM_J-#vu;HLJJB6IZ^;g;iFS4fzRlV?V7bF~-XZIy@Y+!ef>&HV&Av75lacCrYPh|tx! zy)>ItU`VAoqDP{i`xu`ubCA6$4O93X_R4i1KaMZiTka@tvdv$wVo0FH91A&Fz{F15X)1U`x1!Oy%+U~VNu9z%IGnaQ z!L}o1JiQ`&UXm`5%$S*E!$45rwuNm~aK8!T#TM=Iwi_5ih{LR=65gh=ElO1SNZnyE zn%6+7M#nuhhRWjg$~vbkSwhI!QqDT-k^;p%5BY?Q%a>h#f&zbKI?ZwS_C*+v*Ks@s zeo9WZ>Q`)|6bFTM#VtIY=FM$O7N{J&JetSUb@!)DyG%8`(q70+(H^T%8A?Z?ifHsL z2!hn{olN!JJjmUH1zK-2)?oEND6$9bgV{ zzW2H%>4vX_$;3uB17R|61GoNg=Wnq(UzuHZTTz&%xpwBNhS3lh(!tflO>hXM8SDX2 zg>ustCgRMfxTD0?4aqGCfK3L9UW3#SrI6y4o5u`m!K3%!F1yLa9j4z`xyrB(4Q$&L zA~ z6CzKc!tqUPb7u(Rfu;cPt{&KbRPrsJBe+rK6{6eJv}juUZzaqmoo5TrzVD-j zy9ebAiopWNZ?PHw0b0|Nf3H1O1&*+mhaDe z{89P-osS>K_q!^Oe*7T55C8n|$M2K$&sqL2+V}6t_XKPXgy~u^K3XW8*>P{-6K7Y( zT-wWpyLP;3#~TZU4y1gqrXKv8_A81o}JN}~2 zV4?70PrXr}MVhn~35EOc_qXsDeFg~+-mojk`1u`pK>`t8C;p<(V4-j#di?IsJ4k^i zyI%N{w+|L>-9Z05`R5xu?<)=#p00kOaP#ZWj(%bM!p6=55^j!u;pvzEf=~D0>DjaY zdSmC~lJAwmv!nldeB;0=JZBv|q2`wk-(Tn*EUex2XD@x_6L%E~3m@JIoSp}tKDRh9SorKcUHkER9;-Ki>Hsg_@Voryzkbpf<-UB^yC8z_jLl;4`9#@{J}FZ|_; z5}*4Zaw9{>lRt3lGJ${fuq1ut#kIZ99N^1^uOsimm+{Z7pNFKlc=01IHoxz)qpy5> zV`Jk5NY}zw{s{j(bKnTDbkN?p@G=X1iG>!W&}Z)@th1OrT&C!=7_rM=m{x=~1 zg5bg?tbNbM&Y#7TweUFtzV#PC?4=jZzOPWY`Y4(MjPDtE@vCQ_JA$9jewYbAc97C| z?JIBnl~Zs3v48(2jQr+1@qHK4{>{@b{5abEmTR9M_{~Ru@>}@%)weu-qxqI=|D4a9 zaNhcNKY8<$Pwsf0=x^*?Ee;l*+}PN;i0_LRpZ{@C;rXA$KN~wgkCdDDH}U(K`!;s| zq&%(4({sPThr;tW@O10KU4_D}PXOhokG>5M3u|v)_$S}T@}EGai?`%4!?TxJH zA2WI2#g}e<0B^qX5B}rE#?6iMH^25{Un_%X&!6f;b@z|(-<^Z}_xq3V-vbBv@6rAE z_v%mWe7}6%KP6w^-z#7D?v=0IW%=sv!q?CK^u3Sc>*i?T`7`o;&-3H>es=V}=P&d5 zBQL+QvGLh^|2}Zrf&ZT4%jeJF>9h9^;@>ykQ+V;S_m0S03?mM&XFl>J6nX9*`T2+V zxv}$IXwLH=MZ%LiZXHL9EK|u3e-;7k)l{lo<+Z_aV@w>H+^ys?Jv{WQ`pesvy${=X#eK3#pqvTuA5`8E!aFHi=BUVe7;n9q7D-%HF}swCMnM z>l`Th%`d(4#czJ;;;Ac0zVN~;y9Nut&VL@e`K`mBX}tC38!%$;+W5lLpS|bsx9k6I z`U%N(51RPv{HJBEjoqIErk}@0>3sG+8(=#)xKP*7k-I->!|LA-{O+%EOkI8S)Z5?s zcRqFVQ&(PMLHqcAI(6jk&%O0mhTb-L;jO>&Bi}RdyN^9Pa^KrO^5@sydGkYWzIp1+ zhyQ*3jW-{^@9y6}{1^3(6HmWbKK#Y{t$`P(pSY9&r}#Yi?S)f7f1!$zGxr>bR2YBy zYxhB}3fuLKerw==1I@oa_dH)cQ@sUx^49`|!L0{DwWmLQ4+wJ}{L{^}aeXr`Z|=jJ z!#4-JK6&d0zKLmQhjqryvtOTk=6-^?2&8WQKOln0upI-xdlX{lMQJ)^nB^aMF^H%Gc2cxJKdfoJje(95AIub?St!N%IRZjQcUpr0B2mT%ol zaV?72dmV?oZDZ{L<=BP)>;aIAWAfIYqOzwy{dVCOH2ZE+Wk+G)&oA8ieR=a{H2?k! z{M`DKB)sj3moPr0AD{l(147}atKY(;{Yqi-vEMlNCxid&pJA|U9R6(c{*S!WeB(F2 z@H_|nSKoi(+0k!dxE?`MZ?3&Eu=cHuoo~kPfv<1ue89fnCf{fa^1Yq;M!zvI`t^aY zAO7ar8-9HZkiYt-kNnx%8*YB)lP^E;rmsJ9fDQQO7oR!63Z9t0^qUy9O%&VMeMA2J zp#1w``S-`=-;c__e@FiP`||G>@bCDX()`(xt~Wn4*Y##l`jS@i>wo&@#>Q9vh?$|O?4CjmH>bMpd-lYeZ@%aA1E0I~ zQM|tSo>%beC-Ccq#m>RPR~pC->i+fnyzqmC!f)a45ApYtdkTd*{$}uZ3V-|Y_r3W0 z59LDPPw@A<`1@u2eFA?U!{5j8j-UU4vM+Z*z26O3`1@NwR4B~h?;qjsA0nNfa;Z@G zH2%JbzgzhGHvaxLV3hIqF#i5~JfFh%`|#Jq-%sH0ZzJt3_$%P=WBB_|@wbS-C-66c zzmxbo#y{lk#rOY;zZVQA*7>mq2KgnQ$Nt7(;XlU&llO6J!n^Qt5q}@vg}=8#5P_{Z zz0P1UPQOC+VN?jG@&Ii$4KTmHOEtWS$&tz5`Ke!e;Fo`T$9sNZ*Pnm>t7qPa;a@1s z9=eQO>w4oduS&R#vy6;l&}>}3R%@(P7U9`|gWoH~{@%Xc%j0^oxp8@+hK+TYpI0wW z=n=z^(z7}@BiL2H4}Fm0~n+!@lp2EHm33xzIz+vgkLsgWXhB})4n=>G+z z<$eCD^7r4}@$PDK7>>g5+2wsRg~G4gJ()^5d>8UeHs?-ZOMMYJet2}^+~}Es{+_u- zu=)2Ejvwzj{_x28<6W?o)u(x6so5AD8EiD?1_v)3$Nj9r@+)t2CAMd>*kbdb6#}Vja@iur- z+SU&?20mveFAR@A+AG6g=Z@*hQe&P``d}ve2ZiZrbUBX-BeX-Kuy@DwtiJi+4$X44 zF#Y)KYV%}m5$-mH&)=hOe-7o~(}sH{t`+XNOMc?@QsLh4d%9XU0%&sN1UCj(D{~xO z7?%Gx%EH5s(jMo#5lN#CYaOtfdlLS{(^uze*9wJyfjq0zD2<>R9P9r|pRelElfZ46 zu20jeD?E}=FTAWUrxD*`np-az`>4C#oSv)Btj*)d!t?^J|0ooO3&)8kTHCA+4qhC3 z9Nk6gOc-_kgsf(Q24dGPO}}XqdJHClmTJBTG(}u z#GWbq)A;Q$BdXP_K2xFlo+;NP%#1wHQ7HTmA}!bIxIYD#U})m8d(JN_W~@PDOIf!T zcHRS7+Vb`9+;zG!VuT!554FP7-ME&vzCK>9i%A|Aungm>6ZXYbO5v&6>{F6ZDEwlC z%aky}>A6DTe~eNMLnM!h1YgIEJxfzKlrq!+f>r)1G|(TzH*QFXE|u@~fxP)|vox+3 z{^xi6#t7$VemIYF9`1sE^4-s09}2*(gU?-V>*K>z5f4HQW-34Wo`xnY?pvfUA--QCyUHK5ardR^LeggIE0K8_QUZ@q83-f?h!wR4Z$jgPR zg%#wX#5&TK z@T-A5b5duqa2?-Gtom09Yxpio{guK~@^&82#EiHU@mBv`08Bnr0dGm(S5T^mdX@pT zXpmL~9_yXKn^{0~5VUr!k2pSDcvIAt3j((RxULO-g}o@-oA{UI4i?^wa-@~wvW$8b z(I*vPg((MZssW!l{9YC;6(jP`_oCbxq|Zxjy@C_!nydaP1E80eg|T6c5)od}@bJ5Ae7l_#P_U4>;q1vx1t}K(l7l z8rBoqQ~H;O+`)c31}vge;cPH-)!se*-kG0JG3PZ@lLp z-*W`+u+=3xJKtDZTy7jiB%b9%8vCZvy;NgBrpXU3pNE|+?BPc_ zGPNIpr$B`3k$t|=N(0;k4_ybsgpPC4*? zwDYX%g`Nw;(65FkraE2Bh@+j4_Vo7j_x1KOFrYgQx>6tFwHQsr09&gA+Kqz`wgS{tv&-709KG8eZyWIPM-e-FMUhi|gztH=S zdVi_+h2CH5{Z#MgdcV;7<=&0n`}%&cue)!s?>&8EeN%nc`aaP2<9*NceXQ^2`+l+S zm-{}~_uGBH*Y^i~|7YKy_I;!8KlI(*-`W3Q|3Lqv{X_j{`=|S#>|gEw)&Ae;|7`!i z?Ek&~-|zq9{y**ifBV1D{}=sl8u*(7?-3pM1GfhLY+z&Hp8apz|Bn4Xuz&ad!~0L{ zzq$Ws_Ww8gpWFY5{r}7U&+Pw${eQCm+xy>kVAp{s4$L0-@dM8s_=N+f9zOf<^ur%| z_-7vedk+^L`JPAaf8@bOu0Qg@M?U$;mmm3ekNn3+?mc+_!FL=ydT{jM_`wSYFCUya zxNva!VDsRQ9{ljZpFQ}09lYyN2<{@~Gwi3mw|c+U`>wuXU$gH<-@E#Y{e%6N``_Qc z(BI@lg_#m=68Qd3`y4rr+eqDg@c8zJC$qOx($Glxi|a9XpeB0_rM^pW#}#qotzE66jt5V;N9(k2z*Rz-iKq8@kQ84upDQLQH-r7tJsQkg+`)@E^C zk{-xGZ2#Hoa|o`k*U`)Qu3{G!>`0OaT#k<0LiFfA(jLZj43^2_I)se8?3jzr<>sM7 zaa9VM^#Bw=<6mF7K}M2+!(ABe>hpBth$T)lfigO;g=X`PNJNsCPrA`CBA~bSE?3VWG^kHD_SX(zgy$22K+f03*OX?0<5zr;?1o7&TvRYdOl>KbkmsYuDZf z$Z~&yg#PCxJ95PWQ9+9=f4Z&R$9UsNS<1mKf-}c#dYW;{rSp1^EBry3U!3D`e9Vps zkw87XKK4V7q%9k4!S`MF;fP?|SOMWLNA|qBwNatjm8;Vo7x6NxM6lvwSFQ0FS^~E} zFoNgv#@bc6d($&psRtCp6&agM$o z=`_^sFUP3is8kt*o*tV--Oy*0%HY8a+rZXr$m@k-V19d!88NiazZOPJgTo;~1&0AF zWpaRAx$A<7&7rPMy(Bhn+Y*H35^uP$O_V5zHkq@>#+6BucoNr&AmJ4ZX z^^)*xmtI~r4$~dVRd1{=!Vp&;ADTK9U-g2*kO!IahdX#JNL1nB4w#SZT9XJcoLl7< zO!gKBSFJIPau~*_Rk0u#B?r?$40qv++-L;MAPxPUG{{U+Sb6j-_(@?M7VuUEv}HD4 ziGd2kz>Vs4S6Nu47=QTBR~@&_E;#Mjj6$SbX;1O^p`xQ1Nef!1=p5h@V?%WU{dq!RBMgcq4*c)9NG6?1P5*Gr3M>3i0ND9WK!ZkEXx2+5=X8jLbSzpO?+gNQ<}w z`iChWUegeTp*T7*0Zu5v2wytfp-E%srt(V>xxd`WZux-VgG z@il>C(R?qnOm!}uV`_Bb?CEnnJIzeq63=|TBCNjfve#iVIW?jP^+hR)b$>jMaI1i3 zcg*&O!|VdhBzl5Gik`4#Q(KOnO<{>rOmT@)xEVmBf0Tr^xu^}J*;lJdY}ysL)q$mc zR1VdH#K{D7LzGaZxqh~4lcw1HT}RNN`>Q(3-`8h`G^_6-uTfl&$O)dE`UgWG3>xdQ?b*@t& z8rDX|f=#!uCme3uGMbLVsj?{ihJqtGj9!I!} z)AWqhHnWBeFC6$phK|1ehb501W^nl04sDyizs?g-S&;D&$%sCXQ{6lbAYm!w-9ZOL zqAwFw=-WEdSxZN;um4fW0;0^Xt+LF3VlR6`^|e)~LrBwRymyV|p%~7sEv*8jx*#y1 ztvQXj^7HVT;E7eP3BY+$bU@(qR8e)`3@H7S?4KcULq)6GFYn#zq%wo}0~o}t^nkoM z0sauWA$n6qY*Re1gd6S=et#Ju4j8b2QLjM8r7DiCBTAEACLl1BiYIZ_Lm~lc-Bq_0 z1&{>2j7Ve*bA}lEIO`6zkr8%e>~sncrSK0kC;(y#+E!k%wzO28t6@`baea?uQ(~XT z9%Hvdu1{Vli6^V`!oz|sVq_uWxY208k~~qepC$Jz-5&#H2z9Yi-mF&Zo%6kDVw;-bQoJ4tBT1em~TtBdaC{ne> zb;+dEoKmdljDy@5jA)#^R+^r`S+)5(HUR^Ql_oaw5G0VXpb-5P1hVjEFc_9Q5sySL zJfI}swET5P`lb|z^tAHYGw17w%7ZHa81brd4N;M~NRr!jxvMH;=YaOh@c8*6r9DDT zEMXLp1*?eA!L}UGjyVb0gckm2ZWbLSna(9dQlBOOH27Mr-lXRelfm2ZBg1U1;;Mr= zVd(?fxfpejh(am>WLba|7~p_*)A)*peb9&(_R3M8fmRw^#3G%`q~s^P+=v%VT_({u zc|tL6X*RIszD%5$C+yMYO(bk)LZF_y>+0IF2qd}}@SeaD`FWb_3{_Cut%;{_GSW#W z)n{;;hQX~cemt1Hi2TR^EI6@G0i0k3gvqNj3X40-FT>sz(rzUk5d&?CX#@ROZ2*Bn zy5Q&Yaq83fQ5IzwKU6Ol^T95f51@Tevvp>H*bo!1f{;ya7-S?hOkG_P^(R^IQd#2b ztnVpyAs5$#AwjZ>vg%hQiZkJApYg3>;kvTRlp&YQ4Sb0SiAw|Z2NwqpP)oH06=1%B zc9hJDp_0{2kayQs7A1NJ*OqRww_)^p;E5&RZO4FqA+ zvTA5@L&x`D35z4&N*IFKdr?{oeH@vvgmUNwoqaT|kz2;d_-Tcx0nZKF;wC{lFgyR- zmeoOfAyWBKuMSPK#<_JKhUL;K^T9mLY0R*R`KCp=Ud`{$)UQNlCKCeWb6OH%$-?l? zpmfPCe7NGgV%BiXPgd$W<|put7pr*{b>!95VJ(dz?F9i-c>$lPs6%a1ik~e4o#4aL z0tOtrnW(*JoXBSpDpCV-0u^$@1GnYW}3>FugOFY?vL9)P&Hrt#x+AO4HVvVpzttHr- zi~BAd8ar3qH!A-Q7dz3?C|6IZ6DW&1leov*W{gXvne|odNl(jKvUEi)mfD{!vY=j_ zIh78Y*Q7k&dF2KJF&H|)#8K~hmKJgDpP-5O-_Qa4?=ZW$hRDUgYq++{-4tx~3;ONU zvanTwTWpu1dLalh_B5=xsaMvS`Bnxq9ie-JNziCOyM-u3Y!DL6ktmReWxtKd86Egz|39O@yF$awl1iVg17XP zD$mPoi&!H_UN(v#p)vRJg>b%K>N;9E&IE1RF(%gwmQ~hah|tJBd{Dr zB6bl}V9mc;UAd~LhQ_08B{aA(RCqC~b+%<~ZvgfN%OGuJ1 zvM`8c^SdiAFSKSkX+bMA?2NE9!Llb|NNrm&qJxtuHkzD0(v4+U`)CdkVMflJeodtN zYb^Do^K-?Xk{xD5w2^X7MXk7wfYynvA`q$!s=|yzp$7W6wZ)whojRR{E><~y2ZX0L zAf+V&S$0Mq`*-!F(TRH+46#QMKH>S}w&?xU+K4RA(!;TT>}^?rwCN zF}jD_`ia4utQ@8pAV7UwM4_0r&Sh{dn+ETQ zF7v@ab8RCl9n;;D3a3(rgY^Gj^b#(Pn%0Kc&Zuz(Q#||_j2hlez^0@`|F~SGD@K@Z z81^}elGQ?JiDEBWmA*?4tZ2+Rpy+1Z ztlC8l(hB3I-+1 zfK%|LQXX{0gG2xy1v&>#RMkn>CnCbw5sgb;nqdxe%a`2OSkSlOS+44E zArh-lH*w@;dtIM5ZGa7>d7L)d(*wjKHcF4bqST-=4T_|R(TX*o62Wgk2ZoNWO$J|X z4=9w)>IC`eXAqI!W}n-vGS$p<6C*9o_}=kB6OVeUQE@8n%6H3h{GNqKMwesm+v!}Z4B{q1s^GR4+K?-3(G0kdZG)S% z!We26F13EFDRdheLc6b5EfSg_hs=;_3n|O?U|;X(!)k(a8cuSHZnK4Fc5M*rq7iGw z3j~Lx;G{Ze`(;8{r0UpHqeyI|OPRI&x=V99^D|cxep&)Cq;)Np97d0578Etx&oFrd z++{Ak8f|_nn>Hj`nkbRMN9>p3S#n!5qg1k!wKAjzTRn(H`(JW=wc&Ut3{r#(Oq(8j z@Z>kWy^9_-k^M~d|6&h_t&Xa##xFob%49hQP8A!$o5O*~9oGG#Gr%NAyhOW6p+Pxg zFbOB#go>IM0i$viJqD#ps2HkB(~eWL4(uFcZE%Oux-?6}fCB1ZxDn|zpJ+!&Ytd0E z3rvF!%xz~QUK2{44w{J@UWE$VjmOG#D+LUDf$U#|?TJ?2-D=}-wTu6bD$@ugUE!r) zH9A(}s?i#5TBH#U)w(?l<4ISTgIKHI4#6M84aL|*!#M-&-N0!xhw|di+-!W^;a%hP0~(JuF^o5yjh7!~uOmZv)1Fwv5|h5JLs3ZPwAPb(?du z$BoWg({8rBcIanYo?G4anl1tt8Ul~#!ye##ImKN*&sj*B3b(jXGC|71-~b3lmpjqS>XsF)R~3W`H8fg_<-rbWXaw9P zIu`3b*1o8a9^|KIL7I(~#&M@}6cM?En~x~p!CMSGt9xlUG>E-nxEMz+G~z?zx@knR zM#!i1JQ!Q)+5#EM^W5+VfPb{>tvo>5mewsXHPkg?D;hZ0Gl|AVDs*Tv9pq&DFsI3_ z3$WohEL45UHL7J+j1!%&z29G|ZU_m@5QcFZ;}2bRHYn!8 zu>Nf&b#sis#;$&YL^-burV^Q$h?VJ5sW&1SR%WI#R^dsDfsRA$H5^74yDmIQ!<~b{Y zL1f`KisL2g=E~%7keTl)PbV9XMBRTlT3#Xxm)h!#0#EypEn?J=n8TA&mM$f3bH*HG z;zFCb$z&K^fpT*ZbPR4aIR5zBI@G{JhhA|Bib5Ea;1f^#vur=%S^aK83-t)Cvr|Or zDJPup1}1dUJhZ8kcOs~np+QhS2_($zW!pg&{j zj`zjq8)X6HbI3ZxKiN{iKl`v1d-iL+e!@<2gtQQQR7tUy2sK%!Tl=0Gey*k{fE8x_hSNm|<6hgr* z<~&LPEU`C{MyFFPUk=D)bhci&V)h7G^XQ-<23)m4{0Th*=aRgWxEFS`$@O;Vf*h!)mvJ;1kua@Cp;3&h`;4rsWjP{eE}c&4#%d$www@H><&xjCrVQAFQwn-!+ZT7&b#vC` zsm!=yZ+;7+&jh3;P-1E%B#f8ZlEz3W72%&GB651EvRW=xaat43ggkGKT7&PBM?sEW zu!ROZj5N_MRl3?cKETr^Sh*f5c1@3-yMRq-HD`v^ZMvMumq>&<`bM#w#6XIv3px-= zdre4&lYg?w0nbJS5xuuHxrdNiprm}n`|v6bt{43@e462gnFN7xhicIoeIgab3nx{u z7DJq_jOVWOt%O5pybgFJSvX*oN+-X)wm7v{n~Z;4&n2{^lQ@?x5w11@X~@l)^Ds<__v#P9YDy~vFGk%N&gBCp{*F_M{lgEv8#d=j(EWsz`d|~H6M8YfUS7Vutn0@^mzSZ6 zCq9>#J9}J5}Li>c^uJ zb#`k3_9TcHLFOFxJb^eq6BlvT4@*LZoYH4Ll}odWHC;6F#pKZVX_<;kXd%kM9g*aMN%s1>;3q=%Ygu(}sxpud|l+l`yKGGG{7= znZZF|D$I9;vlTO&OyCT9B@2ww@S)2$Dod-Ymr=>(%W@pjHO@`xseZFCf+ZAy<{%Wn z#7l^`G3z2Y2=7(7Y(pF@nlIs9KGCi*ZlGfi*)pfqS~{o5m?($aDBvgh&B~~tyR2VZ zE@iEbS}z)hj?Yuo#Z}ATC5Ao6<8)SqqYiG!z_eu<=G?2bc|iFWhGUMRIiDR8O^ej*;t1&=OZmdo*Q);dZbeyt1ro%W?-tcA`5l2o`rO#L zQG!H790?9pB-*sHJ%08KjyHF})*x*ff$odbCenn`NUgEDSXpmqX1IS~+uIOwEoLv2 zj~qENI(D*y_#UBY^4M<6)x$RKKC({|UD4uP4p^P3t=J*RHG_mvRP@l`ez_=Q+l>G* zji$lv=-`S0F`sa5zv0{01Ts+@i! zC_+XPFue?|t}fQ}R9F8^HbyJr=}-{9;5K~+mM?BRIg@{_|8NZ+HLE;ZK_jFU=MqVw&l4X z$feB$wRE~pAWFw=6mY$?jUt|nHY+nGUAWx|Bx0-0Dxe^%)-UVVB;)3}k4`s6^>}K* zokosnu`6SnBw`hd9x;btsKsMUO6e0EAJLnm;W?86u2Am!GX7{#G1q zk;k8+LXGuh9>0oFBsSH0f(q@|c+zSrvXIb`+zCxJC0E!hpvD=s?UajRrbAk`HZ9EN zRnjBfo4Oj)q7%`i)J7z}e%GVIIn8jaY~ZFet^Ne zA8HUOA6|vsU#ump=RHpvHm35oniT^u|Et*LVMILY(*QSQaPwo9?J-+6xP(7ST2TCdj- zeTK@At7?y(K!RtbqyueiIbj>x4JDDw&WfYx=U`r=EwX~p7u#27V$$S7h7gB%|IzBJ zgdIqq9-r{pxeeGpo}Dqr_81Y*N(oGNQU zq>Dxboeg1S%E2rQJ2VG;EF$v-7;`CebYV7%13$~h(`HdWyJ(dw~x?V$!jsSya+oDHoBVfZfW>sUSG%rEc%Xvkt0MIzo)_0)} zHj8E_Mliyg&@WuOwzR>OOJ?#kdk-rD*EXp+i=!8>iNM)***- zqlh9zH`d=Nl~Xz@-6>?n01$Zim0mih8A)8-2|Qu~wr{QJv@Q5P+jjJB)OI#F9x>g= z61YdxN!Hv2NatYEx>juqFI8W%V{-Z?;w-rEsA^cdU74uJDbs zXV)}W^Q&)VPQN7=JYXvut8QLAht-PGfD@#>lqC!X48L2-?pz)ga9QqEK=BloJNAQN zGUg_N^RmuoK|jTrwufeH1I=O^adMKAa**aip)8Av2u-dKh#$YVhnRrNVx)jP{nB_~ zFpulbYjw^;klrH}D&VXyyI}5}tuCz4UoM=>>nM2(Ec5L~80(M{Y_cVZ3RuME{XvR`vyGN09;os=nlX~&LfrJyB=IiIT(281%lt zWP!+D$5JIQ7&8I@4Krw(m4)|;Mn%c*L!4*euR~!YfvOPpDd6-bB&{^4U+O+QFK|sqH#l`rjc>aLBI5(F?UcxrG>ve z*gMgTB+Q@!x4|t^FBHQl0mZh0XEb#tdEXDwgod*j-W;oK5aw&e)H2=~6w?A>6XM$Z z*alC_A*hDg-jl?O&(y>p0H@j|FE*=B3FES+AmlNb>J2uJLWOk;&4jSbm#~yZV);mo zr6dz_xI@}uIU`IRRUxyg4dlj&^{LAIIgGn)iai(q~hk(3rgx;xP4+CZoe>NmP`Adxb(#3 zLsuRuUp}Ni_B`0NkCI19a8ioQ*{!_Oyr3wV5loynG#n!0YSr{EISGlVV2Sc}ed}Ju zX0TgNZ?+>UhLhXe3Vx0Eb&asWG3d5-!gYL=MIThP8rgVH@+}wieQ0y2) ze-7s+6RCtYedxRyrRRF!5UqyW@n|?`)PT0@bVF=-v&Eu^wz-!}zJpEdAISdGN zvQq)rHMVwjAIox@TSex8bo&xA`28sM+Z(DPM(MWQ;nZH)1(Auw%vj==*47J$ON_8~ zqAgq9aoipa-`fmSE_uGf#QLh;b@FPKWy)q%V?xL_6deboWW|6LX=Vn>_Cn|=Z~KeH zfx0XNKfgR0Z00=Zsip3NaP#vu#ohBN+y*o7NQLdd<7jfjL~M(#8d zV3~tYRF^aaZ^)sc^UO~W3`X70!YP>6c}WAQ!&d;?BgwmA1emO00I+N@CAW&*kzcTtK@s3j%jwmuX;NAfm^cUfG4Tt?arIFgSay4- zYqlxzhE-GBp2{2s&K4zD%;8RGB>X3{a>%5ABy)_IUQ#&<<#R?XgRGI>%Q=zrhr{APwa^hWA^mH7jvY|2LfsUM&5ljlVk@@K=jikK}ijOunNQ%}o#p+sPp`2HH z)G&b-i8h6<0Wsuv058-{MN3#k)ZVSs5K)cVf)L5g-dd&TA4#4Hq!c7rFziAq;@*wQ zj@%O;Qwj{M6vcYKTA^>Dh+Aw1L(vFV!)Z=4LyF0kr!a5G#k}y9!@bgTljJaJNJIi2 zb9fR1?BaSE#(H)7inB&9BfUy1a$PswfXBbTpUxrCD+@wYA88$Q(eqfaL6~f|VmyRf z9ke$F{9IfID`GW+OMQrYv00hdOY5C4osOMU??Oo#RyQ=!HEz4-lHIB;y_AUOplN%$ z`=apf>5+h8GD;WsNw#1!t_!>qF?UErA4DS+0wPAyv3wc&KH@Xk6EXMb#c9dq|!SR&bmOOxaS4+6dTp#-I zFki=|1NzvH$92TFSh=px19+ZUT&wESempH<4{fB zIKiI5HQXNG2G{*DuKSa??oZ*mKY{E1G_Ly-xb9Ely1zZHY88oh=x5*yBP1YBYY;K@ z28O3LoHfrN1c{QH)tZo!0*~OCKI_%WqC6iI_ZCN{E{Zqp!a%#O8T4HyF{5V&Hp{M( z3nCFgEDWb=ss}m}4jpPe zY%gIcg1U7bzm>_sdFOOsmXIwi8k~)dU`As*%miPZJ0v?!J%}q{VKQ*TIc?VE-HknP zAknJ#b~w)m+8$gG9Nb;s*aG_4v2Yuex{Vqk!x(uKT3czplif0(W$Zp6Wy~Gh>#e|V z+wZMV>#zvfJJ4GZI7t-=5N|tIv(9{bR(+eps?Y4$!t}xW-zFH z*!mFA{M#A>hHW4w2fWzpM4?n&)vj#Kj(~JqgZ)c4zGKOz$fQN>HT5wi)8(st^_~=!q22=1)u4O?F z$!kjtH}7^{?j0Dow7apiyCJ`Nm*7(9XVAl#KryX?g4ml>ni9zEnRQPljVt7)nhUFz zYo7210yVU-K)ScvDLz4|%qbP-0#Y_om)e#Z6RJm?NSv}*<;f-oHwPDm+DH!{S0W2P z=5hANYq`X0v${+550(aq4{~yB;_T4WG=eJODyMT`;?fV6FXMB3&yn)J<9o`ND&0SN zh5z(+KRSK%aQBsm_8dP}F2hQ7>4_uEc;%rYZfZWh>j={J9hdAM+;?oxo*pFkl#d@d z%1lT06!$#1=Lc(z8*rBOD>b-&**;OU0?|di8%>k|e#IiMCTZ2k&|2rG5Rg*?C|HmZL}nl8iv*nQX5n7COEqK%BzL6j`2=gxw($ zXpxkS%S3lf_+&KFS*5tGMEgYv$COvk5UsSoP~S(bTn zkefgF_XV( z>_7upGL}?wK$;4PUFwmP*(Pe#f-U1-&OZs!5m*an6ZMn?hmq4JA_wh~3cQj?y^vKI zD$S3M*fuaS94VxbmvP&j0QbkJO#%ywAOKxo*?f&92!vp+V*Dm~aj8!(1W%G&W>m8> z$51Zys`4u;mJ3g>U#-V2phx_n;#4_x4HH71tgkF#4;{{f)-jhrIB*WJ3#&|> z);OBB*Dt30=Jr6`24Nwkzf~1uuR4CBqqu1ma(YJJa*wHi$uMdQ!ax{;KxA2wHNk;< z8t$#CR~&_+bQJo4+!OD zNNFS6%vogEuvRnJvR0Gd>@!=DF4V44{K)9D9xh#-=h$#V52Fd$wMud9!J5E~eU3XY zI$hVu*qUh%AlBohW)5!$AX%4F-s5asJvok({ui3q?2zfboFV7dnGT4uQev9rIVp6G zT(2S7i^qz+A%J$wcJWr1+q_fFk;ej9nE2f>g}>$UE2Y_!9%;$lN>4$@+b3NjL6nd! z53)!Ooq(NBtY1CKJCC5#FCusufT=mVT~;?A`JSZ7jrqO?$^qanRC3 z3%=Ex`7UW0IM`m3Ut7(78xEntzjteu2tGOzw^6{k3U5|KRngn@IyLt*g|Wq&5@ojB zJ;U4cTyxoZNWy+^hMp_J(-vDSLg>kGf$jOy^80bA&zm7;rA82DYGr;~MR0r$>&QmK z6$&HM{mQrqV?K2<97Y~F=o!ZZyk1$B6ZTw9+l1(|!>@B0Q?jR>f5A0kWI~_|M>3?L z#E)oNEa1>s)T7!3&L%aIDO)kO~_tI z69!PxF2>8KK%i7!2@xwn7KxG#0c{3mZFGQdy^2^xqQ@ATqMB${giG;9gQ-p6zr-iq z8}tGe6uby9mu}#y)*F>7(d!lQDzo=a^9zneHM->rM)HgK#O>Q8IvZ%ZZz!W6rBHy5 zQVAhtB`q9ULvB$YhJ`m!b#7&XPR&v;ILV-*KE%b^JTzL0Rfvr)kToWnD7XeQj%k#@ z(UU3a5`>|GtAZa_VRW3m+@?A2=ZGPauXwfr6ldHMw-iAx$ic32oNB zk^GK*)10fFoy)sU^YXpn=q-Z8i4Z@wY*3x!30q|zqQqu}h-AW3-N&GijaTZ(2v1AX z%2ht#+P=ziqsCK5)ZJb8CUi|Z5Ugv@ZG&H4*}Mjs3xp}Qk3QuZ&}I@Y$lF^;PAzb=~|=G#1Ra+zri>kV=EH!!V%@toG1mTk%W>8 zC*>ee5KZY~cn-y6#Ga_~wl|LSP@c94Ryv#g1Czl_i`kz8 z6?vS#lGF#8*~+2rgn)MG=AbWZ>}M(fv;mNT$7vkZnH{0k43nNx5`i+)vK@mp3^M;K z3JDx(gp^03bFyjvA)Y$Y(34{^W4YZD3=!<0w_Y$IV>c*qjCbbVMVN*}q8gq8t~r2@x|h$72VXo;?Iv>s<=oMR}kI@ zr(FXmoi`hC*dUxU1qbD;y^h8jSknrd{;SoB)HQp(`RHJovfR8{E)|QryZawy_$-N_ z1uW!xhwg3|qqVTeUYYv$gqTWj$&21jTEIb6dc5c$Bmsmh0SF9VsGshA5v)YC7q&Vm zKF)W6SLKY3;nES`Tbj)jp)`+x6NjnYdIb}qL1$KQQB5Vj(=nyof&rs_R?8mvFl#eH zEkmaB<$;gZQQLzHzEk37h9Sk+CKyRcm3Js^@`>d*zBVh5n!U`|wg4+V`hX8-4Zi@6?|kN-ONn<0 zP$bDqPnKv#=XeO!SS#dxE0*4BT{;Lz>cHW;nTWFh@a(aZa%;;q`1#laE_e5$P1|EJ zO1wDC3<0HStQqy`E|ktjx^dTn>@t(t}45fZLEH*6<8Jygb4sNNQA6V)Ke% zw|0jWYITp?az;f?1SPz0__)37Zd|* zeVa>e3{o;Uy|mbKl%YH?FOSsbitD)dZ3#Lj?gyzgjw8S)KAphJTYMbBatqmc7~DD1 zs78gch>eOl8lX;rE?pOztBkr8pC$)XOR&AJsY(Xd(b(?pV<&6{X3Pa4~^%nkJET8Os>t(tB)YWbD9U^VX0y!0kp66Ny~7kXE9mMbz4w_TdUR26rnFS4Yf{$(b?I3er8ym4@jAei}f3 z$_U!p^lcC!N(1MCLB3Pxa5vjhU2xOTYdD4LS~Vdo_P9a*8V$`Y%-(re*)YzNcsbd< z#EWL^^g@n4mqGqxgo;tX*3n_RJm~EmXiP;6RRXKB%mrijPWLCQs zy_q1x!4PhYO&P`Y_g#v_C_5H+%2_fH^k>T@^d9j{6+>p=`i`#awYla3z+!_cGeu&4 z#$lU?w|#Ec!S*h;K|uIew&R`;^0L8CR_Z2+tiVG@i~VkD00o3aY(dmkgKZ$B_^{b* zyj=uGWr@A$lJOXoBeX4=$}W%dCNL0oaqFJW-Q8Uu5d1&Hy%t{Yz{2>7+nU(^5G^JQ zQ#UuX+W&C#NNBlrHpZ;(a6v%u9)+a9X@jyicn@8x)a2g1ux#h(T=4ee|06HL!9uFNp{i5&d3^#m3N zEii>$LGo}ni~T1%0Wx-9J)^kmv#3^`ZiI zLfkNLzPiX=zJr&8+*n#9s?n_m{#KC2`qI^vWjM89q;zy4qqys5?iFdYxlX}GG-99= zM$T|^8304~Il^rtGKnx6xfmBAA+n>#XK9B+$Rt;%WZMxk01FW{STF_dy+eabq>Ha= zh>?lg5YA^5oc$Bqnd$}q&VfV=0_VBJG;#GR7f;a~6UepeBgD^|1h|Q<47}d)(*hbT^yOzyO z)Hh1Qnu|l8Ow~D8ZFoHWMH}n^10*$K!tz&l@P$RdrkvO^#eA20<2~P&W~Q~6+UIO_;`AFU?fYNNd~!yy ziePzj47Go>2pDs4k(_N#yF_0vxugwDd*zXE<)BA6P^2DcJhuW!UCmk(;rOEEHk?s* zH?Gj{f;R+Ai)TGtZpjIziyKw2x5ctdcPd}dC#I_PrP^|3@i8ic;`h<|9kh9L8nu?w z=ccC54n5AAa9}p*DX7B>9yuY`SY3n#Of#21G?ej8>zS_2#{1$Btxc(G;C=Jc_nNj zxBxymm~+rNKS$@_*V#$t$W0B%dcv^7>_H@9x?JgqL@eMjSj$G`%F+bS{HpR(MfkSy z!8HFzU2Qv5Fd*Qsp>f4ko@amuW!3u=;lu7gK;(;8Axc^U3$qRF6Fv0}%TWs$rp<_G*XGlVo_57zCq;t_kvhld{0d2g}O z;DEQKxn$;s>@9G}^9>>D>Vs9;xgg^~4u@%XidAJA$ zdR?0DJoCmo3k9@%XX>{#0H$+B6{cD#63DUZB$5%Xacv1+9n#7i+SOOCH+lry!##** z#k}^ouf~xdKyQO${aK3X*?P=c8Hyd&`g(|o`)!!KCMX2GUwfOPMQ`cRK5aWxht;Sj zf5-U0>d4gXPohkw|m)b4~W{*5g?9f5Hp;$qH7%3gNKz2sJ_E7l%X@{ zN2kw?og2k#yQV~ZQi?1C7ZtfD7h$_|E@0~Jj!(a|1-v32gDQlJmF4x#0K>;NI@Hn* zG-8h9^k<_=w_H`#7%A4NnXM2%=LX!W207kF3u`;%Pp;Hw zt3`^rqSB@nGL++w$KsIb%3*pFtm-K)PFQTD{9>GKdvgljX zEaNFY#_rOR=hT^+Yea?E~~R%=jz!6vxTZEvwZ{{2f;e_dmQrJF6?1H(F)fQVT{wv82-R{r!I3AtUReX{!=X;m&>c!1 zHUUCdD`#&#lKM2}%hlOM9C$-nqL7y)^Sw07<+NI`5DU`&>I#V(cqrRn2^HWwvA>pR zU!#WfSE+|GDo3|{9uoUc&qK#mVWcrA@gSBV@=(6+<21-i{a1QQH)vPT@JZkxWdz3x zaI&|nUfjy5*kV@+JQ1wS{@jEgmtgo{`EWK)vDDDonzkr+A$B>6+WIByCB!m&v6*kX z+RQF1DJ|x=tW;7kvlzBLR+m`dO4xdKMpx#4itUSBA$%cJ7BwLYl#Tfz2T68D4U}sw z00a##hgS01i}CCb9EzNs^W4cI1VLxJ2ftaC`Q>&~g0(mk3EGH@dv^!{p#$ zV2Yb7?w%_yA+`kIq;q!{`?()3`=^0zvV7C37qJ3gs?kxI@>9Ii=~+x^oOKTe2}R6L z%WX4~kKJN|%xwdUr>d;G#k#EUVGB~Yhje;ycfd&EUNPH9#(3gFXJptvW@GIGm+SpbY_(+Ct)pJuPZOTxMts0a`l$S^8jLtgv~bI3xoT0?jd z3XPD76-h3*>|>r6BRX^JDefx{6ye7k9HH@Aymkp503??PyttV#-Ql%3Goe-@f#JwX zb|stxSuPO4go02&q$gZX^w5OBZ`}xrTyvOUrmeRD1>+_JRJ2;bM1JNpP|}kvlvY+$ zXvfrGU&o5{H0lK>rLwT}Z3HmW&}68=J{pBd)(dX!iun**o(K-@R;pkI1VhoZvGFMw zhlb7|4PH8$c6xGpXkudMBGUE)vJ*Vg*-`W)>N(LPj8BXnlt@wa6r&^{D0rsb7&7sc z&@ZA!d3Oe<>T;1suYF{&3i0UVwI47VZiDx+@3ID886EstVEvVma8=;sxUktN_AXRNgntP zzVjCJ)~6w8kG4hV(5`|Z*|Uqpzd{e(8ld~MR?xFYHgm|7;;fv;M5yy{KoHiB&|tsF zU-!()d~Me6NtH{(m1Xh*s>WUSj-5raK;xLLt~TlGp^JaVNr8cdr?9pcl8FRYI@TIS zDDr-CL_}CQRzMs5jFcP@n(Xq)TCh!1hEn(waU7%)AozEFjqX>)r8S758JsV(H-WK1l%yLOUo0RtQDHno&ODK0lI&n4>wG)472;o7r?P={MBxu0 zpb~U-;F~z`Zpv4oX!BUcE?d0BmxE=MBMizNE(&X(BH_d#STX^29lwl?v{>%q$R;{r z6ECr-mMFM&?nplyTpk?<9WaVA-$?lWNO<3lxJtCWVU}=kyKqCJrciL4mBm!w^pb%~ z-h#1`Ge43K?QKB)I9ck!QolB4EHzhDd#O)S@KPA|<`GRAS7V;t5>uvO0I@aRF}1?Q zz}e+*UMLP=@VvRQA?8chRr&Z62iy;qi|Y&}uZlAqXyMp$d(MI$3E_Fm@}-3ltslai zM#d;9jeG@`@csq&N2c0;GRn0O3k<5>P-Muu4O1UzaD~TK>-4M_kHAX{9So=I54YrX zt<=EjrtYujbA@mtF3*a-KaVSF)$KXl`c&ud`~zZC&D@>ge>>hztHL+B31_EON>zRf zji7zIhFaENE$a&4raYY?dL2h+g~`t!&2UqDZA)?l#4>=9j+BUlQ3b`KM|}f1ceTG? z{rA1C?aj6I@6pW;t`4YP-o*t>xM}Gfu2K4g7Zu%^j6ZifxKsc+N@?}~{$gW$btQ%J zXmJvdCgql+&j5=*=E4iy$u#+Hmu>Ch)(u>QloatdD6`vPL*cKK^wjXvb0BT~5khRn zXNv&{D+EUV;$jNHoC|f=tvY1hZ5(Lj?ZIorrTmnZ3qJM#GaO=C>W6X-pe&COP^3Y1 zNh(RMhuR(Pp1s>0zT- zv)h@}fAn#3lB=Hl;u`{`zqpA63ny}mh6Eh_CAStXNIGpx@5grbJ-S+S%Qh`4I$WWs z;mCV51V^Zs5JSoaC`VD%z+C5paSMY&sc?s9qMAT%2r_@fH9|a4tbMn28f>{2XOfM8 z1Ql@`aHtCoZAInou$r8kn`4GG{^1b%csZXccY`ALHQkvf9=jM|o1aUFlTKwf*dHQW zA59}C-m|LwpnGHX3}iC|I`wS`J0;;x{o-*!)Ircw)v$gE!KCl1uc0K`!l5HOVKE7S zGNhF}!j`>pgmkPwF7`9@hBpbE>3CCHa_)4_CpR zS!7G|gc7gH&FE0i;W_D|7txhn1KotjO$0CLs>5yRZrX=2A?(QDt_H;OYYk%0fAL{a zY?ZH%5W^1LPevicWo{qfX0Oe#3_(wZVLx#nYM(LGA9q7Jvpf*;VeM0(kV?cIA2@g{MMQQ z>{JyQX!Lzie;qR25agsRx7X7y7y)bT_DAx?V?xM%=UR!nnf6UNr+bq6=4Iz5=8j?h;!9hwzoo^9Tyu+YMkrehUvA#wQuE?P!AcZ*TOcXEw6#3R3U#g+FxyDQc zLhl!#}AV^Uoyw57cDMO}O*IO>ADeRhxN*kzsELssI zsnnq&Y^WMPPLS9iEns1+6aGjRUCzb8wL%;cSc?pf+D7P zZW=(~gdSIr2tW)pgEWQ96gRP(Wv9az+z>|gR#$z`_UTET7+%nh01Lr}#(P7EB3XRV zIk;hgCB`YNcM6$oZZlqN%BiQq*8ohYDMH6Yuu)IPpLu3_e~3X;q7!-LO^0i-hPc=) zA5czEb4q9(g&`)hSW8)ZU~#!r6%Eig6Iv(LmMqi{8%sWe-oe|=5hoyc3t;UORMu8F zHFyk++zXLLQ#T|*jh9s@)ozHKnf?OP?fwLw=#*xF^bnGg4MY)c$n@MW%5jWS;qu~Pnr75er!dlz#|Avow3o%e z$jSvMCWoAXn{ixn_jR2mFv!<5B^wlkKSb42#+oH8g`>)3vYr*~aDyEbX9fV(UqYq`V6D)+39q`>ey&FitRnWHLR}RacHGxi7ud zst|?*PbrBbP}0mO;(3!kO5GduQ_r0%J93RO=-^g~>XcjWfQ_U`mFW>pEMIEJk;Mie zn-XhqKkr(l7%%7fF?Fmdy+Yb;WN&)aQ7ofsG$m9QzxyM?4}z^LQPM1d=? zcsPbw2Wo0!Y*^T*>D;1M?u1N;8RK(H&4Ie*6q-_es7a_eAcbopcQ~GbgOQVrV}cf>dcE$C3(R(>0Q3U4%e`A zgNFhYyP=`~WBR(vmZ%~e`Q<^>iKzb zv847zj~q(Y2OXdVx>OrcsBPLM1S1fvvE-Z1sWw(7gdTuI8t-gFYguB>rWplD%LZGn1q z;T)q?=%SfczsVF>_CYo#hshA9`0I|P*Xpn_(mF*STWyp70&``Y>eO5r-{06>8ZI~N zUWiu4@~>SHWTwNE?@?@+$vD_!W4PCKg!{*RE2A@CK8wpf#F%cO^vJyzpQEMd0{5`0 zRe%u>y$t4DWueLY+M-|TI@KMyfI8%X2F-gr zsjVycw~DYiTJDoI5#NYW#22(`~lv~e5B z5+`VmapASP6a+WxIuG!DdhvJi1dTV8DDLl(IAOkU327YiC|w!=H1QfQ8+oNNKygE2 zG4R~I*=7z`1|FjFO&V!0Kbk=UawVyKD`AlXByksIAJunpcDQ^jwsd}l>4#Wx{1wo@&2BHHX!(76)P3w`4t#gPuL#YXlg8OSgRI(l(D z1i3s8RB~p5qNav86j_BA!F444s90TI^dXlu1SyxyqqUW_Vl#D^^^sWWovGgTk-AUK zYMa+iuo87}1s@3)G$YauTf~bdE!jjzBDRht$&{?}6}b4uZcswEI@|W=?jhW-u=zQl z>A8W@Vnit2QZIpC9hQXYZh-|Z64mYbq17s2qr%iZsc?g3D=wl!HI1^BD&Rto!fA-K zxiduj?p=f8c$wZVm&uaerqEr2#eQTc2=y}QYC`P*k#)jOD>7s90S4jHlvG9tsr-$LVuK=n2!N(U48XQYD}}6CZBE1 zmvVAFDPUAEc&!vs!;)-l28iT6^z>3J)AvsD`h=iN-mI5XArmcX?OM*S6H!6L5vWTb z41jWekLV_IwaxwUTbPf7dDid^y10aX-9%fKk5GwCoJk!a8ch0|g6z!a?YhxoGpLPU zgC+`?wk1|liDWaUA+TeuCvzEgI3Ql~N%&wHn}*xCdW9}3X1H{=RVXse9<p9{XZ;T$%5^8kb^l!AgX zOQhG$0`j)f>Mqw;p?n(~glzCLR4P+S8{TeU&JVl89A(+s zPv`NrG4P5t-EmWPAD{N;;Emon$NNXfx&O3OXpT$6u*7>9(5`boxSD`@BRz)nc@J9=5kAjX1vRwpKJM@b?BVP@Is%=whb)U6-6jmwQBp!KN2 zk)l)^-*MCGWC!L~t@wuMElcXK;yYUYB`eq~v1L)>mZ#Qn?ZD z#B$?W*toH8!L4A&%03BVy#J}iJq1;~g(Rho^q7~b=#4oVP378=>TvHb+FfsJ*bNug znWyI`*llVzCwV^;by>2a-Hb}x33*AlO$jWNT~Hb*s^jMLROS)Hsr#Om8}bw8vXMgO zDGoCfcRKl)avlUBftBoCZYh7adLVk-(3b0_@E`C=1S4I)Eq^E(B0SGcjzbI%vu;TP zaGX&ZBRqAHWOm6izjQbdF=0!g9gB;E#p{tmk@RAux?^w+kF!~c@MqB`(F#E~(+ZvD zqNREt319N(0UUZ5j>#&Im%#5+aA?oLHA_6W4$JMI*Z~c-ljyX6~PNFrR%cvEg z$w~S;#Fb;{G*_3b4?*vbIN=|_kHd4OOPUT#*VG&oQdjk(y zft-U9=P2-FfK<#VnV4odjAWX~SczoX&|wxR*s7^a%2bWt|0tGfZfrb5ox)-7pUW** zVbTm~%hd4+n}@P$Dpff9cks4%kg`1IIE-pK{_n@9Szk&n)sN3ap^Ji?4O=XddZxsX zPYvB1xy}VRaCT&A@OS|IyWFou9-rV!`COn*P>(ad05QfMnTEY^M{fE{5KHF;(hGo6 zdEt}D3*I})Cv=*Ny)cDPOpU4pq?A@r536RR=}n#uSN8HQO`PU{BM(33#k%N^tuT$b z;xraggMw6uwThcwUF(dy>^77Uc(iNay3N7K0NL999d&@e`-t*)nOM$!g2gVWiEyvFIG7cV4s z6hxgNt}#|CsE!?{6Rd<_5<7jZ$zaOA;bvk5IHvDfK)W6amse2JribjcOf(eU)~fD7 zQgpRYeL6y-RyKdd2}tJavGCPE(p9Iljoffb6htegkN`0pGPUI>H~HH^=ZpjCGh~We zYQQuBDRe?GFUAaGqNSXT(`v=^)Tp++t77km$}qgCwAzhQk-mfi1@a$5p=Fqv&XTKF zn?7X9tf>HN)>TT>no106%p+{w(F!asp2N|4iFqxE*yK&R znzT5gU2M09>rt5bJk5Ky?OIM~nJ?x(r?UALjErc$p_2NNon2m{)Zu^rHX5AiedIVH z(OVH^>=m0z%vArjg&RO$tgmh@FFogxl24bmepubYm14z*7wfp&b7fT^zE?^YWC!<= zR9jc4hl8`LU0h0Yj;w;S3;a484NiKyOYye8zg$yP@{4x2;(?pjRoJ+^z)>>Zls>i9 zd9JsQ?b^SOFZ(A4z2UW`kIz?s?d@M)@CdypIAfeu;-6-&Z5{VcPB8S&DpSxAenVlB zOopTv+!=S3ywU3hlNVSY*hIz(Zw5}@A|3hkJk4>#3tk(y1_H}9hzUw%5H3wmONIib zQp7P?YfTV<&E>o7VGuEaX1c?48o4EzbCyoq2^`c&!GA(O^VBl!RoL5i)jH8vC4V|2V|Ldo`sVdN7J_NU1uQ$oeYF(2_-xx@9x2# z5zUux>=Etx_=x5?>G8fIxhaT0bQhl1fp+NvSL!E}>J4R1rFo}~E2(E6`fb$q8l(3E zn(qxIS~Ycke9qCpAq^ILZKkPApl~Ho&j#;#U8rOT6(Fw1X4%6->3ulLI7e;F4$}Lv#_w;$3wQC#veRO0pyD zN-I=G+88At_b_0gf7IiK1zaU*z-Od}6&~R<7s`77rlRf49-rH_7*B}z=4znXwmcyNFlDv!_a`0wIKA#MQjPmV2~`)9|! zVgF+F>`i|-IOFo!kKJLPQ-H}T%0L*ND+bUAAk&l$S)jf?L)BO}W*<^}xJrc_>b>yT z+Sk2d!<3r5whcLox-ULHei&2r|?=%Uqu!8AAx$3M!p5HAG~nNt5Xo z_lfCM&5Ts1Oc6)szHS-^*fE^rA->MHa{1`P?3(1>@U(vhK{`-b#Zik=2oc2Y#pS4) zmKeU)qffz>r1_YG3VPc8C{9eyE3TO^52McI@T@WgG4WIH1pSiJ9eHk}DvaN(gBeW# zMkk%$JYOBZrhBv8f-4tpD8UIk!)eIAR; z-9IBD%J(x9IBGhSJb{EHC2>wAP{p2izUNKL-8bF-iJbClBRE3a+q{t)_Zza)(vPzJ zUsfVa)hwb{JZM?2o@X-#_?O%ML2c}GlnY~B+hCvC;Xw7aE?D{YBfj71(0tf?b$NJ* zSqDc6@hBDjfRoibO#9PWP~V*oph`P0mFd!A(Qpn4sCwW@RB^DV<#ZR~iTSST$ie`U zF^PAqFRqeZ(bga)(A_fI(H=mu>s6zo&6r50E$_27j!xFG5w^y`urLLx>D^!rR8w+^ znelMw9qy{cP4dA3%@OJu+yx8z{onkdc;OcTvbC_LN!myFT zkw@I#4PxjlAU0B>$cxzmV9t0FRw!<=s8T{{5Be+BLZ%Ge>ItPC+#zIQG`+RGvbN46 zs~0{8B>#paTxk^RM;HklWrX_=fdo(ILv$7%pz_UPHH7zZY9EWP(yHC6R zL!HoB)d>>X(q1FBY(*M-e*Vk zhIovi#CWImiC;jzs};g?IF+Eyadfvg)F>gK?d&gH^Q69{wwK-yrf)*Ssh^2B!>hpv zHfG{1LLXqU8}?_~P|35YS2z`*fPL(!*wxL)8(VAJKkuz?Y(86h3LSmW+aJ*WJwPFv z{4-S>WvZ!wf|X4;Hf>9mdgUz6_!gJ{%B_uvmSh+p)D-cASSt^g4XJ4x-{Pn;SU3fu zvK|m|tOFBQZpHgMok&z5g7BaZV@X^{@e?0}>7wy4j0u$z@5G8Mg^ZgU0PeuP=N8!Ki|9P9)$~Rf`rwviRf75b}`*Uc75vTi?i2fgSR+~UM4Md z9*+9B)mgkGdPnpp)ID;Qcrf@0o-2aIMm7{OrVw@tTpy+#SP7*!bq$OUns-g+eADkb zrV`}<_$qq2l0HI{>EG?}Lp2%)AB5s~(?HR)Sv{!pFxf)K=&?n4h%yu`x*6VUe@h96 zc3&M12SaUCVjN`==)bgxmJDFwO|ZU<{U1n_CqCbm)=oeq5U5~)tqZ7)Q^mTV4M^f1 z#uVfZ#Y!M0Ei$u4tAPK5!iEYd)!#?6Z>U?D?>Ov@2A4xzkz^=~)7ep6^u#@kX(+Wv z0S-l}6BA3iRY7#NX)>pTQ`_;Z*TdyxU3xem7S(SbdEbZqAv{OVjPU)Qrc13TbgTXX zh1rv6nnja`o^N=H+8DmwGQ*Qw8vB&+6Q-)B1AoFmU{C@i{0=Hq*wn8T8QTxxVUQk=sX_nTvus*o1p_2}Nh^63P;p?oPUb2kcN^Q2M#{+^Ri8xIE*S-D- ze#sSJ!3qD@^i{2+B*Gu6#BZao`az=?x)s#rTEayYofWgDnC4!7QcVN7i8oXQA4dV; zUJj;8TFuN1!j}NjKEQ}cf!SNJSPV|kv>#|WCR(>txWc|C2&R}|WL9phS5|AX+1pEl z-4c7ey5SS z;I`BR$uSCLr!y<1j!{d&W>Q3TrqPIk47JZ8Mx}!$Bxoq4_D@LxsGQZ*hd>AO7?A>P zqh4B9>6efV;{oRa5Ho^+snqQdn64Zejk3CEMD(UR8(W{>zfY^Vf^_X&GEH;3GqPlb z_cIWFz_Lji8;!HOehHle9`;uf6pX56w{WLL_*G3E3uwHgQ0dhbZ6Givp`v(oElr`E zXTf}6cgO4bz`dpa<~m+Adnl&J2i>lZkz#fEKNJUXtG6$iC{rW-5iM%`8eLq;K9~;I z`8s4OW7D{t{ORZ+H%fkc4?nQkixCj1#eV}te`6*|VK_DK zRbm?pr@N_xmrZdnyK{txMllrEvhOwk;TaN+DJKQ{4d8{cT3p25vo5_wFYYa58^)#- zVeA~Lc(E(~!4?<(BC*n%ax)6HlqJTuEQ@&=>{vrMReJxZ`W#SX{~C%YK^+$R7m8`x z=}5Q5A(7nnin3CYESj!aDv3x0U5<+suxp9=y1t_h5V5Z)D4k?MPq5?Ut4PU2Yh6-^ zjo68z0xL8`F#qg5O+lg48CQOaO-$g)lp(EAXHlh^aWu^zrfS%JA;&LcRS$i*GZoe? z(E(&Skrl?F%&~{PwJ1U<{Q6u<2R0(3y5m$2@aJ7^F=Bh8K zYvATXV;UbPK7=>62Jdyt9;SAx2#e1eiJC~J(_@)Yl5eT#_(uv4jtIv;Ja$&!3a?u1 zf)_98+Zk@6!Gwp6EiBJvZwd5ZZB1V&iCg9(OnWxlg*noj!{8bQI`M%%QL1qry*?#K_MszU?s zMreicZyz`)Hwo_r&B8FBBB5u_*jhLi6QC5a5)QHxHCq74jk+9p5ltD_k_sT2@%yy> zTX_~3^njQ#aWsv)sqzD(tI^p=5C@#h<;E6C?LSncbq=iu>)+EF+MD7 z(%Athqc=2IZ8*zFGM@?Q^Dsuv`}?nZ2cG8{2O=dh^#e7B-8*iDcC7C0kDkd!5ZyE! zw4<&q^ddPni*wp+gRsGYK(7 z0%4FYRI#y1Up`u_BrgnwNnx34B=aO|4cA#GXEkm?{FXeglDnNSAAxFUKoiZp8j%Jm_N#4kN6OmfF!yIwq$Rg@_TVr%^ndmtRvLF^!K}G z0~q2xu&Nab@`L&Ci7yH=1rhU%XwT`+@d-qOL8rpOvCpq?oW^UE=peoQq9?vS8n;|` zx%2K2W;)Wklkt^1C^0^|*fF^~|J7HMn$xiFLZv70r}LWo-Miv*;&waf;@2 zMw{Hd|KP#S{0dXyG1YN=?LtJP4T2*+0{!CvXXAv?<=iSM&>Wb{SJ0=^WDv`m4ZK05 z1ka??6)Nsmb70m6YRQ#|O03S3uIWte?<~CJh3=il$qT1mUJ8_(omcvz`w1BoWuj9f zH%nnjBwjTol*2(h(-~~qq=3E1RT85utf$DncQHO}@>7^T;VlWJL}b&0A(#W>{%!SO z!SSEmy>QyFkMz3={z$-KhtPelQ2`GzN4<|KelBJ>b?FP7-?8wXQIXsUMt28;0@jT`}#YkPNBuf8HGx$iK?g6p_XV zpk?4{BP1Zc5(uTZcfLJ7dDyAug17}dIojwMbj2L@Zr)=t3{ErbuT@)!_$F#I81|xA zJ$|LCH#g#)U_wW-BetlL{otwDw5iG=n1wa(2GcNBR*wg3UIzaA^=Lmzm!_sU1P@IbmxjXoiPa+{s1-_Qog1E zA}5nOXvk4c!W3?rfCWJkZuD>@fexC1V3Xk*a1okWolm?HRo=CgBAuHE{X_rYpm(;} z8)56~sCP3`qeCaJFw;Sho!0CpI!E7by2CBQKdr5=Z2Yw4TZi`^zgT;^qEVeG%+RR1 zw}+Mey*;wzy}3F5AFz=8lhHj~Id<<@Fq4^b4mD^wnA$%-?hY4K)0@KD+k3M1baih} zW|uE?m@ZDa{gt!N)o2e>`X0T6y@Nh>0N$d_{lWRwG%?1|Dn#l;?SZ|Jp((Xr!Uk%i z0};*7PlSqaM8{@o0&CJ8kxvrUPj!^eNXIVE5LZONH(&!cb7l(qM<;uYqR2W$-5$)^ zoKE?60+H!!jjE&@W^B1B75ZzorecwpJe8ZY!SqakU8iVDky5=3fl9&?*BGjHx0SFj zH%ur93~NS`FbvJaRjHOqRydmcGH4_o2Lyvke1iJ2(f>^>YcopX< zdU7_!pk%V88KslpRhMp?uoexUJ~MjW^m{t7c>ohMNHL#SP}$-9!9!3A1@zKd3|hz{2~swb(0{7xsoE@b=n_}LgT74R)g!J6JeS)B z7#f;~-Dj-WIgs!sbYp0^+!Y!%HL7v8mEek4{j!rsm0zNhG$qG_o`9Ra09S~RLmI^F z>!0+(!d9w^)e3_3eSzpEDmCh9QN|m8?Nkd7t9u8%H}{CzWD-`CkkfSMRTr}btkeJ0 ze_rc9oI><8Qjh;fz!D|!i`g9Ry)NuY-HHF~Ul zK4{cy=&{B%feME+_$0zMf>WF^VB3d>*;yaNz@Q3*Gx3IjU8)>JL zB}u%=@&_cED{EX9H=l<8(~-9l$iiUj7Ge{xeK-Mom~@76NQW3vVcJWSy>38m+$t>j zH|T&cl1}B?D-QGrF0II*hJ7Ns++hnHN&cR`3G+F|CuSzw(j15vMoT%RR6S6Kk4G4ec++q93f2R{82DmyC^(vMMS|8mIdct5@ODHd!##TnqQy2DSg zB+^k>M_PIRmV9JSj4Q2ql%`H!(2e;TjgC+8md3$T?+1X}wf z3wup%agFeEYtVmtb9{b~w8`f$n;Pa;cI{Epg!T%DI&>aAdW6K=34O7E7xTzC-jA=o z_y+IrhN)_}c-IPk$*S`e&OwPKAAEVAZ}E<0zq}s`e}NQ8DSnJonjFH{U+UZAAxpDv z_%y%pje?r54!YQF;Z?r8|M}Mf32#|RK;Ua6cCq6IIvE@-VgUOTrw=8{7o+F$VY_Ns zpk4VYED8WlMPh((A3t+=CLQu+ETC1aQ(pnIc4&n(UJuf>Z9B+9hOY;yOygO@NmrGX zAsd!ZJhrc!K?__WdkR4`tmgbj0xZ)KHWDlnfz}*85#TW~O30^aqokc6_4q`Q(BoZj zeilxQ)yV`OU<- zt8cL<2b*|P(7-^=?Q{m!DMQzAla3(#E+*o&^-1)x+m9noHc4HBRW_Bq7hVP-!OXPu zH7%>P@Bsu-07<)Ipc+->WEwR*d{j9?IJ%Gl16vPP z&Ip14_fR<|pvu4pi(CJ_BvwDAXEwA}oK8y)5sRJrA{F=0iherik8lyim2STTV)j>B z!Cs7q?Da>?+4)D1(Xj|*^b8*0V_j7hkofBM{-}3`qxHDKU3;J&I@91dhg%q$9-Zaw z03u>1*l=stbbR`oXS+Z}Cn^qn?~}3emc58ZrP0WTQQ~?Df5!&hM|Yy8 zsTIC9kbT2&saG%hg48YJO{wx=FhEgMcFC+xj3#9_z45YcqB42*4NMUu2v|kW43nu% zf#OydFbxWUXYL3IRbc4MsCalX7{IP|oIi7%KvJ=kiIoqFik|DQ-(=k>MZ#d^y$q8; z6U4F`aeSq58@jA#MGwy0%7VUT(u~bU zmZ|B*Vev^qo19=tDq4kksC@G~f{O7n`@KHyJdqZlCauB(T0q7K{x=uir9f2CJ2`lS zWkQbHgd8oUjn<`Ivzdfa4JlK<-k1E!WFDvoxv-+-rb`V%q7Ei^e_gmw+fG@m4u|XN zH2|okD8NQ{Pt&ZwfI6zR0{zbC?WDBMm8ErvFm!E^%~dN{=e9xnC$%H#m1%9irk=W; zMC*6g5Pspp`qB}coFf?0GZi*G;mk&!9t(R)gT`y$!+~4f`*C?ol7hCtZHPf;-&QuZ z!#AE<;e0W5s)@vyIm0WO=h^%pzYK&N%^!Qe?w?#rRdVgCd@X|8wBJHI$? z{=_g<^OwT`uTy)9v><*L(sROC9o2!3J=v8`q>?Bm4m1YhHA7oy!KSfAd; z5l;9Ql=as*!*2cQ&rHpeLwD9zw;&%uox&PzKt13L#{PgdnZh@&HR~?^DHW3lP5l5x zl_l9&L)tCFT;@6WgW>M_<>{;5Fuw4*WBM9fZ$d!4ddf&ba1oiN)>lw2ZCmQysOhrf zR8BD5RW_2QMQKUX{_z^{HNdLd8eyB-yAAjTlDT4NYQn#LU=xXYX=)|;e9MOF97x?< zTWJu5EugPuKNjWW-X>|}zH*FgioxAFP}<4h7gNvPX%EK$xCw6`XX2JMseQ|)Rrjq7 zE~rGX6*Ye*E@BTAuVd69=uWdr?0KxhbPqJI5)5(!@#pG)j8(d~A+UmkA>kdw&!TU9 zbNg-u+XTw=eY=&yXy6-TmU33Xr4p!UlW8Utmco-vwS`c`_z9VjeyLyp%@Xl1w`4ljetZ=yP~!uG>GRj_7D~|`^cRbZTN|6(dyjD*^wP7{_iB=} zWIB`HyvgmKpI_HxGZ`OWE$#En_1=Ma+jSax_r~;hX?b~dYYR7MZLeM1@wVZ7e1)LI z+0lofvHZi*`u8`*qKTy*bAMm|u>T=QJlj}Vd$M*@ENYw$!v;3eSl-qmcJha@{upj6 zJDQmADXS%~VL@wT_EwWbALnL-`GG$GW?C$994F0Z;0V{+Ld{ZuZa#jvC(n~X@hD&< zAd)OewI1{7DTQ*}=Oi{PR{$Ya=@sr9f+vB=7nwo07ebB#U~>{1f|4=0#;f2ohY$_9J&>;-h74qKnMX9GsYey0j|GxC7dk*$%3m{J;*7~GetjF) zgLaFc9oF`+3vL?8jX79vg>nwY z4IvqdUVC?i!LUu*A7DAKkAH}w9fG9>O}d;k!l1|4CdkY6C`dyC0x5tme2id>OA_~V z>sRb5HpL>rH(g)QH#hUz>Q=h)CA(a-8iGxYIK{7HaKa=_b6X|0bMKekxw*UdB+{Gm zs$o8s%&Qs>4FYiOcskHPaU+~WHnhXyj4-aKt ziwdhlQ(cRVHTb~_&07Ei=6 z_Zup{KN-BjOcefX9xQ_b7lX~-Q4gmn9-%y#4GTj3{fnpAW(wOCK^{PutLQ&EXkpf84s2sTy)$t>b}a99Q;TVI4nS5E8z0w@C4CmykpJ+)^d8x+vM`2 zBF{#VAYX)%EElUdsl`Pc%iEFWM`_6)kT>M z%}H_ zjwHH!H*%-QWCZ_v1W)HImdi;Sz;OpR>Yyp|Ouebu*$AR&YUVwfLLD^qj_et6#uW*H zqll((zDs0|HwG4^A%OlFQsj&+08u^t7C4!cRR2tJL_@hCH{>#QF0JWMgBI-k=@;Cf zEp_HPQ-5L?-b)zz(_EPem6ep>(%`{jL6n-nY zt&6bmIYtC*>LC&JALy)}A-{Mnge7DIg@rJH55q!t^-4m=8A9H8#)T`nZc@F;732wz4Hma-Y8}$w~YZ_x*}LzbYH{y z=vFwVN_NM{Syu2ax|M9(uU=ik51s9wRU%;BlOyEPTpY`#WVrI(1C24pSnB93Zb*Tb z04*fdCyc#|fd_4KU4UE8;b?(6mtD3EPGam9(i#OGA6>dNexF5mwF1feaZIQQPsYPZ zy-iVzU_?E`*|L2kfcZYERfSoQ=lloFFPNJOIus+dQi@ku`6US|-joJzN zDNO>E44U?h{Hf0&;`52Wn0?4|cXWY@4K#{*WKYNFBl61lVE7GK#uENGXw5^C4&gX1 zIj{vO1mhCR)WL)Fy0n;4JE`NV!n&|8T15HGA;_|6=TFnWyt{WNmqmwy$*uDzO62st zcT;!N`Tr=KNJBSrq1&L)4JHY?_1g}{*~F%m3oOt*M!bad1wqy8?eQgBV?LhZBZQ zGcyi*Z;{KdSuJh>`N)JLkba`iK|dCVw=|zax*h>%nv|%4)f7h;zc7R$Fp$|tkkdlfU}blD?zaaszdyRO(?1>k zdbzan;^k*k_n^aQN#Vbz>W^XJNFok?O!(=}JkFKIezkdOLgd^~5!zYr`fZk9)bzH> z^e0i0f@uEoCy8a-xwrG#qn*X>2~Ngdd|5Fl{nGG2)7nolVk5K;0{RRsNb`V>sCxoD zPT$^KVuLHig4jeJ50nesK+BTSdz*{Mb&W2}zfn@mt7=-x94DInH&|a4uLD}~V;YNx zw(bhnh*p)o6#muUu7H^l7~OUI#*a60$t}Sr^S*;GZF1&qtB=Lrf_OI-fzo8Yv3;6! zUUh+^!KpeYfiRH-fJl8+B(G^tnziVMy||fK1;< zf0$gbxL8wZA4RFzT3d4hNMI|Bdt%T5@BazCV3`f`kQ2or3y3>ws2j{=8A=kFi1974 zTP>1WcQD?^4`vH(f@7@ZD1Ah3VYlnKckRrAXctzDWPhUU6$DMfdQrJ+5#MS%d&2}= z&Jd|l6{hQG$_yT+>dubmPs05a7q+t+8`plAY{~ph_zw!~#!^WVp8&STU~a*UC_d6&r9< zcCv_o<;tS4Sg)GS`)41xIfFX+##~yF(!|cHUv;pJ zO}b&Fp&Fc>T)`$kKj&JZiZZjYEHF;v&N;$tux2xhF$mf;OlejtT&#(*=ONyXj+)-! zB1J;_X}97+QPE~_^DOX!R@C@HNPX+_6+g&&W%*Lg2f^jNRMK+DzR0h`fU2U@7{g#-MgJ{AHMwV(Yx*;rp0%oKDH#h(_`D- zArHNOa_13?&fR@<2mem}sZWoLaH)~>QnZpETGQvlj+~Xj*>2~e!eK-(t1oa_tloZ2R}i&LjeJbOQ#s4#q2N8~=p@-n zL0R#%Z>lr>D+LdV+Iq{GDt96sCfAYqSJtg-8GtZSX^mZJC3f$Z5aWWQ!k5^wVSd|9UV`m{ zNDh-F#Y8aE*EC_hfx3KU#w3k&7=3B&;!$E>M4-$25f&F~)uVloLLpbB&otmbM*AC z6db>**PM53gSX3`AxhGYwfuS~2fP0^8l2(uFzm>+m5CQ{4i0&lBS>_c+H}B27YED0 zKPD%L-gd}r>NyT6>^J@1v{JSA&zd%z|4Ucw-GL z^@r60?qB$~t&R09?36k?l2^Qv0Y_b~t$#0%yuE=X)Gn4+U*OgHi)W8lH{}s`Hdu+1 zlc%z1d?N3^l44Jv%EQ-I=<#51((7W7;3syq+&B2Nz4>BQ%FW-$)03sATlP3_^|rB? ztpyzAA9-%Hz9P!28&CAX5#D(9Hv~cS9WiDnmY8FxuZT5w+B~f0i70o);HRVctVeVB zDUNl}_r#F^^u4@WAf~*%6I3LYGy0S0?dVUU^-_O2QmZFD?(GhM7l_%Et~f|Z0_6&z zJOC_YCypd&hNSoI$!HI3qbs%(3q)X2Ms#v!;&XY@;ltyX@~C5l$FJ@2=~Fz+<5&jN zB<1;p103)ql;@)Y0#W(!m3;e+KNZC9__?4p7Wq?=nBmU{Qg(+w6~;^a%zA-UfP>3@ zddz7*&~fe2gY=e;FVcsv(g$4X5ij$|9_Dce$U?YsV?KlTk)D87(gL0yg!aG|$*+T@r?qhI5Pv=yp- zj%vy-;T3MYb%V7M_R=p8(${(CzHIrlF!L3$N_wSllvNe1y-5J_O@cx8KWgzjY4aa#I+8icd6)@#h4s*U8CiyY% zxUgselciLH_+X(_8=hq3934q7ir*JCczl!N zCvP%%nKWcy8is}N5|LS$X&5J#Dd6ChcBoR7@4c%Jk_<>jf%*`oBcyr&+AwL+v{6c0 zyb?(jb%Xi0S`d9R|4kK96Hxyky{6sZB=TPRET{PV%z_MyfWq*{E@ng5uGHr1oG!0& zR?=4q-N6kRvnLV_luF*ltBm!M(g5@&xhUfUyow!3nGk@|PdSIp*A=R~Dqz=DDoQ2I zC77VOvNT?$8a;=-z_~0J2)v503w-NJVZNjs$WvDfcokuekqTw}g?}o>f54o$_hS6m z<${d$d2hZx@#(9agVU++AFdG|(A4=*!sLQQHJCFfG-E=>ymWm6nNNi_2c%jjBPU+j z#dEzArQ&`j1nuOSW5z&ju}uREi06#3cSS$KoA;|! z=|sc#ESJ&T{g*qN+M+WlOoWkiH5z-|*|^0u{GEP;H)Zd|X%2mqD2d{!9(MR5=iP^3 zlKR@=BmJ7GD;&r1>jmD)xgS?oAVTkqd$)Kd2p(}gq&;9&o(Jf0-CDnHuWoK`Y~lei zfGIdz_=RYJqj-Wlf`@p;Rm!LQE!o_PrF@h9+H&P|q^mSJ*he+%sFegFCxiW>Bbu&@ zPyB*2OC(aj7UfdkDde-RGE}ftya(+hk;jVRIzcXfhHBmTQ_xpnS6A4S$S>$k46RI^ z$HJggd_b|SycukygG1kAZmu;9uNQ%f@I3MM7}2e2npqf(M!|Lca}qb6K)_?*#EAOPO#YYcC{j=C6_=<4}X^adom2H@j#= zyg$bV|H=UA60qXkTmXy0F=s`4`-!(8`vlpgIAl#%$Y$<~4Ji z?xdZsOh7eC$Kb}!)H%AsB%gDH@fU`}pD8hPpkFTU1sOQ&&1rCu$ z&uq);q=&h9Q-#BqcEd7?=YVTVCP}ikXxZX!J$ELtj)I2uy78plNna(w>LlbIm=Ond zfG6Z!3t#$ffl*$_162DL6%P)C)~v2@aULR-2Ne}4IA690B4-A{NbLQra44kS*<>F` zmXHyZ=BocTiOh^?=BV%dGWqK8Y7<(^@xeJfc( z5n)A|=}Tv_6FV?u&{!Rb9c#26xlNVS=3Ow;oHAqvxjWArVkQ@N&rqE~a_TyeH6M@B z@!;+A!KhCajNM+SAmzzrJ{BrKvA*$>Q@Hvqg+6WCF$x62qRcIsTy_16n??QY=}*LcIqVIMIzO<)U|$Q!l#LfoR(p(;L6;apFA zKpF~vXv?z32;+mV<2p@gmN|{8M@THPkJWJ(Y0w9raEI9nc&WLQ(@kH4KZ31Pe%A|* z0`Eg(mnBpv1O~{^R6y+;?^FW7CesPZWo{sl3^IxBz3yG@$vMtg5mU1)XufPN6%O>9 zo{WrYr_k>FKoVM?&j;tQefD2Z%XXw&*)W{-YG#|U*k7<}6m2}~rM@&v%i0&4sMSnz zgCXRgHB9+C27Qc4qC|cfO_GdFaSjf!MQ_|D^K&K=3k4Q}DJy-6!-1%(#&WUgT@m|5 zRopo2p;7>&3Kvg~`Z&A;dLFtQRxX$wI@Rn*+}DMJEhzi`TI?>BwUDBaU})J1^NXas zl1XVR_eLtGf}{0|Iv(i`x?sASbeI{EfUq}o4rU)h%2WhuEg4FfRWlY)Gnz8+Nx?Lr zB1`_`#aZ-JG(j=Keyy)trL(h~QryPQ`d4%cl&*<}k|5j}t=S|CGG6MAxJV|8Ry!sE zuu@)%r9wMh+#77kCwD~UWMi6n9Qy7KuP#IrG9s%y1IBDq*vApmzNDO|b;u#*TQ}7y9eun9W z?T~grZRC-RVK99-I{f1Jxw@D4QpBt2a_&Dik1igt>sJNM|V@nujoHcFy}jB$nW zP$2QU4GEk6Ql*mdvmkkAb^*eWq1{vn5)+U}I_HgsTuYV12arQ2U))+zzQOvq@AVGb zT~pf*);mV+7)QKHOzL7Ox`mUJlu?*~{KZ6z2t4B_k7bw&KvQEG>k8ZWF|W*2 zf1VOuA?H;<{wc8E`Fv(yAjt>^g^9(gl5-@PZ64{Y!Jra<&TiP2O|A5-raQmETcV$| zllXNn-|@}mPIvZjY4!<{j$i)v`S0TDsn(HEgxRic&l*k%!iCH{S7LD1p|_`PQpifB zr(eP-3YH6tA|U`Kw>mphxqyc=E4E#U_Aj%g8Q0J30WBBuWA?4+?&=>VW7N|)}eHqgS z3rZL-?U0QmmZc2RgkTnaOQFvl zjL(ZJ(diNNO9ZN93_Be-sGU9W#rAlBY}Ou16}&q85SBm-$m9}MCE{0g^iuFpe1H!3 zp#eFK_PBSbq3A!`eyFBwF^xXqwj`#$#(tuS*@wTSZ`kwSGiMm#XDYPb9~V+cLJ*TH zm**#a?11T2=(xmf5_E}Nn(|jywCZjLaper!Or)lix%VN`f#bW(doeIl(5nXnPQl^J zUv&gIkg)S{q&3X`@KLJWy-WRKKBR|Zi&2VI0FPd{b4CD#_nchP(UkG^f*B!e%=mSk z8CB#;q57s?Fl(}eyl}r>p)Sm&>@IK)N-#!G%z=q?DTS|)6OkX&2k`gITA~67R+Rbg zzEBk=;)V$dK`-vh;!GdrO?d;J3=^lG5gn8%PdWOF<&p1B#vE+~b;Ijd@%8@kaQbVI zTeL%G_b~s0ES2!-EPm3coK+d7?|FK;y}wA`U&QzKlK1!U+7=o@2aYdJPxg>j*uS{i zJ3zReVYfg3>DSKOiA2>r%ItK&G9#w(&V8O%^%r=wSmW)y*nTqmwQOu-e6B|65RsTV zOV-xJg2QkMpX`mG5yJJ1@<~9$8(FA6t5A{BkQ%7XHPt<`s;CtzsnOb8Nx!9)ntB0$ zT9U4nV}%bTMcn*s`7B-&-DF!H$#yuAsa5L=&jW(10fD0qJ*GYRn#MkgFJXofGt(Ue zsl_+9$yAo4WX-~W z%Xd5T_^`uz?5 zzl$I9hcrwlU*7p_5(^I#vdB;Dej{*Mz2?*Je3GSiKJD;HG7R~|Gj=8<%@9vBe2`Uh zJS_4-l8o>`158oY>_)s2bIaa29cawiEA867UwD|%gyuM+(j1?_p^@as1V4~id_v{P zwe_{_wWX(6^LglLyOS~!$(#E#$W@tnF!TA$7c*bZd^Pj+%r`SghMs526tj0`CML0~ z_T-Y+FG)4_0gbn{zf~lT%es4#6@H31L-Kflpnmcws-kzmp6Q-&RjVjrkBAJ3tDL=o z7o9$9)GDbV(J2QJw0Mxp#AWxh%Wmge2SIYF_KBm|*ADwCTQ!f_0zxG!1|yg0D-sKD z%_O&%Bw6jH(}gfp*M%4w(*gVyNfV10wTl;cl#Bwc6BtVd7T0*}PbchDH zGNj!r)_Twx_Q0z2x);-(Sro=I92}fZIOLUVF*qPC%z5$F7K(*SUoE4hw~zl!{(`xc z^G3lgId0M+gwTkd9TBEouu+sr$PvxHFf!Nhf%BqMm77?#hiHNEY$S!jvzeQNJ&Kq! zTvqhbT0KV*tcfjfRGP6a`b*3ySisz6h4aYw$Klxx(n6Y+!AVas9+NQ7CtMTuswb3KBk zF`}cYDCxh@CsqViJw|0FG2Uo6K$T~Fgh<5)>QPP>S~P2hYA0z$gB&QDs0A58dj%$# z@(DhQ)v_KCy2+WJ`cR8JO~Xx637?q8f!J~XyKpg)d_mcGF%iiiu_kHRy*4Sgqo8?R zk+}S#g^g9rEN5?G?L8bhhI@u~&*nbEL~3aSFx2r=s6uR=Jo-aCOBpdOKo->@2#WV^ zh@v?R(j^p1RilToSukCA1uPURL*BW5j1XlWyvr>>Z9(x2UJO&$^s9CDQP2|70}rNK zj}a4uWr_ca5h*logpaFET&88H=m3ea5HOcXhzSM$9pqLzDOa_B5{7k?Ai#s-Isz+t zt4?x@%sJ?$snn-kR~K5#>Onw)G^JU%E%OnagOTOaY4lg4O|6P^rn zHn233Ik?kYM>{&hQPId=f%==nB+R)_%jbzCz8fn^g_y)ft|aq;`iiSbb=i?-!D_UO zIWd*0_f_wRw>byOCLU#YCsvMz62iM?XzH5cSagCh!mC4yFRq^g41wE%7v}j~Sw?_H zqX33Jo{V64qX=hf9{D%24wlh``#Ok;G*PDD)b5)>{{YD=d)y0*D_u`c`Xg?egMR$^ z=e@1%rOj=j-OoSc_x36|-rpz725nHta1wYxAhPc9)d7u`NeCu_XiE@GU*gkB> zP>@PtmN5|@8(v`3MVu0`X&bHiuj~~9(EjRcdxa3Qy;^u+uVNIO=V=HmglN4KGHK-% z0%iV+8*teoLShacwz#~SN8kOO7n?^+%icK_^XR%4^ET&W2VjK%xb$>w1>qt(@3(Q^ z)pIW+q;XSzNQ(w9#&xc#gWd)5n&dFi!dT6=%Uh-whQlg=MbvM&lL%e>=EdN}HSNAP za3(%Q!;qd8#VVCV2LLD;o=p5ca#l6&9`^UZ0?(Q;oQX^;U>>}>FRvLw9U?5H` z3*z7}*meMgwXZ2G%;;s00vcTxm8Iv9$EUviBtfW}d8W-k_71~*l$sLp(cu@dm7~y5 z%lZbld?>qm<%lmoC#_GG$GV=X382#FxlTCs0%O*%h18Vx zvkz^dh9~ku|G}Dh{-Jpi!FcNgNBka6?8M*M>I~fWbu$QuYrW z0J}m(8xl1Xx6-e`MiB^T$*85|2&XAE6IXZPmpyI;Cksj5=N@3!@LR>p>%gTvRd>mp zt*&G3$WzyISdRCvxMcXCKKel+l=#m+gmJ?vt#oPNI@J~-iDTn1EtoK88T}6WBluZ^w>tLyv;Ku(L!Z;M6I8dpqJQXfyhzv>8avpCfC+VG10|rj-0leu z5sQ=~YlN!25m>Au5Y<~8?`KxO5GpM9E19scIFJ*?5y10)`lS539eIAsd51*5-INnK zU6~?2w{!l~1)B-$3C>M?o9QJfrg{k8T7UE-Cl=Jx(1Wq5=SoU#axdl~7t@1{GdZgk zbv&FIh-Riiww;4$@!0T-bJ4yA25|yRr&y!g#`j=klSiNNfLWjh%Wz9{sTJJyJQ;~W zgs;d<(vG76)C%Bn&~`D2JG>y&ri-~OE$F0`6Sf0K%d~E6DW#R^8c3pK^wr3~T!BK` zm~xar^+tP6S5nDLMUyV)YRugVClPQ}D?uHF0kf)YGRSgK?xL=U{7~2(I!x%W7(~*H zm@;fXa*=1_$VjU|Ir4rZ3<%5GriCPeYLa<~mGy%e+&PDId~6M%U&)b0ShYdBxs5{a zu$H{#JG>762b?4sJ|YG zIAOYnDTqSeS@Rh>m@;uVOadugBNPZ2VQux59(R+M<;xPLTAX<~w?ZA{1h!1Vjp>M( z=0#O<75&h-S|)kH+*)xoDN1=czA16+p5g4AKt3gSSc;B22X8NZSY12C;CAS` zk)KS}A(vs@MiT9kR)iiFMsXyYFy6K-;S=bQQKZSuU z#-NcB%P7Gv4By+$!qZEvD(Utm*oOknbSrl9iAGfd&OUTtqQjff>HbpAkDXYFJr5h@=C8(F=+I%6(EHms4i$H(O7RL`g$8-$Vte;51DLl65`RSHvRL<51)6!!4+Z80r~|;>@xa5hykw zW37l~#09M)$_@{49rbuydL`tG023_BnM}B&#>sf0UUv+&V56CM?kz(f8;b)u;^ZWA zxz`oEl*lcdXC=ovxH{&&Q4a^^bH8>Q4;ZWw3SeiN%mt1J4Qw01 zBK8z^7@SJ@bBUW)QZ9yp+VsInqnav3sZk|R?M2bARiOu3=zt(oEyEb~$5jDl$^ zZqA(YHr@V={%vW6hI$gQFlcG2eGZK)CAT!+>9GwDxnvMc_BDUUiIaa}N(MbwB_1SU zt^6Alk$n^Lpz~5vRD5$u_khE$dd>#?3+e2)l#ox1>>d z>1m`?#Xb45QZZ_w9|qoU%@lMQ%z=2>1uj3VF8_7!*~ZH1-ulMov!$n4C5q*hekiP* z2tpb=Uv6wlEbu&9cce!i>0J$zF=j$dPw|{=Vy0^TvwTZqS=# zK`bpj6&}>OtvBSXRAUe@q$vv}jx1*@c_un8W14p&Q`AH_0$HpbTWzyd^oAaK!z10o z9(cHH*9T<}7d0G(Xr1B&4HYFJQ zV1s1Z2;34yre*C6Ey-*vQbE6&sc#;I*O*LSCsrwe{0|u8MmCD=$z4XPraZGjZ0+TSt(o3Z!8LW$ zK_wBAe~K<{$H*s0c|#4})IkOG&(^)@BdMqLVSP-X{j+tnc~SPgB+^0svvpCUVHUyt zuz_??|7=}sTDpF(F;qbRfbNx3Z7?9AGi;*8I)_I$Tpi2YbX%iPb7;Ie>eL#R|* z)KJQqc-UgSn3J3y`IiJo+px?-s8;G0SXvylhGqBH@mMxeQ|+&aYHpPoU3tfh{k#rO z#Vz`RG=y!TV?>d1I?_ zxJ21USL9j_C2pz)u#r6+X!g3~mNY1NEo<#ey=JaF{S^Eo?B$sessw5cc8Ve6<_=Y^ zG9))(A2@p-%o&cP0Rm1Uy3^|JA$J{f2rrmD)n#RI9o36`EnXzopwsZSDM17$X}=@b z83NwPtD6M8JL=P`!sDg!SnYt8aVAl1jV(8?qpbSWq{@ol?%qvUrx?Rx<3XUl>57H{ zrN4XoP?K5}w4ZKkPgry?V20jb>kh~Guv4SQmKhuNbK8DoCI{&vet-5>@;gGf6YQnS z(4IQwIX|3)Y@!e~D5X+y32bBnF&ReOmTqSwn!iA|17zOo2 z{vPAdIP;%&f5kqrBjhe~VF_s_Nc<1$aNNllggz-5guF)7VAx$$rvogD=gZ&TmKb&W z$K4^0>%t|#)yV)y?55M7t7*u`X;;?G$03=UpkDGuNa9kf8kYSt1#w#}p=i+?oDjw} zrnp-UJLU8&LhFnbqpFhKbI#j|TBwmV@T%_c;&RgvX;Hsc4lJXyBXT`oA_U7T)0z|c zcee!NzlD-#NZc8V<}9rey>0qSg@FM zG;v=$QV*_qjGvoiWNC75zy-m*y_L1iy*(LWoL>ud#=IfE7M!69HF9=Om5ii8!K>aiBfO$1| zE0t$;k_h#Ee6_aIUudN*fb4(J@THJFqVPV3LN1yy#78m{0i%VVJrY)?}J?P z`!crRj*LgAq`>7&7>HHHV%o-D21ziu^F&;a=5r? zNI{U}FIn^-OFgb<8-{>`dmh5}qNE4s)P)*aa}M;f~fNYtyISPHdK9Li55h%p}OZp@N*jo zp$&(NI?8l_yR$hoHCNZEU$7-USf+#Qt}gc<3=xqK-KDT#F-I132Hl$iWU z_-5j3Egx6ru#gAsrk0kE&Ok;Yx7lhyW+rlc#eFZL3$;oBp4UnVSke!!=8KH&r>Al; zNU{_14;-TWjnGJCw`>MNQ8ZI$&Ng5rte|a3>T}p^z(gdKAhY=*lvHUI)=lRr9xBV- z1mqkAMV6Evl#M1+Ppz5cTWdoBaD&u^dD9du8xV!eJF`hV|k+0=sE~Ls# zv1nAjz9%J!O-<5ilcvC|NuEe3@d^<>JGxLM5s3Jvd-;fK$h1>6S#ZJK(TXYx&5Fp2 z2-yip22vA6;Y+a1i=m$1YmE{^iHaasSzej0M!9gv6U)q0aA9yO_Qoa-ELeTMw7Gv;!uEY1J_n$R!*olzAg|fhk##sSGAxejn4j?fk>%m@I z2O>hmx#KQaP;ka9t`$_niAoY<;s6teFb-&qL-=5wqFMCAm`Wu9`b$V0fwG!_!Vj-b zmJ5Miyl*BU-o|bUROf`7RUx7De^H6MKqPbJKN2|=Ru$4*i7}8OqN$AB>z;x~;m5t{ zA(cohjXh2-MtpK|Mj;hy2(gn)+Ypw-l0Hm*MaB=*iY!Y|yaq$~h)2`74+n2WUN6riEyx)G+uizmm{e3oEByVd zy_xP{Pc05%!{UIX_kh6#3dEF=Q;_ zEnF9uYKegIx*oy?idq3xG&G8d5&keHe|e0bb>|*tJ^)TzVs+j5c7%ga9hm=XKtDsG z;C&tl;_v<6d-u4eDdF8}djAr4HNoM;HB@qG6R6u8-pT4{OQ(OgmR{eYCGenk%Xu-O z;qPDuezswJ+CP;6k~3gA`k4Kvkz&Xp#{CcYD6S!EfJQL65lcc0FD<|&MBSY(1I+C6 zv+9fag=f+Z882%KOU<+MB5WBWWreu*1_(G2mcwQLq@x`fHN}LEEgNPYJ~94X{(E^Q zG5V}E=uUl>{_EU`{tYj6345RSK*3nS9cprdH6&|dDtQ3c(Pc|$kC1xK2mL&Nu z56-WK{iEXxFxDNMn1_4O*KsOKwS+hm8abP({%tTk=vR*iM@Rkc*34tp*uoC<^(QZ;ymGhfc+&;YCyonKAJ zegS|P4Gu5fa_wIFH5v`}`#5Uk0DCXM?ufAI>I1dd2$v%w^bK}RgmDQDMa%l_CI7RcPwe24^UTjw=vAMLq{d0wT zM3>fouKv2VzA{s-{`>RI)vc{+V{>Bd+4HAst9ZAzzWnsX%G&z()nn9K-`K96u031Z z2GH$|iVYg5wN=!6QaxMUT>b%&mmaS@UEBV7X5z`(_Bvrc!QG`x6^y#=wPj}eR?lB- zKHu0{Me{2FyS}#mWD~8dK3iSioHlD7m;^pI2V7B!5>8dt`PAxxOT6+ega_QO9_rz|q+5nWz2^LfAs-J#XR-PgI;cKhpIoVZLQ8!OPgz3B*>G^jb}3xBqwSBI)I?wIxJX1 zA*qExVMI`b1bcx_J1*7ADh`8M{~p7$o(&&H^Oygt_rLu=|LwPf|M&lV_uu}P|NE2w X@qhgI|NZa(>%aWVzkL7r+2j8oQTk#) diff --git a/site/google-code-prettify/lang-apollo.js b/site/google-code-prettify/lang-apollo.js deleted file mode 100644 index 9b5ef3c9d..000000000 --- a/site/google-code-prettify/lang-apollo.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["com",/^#[^\n\r]*/,null,"#"],["pln",/^[\t\n\r \xa0]+/,null,"\t\n\r �\xa0"],["str",/^"(?:[^"\\]|\\[\S\s])*(?:"|$)/,null,'"']],[["kwd",/^(?:ADS|AD|AUG|BZF|BZMF|CAE|CAF|CA|CCS|COM|CS|DAS|DCA|DCOM|DCS|DDOUBL|DIM|DOUBLE|DTCB|DTCF|DV|DXCH|EDRUPT|EXTEND|INCR|INDEX|NDX|INHINT|LXCH|MASK|MSK|MP|MSU|NOOP|OVSK|QXCH|RAND|READ|RELINT|RESUME|RETURN|ROR|RXOR|SQUARE|SU|TCR|TCAA|OVSK|TCF|TC|TS|WAND|WOR|WRITE|XCH|XLQ|XXALQ|ZL|ZQ|ADD|ADZ|SUB|SUZ|MPY|MPR|MPZ|DVP|COM|ABS|CLA|CLZ|LDQ|STO|STQ|ALS|LLS|LRS|TRA|TSQ|TMI|TOV|AXT|TIX|DLY|INP|OUT)\s/, -null],["typ",/^(?:-?GENADR|=MINUS|2BCADR|VN|BOF|MM|-?2CADR|-?[1-6]DNADR|ADRES|BBCON|[ES]?BANK=?|BLOCK|BNKSUM|E?CADR|COUNT\*?|2?DEC\*?|-?DNCHAN|-?DNPTR|EQUALS|ERASE|MEMORY|2?OCT|REMADR|SETLOC|SUBRO|ORG|BSS|BES|SYN|EQU|DEFINE|END)\s/,null],["lit",/^'(?:-*(?:\w|\\[!-~])(?:[\w-]*|\\[!-~])[!=?]?)?/],["pln",/^-*(?:[!-z]|\\[!-~])(?:[\w-]*|\\[!-~])[!=?]?/],["pun",/^[^\w\t\n\r "'-);\\\xa0]+/]]),["apollo","agc","aea"]); diff --git a/site/google-code-prettify/lang-clj.js b/site/google-code-prettify/lang-clj.js deleted file mode 100644 index 542a2205f..000000000 --- a/site/google-code-prettify/lang-clj.js +++ /dev/null @@ -1,18 +0,0 @@ -/* - Copyright (C) 2011 Google Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -var a=null; -PR.registerLangHandler(PR.createSimpleLexer([["opn",/^[([{]+/,a,"([{"],["clo",/^[)\]}]+/,a,")]}"],["com",/^;[^\n\r]*/,a,";"],["pln",/^[\t\n\r \xa0]+/,a,"\t\n\r \xa0"],["str",/^"(?:[^"\\]|\\[\S\s])*(?:"|$)/,a,'"']],[["kwd",/^(?:def|if|do|let|quote|var|fn|loop|recur|throw|try|monitor-enter|monitor-exit|defmacro|defn|defn-|macroexpand|macroexpand-1|for|doseq|dosync|dotimes|and|or|when|not|assert|doto|proxy|defstruct|first|rest|cons|defprotocol|deftype|defrecord|reify|defmulti|defmethod|meta|with-meta|ns|in-ns|create-ns|import|intern|refer|alias|namespace|resolve|ref|deref|refset|new|set!|memfn|to-array|into-array|aset|gen-class|reduce|map|filter|find|nil?|empty?|hash-map|hash-set|vec|vector|seq|flatten|reverse|assoc|dissoc|list|list?|disj|get|union|difference|intersection|extend|extend-type|extend-protocol|prn)\b/,a], -["typ",/^:[\dA-Za-z-]+/]]),["clj"]); diff --git a/site/google-code-prettify/lang-css.js b/site/google-code-prettify/lang-css.js deleted file mode 100644 index 041e1f590..000000000 --- a/site/google-code-prettify/lang-css.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\f\r ]+/,null," \t\r\n "]],[["str",/^"(?:[^\n\f\r"\\]|\\(?:\r\n?|\n|\f)|\\[\S\s])*"/,null],["str",/^'(?:[^\n\f\r'\\]|\\(?:\r\n?|\n|\f)|\\[\S\s])*'/,null],["lang-css-str",/^url\(([^"')]*)\)/i],["kwd",/^(?:url|rgb|!important|@import|@page|@media|@charset|inherit)(?=[^\w-]|$)/i,null],["lang-css-kw",/^(-?(?:[_a-z]|\\[\da-f]+ ?)(?:[\w-]|\\\\[\da-f]+ ?)*)\s*:/i],["com",/^\/\*[^*]*\*+(?:[^*/][^*]*\*+)*\//],["com", -/^(?:<\!--|--\>)/],["lit",/^(?:\d+|\d*\.\d+)(?:%|[a-z]+)?/i],["lit",/^#[\da-f]{3,6}/i],["pln",/^-?(?:[_a-z]|\\[\da-f]+ ?)(?:[\w-]|\\\\[\da-f]+ ?)*/i],["pun",/^[^\s\w"']+/]]),["css"]);PR.registerLangHandler(PR.createSimpleLexer([],[["kwd",/^-?(?:[_a-z]|\\[\da-f]+ ?)(?:[\w-]|\\\\[\da-f]+ ?)*/i]]),["css-kw"]);PR.registerLangHandler(PR.createSimpleLexer([],[["str",/^[^"')]+/]]),["css-str"]); diff --git a/site/google-code-prettify/lang-go.js b/site/google-code-prettify/lang-go.js deleted file mode 100644 index 5f03e77d2..000000000 --- a/site/google-code-prettify/lang-go.js +++ /dev/null @@ -1 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0]+/,null,"\t\n\r �\xa0"],["pln",/^(?:"(?:[^"\\]|\\[\S\s])*(?:"|$)|'(?:[^'\\]|\\[\S\s])+(?:'|$)|`[^`]*(?:`|$))/,null,"\"'"]],[["com",/^(?:\/\/[^\n\r]*|\/\*[\S\s]*?\*\/)/],["pln",/^(?:[^"'/`]|\/(?![*/]))+/]]),["go"]); diff --git a/site/google-code-prettify/lang-hs.js b/site/google-code-prettify/lang-hs.js deleted file mode 100644 index 9d77b0838..000000000 --- a/site/google-code-prettify/lang-hs.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t-\r ]+/,null,"\t\n \r "],["str",/^"(?:[^\n\f\r"\\]|\\[\S\s])*(?:"|$)/,null,'"'],["str",/^'(?:[^\n\f\r'\\]|\\[^&])'?/,null,"'"],["lit",/^(?:0o[0-7]+|0x[\da-f]+|\d+(?:\.\d+)?(?:e[+-]?\d+)?)/i,null,"0123456789"]],[["com",/^(?:--+[^\n\f\r]*|{-(?:[^-]|-+[^}-])*-})/],["kwd",/^(?:case|class|data|default|deriving|do|else|if|import|in|infix|infixl|infixr|instance|let|module|newtype|of|then|type|where|_)(?=[^\d'A-Za-z]|$)/, -null],["pln",/^(?:[A-Z][\w']*\.)*[A-Za-z][\w']*/],["pun",/^[^\d\t-\r "'A-Za-z]+/]]),["hs"]); diff --git a/site/google-code-prettify/lang-lisp.js b/site/google-code-prettify/lang-lisp.js deleted file mode 100644 index 02a30e8d1..000000000 --- a/site/google-code-prettify/lang-lisp.js +++ /dev/null @@ -1,3 +0,0 @@ -var a=null; -PR.registerLangHandler(PR.createSimpleLexer([["opn",/^\(+/,a,"("],["clo",/^\)+/,a,")"],["com",/^;[^\n\r]*/,a,";"],["pln",/^[\t\n\r \xa0]+/,a,"\t\n\r \xa0"],["str",/^"(?:[^"\\]|\\[\S\s])*(?:"|$)/,a,'"']],[["kwd",/^(?:block|c[ad]+r|catch|con[ds]|def(?:ine|un)|do|eq|eql|equal|equalp|eval-when|flet|format|go|if|labels|lambda|let|load-time-value|locally|macrolet|multiple-value-call|nil|progn|progv|quote|require|return-from|setq|symbol-macrolet|t|tagbody|the|throw|unwind)\b/,a], -["lit",/^[+-]?(?:[#0]x[\da-f]+|\d+\/\d+|(?:\.\d+|\d+(?:\.\d*)?)(?:[de][+-]?\d+)?)/i],["lit",/^'(?:-*(?:\w|\\[!-~])(?:[\w-]*|\\[!-~])[!=?]?)?/],["pln",/^-*(?:[_a-z]|\\[!-~])(?:[\w-]*|\\[!-~])[!=?]?/i],["pun",/^[^\w\t\n\r "'-);\\\xa0]+/]]),["cl","el","lisp","scm"]); diff --git a/site/google-code-prettify/lang-lua.js b/site/google-code-prettify/lang-lua.js deleted file mode 100644 index 4f19862b5..000000000 --- a/site/google-code-prettify/lang-lua.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0]+/,null,"\t\n\r �\xa0"],["str",/^(?:"(?:[^"\\]|\\[\S\s])*(?:"|$)|'(?:[^'\\]|\\[\S\s])*(?:'|$))/,null,"\"'"]],[["com",/^--(?:\[(=*)\[[\S\s]*?(?:]\1]|$)|[^\n\r]*)/],["str",/^\[(=*)\[[\S\s]*?(?:]\1]|$)/],["kwd",/^(?:and|break|do|else|elseif|end|false|for|function|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b/,null],["lit",/^[+-]?(?:0x[\da-f]+|(?:\.\d+|\d+(?:\.\d*)?)(?:e[+-]?\d+)?)/i], -["pln",/^[_a-z]\w*/i],["pun",/^[^\w\t\n\r \xa0][^\w\t\n\r "'+=\xa0-]*/]]),["lua"]); diff --git a/site/google-code-prettify/lang-ml.js b/site/google-code-prettify/lang-ml.js deleted file mode 100644 index 281523efc..000000000 --- a/site/google-code-prettify/lang-ml.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0]+/,null,"\t\n\r �\xa0"],["com",/^#(?:if[\t\n\r \xa0]+(?:[$_a-z][\w']*|``[^\t\n\r`]*(?:``|$))|else|endif|light)/i,null,"#"],["str",/^(?:"(?:[^"\\]|\\[\S\s])*(?:"|$)|'(?:[^'\\]|\\[\S\s])(?:'|$))/,null,"\"'"]],[["com",/^(?:\/\/[^\n\r]*|\(\*[\S\s]*?\*\))/],["kwd",/^(?:abstract|and|as|assert|begin|class|default|delegate|do|done|downcast|downto|elif|else|end|exception|extern|false|finally|for|fun|function|if|in|inherit|inline|interface|internal|lazy|let|match|member|module|mutable|namespace|new|null|of|open|or|override|private|public|rec|return|static|struct|then|to|true|try|type|upcast|use|val|void|when|while|with|yield|asr|land|lor|lsl|lsr|lxor|mod|sig|atomic|break|checked|component|const|constraint|constructor|continue|eager|event|external|fixed|functor|global|include|method|mixin|object|parallel|process|protected|pure|sealed|trait|virtual|volatile)\b/], -["lit",/^[+-]?(?:0x[\da-f]+|(?:\.\d+|\d+(?:\.\d*)?)(?:e[+-]?\d+)?)/i],["pln",/^(?:[_a-z][\w']*[!#?]?|``[^\t\n\r`]*(?:``|$))/i],["pun",/^[^\w\t\n\r "'\xa0]+/]]),["fs","ml"]); diff --git a/site/google-code-prettify/lang-n.js b/site/google-code-prettify/lang-n.js deleted file mode 100644 index 6c2e85b98..000000000 --- a/site/google-code-prettify/lang-n.js +++ /dev/null @@ -1,4 +0,0 @@ -var a=null; -PR.registerLangHandler(PR.createSimpleLexer([["str",/^(?:'(?:[^\n\r'\\]|\\.)*'|"(?:[^\n\r"\\]|\\.)*(?:"|$))/,a,'"'],["com",/^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\n\r]*)/,a,"#"],["pln",/^\s+/,a," \r\n\t\xa0"]],[["str",/^@"(?:[^"]|"")*(?:"|$)/,a],["str",/^<#[^#>]*(?:#>|$)/,a],["str",/^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,a],["com",/^\/\/[^\n\r]*/,a],["com",/^\/\*[\S\s]*?(?:\*\/|$)/, -a],["kwd",/^(?:abstract|and|as|base|catch|class|def|delegate|enum|event|extern|false|finally|fun|implements|interface|internal|is|macro|match|matches|module|mutable|namespace|new|null|out|override|params|partial|private|protected|public|ref|sealed|static|struct|syntax|this|throw|true|try|type|typeof|using|variant|virtual|volatile|when|where|with|assert|assert2|async|break|checked|continue|do|else|ensures|for|foreach|if|late|lock|new|nolate|otherwise|regexp|repeat|requires|return|surroundwith|unchecked|unless|using|while|yield)\b/, -a],["typ",/^(?:array|bool|byte|char|decimal|double|float|int|list|long|object|sbyte|short|string|ulong|uint|ufloat|ulong|ushort|void)\b/,a],["lit",/^@[$_a-z][\w$@]*/i,a],["typ",/^@[A-Z]+[a-z][\w$@]*/,a],["pln",/^'?[$_a-z][\w$@]*/i,a],["lit",/^(?:0x[\da-f]+|(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d\+)(?:e[+-]?\d+)?)[a-z]*/i,a,"0123456789"],["pun",/^.[^\s\w"-$'./@`]*/,a]]),["n","nemerle"]); diff --git a/site/google-code-prettify/lang-proto.js b/site/google-code-prettify/lang-proto.js deleted file mode 100644 index f006ad8cf..000000000 --- a/site/google-code-prettify/lang-proto.js +++ /dev/null @@ -1 +0,0 @@ -PR.registerLangHandler(PR.sourceDecorator({keywords:"bytes,default,double,enum,extend,extensions,false,group,import,max,message,option,optional,package,repeated,required,returns,rpc,service,syntax,to,true",types:/^(bool|(double|s?fixed|[su]?int)(32|64)|float|string)\b/,cStyleComments:!0}),["proto"]); diff --git a/site/google-code-prettify/lang-scala.js b/site/google-code-prettify/lang-scala.js deleted file mode 100644 index 67ae9dc5c..000000000 --- a/site/google-code-prettify/lang-scala.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0]+/,null,"\t\n\r �\xa0"],["str",/^"(?:""(?:""?(?!")|[^"\\]|\\.)*"{0,3}|(?:[^\n\r"\\]|\\.)*"?)/,null,'"'],["lit",/^`(?:[^\n\r\\`]|\\.)*`?/,null,"`"],["pun",/^[!#%&(--:-@[-^{-~]+/,null,"!#%&()*+,-:;<=>?@[\\]^{|}~"]],[["str",/^'(?:[^\n\r'\\]|\\(?:'|[^\n\r']+))'/],["lit",/^'[$A-Z_a-z][\w$]*(?![\w$'])/],["kwd",/^(?:abstract|case|catch|class|def|do|else|extends|final|finally|for|forSome|if|implicit|import|lazy|match|new|object|override|package|private|protected|requires|return|sealed|super|throw|trait|try|type|val|var|while|with|yield)\b/], -["lit",/^(?:true|false|null|this)\b/],["lit",/^(?:0(?:[0-7]+|x[\da-f]+)l?|(?:0|[1-9]\d*)(?:(?:\.\d+)?(?:e[+-]?\d+)?f?|l?)|\\.\d+(?:e[+-]?\d+)?f?)/i],["typ",/^[$_]*[A-Z][\d$A-Z_]*[a-z][\w$]*/],["pln",/^[$A-Z_a-z][\w$]*/],["com",/^\/(?:\/.*|\*(?:\/|\**[^*/])*(?:\*+\/?)?)/],["pun",/^(?:\.+|\/)/]]),["scala"]); diff --git a/site/google-code-prettify/lang-sql.js b/site/google-code-prettify/lang-sql.js deleted file mode 100644 index 110026680..000000000 --- a/site/google-code-prettify/lang-sql.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0]+/,null,"\t\n\r �\xa0"],["str",/^(?:"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*')/,null,"\"'"]],[["com",/^(?:--[^\n\r]*|\/\*[\S\s]*?(?:\*\/|$))/],["kwd",/^(?:add|all|alter|and|any|as|asc|authorization|backup|begin|between|break|browse|bulk|by|cascade|case|check|checkpoint|close|clustered|coalesce|collate|column|commit|compute|constraint|contains|containstable|continue|convert|create|cross|current|current_date|current_time|current_timestamp|current_user|cursor|database|dbcc|deallocate|declare|default|delete|deny|desc|disk|distinct|distributed|double|drop|dummy|dump|else|end|errlvl|escape|except|exec|execute|exists|exit|fetch|file|fillfactor|for|foreign|freetext|freetexttable|from|full|function|goto|grant|group|having|holdlock|identity|identitycol|identity_insert|if|in|index|inner|insert|intersect|into|is|join|key|kill|left|like|lineno|load|match|merge|national|nocheck|nonclustered|not|null|nullif|of|off|offsets|on|open|opendatasource|openquery|openrowset|openxml|option|or|order|outer|over|percent|plan|precision|primary|print|proc|procedure|public|raiserror|read|readtext|reconfigure|references|replication|restore|restrict|return|revoke|right|rollback|rowcount|rowguidcol|rule|save|schema|select|session_user|set|setuser|shutdown|some|statistics|system_user|table|textsize|then|to|top|tran|transaction|trigger|truncate|tsequal|union|unique|update|updatetext|use|user|using|values|varying|view|waitfor|when|where|while|with|writetext)(?=[^\w-]|$)/i, -null],["lit",/^[+-]?(?:0x[\da-f]+|(?:\.\d+|\d+(?:\.\d*)?)(?:e[+-]?\d+)?)/i],["pln",/^[_a-z][\w-]*/i],["pun",/^[^\w\t\n\r "'\xa0][^\w\t\n\r "'+\xa0-]*/]]),["sql"]); diff --git a/site/google-code-prettify/lang-tex.js b/site/google-code-prettify/lang-tex.js deleted file mode 100644 index 76e25ed2b..000000000 --- a/site/google-code-prettify/lang-tex.js +++ /dev/null @@ -1 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0]+/,null,"\t\n\r �\xa0"],["com",/^%[^\n\r]*/,null,"%"]],[["kwd",/^\\[@-Za-z]+/],["kwd",/^\\./],["typ",/^[$&]/],["lit",/[+-]?(?:\.\d+|\d+(?:\.\d*)?)(cm|em|ex|in|pc|pt|bp|mm)/i],["pun",/^[()=[\]{}]+/]]),["latex","tex"]); diff --git a/site/google-code-prettify/lang-vb.js b/site/google-code-prettify/lang-vb.js deleted file mode 100644 index 237e30aa5..000000000 --- a/site/google-code-prettify/lang-vb.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0\u2028\u2029]+/,null,"\t\n\r �\xa0

"],["str",/^(?:["\u201c\u201d](?:[^"\u201c\u201d]|["\u201c\u201d]{2})(?:["\u201c\u201d]c|$)|["\u201c\u201d](?:[^"\u201c\u201d]|["\u201c\u201d]{2})*(?:["\u201c\u201d]|$))/i,null,'"“”'],["com",/^['\u2018\u2019].*/,null,"'‘’"]],[["kwd",/^(?:addhandler|addressof|alias|and|andalso|ansi|as|assembly|auto|boolean|byref|byte|byval|call|case|catch|cbool|cbyte|cchar|cdate|cdbl|cdec|char|cint|class|clng|cobj|const|cshort|csng|cstr|ctype|date|decimal|declare|default|delegate|dim|directcast|do|double|each|else|elseif|end|endif|enum|erase|error|event|exit|finally|for|friend|function|get|gettype|gosub|goto|handles|if|implements|imports|in|inherits|integer|interface|is|let|lib|like|long|loop|me|mod|module|mustinherit|mustoverride|mybase|myclass|namespace|new|next|not|notinheritable|notoverridable|object|on|option|optional|or|orelse|overloads|overridable|overrides|paramarray|preserve|private|property|protected|public|raiseevent|readonly|redim|removehandler|resume|return|select|set|shadows|shared|short|single|static|step|stop|string|structure|sub|synclock|then|throw|to|try|typeof|unicode|until|variant|wend|when|while|with|withevents|writeonly|xor|endif|gosub|let|variant|wend)\b/i, -null],["com",/^rem.*/i],["lit",/^(?:true\b|false\b|nothing\b|\d+(?:e[+-]?\d+[dfr]?|[dfilrs])?|(?:&h[\da-f]+|&o[0-7]+)[ils]?|\d*\.\d+(?:e[+-]?\d+)?[dfr]?|#\s+(?:\d+[/-]\d+[/-]\d+(?:\s+\d+:\d+(?::\d+)?(\s*(?:am|pm))?)?|\d+:\d+(?::\d+)?(\s*(?:am|pm))?)\s+#)/i],["pln",/^(?:(?:[a-z]|_\w)\w*|\[(?:[a-z]|_\w)\w*])/i],["pun",/^[^\w\t\n\r "'[\]\xa0\u2018\u2019\u201c\u201d\u2028\u2029]+/],["pun",/^(?:\[|])/]]),["vb","vbs"]); diff --git a/site/google-code-prettify/lang-vhdl.js b/site/google-code-prettify/lang-vhdl.js deleted file mode 100644 index 0c9f23f0e..000000000 --- a/site/google-code-prettify/lang-vhdl.js +++ /dev/null @@ -1,3 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0]+/,null,"\t\n\r �\xa0"]],[["str",/^(?:[box]?"(?:[^"]|"")*"|'.')/i],["com",/^--[^\n\r]*/],["kwd",/^(?:abs|access|after|alias|all|and|architecture|array|assert|attribute|begin|block|body|buffer|bus|case|component|configuration|constant|disconnect|downto|else|elsif|end|entity|exit|file|for|function|generate|generic|group|guarded|if|impure|in|inertial|inout|is|label|library|linkage|literal|loop|map|mod|nand|new|next|nor|not|null|of|on|open|or|others|out|package|port|postponed|procedure|process|pure|range|record|register|reject|rem|report|return|rol|ror|select|severity|shared|signal|sla|sll|sra|srl|subtype|then|to|transport|type|unaffected|units|until|use|variable|wait|when|while|with|xnor|xor)(?=[^\w-]|$)/i, -null],["typ",/^(?:bit|bit_vector|character|boolean|integer|real|time|string|severity_level|positive|natural|signed|unsigned|line|text|std_u?logic(?:_vector)?)(?=[^\w-]|$)/i,null],["typ",/^'(?:active|ascending|base|delayed|driving|driving_value|event|high|image|instance_name|last_active|last_event|last_value|left|leftof|length|low|path_name|pos|pred|quiet|range|reverse_range|right|rightof|simple_name|stable|succ|transaction|val|value)(?=[^\w-]|$)/i,null],["lit",/^\d+(?:_\d+)*(?:#[\w.\\]+#(?:[+-]?\d+(?:_\d+)*)?|(?:\.\d+(?:_\d+)*)?(?:e[+-]?\d+(?:_\d+)*)?)/i], -["pln",/^(?:[a-z]\w*|\\[^\\]*\\)/i],["pun",/^[^\w\t\n\r "'\xa0][^\w\t\n\r "'\xa0-]*/]]),["vhdl","vhd"]); diff --git a/site/google-code-prettify/lang-wiki.js b/site/google-code-prettify/lang-wiki.js deleted file mode 100644 index 7d715d16c..000000000 --- a/site/google-code-prettify/lang-wiki.js +++ /dev/null @@ -1,2 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\d\t a-gi-z\xa0]+/,null,"\t �\xa0abcdefgijklmnopqrstuvwxyz0123456789"],["pun",/^[*=[\]^~]+/,null,"=*~^[]"]],[["lang-wiki.meta",/(?:^^|\r\n?|\n)(#[a-z]+)\b/],["lit",/^[A-Z][a-z][\da-z]+[A-Z][a-z][^\W_]+\b/],["lang-",/^{{{([\S\s]+?)}}}/],["lang-",/^`([^\n\r`]+)`/],["str",/^https?:\/\/[^\s#/?]*(?:\/[^\s#?]*)?(?:\?[^\s#]*)?(?:#\S*)?/i],["pln",/^(?:\r\n|[\S\s])[^\n\r#*=A-[^`h{~]*/]]),["wiki"]); -PR.registerLangHandler(PR.createSimpleLexer([["kwd",/^#[a-z]+/i,null,"#"]],[]),["wiki.meta"]); diff --git a/site/google-code-prettify/lang-xq.js b/site/google-code-prettify/lang-xq.js deleted file mode 100644 index e323ae323..000000000 --- a/site/google-code-prettify/lang-xq.js +++ /dev/null @@ -1,3 +0,0 @@ -PR.registerLangHandler(PR.createSimpleLexer([["var pln",/^\$[\w-]+/,null,"$"]],[["pln",/^[\s=][<>][\s=]/],["lit",/^@[\w-]+/],["tag",/^<\/?[a-z](?:[\w-.:]*\w)?|\/?>$/i],["com",/^\(:[\S\s]*?:\)/],["pln",/^[(),/;[\]{}]$/],["str",/^(?:"(?:[^"\\{]|\\[\S\s])*(?:"|$)|'(?:[^'\\{]|\\[\S\s])*(?:'|$))/,null,"\"'"],["kwd",/^(?:xquery|where|version|variable|union|typeswitch|treat|to|then|text|stable|sortby|some|self|schema|satisfies|returns|return|ref|processing-instruction|preceding-sibling|preceding|precedes|parent|only|of|node|namespace|module|let|item|intersect|instance|in|import|if|function|for|follows|following-sibling|following|external|except|every|else|element|descending|descendant-or-self|descendant|define|default|declare|comment|child|cast|case|before|attribute|assert|ascending|as|ancestor-or-self|ancestor|after|eq|order|by|or|and|schema-element|document-node|node|at)\b/], -["typ",/^(?:xs:yearMonthDuration|xs:unsignedLong|xs:time|xs:string|xs:short|xs:QName|xs:Name|xs:long|xs:integer|xs:int|xs:gYearMonth|xs:gYear|xs:gMonthDay|xs:gDay|xs:float|xs:duration|xs:double|xs:decimal|xs:dayTimeDuration|xs:dateTime|xs:date|xs:byte|xs:boolean|xs:anyURI|xf:yearMonthDuration)\b/,null],["fun pln",/^(?:xp:dereference|xinc:node-expand|xinc:link-references|xinc:link-expand|xhtml:restructure|xhtml:clean|xhtml:add-lists|xdmp:zip-manifest|xdmp:zip-get|xdmp:zip-create|xdmp:xquery-version|xdmp:word-convert|xdmp:with-namespaces|xdmp:version|xdmp:value|xdmp:user-roles|xdmp:user-last-login|xdmp:user|xdmp:url-encode|xdmp:url-decode|xdmp:uri-is-file|xdmp:uri-format|xdmp:uri-content-type|xdmp:unquote|xdmp:unpath|xdmp:triggers-database|xdmp:trace|xdmp:to-json|xdmp:tidy|xdmp:subbinary|xdmp:strftime|xdmp:spawn-in|xdmp:spawn|xdmp:sleep|xdmp:shutdown|xdmp:set-session-field|xdmp:set-response-encoding|xdmp:set-response-content-type|xdmp:set-response-code|xdmp:set-request-time-limit|xdmp:set|xdmp:servers|xdmp:server-status|xdmp:server-name|xdmp:server|xdmp:security-database|xdmp:security-assert|xdmp:schema-database|xdmp:save|xdmp:role-roles|xdmp:role|xdmp:rethrow|xdmp:restart|xdmp:request-timestamp|xdmp:request-status|xdmp:request-cancel|xdmp:request|xdmp:redirect-response|xdmp:random|xdmp:quote|xdmp:query-trace|xdmp:query-meters|xdmp:product-edition|xdmp:privilege-roles|xdmp:privilege|xdmp:pretty-print|xdmp:powerpoint-convert|xdmp:platform|xdmp:permission|xdmp:pdf-convert|xdmp:path|xdmp:octal-to-integer|xdmp:node-uri|xdmp:node-replace|xdmp:node-kind|xdmp:node-insert-child|xdmp:node-insert-before|xdmp:node-insert-after|xdmp:node-delete|xdmp:node-database|xdmp:mul64|xdmp:modules-root|xdmp:modules-database|xdmp:merging|xdmp:merge-cancel|xdmp:merge|xdmp:md5|xdmp:logout|xdmp:login|xdmp:log-level|xdmp:log|xdmp:lock-release|xdmp:lock-acquire|xdmp:load|xdmp:invoke-in|xdmp:invoke|xdmp:integer-to-octal|xdmp:integer-to-hex|xdmp:http-put|xdmp:http-post|xdmp:http-options|xdmp:http-head|xdmp:http-get|xdmp:http-delete|xdmp:hosts|xdmp:host-status|xdmp:host-name|xdmp:host|xdmp:hex-to-integer|xdmp:hash64|xdmp:hash32|xdmp:has-privilege|xdmp:groups|xdmp:group-serves|xdmp:group-servers|xdmp:group-name|xdmp:group-hosts|xdmp:group|xdmp:get-session-field-names|xdmp:get-session-field|xdmp:get-response-encoding|xdmp:get-response-code|xdmp:get-request-username|xdmp:get-request-user|xdmp:get-request-url|xdmp:get-request-protocol|xdmp:get-request-path|xdmp:get-request-method|xdmp:get-request-header-names|xdmp:get-request-header|xdmp:get-request-field-names|xdmp:get-request-field-filename|xdmp:get-request-field-content-type|xdmp:get-request-field|xdmp:get-request-client-certificate|xdmp:get-request-client-address|xdmp:get-request-body|xdmp:get-current-user|xdmp:get-current-roles|xdmp:get|xdmp:function-name|xdmp:function-module|xdmp:function|xdmp:from-json|xdmp:forests|xdmp:forest-status|xdmp:forest-restore|xdmp:forest-restart|xdmp:forest-name|xdmp:forest-delete|xdmp:forest-databases|xdmp:forest-counts|xdmp:forest-clear|xdmp:forest-backup|xdmp:forest|xdmp:filesystem-file|xdmp:filesystem-directory|xdmp:exists|xdmp:excel-convert|xdmp:eval-in|xdmp:eval|xdmp:estimate|xdmp:email|xdmp:element-content-type|xdmp:elapsed-time|xdmp:document-set-quality|xdmp:document-set-property|xdmp:document-set-properties|xdmp:document-set-permissions|xdmp:document-set-collections|xdmp:document-remove-properties|xdmp:document-remove-permissions|xdmp:document-remove-collections|xdmp:document-properties|xdmp:document-locks|xdmp:document-load|xdmp:document-insert|xdmp:document-get-quality|xdmp:document-get-properties|xdmp:document-get-permissions|xdmp:document-get-collections|xdmp:document-get|xdmp:document-forest|xdmp:document-delete|xdmp:document-add-properties|xdmp:document-add-permissions|xdmp:document-add-collections|xdmp:directory-properties|xdmp:directory-locks|xdmp:directory-delete|xdmp:directory-create|xdmp:directory|xdmp:diacritic-less|xdmp:describe|xdmp:default-permissions|xdmp:default-collections|xdmp:databases|xdmp:database-restore-validate|xdmp:database-restore-status|xdmp:database-restore-cancel|xdmp:database-restore|xdmp:database-name|xdmp:database-forests|xdmp:database-backup-validate|xdmp:database-backup-status|xdmp:database-backup-purge|xdmp:database-backup-cancel|xdmp:database-backup|xdmp:database|xdmp:collection-properties|xdmp:collection-locks|xdmp:collection-delete|xdmp:collation-canonical-uri|xdmp:castable-as|xdmp:can-grant-roles|xdmp:base64-encode|xdmp:base64-decode|xdmp:architecture|xdmp:apply|xdmp:amp-roles|xdmp:amp|xdmp:add64|xdmp:add-response-header|xdmp:access|trgr:trigger-set-recursive|trgr:trigger-set-permissions|trgr:trigger-set-name|trgr:trigger-set-module|trgr:trigger-set-event|trgr:trigger-set-description|trgr:trigger-remove-permissions|trgr:trigger-module|trgr:trigger-get-permissions|trgr:trigger-enable|trgr:trigger-disable|trgr:trigger-database-online-event|trgr:trigger-data-event|trgr:trigger-add-permissions|trgr:remove-trigger|trgr:property-content|trgr:pre-commit|trgr:post-commit|trgr:get-trigger-by-id|trgr:get-trigger|trgr:document-scope|trgr:document-content|trgr:directory-scope|trgr:create-trigger|trgr:collection-scope|trgr:any-property-content|thsr:set-entry|thsr:remove-term|thsr:remove-synonym|thsr:remove-entry|thsr:query-lookup|thsr:lookup|thsr:load|thsr:insert|thsr:expand|thsr:add-synonym|spell:suggest-detailed|spell:suggest|spell:remove-word|spell:make-dictionary|spell:load|spell:levenshtein-distance|spell:is-correct|spell:insert|spell:double-metaphone|spell:add-word|sec:users-collection|sec:user-set-roles|sec:user-set-password|sec:user-set-name|sec:user-set-description|sec:user-set-default-permissions|sec:user-set-default-collections|sec:user-remove-roles|sec:user-privileges|sec:user-get-roles|sec:user-get-description|sec:user-get-default-permissions|sec:user-get-default-collections|sec:user-doc-permissions|sec:user-doc-collections|sec:user-add-roles|sec:unprotect-collection|sec:uid-for-name|sec:set-realm|sec:security-version|sec:security-namespace|sec:security-installed|sec:security-collection|sec:roles-collection|sec:role-set-roles|sec:role-set-name|sec:role-set-description|sec:role-set-default-permissions|sec:role-set-default-collections|sec:role-remove-roles|sec:role-privileges|sec:role-get-roles|sec:role-get-description|sec:role-get-default-permissions|sec:role-get-default-collections|sec:role-doc-permissions|sec:role-doc-collections|sec:role-add-roles|sec:remove-user|sec:remove-role-from-users|sec:remove-role-from-role|sec:remove-role-from-privileges|sec:remove-role-from-amps|sec:remove-role|sec:remove-privilege|sec:remove-amp|sec:protect-collection|sec:privileges-collection|sec:privilege-set-roles|sec:privilege-set-name|sec:privilege-remove-roles|sec:privilege-get-roles|sec:privilege-add-roles|sec:priv-doc-permissions|sec:priv-doc-collections|sec:get-user-names|sec:get-unique-elem-id|sec:get-role-names|sec:get-role-ids|sec:get-privilege|sec:get-distinct-permissions|sec:get-collection|sec:get-amp|sec:create-user-with-role|sec:create-user|sec:create-role|sec:create-privilege|sec:create-amp|sec:collections-collection|sec:collection-set-permissions|sec:collection-remove-permissions|sec:collection-get-permissions|sec:collection-add-permissions|sec:check-admin|sec:amps-collection|sec:amp-set-roles|sec:amp-remove-roles|sec:amp-get-roles|sec:amp-doc-permissions|sec:amp-doc-collections|sec:amp-add-roles|search:unparse|search:suggest|search:snippet|search:search|search:resolve-nodes|search:resolve|search:remove-constraint|search:parse|search:get-default-options|search:estimate|search:check-options|prof:value|prof:reset|prof:report|prof:invoke|prof:eval|prof:enable|prof:disable|prof:allowed|ppt:clean|pki:template-set-request|pki:template-set-name|pki:template-set-key-type|pki:template-set-key-options|pki:template-set-description|pki:template-in-use|pki:template-get-version|pki:template-get-request|pki:template-get-name|pki:template-get-key-type|pki:template-get-key-options|pki:template-get-id|pki:template-get-description|pki:need-certificate|pki:is-temporary|pki:insert-trusted-certificates|pki:insert-template|pki:insert-signed-certificates|pki:insert-certificate-revocation-list|pki:get-trusted-certificate-ids|pki:get-template-ids|pki:get-template-certificate-authority|pki:get-template-by-name|pki:get-template|pki:get-pending-certificate-requests-xml|pki:get-pending-certificate-requests-pem|pki:get-pending-certificate-request|pki:get-certificates-for-template-xml|pki:get-certificates-for-template|pki:get-certificates|pki:get-certificate-xml|pki:get-certificate-pem|pki:get-certificate|pki:generate-temporary-certificate-if-necessary|pki:generate-temporary-certificate|pki:generate-template-certificate-authority|pki:generate-certificate-request|pki:delete-template|pki:delete-certificate|pki:create-template|pdf:make-toc|pdf:insert-toc-headers|pdf:get-toc|pdf:clean|p:status-transition|p:state-transition|p:remove|p:pipelines|p:insert|p:get-by-id|p:get|p:execute|p:create|p:condition|p:collection|p:action|ooxml:runs-merge|ooxml:package-uris|ooxml:package-parts-insert|ooxml:package-parts|msword:clean|mcgm:polygon|mcgm:point|mcgm:geospatial-query-from-elements|mcgm:geospatial-query|mcgm:circle|math:tanh|math:tan|math:sqrt|math:sinh|math:sin|math:pow|math:modf|math:log10|math:log|math:ldexp|math:frexp|math:fmod|math:floor|math:fabs|math:exp|math:cosh|math:cos|math:ceil|math:atan2|math:atan|math:asin|math:acos|map:put|map:map|map:keys|map:get|map:delete|map:count|map:clear|lnk:to|lnk:remove|lnk:insert|lnk:get|lnk:from|lnk:create|kml:polygon|kml:point|kml:interior-polygon|kml:geospatial-query-from-elements|kml:geospatial-query|kml:circle|kml:box|gml:polygon|gml:point|gml:interior-polygon|gml:geospatial-query-from-elements|gml:geospatial-query|gml:circle|gml:box|georss:point|georss:geospatial-query|georss:circle|geo:polygon|geo:point|geo:interior-polygon|geo:geospatial-query-from-elements|geo:geospatial-query|geo:circle|geo:box|fn:zero-or-one|fn:years-from-duration|fn:year-from-dateTime|fn:year-from-date|fn:upper-case|fn:unordered|fn:true|fn:translate|fn:trace|fn:tokenize|fn:timezone-from-time|fn:timezone-from-dateTime|fn:timezone-from-date|fn:sum|fn:subtract-dateTimes-yielding-yearMonthDuration|fn:subtract-dateTimes-yielding-dayTimeDuration|fn:substring-before|fn:substring-after|fn:substring|fn:subsequence|fn:string-to-codepoints|fn:string-pad|fn:string-length|fn:string-join|fn:string|fn:static-base-uri|fn:starts-with|fn:seconds-from-time|fn:seconds-from-duration|fn:seconds-from-dateTime|fn:round-half-to-even|fn:round|fn:root|fn:reverse|fn:resolve-uri|fn:resolve-QName|fn:replace|fn:remove|fn:QName|fn:prefix-from-QName|fn:position|fn:one-or-more|fn:number|fn:not|fn:normalize-unicode|fn:normalize-space|fn:node-name|fn:node-kind|fn:nilled|fn:namespace-uri-from-QName|fn:namespace-uri-for-prefix|fn:namespace-uri|fn:name|fn:months-from-duration|fn:month-from-dateTime|fn:month-from-date|fn:minutes-from-time|fn:minutes-from-duration|fn:minutes-from-dateTime|fn:min|fn:max|fn:matches|fn:lower-case|fn:local-name-from-QName|fn:local-name|fn:last|fn:lang|fn:iri-to-uri|fn:insert-before|fn:index-of|fn:in-scope-prefixes|fn:implicit-timezone|fn:idref|fn:id|fn:hours-from-time|fn:hours-from-duration|fn:hours-from-dateTime|fn:floor|fn:false|fn:expanded-QName|fn:exists|fn:exactly-one|fn:escape-uri|fn:escape-html-uri|fn:error|fn:ends-with|fn:encode-for-uri|fn:empty|fn:document-uri|fn:doc-available|fn:doc|fn:distinct-values|fn:distinct-nodes|fn:default-collation|fn:deep-equal|fn:days-from-duration|fn:day-from-dateTime|fn:day-from-date|fn:data|fn:current-time|fn:current-dateTime|fn:current-date|fn:count|fn:contains|fn:concat|fn:compare|fn:collection|fn:codepoints-to-string|fn:codepoint-equal|fn:ceiling|fn:boolean|fn:base-uri|fn:avg|fn:adjust-time-to-timezone|fn:adjust-dateTime-to-timezone|fn:adjust-date-to-timezone|fn:abs|feed:unsubscribe|feed:subscription|feed:subscribe|feed:request|feed:item|feed:description|excel:clean|entity:enrich|dom:set-pipelines|dom:set-permissions|dom:set-name|dom:set-evaluation-context|dom:set-domain-scope|dom:set-description|dom:remove-pipeline|dom:remove-permissions|dom:remove|dom:get|dom:evaluation-context|dom:domains|dom:domain-scope|dom:create|dom:configuration-set-restart-user|dom:configuration-set-permissions|dom:configuration-set-evaluation-context|dom:configuration-set-default-domain|dom:configuration-get|dom:configuration-create|dom:collection|dom:add-pipeline|dom:add-permissions|dls:retention-rules|dls:retention-rule-remove|dls:retention-rule-insert|dls:retention-rule|dls:purge|dls:node-expand|dls:link-references|dls:link-expand|dls:documents-query|dls:document-versions-query|dls:document-version-uri|dls:document-version-query|dls:document-version-delete|dls:document-version-as-of|dls:document-version|dls:document-update|dls:document-unmanage|dls:document-set-quality|dls:document-set-property|dls:document-set-properties|dls:document-set-permissions|dls:document-set-collections|dls:document-retention-rules|dls:document-remove-properties|dls:document-remove-permissions|dls:document-remove-collections|dls:document-purge|dls:document-manage|dls:document-is-managed|dls:document-insert-and-manage|dls:document-include-query|dls:document-history|dls:document-get-permissions|dls:document-extract-part|dls:document-delete|dls:document-checkout-status|dls:document-checkout|dls:document-checkin|dls:document-add-properties|dls:document-add-permissions|dls:document-add-collections|dls:break-checkout|dls:author-query|dls:as-of-query|dbk:convert|dbg:wait|dbg:value|dbg:stopped|dbg:stop|dbg:step|dbg:status|dbg:stack|dbg:out|dbg:next|dbg:line|dbg:invoke|dbg:function|dbg:finish|dbg:expr|dbg:eval|dbg:disconnect|dbg:detach|dbg:continue|dbg:connect|dbg:clear|dbg:breakpoints|dbg:break|dbg:attached|dbg:attach|cvt:save-converted-documents|cvt:part-uri|cvt:destination-uri|cvt:basepath|cvt:basename|cts:words|cts:word-query-weight|cts:word-query-text|cts:word-query-options|cts:word-query|cts:word-match|cts:walk|cts:uris|cts:uri-match|cts:train|cts:tokenize|cts:thresholds|cts:stem|cts:similar-query-weight|cts:similar-query-nodes|cts:similar-query|cts:shortest-distance|cts:search|cts:score|cts:reverse-query-weight|cts:reverse-query-nodes|cts:reverse-query|cts:remainder|cts:registered-query-weight|cts:registered-query-options|cts:registered-query-ids|cts:registered-query|cts:register|cts:query|cts:quality|cts:properties-query-query|cts:properties-query|cts:polygon-vertices|cts:polygon|cts:point-longitude|cts:point-latitude|cts:point|cts:or-query-queries|cts:or-query|cts:not-query-weight|cts:not-query-query|cts:not-query|cts:near-query-weight|cts:near-query-queries|cts:near-query-options|cts:near-query-distance|cts:near-query|cts:highlight|cts:geospatial-co-occurrences|cts:frequency|cts:fitness|cts:field-words|cts:field-word-query-weight|cts:field-word-query-text|cts:field-word-query-options|cts:field-word-query-field-name|cts:field-word-query|cts:field-word-match|cts:entity-highlight|cts:element-words|cts:element-word-query-weight|cts:element-word-query-text|cts:element-word-query-options|cts:element-word-query-element-name|cts:element-word-query|cts:element-word-match|cts:element-values|cts:element-value-ranges|cts:element-value-query-weight|cts:element-value-query-text|cts:element-value-query-options|cts:element-value-query-element-name|cts:element-value-query|cts:element-value-match|cts:element-value-geospatial-co-occurrences|cts:element-value-co-occurrences|cts:element-range-query-weight|cts:element-range-query-value|cts:element-range-query-options|cts:element-range-query-operator|cts:element-range-query-element-name|cts:element-range-query|cts:element-query-query|cts:element-query-element-name|cts:element-query|cts:element-pair-geospatial-values|cts:element-pair-geospatial-value-match|cts:element-pair-geospatial-query-weight|cts:element-pair-geospatial-query-region|cts:element-pair-geospatial-query-options|cts:element-pair-geospatial-query-longitude-name|cts:element-pair-geospatial-query-latitude-name|cts:element-pair-geospatial-query-element-name|cts:element-pair-geospatial-query|cts:element-pair-geospatial-boxes|cts:element-geospatial-values|cts:element-geospatial-value-match|cts:element-geospatial-query-weight|cts:element-geospatial-query-region|cts:element-geospatial-query-options|cts:element-geospatial-query-element-name|cts:element-geospatial-query|cts:element-geospatial-boxes|cts:element-child-geospatial-values|cts:element-child-geospatial-value-match|cts:element-child-geospatial-query-weight|cts:element-child-geospatial-query-region|cts:element-child-geospatial-query-options|cts:element-child-geospatial-query-element-name|cts:element-child-geospatial-query-child-name|cts:element-child-geospatial-query|cts:element-child-geospatial-boxes|cts:element-attribute-words|cts:element-attribute-word-query-weight|cts:element-attribute-word-query-text|cts:element-attribute-word-query-options|cts:element-attribute-word-query-element-name|cts:element-attribute-word-query-attribute-name|cts:element-attribute-word-query|cts:element-attribute-word-match|cts:element-attribute-values|cts:element-attribute-value-ranges|cts:element-attribute-value-query-weight|cts:element-attribute-value-query-text|cts:element-attribute-value-query-options|cts:element-attribute-value-query-element-name|cts:element-attribute-value-query-attribute-name|cts:element-attribute-value-query|cts:element-attribute-value-match|cts:element-attribute-value-geospatial-co-occurrences|cts:element-attribute-value-co-occurrences|cts:element-attribute-range-query-weight|cts:element-attribute-range-query-value|cts:element-attribute-range-query-options|cts:element-attribute-range-query-operator|cts:element-attribute-range-query-element-name|cts:element-attribute-range-query-attribute-name|cts:element-attribute-range-query|cts:element-attribute-pair-geospatial-values|cts:element-attribute-pair-geospatial-value-match|cts:element-attribute-pair-geospatial-query-weight|cts:element-attribute-pair-geospatial-query-region|cts:element-attribute-pair-geospatial-query-options|cts:element-attribute-pair-geospatial-query-longitude-name|cts:element-attribute-pair-geospatial-query-latitude-name|cts:element-attribute-pair-geospatial-query-element-name|cts:element-attribute-pair-geospatial-query|cts:element-attribute-pair-geospatial-boxes|cts:document-query-uris|cts:document-query|cts:distance|cts:directory-query-uris|cts:directory-query-depth|cts:directory-query|cts:destination|cts:deregister|cts:contains|cts:confidence|cts:collections|cts:collection-query-uris|cts:collection-query|cts:collection-match|cts:classify|cts:circle-radius|cts:circle-center|cts:circle|cts:box-west|cts:box-south|cts:box-north|cts:box-east|cts:box|cts:bearing|cts:arc-intersection|cts:and-query-queries|cts:and-query-options|cts:and-query|cts:and-not-query-positive-query|cts:and-not-query-negative-query|cts:and-not-query|css:get|css:convert|cpf:success|cpf:failure|cpf:document-set-state|cpf:document-set-processing-status|cpf:document-set-last-updated|cpf:document-set-error|cpf:document-get-state|cpf:document-get-processing-status|cpf:document-get-last-updated|cpf:document-get-error|cpf:check-transition|alert:spawn-matching-actions|alert:rule-user-id-query|alert:rule-set-user-id|alert:rule-set-query|alert:rule-set-options|alert:rule-set-name|alert:rule-set-description|alert:rule-set-action|alert:rule-remove|alert:rule-name-query|alert:rule-insert|alert:rule-id-query|alert:rule-get-user-id|alert:rule-get-query|alert:rule-get-options|alert:rule-get-name|alert:rule-get-id|alert:rule-get-description|alert:rule-get-action|alert:rule-action-query|alert:remove-triggers|alert:make-rule|alert:make-log-action|alert:make-config|alert:make-action|alert:invoke-matching-actions|alert:get-my-rules|alert:get-all-rules|alert:get-actions|alert:find-matching-rules|alert:create-triggers|alert:config-set-uri|alert:config-set-trigger-ids|alert:config-set-options|alert:config-set-name|alert:config-set-description|alert:config-set-cpf-domain-names|alert:config-set-cpf-domain-ids|alert:config-insert|alert:config-get-uri|alert:config-get-trigger-ids|alert:config-get-options|alert:config-get-name|alert:config-get-id|alert:config-get-description|alert:config-get-cpf-domain-names|alert:config-get-cpf-domain-ids|alert:config-get|alert:config-delete|alert:action-set-options|alert:action-set-name|alert:action-set-module-root|alert:action-set-module-db|alert:action-set-module|alert:action-set-description|alert:action-remove|alert:action-insert|alert:action-get-options|alert:action-get-name|alert:action-get-module-root|alert:action-get-module-db|alert:action-get-module|alert:action-get-description|zero-or-one|years-from-duration|year-from-dateTime|year-from-date|upper-case|unordered|true|translate|trace|tokenize|timezone-from-time|timezone-from-dateTime|timezone-from-date|sum|subtract-dateTimes-yielding-yearMonthDuration|subtract-dateTimes-yielding-dayTimeDuration|substring-before|substring-after|substring|subsequence|string-to-codepoints|string-pad|string-length|string-join|string|static-base-uri|starts-with|seconds-from-time|seconds-from-duration|seconds-from-dateTime|round-half-to-even|round|root|reverse|resolve-uri|resolve-QName|replace|remove|QName|prefix-from-QName|position|one-or-more|number|not|normalize-unicode|normalize-space|node-name|node-kind|nilled|namespace-uri-from-QName|namespace-uri-for-prefix|namespace-uri|name|months-from-duration|month-from-dateTime|month-from-date|minutes-from-time|minutes-from-duration|minutes-from-dateTime|min|max|matches|lower-case|local-name-from-QName|local-name|last|lang|iri-to-uri|insert-before|index-of|in-scope-prefixes|implicit-timezone|idref|id|hours-from-time|hours-from-duration|hours-from-dateTime|floor|false|expanded-QName|exists|exactly-one|escape-uri|escape-html-uri|error|ends-with|encode-for-uri|empty|document-uri|doc-available|doc|distinct-values|distinct-nodes|default-collation|deep-equal|days-from-duration|day-from-dateTime|day-from-date|data|current-time|current-dateTime|current-date|count|contains|concat|compare|collection|codepoints-to-string|codepoint-equal|ceiling|boolean|base-uri|avg|adjust-time-to-timezone|adjust-dateTime-to-timezone|adjust-date-to-timezone|abs)\b/], -["pln",/^[\w:-]+/],["pln",/^[\t\n\r \xa0]+/]]),["xq","xquery"]); diff --git a/site/google-code-prettify/lang-yaml.js b/site/google-code-prettify/lang-yaml.js deleted file mode 100644 index c38729b6c..000000000 --- a/site/google-code-prettify/lang-yaml.js +++ /dev/null @@ -1,2 +0,0 @@ -var a=null; -PR.registerLangHandler(PR.createSimpleLexer([["pun",/^[:>?|]+/,a,":|>?"],["dec",/^%(?:YAML|TAG)[^\n\r#]+/,a,"%"],["typ",/^&\S+/,a,"&"],["typ",/^!\S*/,a,"!"],["str",/^"(?:[^"\\]|\\.)*(?:"|$)/,a,'"'],["str",/^'(?:[^']|'')*(?:'|$)/,a,"'"],["com",/^#[^\n\r]*/,a,"#"],["pln",/^\s+/,a," \t\r\n"]],[["dec",/^(?:---|\.\.\.)(?:[\n\r]|$)/],["pun",/^-/],["kwd",/^\w+:[\n\r ]/],["pln",/^\w+/]]),["yaml","yml"]); diff --git a/site/google-code-prettify/prettify-sunburst-theme.css b/site/google-code-prettify/prettify-sunburst-theme.less similarity index 100% rename from site/google-code-prettify/prettify-sunburst-theme.css rename to site/google-code-prettify/prettify-sunburst-theme.less diff --git a/site/google-code-prettify/prettify.css b/site/google-code-prettify/prettify.less similarity index 100% rename from site/google-code-prettify/prettify.css rename to site/google-code-prettify/prettify.less diff --git a/site/grunt/aliases.yaml b/site/grunt/aliases.yaml new file mode 100644 index 000000000..dec1a9032 --- /dev/null +++ b/site/grunt/aliases.yaml @@ -0,0 +1,15 @@ +compile: + - jade + - browserify + - less + +compile:min: + - compile + - uglify + - cssmin + +default: + - clean + - compile + - connect + - watch diff --git a/site/grunt/browserify.js b/site/grunt/browserify.js new file mode 100644 index 000000000..6b8a2f76a --- /dev/null +++ b/site/grunt/browserify.js @@ -0,0 +1,11 @@ +module.exports = { + options: { + //transform: [['jstify', { engine: 'lodash' }]], + watch: true, + browserifyOptions: { debug: true } + }, + dist: { + dest: 'htdocs/collatex.js', + src: [ 'google-code-prettify/prettify.js', 'collatex.js' ] + } +}; \ No newline at end of file diff --git a/site/grunt/clean.js b/site/grunt/clean.js new file mode 100644 index 000000000..925e05d81 --- /dev/null +++ b/site/grunt/clean.js @@ -0,0 +1,3 @@ +module.exports = { + dist: ['htdocs/**/*', '!htdocs/favicon.ico', '!htdocs/images/**'] +}; \ No newline at end of file diff --git a/site/grunt/connect.js b/site/grunt/connect.js new file mode 100644 index 000000000..42b407c8b --- /dev/null +++ b/site/grunt/connect.js @@ -0,0 +1,8 @@ +module.exports = { + dist: { + options: { + base: 'htdocs', + livereload: true + } + } +}; \ No newline at end of file diff --git a/site/grunt/cssmin.js b/site/grunt/cssmin.js new file mode 100644 index 000000000..1df2862a3 --- /dev/null +++ b/site/grunt/cssmin.js @@ -0,0 +1,6 @@ +module.exports = { + dist: { + src: '<%= less.dist.dest %>', + dest: '<%= less.dist.dest %>' + } +}; \ No newline at end of file diff --git a/site/grunt/jade.js b/site/grunt/jade.js new file mode 100644 index 000000000..ab46f9614 --- /dev/null +++ b/site/grunt/jade.js @@ -0,0 +1,14 @@ +module.exports = { + dist: { + expand: true, + cwd: 'templates', + src: ['**/*.jade', '!page.jade'], + dest: 'htdocs', + ext: '.html', + options: { + data: { + version: "1.5" + } + } + } +}; \ No newline at end of file diff --git a/site/grunt/less.js b/site/grunt/less.js new file mode 100644 index 000000000..d3dc046f8 --- /dev/null +++ b/site/grunt/less.js @@ -0,0 +1,9 @@ +module.exports = { + options: { + paths: ['node_modules'] + }, + dist: { + src: 'collatex.less', + dest: 'htdocs/collatex.css' + } +}; \ No newline at end of file diff --git a/site/grunt/uglify.js b/site/grunt/uglify.js new file mode 100644 index 000000000..9e5a7ddf7 --- /dev/null +++ b/site/grunt/uglify.js @@ -0,0 +1,6 @@ +module.exports = { + dist: { + src: '<%= browserify.dist.dest %>', + dest: '<%= browserify.dist.dest %>' + } +}; \ No newline at end of file diff --git a/site/grunt/watch.js b/site/grunt/watch.js new file mode 100644 index 000000000..9140daec3 --- /dev/null +++ b/site/grunt/watch.js @@ -0,0 +1,14 @@ +module.exports = { + livereload: { + files: 'htdocs/*', + options: { livereload: true } + }, + templates: { + files: '**/*.jade', + tasks: 'jade' + }, + stylesheets: { + files: '**/*.less', + tasks: 'less' + } +}; \ No newline at end of file diff --git a/site/htdocs/about/index.html b/site/htdocs/about/index.html new file mode 100644 index 000000000..15c6afeec --- /dev/null +++ b/site/htdocs/about/index.html @@ -0,0 +1,35 @@ +CollateX – About the Project

    Project History & Agenda

    Development of CollateX started in 2010 as a project within the EU-funded initiative +Interedition, with the aim to create a successor of +Peter Robinson's Collate. While widely +used within the community and valued for its versatility in producing critical apparatuses for scholarly editions, +by then Collate was increasingly hard to deploy as it depends on a runtime environment whose support had been +phased out by its vendor. +

    CollateX was planned as a complete rewrite of Collate that was primarily addressing the architectural challenges +of its predecessor. Over the years though and with more and more participants contributing their requirements and +ideas, it developed a different agenda. On the one hand, Collate is a complete solution for producing a critical apparatus, +with features ranging from its very own algorithm for comparing versions of a text to a powerful graphical user interface that lets the user +control the collation process. On the other hand, CollateX has become a software component which can be embedded into other software or be made a part of +a software system. Its goal is the provision and advancement of current research in the field of computer-supported collation +involving natural language texts. To this end the developers of CollateX put an emphasis on its flexible applicability, be it in terms of +its runtime environment or be it in terms of the specific challenges CollateX has to cope with when applied to +textual traditions of varying language, encoding or publication settings. +

    Contact

    You can contact the team via e-mail (info@collatex.net) or individually (see below). +Some of the team members can also be found in Interedition's IRC channel #interedition +on freenode.net. +

    Development of CollateX takes place on GitHub. +Bug reports and/or +other contributions are welcome.

    +

    Fork me on GitHub \ No newline at end of file diff --git a/site/htdocs/collatex.css b/site/htdocs/collatex.css new file mode 100644 index 000000000..5c258fd10 --- /dev/null +++ b/site/htdocs/collatex.css @@ -0,0 +1 @@ +@charset "UTF-8";.pln{color:#000}@media screen{.str{color:#080}.kwd{color:#008}.com{color:#800}.typ{color:#606}.lit{color:#066}.clo,.opn,.pun{color:#660}.tag{color:#008}.atn{color:#606}.atv{color:#080}.dec,.var{color:#606}.fun{color:red}}@media print,projection{.str{color:#060}.kwd{color:#006;font-weight:700}.com{color:#600;font-style:italic}.typ{color:#404;font-weight:700}.lit{color:#044}.clo,.opn,.pun{color:#440}.tag{color:#006;font-weight:700}.atn{color:#404}.atv{color:#060}}pre.prettyprint{border:1px solid #888}li.L1,li.L3,li.L5,li.L7,li.L9{background:#eee}code .str,pre .str{color:#65B042}code .kwd,pre .kwd{color:#E28964}code .com,pre .com{color:#AEAEAE;font-style:italic}code .typ,pre .typ{color:#89bdff}code .lit,pre .lit{color:#3387CC}code .pln,code .pun,pre .pln,pre .pun{color:#fff}code .tag,pre .tag{color:#89bdff}code .atn,pre .atn{color:#bdb76b}code .atv,pre .atv{color:#65B042}code .dec,pre .dec{color:#3387CC}code.prettyprint,pre.prettyprint{background-color:#000;-moz-border-radius:8px;-webkit-border-radius:8px;-o-border-radius:8px;-ms-border-radius:8px;-khtml-border-radius:8px;border-radius:8px}pre.prettyprint{width:95%;margin:1em auto;padding:1em;white-space:pre-wrap}ol.linenums{margin-top:0;margin-bottom:0;color:#AEAEAE}li.L0,li.L1,li.L2,li.L3,li.L5,li.L6,li.L7,li.L8{list-style-type:none}@media print{code .str,pre .str{color:#060}code .kwd,pre .kwd{color:#006;font-weight:700}code .com,pre .com{color:#600;font-style:italic}code .typ,pre .typ{color:#404;font-weight:700}code .lit,pre .lit{color:#044}code .pun,pre .pun{color:#440}code .pln,pre .pln{color:#000}code .tag,pre .tag{color:#006;font-weight:700}code .atn,pre .atn{color:#404}code .atv,pre .atv{color:#060}}body{width:960px;margin:1em auto}h2{padding:.25em 0;font-size:131%;font-variant:small-caps}a,a:active,a:link,a:visited{color:#060;font-weight:700;text-decoration:none}h2,h3,h4,h5,h6{margin-left:.5em;margin-right:.5em}dd,dl,li,p{text-align:justify;margin:.5em}ol{list-style:decimal inside}ul{list-style:disc inside}table{margin:1em .5em}td,th{border:1px solid #ccc;padding:.5em}a:hover{color:#060;font-weight:700;text-decoration:underline}#header{padding:1em;color:#fff;background:#060;border-radius:.5em}#header h1,#header p{margin:0}#header p{padding-top:.5em}#header a,#header a:active,#header a:hover,#header a:link,#header a:visited{color:inherit;text-decoration:none}#menu ol{list-style-type:none;margin:0;padding:.5em 0;width:100%;text-align:center}#menu ol li{display:inline-block;margin:0 2em 0 0;font-size:116%;font-weight:700}#menu ol li:before{content:"» "}#menu #menu a:active,#menu #menu a:hover,#menu #menu a:link,#menu #menu a:visited,#menu a{color:#333}#content{margin:1em}#footer{margin-top:3em;width:100%;color:#666;text-align:center}#under-construction{color:red;font-size:197%;width:100%;text-align:center;margin:1em auto}#project-team li{margin:0}#project-team a{color:inherit;font-weight:400}#project-team .institution{font-size:80%}#license pre{border:1px solid #eee;padding:1em}.quote{font-style:italic}.content{margin:.5em}.figure img{display:block;padding:.25em;margin:0 auto}.figure .caption{font-size:80%;text-align:center;font-style:italic}.float-left{float:left;margin-right:2em}.float-right{float:right;margin-left:2em}.clear{clear:both}.download-option{background:#eee;padding:.5em;border-radius:.5em}.download-link,.teaser{font-size:123.1%}#download-binary .prettyprint{width:90%}.toc{list-style:none;border-radius:.5em;padding:.5em;margin:0 2em .5em 0;background:#eee;width:33%}.toc li{margin:.5em 1em}.toc a{color:inherit;font-weight:400}.bibliography dt{float:left;margin-right:.5em}.bibliography dt:before{content:"["}.bibliography dt:after{content:"]"} \ No newline at end of file diff --git a/site/htdocs/collatex.js b/site/htdocs/collatex.js new file mode 100644 index 000000000..52751af71 --- /dev/null +++ b/site/htdocs/collatex.js @@ -0,0 +1 @@ +!function a(b,c,d){function e(g,h){if(!c[g]){if(!b[g]){var i="function"==typeof require&&require;if(!h&&i)return i(g,!0);if(f)return f(g,!0);var j=new Error("Cannot find module '"+g+"'");throw j.code="MODULE_NOT_FOUND",j}var k=c[g]={exports:{}};b[g][0].call(k.exports,function(a){var c=b[g][1][a];return e(c?c:a)},k,k.exports,a,b,c,d)}return c[g].exports}for(var f="function"==typeof require&&require,g=0;g0&&c>e;e++)b=b[b.length-1].children;b.push({node:a,title:a.get("text"),children:[]})});var e=function(b,c,d){return a.Array.each(c,function(c,f){var g=(d?d+".":"")+(f+1),h=g+". "+c.title,i=c.node.get("id")||"h"+g.replace(".","_");c.node.set("id",i),c.node.set("text",h);var j=b.appendChild(a.Node.create("
  • "));j.appendChild(a.Node.create("")).setAttrs({text:h,href:"#"+i}),c.children.length>0&&e(j,c.children,g)}),b};e(b,d)}if(c||b){var f=a.getLocation().hash;if(f){var g=a.one(f);g&&g.scrollIntoView(!0)}}})})},{}],"/home/gregor/Documents/collatex/site/google-code-prettify/prettify.js":[function(){var a=null;window.PR_SHOULD_USE_CONTINUATION=!0,function(){function b(a){function b(a){var b=a.charCodeAt(0);if(92!==b)return b;var c=a.charAt(1);return(b=l[c])?b:c>="0"&&"7">=c?parseInt(a.substring(1),8):"u"===c||"x"===c?parseInt(a.substring(2),16):a.charCodeAt(1)}function c(a){return 32>a?(16>a?"\\x0":"\\x")+a.toString(16):(a=String.fromCharCode(a),("\\"===a||"-"===a||"["===a||"]"===a)&&(a="\\"+a),a)}function d(a){for(var d=a.substring(1,a.length-1).match(/\\u[\dA-Fa-f]{4}|\\x[\dA-Fa-f]{2}|\\[0-3][0-7]{0,2}|\\[0-7]{1,2}|\\[\S\s]|[^\\]/g),a=[],e=[],f="^"===d[0],g=f?1:0,h=d.length;h>g;++g){var i=d[g];if(/\\[bdsw]/i.test(i))a.push(i);else{var j,i=b(i);h>g+2&&"-"===d[g+1]?(j=b(d[g+2]),g+=2):j=i,e.push([i,j]),65>j||i>122||(65>j||i>90||e.push([32|Math.max(65,i),32|Math.min(j,90)]),97>j||i>122||e.push([-33&Math.max(97,i),-33&Math.min(j,122)]))}}for(e.sort(function(a,b){return a[0]-b[0]||b[1]-a[1]}),d=[],i=[0/0,0/0],g=0;gh[0]&&(h[1]+1>h[0]&&e.push("-"),e.push(c(h[1])));return e.push("]"),e.join("")}function e(a){for(var b=a.source.match(/\[(?:[^\\\]]|\\[\S\s])*]|\\u[\dA-Fa-f]{4}|\\x[\dA-Fa-f]{2}|\\\d+|\\[^\dux]|\(\?[!:=]|[()^]|[^()[\\^]+/g),c=b.length,e=[],h=0,i=0;c>h;++h){var j=b[h];"("===j?++i:"\\"===j.charAt(0)&&(j=+j.substring(1))&&i>=j&&(e[j]=-1)}for(h=1;hh;++h)j=b[h],"("===j?(++i,void 0===e[i]&&(b[h]="(?:")):"\\"===j.charAt(0)&&(j=+j.substring(1))&&i>=j&&(b[h]="\\"+e[i]);for(i=h=0;c>h;++h)"^"===b[h]&&"^"!==b[h+1]&&(b[h]="");if(a.ignoreCase&&g)for(h=0;c>h;++h)j=b[h],a=j.charAt(0),j.length>=2&&"["===a?b[h]=d(j):"\\"!==a&&(b[h]=j.replace(/[A-Za-z]/g,function(a){return a=a.charCodeAt(0),"["+String.fromCharCode(-33&a,32|a)+"]"}));return b.join("")}for(var f=0,g=!1,h=!1,i=0,j=a.length;j>i;++i){var k=a[i];if(k.ignoreCase)h=!0;else if(/[a-z]/i.test(k.source.replace(/\\u[\da-f]{4}|\\x[\da-f]{2}|\\[^UXux]/gi,""))){g=!0,h=!1;break}}for(var l={b:8,t:9,n:10,v:11,f:12,r:13},m=[],i=0,j=a.length;j>i;++i){if(k=a[i],k.global||k.multiline)throw Error(""+k);m.push("(?:"+e(k)+")")}return RegExp(m.join("|"),h?"gi":"g")}function c(b){function c(a){switch(a.nodeType){case 1:if(e.test(a.className))break;for(var b=a.firstChild;b;b=b.nextSibling)c(b);b=a.nodeName,("BR"===b||"LI"===b)&&(f[i]="\n",h[i<<1]=g++,h[i++<<1|1]=a);break;case 3:case 4:b=a.nodeValue,b.length&&(b=j?b.replace(/\r\n?/g,"\n"):b.replace(/[\t\n\r ]+/g," "),f[i]=b,h[i<<1]=g,g+=b.length,h[i++<<1|1]=a)}}var d,e=/(?:^|\s)nocode(?:\s|$)/,f=[],g=0,h=[],i=0;b.currentStyle?d=b.currentStyle.whiteSpace:window.getComputedStyle&&(d=document.defaultView.getComputedStyle(b,a).getPropertyValue("white-space"));var j=d&&"pre"===d.substring(0,3);return c(b),{a:f.join("").replace(/\n$/,""),c:h}}function d(a,b,c,d){b&&(a={a:b,d:a},c(a),d.push.apply(d,a.e))}function e(c,e){function f(a){for(var b=a.d,c=[b,"pln"],k=0,l=a.a.match(g)||[],m={},n=0,o=l.length;o>n;++n){var p,q=l[n],r=m[q],s=void 0;if("string"==typeof r)p=!1;else{var t=h[q.charAt(0)];if(t)s=q.match(t[1]),r=t[0];else{for(p=0;j>p;++p)if(t=e[p],s=q.match(t[1])){r=t[0];break}s||(r="pln")}!(p=r.length>=5&&"lang-"===r.substring(0,5))||s&&"string"==typeof s[1]||(p=!1,r="src"),p||(m[q]=r)}if(t=k,k+=q.length,p){p=s[1];var u=q.indexOf(p),v=u+p.length;s[2]&&(v=q.length-s[2].length,u=v-p.length),r=r.substring(5),d(b+t,q.substring(0,u),f,c),d(b+t+u,p,i(r,p),c),d(b+t+v,q.substring(v),f,c)}else c.push(b+t,r)}a.e=c}var g,h={};!function(){for(var d=c.concat(e),f=[],i={},j=0,k=d.length;k>j;++j){var l=d[j],m=l[3];if(m)for(var n=m.length;--n>=0;)h[m.charAt(n)]=l;l=l[1],m=""+l,i.hasOwnProperty(m)||(f.push(l),i[m]=a)}f.push(/[\S\s]/),g=b(f)}();var j=e.length;return f}function f(b){var c=[],d=[];c.push(b.tripleQuotedStrings?["str",/^(?:'''(?:[^'\\]|\\[\S\s]|''?(?=[^']))*(?:'''|$)|"""(?:[^"\\]|\\[\S\s]|""?(?=[^"]))*(?:"""|$)|'(?:[^'\\]|\\[\S\s])*(?:'|$)|"(?:[^"\\]|\\[\S\s])*(?:"|$))/,a,"'\""]:b.multiLineStrings?["str",/^(?:'(?:[^'\\]|\\[\S\s])*(?:'|$)|"(?:[^"\\]|\\[\S\s])*(?:"|$)|`(?:[^\\`]|\\[\S\s])*(?:`|$))/,a,"'\"`"]:["str",/^(?:'(?:[^\n\r'\\]|\\.)*(?:'|$)|"(?:[^\n\r"\\]|\\.)*(?:"|$))/,a,"\"'"]),b.verbatimStrings&&d.push(["str",/^@"(?:[^"]|"")*(?:"|$)/,a]);var f=b.hashComments;return f&&(b.cStyleComments?(c.push(f>1?["com",/^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/,a,"#"]:["com",/^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\n\r]*)/,a,"#"]),d.push(["str",/^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,a])):c.push(["com",/^#[^\n\r]*/,a,"#"])),b.cStyleComments&&(d.push(["com",/^\/\/[^\n\r]*/,a]),d.push(["com",/^\/\*[\S\s]*?(?:\*\/|$)/,a])),b.regexLiterals&&d.push(["lang-regex",/^(?:^^\.?|[!+-]|!=|!==|#|%|%=|&|&&|&&=|&=|\(|\*|\*=|\+=|,|-=|->|\/|\/=|:|::|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|[?@[^]|\^=|\^\^|\^\^=|{|\||\|=|\|\||\|\|=|~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\s*(\/(?=[^*/])(?:[^/[\\]|\\[\S\s]|\[(?:[^\\\]]|\\[\S\s])*(?:]|$))+\/)/]),(f=b.types)&&d.push(["typ",f]),b=(""+b.keywords).replace(/^ | $/g,""),b.length&&d.push(["kwd",RegExp("^(?:"+b.replace(/[\s,]+/g,"|")+")\\b"),a]),c.push(["pln",/^\s+/,a," \r\n  "]),d.push(["lit",/^@[$_a-z][\w$@]*/i,a],["typ",/^(?:[@_]?[A-Z]+[a-z][\w$@]*|\w+_t\b)/,a],["pln",/^[$_a-z][\w$@]*/i,a],["lit",/^(?:0x[\da-f]+|(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d\+)(?:e[+-]?\d+)?)[a-z]*/i,a,"0123456789"],["pln",/^\\[\S\s]?/,a],["pun",/^.[^\s\w"-$'./@\\`]*/,a]),e(c,d)}function g(b,c){function d(a){switch(a.nodeType){case 1:if(g.test(a.className))break;if("BR"===a.nodeName)e(a),a.parentNode&&a.parentNode.removeChild(a);else for(a=a.firstChild;a;a=a.nextSibling)d(a);break;case 3:case 4:if(j){var b=a.nodeValue,c=b.match(h);if(c){var f=b.substring(0,c.index);a.nodeValue=f,(b=b.substring(c.index+c[0].length))&&a.parentNode.insertBefore(i.createTextNode(b),a.nextSibling),e(a),f||a.parentNode.removeChild(a)}}}}function e(a){function b(a,c){var d=c?a.cloneNode(!1):a,e=a.parentNode;if(e){var e=b(e,1),f=a.nextSibling;e.appendChild(d);for(var g=f;g;g=f)f=g.nextSibling,e.appendChild(g)}return d}for(;!a.nextSibling;)if(a=a.parentNode,!a)return;for(var c,a=b(a.nextSibling,0);(c=a.parentNode)&&1===c.nodeType;)a=c;k.push(a)}var f,g=/(?:^|\s)nocode(?:\s|$)/,h=/\r\n?|\n/,i=b.ownerDocument;b.currentStyle?f=b.currentStyle.whiteSpace:window.getComputedStyle&&(f=i.defaultView.getComputedStyle(b,a).getPropertyValue("white-space"));var j=f&&"pre"===f.substring(0,3);for(f=i.createElement("LI");b.firstChild;)f.appendChild(b.firstChild);for(var k=[f],l=0;ll;++l)f=k[l],f.className="L"+(l+n)%10,f.firstChild||f.appendChild(i.createTextNode(" ")),m.appendChild(f);b.appendChild(m)}function h(a,b){for(var c=b.length;--c>=0;){var d=b[c];u.hasOwnProperty(d)?window.console&&console.warn("cannot override language handler %s",d):u[d]=a}}function i(a,b){return a&&u.hasOwnProperty(a)||(a=/^\s*o;)l[o]!==l[o+2]?(l[n++]=l[o++],l[n++]=l[o++]):o+=2;for(m=n,o=n=0;m>o;){for(var p=l[o],q=l[o+1],r=o+2;m>=r+2&&l[r+1]===q;)r+=2;l[n++]=p,l[n++]=q,o=r}for(l.length=n;k>e;){var s,t=j[e+2]||h,u=l[a+2]||h,r=Math.min(t,u),v=j[e+1];if(1!==v.nodeType&&(s=g.substring(d,r))){f&&(s=s.replace(b,"\r")),v.nodeValue=s;var w=v.ownerDocument,x=w.createElement("SPAN");x.className=l[a+1];var y=v.parentNode;y.replaceChild(x,v),x.appendChild(v),t>d&&(j[e+1]=v=w.createTextNode(g.substring(r,t)),y.insertBefore(v,x.nextSibling))}d=r,d>=t&&(e+=2),d>=u&&(a+=2)}}catch(z){"console"in window&&console.log(z&&z.stack?z.stack:z)}}var k=["break,continue,do,else,for,if,return,while"],l=[[k,"auto,case,char,const,default,double,enum,extern,float,goto,int,long,register,short,signed,sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"],"catch,class,delete,false,import,new,operator,private,protected,public,this,throw,true,try,typeof"],m=[l,"alignof,align_union,asm,axiom,bool,concept,concept_map,const_cast,constexpr,decltype,dynamic_cast,explicit,export,friend,inline,late_check,mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast,template,typeid,typename,using,virtual,where"],n=[l,"abstract,boolean,byte,extends,final,finally,implements,import,instanceof,null,native,package,strictfp,super,synchronized,throws,transient"],o=[n,"as,base,by,checked,decimal,delegate,descending,dynamic,event,fixed,foreach,from,group,implicit,in,interface,internal,into,is,lock,object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var"],l=[l,"debugger,eval,export,function,get,null,set,undefined,var,with,Infinity,NaN"],p=[k,"and,as,assert,class,def,del,elif,except,exec,finally,from,global,import,in,is,lambda,nonlocal,not,or,pass,print,raise,try,with,yield,False,True,None"],q=[k,"alias,and,begin,case,class,def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo,rescue,retry,self,super,then,true,undef,unless,until,when,yield,BEGIN,END"],k=[k,"case,done,elif,esac,eval,fi,function,in,local,set,then,until"],r=/^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)/,s=/\S/,t=f({keywords:[m,o,l,"caller,delete,die,do,dump,elsif,eval,exit,foreach,for,goto,if,import,last,local,my,next,no,our,print,package,redo,require,sub,undef,unless,until,use,wantarray,while,BEGIN,END"+p,q,k],hashComments:!0,cStyleComments:!0,multiLineStrings:!0,regexLiterals:!0}),u={};h(t,["default-code"]),h(e([],[["pln",/^[^]*(?:>|$)/],["com",/^<\!--[\S\s]*?(?:--\>|$)/],["lang-",/^<\?([\S\s]+?)(?:\?>|$)/],["lang-",/^<%([\S\s]+?)(?:%>|$)/],["pun",/^(?:<[%?]|[%?]>)/],["lang-",/^]*>([\S\s]+?)<\/xmp\b[^>]*>/i],["lang-js",/^]*>([\S\s]*?)(<\/script\b[^>]*>)/i],["lang-css",/^]*>([\S\s]*?)(<\/style\b[^>]*>)/i],["lang-in.tag",/^(<\/?[a-z][^<>]*>)/i]]),["default-markup","htm","html","mxml","xhtml","xml","xsl"]),h(e([["pln",/^\s+/,a," \r\n"],["atv",/^(?:"[^"]*"?|'[^']*'?)/,a,"\"'"]],[["tag",/^^<\/?[a-z](?:[\w-.:]*\w)?|\/?>$/i],["atn",/^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],["lang-uq.val",/^=\s*([^\s"'>]*(?:[^\s"'/>]|\/(?=\s)))/],["pun",/^[/<->]+/],["lang-js",/^on\w+\s*=\s*"([^"]+)"/i],["lang-js",/^on\w+\s*=\s*'([^']+)'/i],["lang-js",/^on\w+\s*=\s*([^\s"'>]+)/i],["lang-css",/^style\s*=\s*"([^"]+)"/i],["lang-css",/^style\s*=\s*'([^']+)'/i],["lang-css",/^style\s*=\s*([^\s"'>]+)/i]]),["in.tag"]),h(e([],[["atv",/^[\S\s]+/]]),["uq.val"]),h(f({keywords:m,hashComments:!0,cStyleComments:!0,types:r}),["c","cc","cpp","cxx","cyc","m"]),h(f({keywords:"null,true,false"}),["json"]),h(f({keywords:o,hashComments:!0,cStyleComments:!0,verbatimStrings:!0,types:r}),["cs"]),h(f({keywords:n,cStyleComments:!0}),["java"]),h(f({keywords:k,hashComments:!0,multiLineStrings:!0}),["bsh","csh","sh"]),h(f({keywords:p,hashComments:!0,multiLineStrings:!0,tripleQuotedStrings:!0}),["cv","py"]),h(f({keywords:"caller,delete,die,do,dump,elsif,eval,exit,foreach,for,goto,if,import,last,local,my,next,no,our,print,package,redo,require,sub,undef,unless,until,use,wantarray,while,BEGIN,END",hashComments:!0,multiLineStrings:!0,regexLiterals:!0}),["perl","pl","pm"]),h(f({keywords:q,hashComments:!0,multiLineStrings:!0,regexLiterals:!0}),["rb"]),h(f({keywords:l,cStyleComments:!0,regexLiterals:!0}),["js"]),h(f({keywords:"all,and,by,catch,class,else,extends,false,finally,for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then,true,try,unless,until,when,while,yes",hashComments:3,cStyleComments:!0,multilineStrings:!0,tripleQuotedStrings:!0,regexLiterals:!0}),["coffee"]),h(e([],[["str",/^[\S\s]+/]]),["regex"]),window.prettyPrintOne=function(a,b,c){var d=document.createElement("PRE");return d.innerHTML=a,c&&g(d,c),j({g:b,i:c,h:d}),d.innerHTML},window.prettyPrint=function(b){function c(){for(var a=window.PR_SHOULD_USE_CONTINUATION?k.now()+250:1/0;m=0){var h,i,f=f.match(n);if(i=!f){i=d;for(var o=void 0,p=i.firstChild;p;p=p.nextSibling)var q=p.nodeType,o=1===q?o?i:p:3===q&&s.test(p.nodeValue)?i:o;i=(h=o===i?void 0:o)&&"CODE"===h.tagName}for(i&&(f=h.className.match(n)),f&&(f=f[1]),i=!1,o=d.parentNode;o;o=o.parentNode)if(("pre"===o.tagName||"code"===o.tagName||"xmp"===o.tagName)&&o.className&&o.className.indexOf("prettyprint")>=0){i=!0;break}i||((i=(i=d.className.match(/\blinenums\b(?::(\d+))?/))?i[1]&&i[1].length?+i[1]:!0:!1)&&g(d,i),l={g:f,h:d,i:i},j(l))}}mh;++h)e.push(d[f][h]);var d=a,k=Date;k.now||(k={now:function(){return+new Date}});var l,m=0,n=/\blang(?:uage)?-([\w.]+)(?!\S)/;c()},window.PR={createSimpleLexer:e,registerLangHandler:h,sourceDecorator:f,PR_ATTRIB_NAME:"atn",PR_ATTRIB_VALUE:"atv",PR_COMMENT:"com",PR_DECLARATION:"dec",PR_KEYWORD:"kwd",PR_LITERAL:"lit",PR_NOCODE:"nocode",PR_PLAIN:"pln",PR_PUNCTUATION:"pun",PR_SOURCE:"src",PR_STRING:"str",PR_TAG:"tag",PR_TYPE:"typ"}}()},{}]},{},["/home/gregor/Documents/collatex/site/google-code-prettify/prettify.js","/home/gregor/Documents/collatex/site/collatex.js"]); \ No newline at end of file diff --git a/site/htdocs/doc/index.html b/site/htdocs/doc/index.html new file mode 100644 index 000000000..f7207d174 --- /dev/null +++ b/site/htdocs/doc/index.html @@ -0,0 +1,444 @@ +CollateX – Documentation

      To express textual variance, CollateX uses a graph-based data model +(Schmidt 2009). +On top of this model it supports several algorithms to progressively align multiple text versions. +

      The Gothenburg Model

      Developers of CollateX and +Juxta met for the first time in 2009 at a joint workshop of +COST Action 32 and +Interedition in Gothenburg. They started discussing, how the different concerns +of computer-supported collation of texts could be separated such that these two as well as similar projects would have a common +understanding of its process and could thus collaborate more efficiently on the development of collation tools +as well as their components. As a first result of this ongoing discussion, the participants identified five distinct tasks +present in any computer-supported collation workflow. +

      CollateX is designed around this separation of concerns.

      Tokenization

      Tokenizer

      A tokenized text

      While computers can compare a text's versions on a character-by-character basis, in the more common use case each +version is first split up into parts – henceforth called tokens – so the comparison can be conducted +on a more coarse-grained level where the tokens to be compared ideally correspond to the text's units which carry meaning. +This pre-processing step is called tokenization and performed by a tokenizer; it can happen on any +level of granularity, i.e. on the level of syllables, words, lines, phrases, verses, paragraphs or text nodes in a +DOM. +

      Another service provided by tokenizers and of special value to the comparison of natural language texts relates to marked-up +text versions: As most collation software primarily compares text versions based on their textual content, embedded markup +would usually get in the way of this process and therefore needs to be discarded or “pushed in the background”, so the +collation tool does not have to be concerned about the specifics of a text's encoding. At the same time it might be valuable +to keep the markup context of every token for reference, for instance if one wanted to make use of it when comparing tokens. +

      The figure to the right depicts this process: The line on top shows a marked-up text, its content as the +characters "a", "b", "c" and "d" – each representing a token – and "e1", "e2" as examples of embedded markup elements. +A markup-aware tokenizer would not only split this version into 4 distinct tokens but transform it into a sequence of such tokens, +with each token referring to its markup context. +

      For now CollateX offers a simple tokenizer, mainly serving prototyping purposes by either

      • splitting plain text without any embedded markup on boundaries determined by whitespace, or
      • evaluating a configurable XPath 1.0 expression on an XML-encoded text version which yields a list of node values as textual tokens.

      While not offering a comprehensive tokenizer itself, CollateX can be combined with any such tool that suits your specific requirements. +CollateX only expects you then to provide text versions in pre-tokenized form and define a token comparator function which – when +called with any two tokens – evaluates to a match in case those two tokens shall be treated as equal, or a mismatch in case +this should not be assumed. Formally speaking, a token comparator function defines an +equivalence relation over all tokens for a +collation. In processing tokens on the level of their equivalence defined by such a relation, CollateX is agnostic with regard to what constitutes +a token in your specific use case, whether it is plain text, text with a markup context or not textual at all. +

      Detailed information about when and how to define your own notion of a token and its corresponding equivalence relation will be given in +the following sections on CollateX' usage. Its built-in tokenizer will provide for an easy start. Later on you can opt for a more versatile tokenizer +and/or token comparator function in order to enhance the accuracy of collation results. +

      Normalization/Regularization

      With a configurable equivalence relation between tokens (defined via the aforementioned comparator function), CollateX can compare +text versions which are comprised of arbitrary tokens sequences. For a larger number of use cases though, this flexibility of defining a +fully customized comparator function is not really needed. It might suffice to normalize the tokens' textual content +such that an exact matching of the normalized content yields the desired equivalence relation. For instance, in many cases all tokens +of the text versions are normalized to their lower-case equivalent before being compared, thereby making their comparison case insensitive. Other +examples would be the removal of punctuation, the rule-based normalization of orthographic differences or the +stemming of words. +

      Just as with the tokenizer included in CollateX, its normalization options are rather simple. Beyond the mentioned case normalization +and the removal of punctuation and/or whitespace characters, CollateX does not include any sophisticated normalization routines. Instead its +API and supported input formats provide the user with options to plug in their own components when needed. +

      Alignment

      Alignment

      An alignment of 3 versions

      After each version has been split into a sequence of tokens and each has been (optionally) normalized, the token +sequences will be aligned. The alignment process constitutes the core of CollateX' functionality and is generally conducted +by +

      1. finding a set of matching tokens determined by the token equivalence relation, and
      2. aligning them via the insertion of gaps such that the token sequences of all versions line up optimally.

      Looking at an example, assume that we have three versions: the first is comprised of the token sequence ["a", "b", +"c", "d"], the second reads ["a", "c", "d", "b"] and the third ["b", "c", "d"]. A collation tool may align these three +versions as depicted on the right. Each version occupies a column, matching tokens are aligned horizontally in a +row, gaps are inserted as needed during the alignment process and denoted via a hyphen. Depending from +which perspective one interprets this alignment table, one can say that the "b" in the second +row was omitted in the second version or that it has been added in the first and the third. A +similar statement can be made about the "b" in the last row, inverting the relationship of being added or +omitted. Basic edit operations (e.g. those underlying the concept +of edit distance) are thus +implicitly expressed in such an alignment and can be interpreted accordingly to make assumptions about how a +text has been changed. +

      The concept of sequence alignment and its tabular representation is well established in the field of Humanities +Computing; alignment tables like the one shown can be encoded with well-known apparatus encoding schemes. In +the parallel segmentation mode of TEI-P5's +apparatus encoding scheme, to pick just one possible representation, each row would be encoded as a segment, with empty readings +standing in for the gaps. Optionally, consecutive segments with matching readings for each version could be concatenated, so that for our example +a possible encoding capturing the alignment information reads: +

      <app>
      +  <rdg wit="#w1 #w2">a</rdg>
      +  <rdg wit="#w3" />
      +</app>
      +<app>
      +  <rdg wit="#w1 #w3">b</rdg>
      +  <rdg wit="w2" />
      +</app>
      +<app>
      +  <rdg wit="#w1 #w2 #w3">cd</rdg>
      +</app>
      +<app>
      +  <rdg wit="#w2">b</rdg>
      +  <rdg wit="#w1 #w3" />
      +</app>
      +

      Also beyond the field of Humanities Computing, the technique of sequence alignment +has many application areas; Bioinformatics for example has addressed it as a computational problem thoroughly in recent years. +In this context and as part of the larger field of pattern matching, +extensive research exists on the topic. CollateX primarily strives to make the results of this research available to +textual scholars. For this import of computational methods it has to be noted though that – generally speaking – +the assessment of findings in the Humanities is based on interpretation. While it certainly can be supported by computational means, +it is not necessarily computable. As a concrete consequence of that difference in methodology, CollateX +offers its users not one algorithm optimized by specific criteria, but a choice between +several alignment algorithms so they can select the one that +supports their expected results best, always assuming that any computational heuristic may fail in the light of +subjective judgement. +

      Analysis/Feedback

      Alignment Analysis

      Analyzing an alignment

      As the heuristic approach to the problem of sequence alignment may not yield the desired result, a further analysis +of the alignment may be necessary. Echoing the example from the above section, evidence not accessible to the +collation tool (e.g. because it was not encoded in the text versions at hand) might support the assumption of token "b" +in row 2 and 5 as not only being added/omitted but transposed/moved (see figure to the right). +While heuristic algorithms may compute transpositions as part of the alignment process, the correctness of such a computation, +given external evidence and its heuristic nature, obviously cannot be ensured. +

      An additional (possibly manual) analysis of the alignment result therefore may alleviate that deficiency by introducing the +possibility of a feedback cycle, in which users edit the alignment and feed their knowledge back into the alignment process for +another run delivering enhanced results. The declaration of pre-determined alignments between specific tokens and the +parametrization of optimizing algorithms along the requirements of a specific use case would be such feedback information which +influences results substantially. +

      CollateX offers rudimentary support for tailoring alignment results to a user's specific requirements, mainly through +its Java API. It is an area in need for improvement, particularly with regard to its +ease of use. +

      Visualization

      The final concern of any collation workflow relates to the visualization of its results. As the broad variety +of building principles, layouts and notational conventions found in printed apparatuses already suggests, representing +textual variance is a complex problem on its own. Software like Juxta +has demonstrated the potential of digital media to cope with this complexity in innovative ways. For CollateX, the visualization +of results is deemed out of scope at the moment. Instead it provides several output formats which facilitate +the integration with software in charge of visualizing results, be it in printed or in digital form. +

      The Data Model: Variant Graphs

      The tabular representation of collation results as shown in the section on sequence aligment is popular, +in the Humanties and beyond. CollateX can output results in this representation but uses a different one internally +for modelling textual variance: variant graphs. +

      Variant graphs are the central data structure of CollateX. Any generated output from CollateX is a derivation, providing different views on it. +The idea of a graph-oriented model for expressing textual variance has been originally developed by Desmond Schmidt (Schmidt 2008, +Schmidt 2009, Schmidt 2009a) +and proved to be particularly well suited as a data model for computer-supported collation. The following figure taken from one of his publications +illustrates it: +

      Schmidt's Variant Graph Model

      Schmidt/Colomb's Variant Graph Model

      Variant graphs are in principal directed and acyclic. They are comprised at least of a +start and end node/vertex ("s" and "e" in the figure above) and can be traversed from the one to the other via labelled edges. The labels on each edge +contain content segments of compared text versions and a set of identifiers/sigils, denoting the versions which contain the respective content of an edge's label. +Thus +

      1. common segments of multiple text versions can be merged in a variant graph,
      2. differing segments result in the graph branching at nodes, while
      3. each version can still be retrieved from the graph by traversing it along the edges labeled with the appropriate identifier of that version.

      Following these principles, the depicted variant graph models three text versions A, B and C with the following content (markup omitted):

      AQueste è l'ultima traccia d'un antico acquedotto di sguardi, una orbita assorta e magica:
      BQueste è l'ultima cenno d'un antico acquedotto di sguardi, la sua curva sacra e muta:
      CQueste è l'ultima porta d'un antico acquedotto di sguardi, la sua curva sacra e solitaria:

      In order to account for the separation of concerns laid out above, CollateX' implementation of Schmidt's model +adjusted the latter slightly. Instead of labelling the edges of a variant graph with two attributes – the content as well as the sigils of text versions containing it – +the edges of variant graphs in CollateX are only labeled with sigil sets. The version's content segments – in the form of partial token sequences – have +been moved to the nodes/vertices. The ending of the example graph then looks like this (with sigils being mapped from A, B, C to W1, W2, W3): +

      CollateX's Variant Graph Model

      CollateX' Variant Graph Model

      The above illustration does not reveal the internal structure of the graph completely insofar as the nodes' labels in this figure are a simplification. +For instance, the second node in the graph (read from left to right) is labeled "sacra", with the two versions W2 and W3 "sharing some content". More precisely +though and in line with the above definition of tokens and their equivalence relation, W2 and W3 do not "share some content". Instead they +both contain a token with the content "sacra", both deemed to be equal according to the definition of a specific token comparator function. In the graphical +representation of the variant graph above, this subtle distinction is not made and both tokens are just represented via their common textual content. In CollateX' +data model though, this distinction is still relevant and represented: Each node/vertex in a variant graph is not modelled via textual content (as it would be the +case when translated directly from Schmidt's model) but as a set of tokens per node originating from one or more versions, with all tokens in such a +set belonging to the same equivalence class. +

      The described change to Schmidt's model serves mainly two purposes: Firstly, it allows for arbitrary tokens to be compared and their commonalities, differences as +well as their sequential order to be represented in a graph structure. Secondly, CollateX' graph model is easy to transform into the tabular representation +introduced further above by ranking the graph's nodes in topological order +and aligning tokens which belong to nodes of the same rank. +

      It has to be noted that also in this data model, the transposition of tokens still remains a problematic case. Like in Schmidt's model, CollateX represents the +transposition of a token (or more precisely: it's content) as a link between nodes (in the case of Schmidt' model: edges) containing the transposed segment. The link is undirected +and does not form part of a variant graph's traversal scheme because transposition links would break the assertion of a variant graph's acyclic nature and +consequently the ability to sort it topologically. While the linking of nodes can represent transposed segments sufficiently, it is superimposed +on a variant graph, i.e. it does not integrate well with it. Future work in this area may yield a more concise representation. +

      Alignment Algorithms

      CollateX strives for maximum flexibility on the users' side when comparing text versions and adjusting the results to their requirements. One part of this +flexibility is rooted in the support of several alignment algorithms users can switch between and experiment with. +

      Currently three algorithms have been implemented. They all operate on variant graph structures and belong to the group of progressive alignment algorithms. +Instead of comparing all versions at once, they +

      1. start by comparing two versions,
      2. transform the result into a variant graph, then
      3. progressively compare another version against that graph, and
      4. merge the result of that comparison into the graph,
      5. repeating the procedure until all versions have been merged.

      On the one hand, the progressive approach is advantageous because it reduces the problem of comparing an arbitrary number of versions to the +simpler comparison of a single version with a variant graph representing several ones. The disadvantage on the other hand is the occasional dependence of +the result on the order in which versions are merged into the graph (Spencer 2004). +Adding tools to deal with this dependency, e.g. by performing a phylogenetic analysis +to determine an optimal order, is planned for a future version. +

      Dekker

      The most mature algorithm offered by CollateX thus far has been developed by Ronald Haentjens Dekker (Dekker 2011). +It aligns an arbitary number of text versions, optimizes the local alignment of partial tokens sequences (phrases) and detects transpositions. +

      Needleman-Wunsch

      The Needleman-Wunsch algorithm +(Needleman 1970) is a well-known global alignment +algorithm broadly applied in Bioinformatics and the social sciences. Based on dynamic programming, +this algorithm searches for an optimal alignment of an arbitrary number of versions by consulting a scoring function which penalizes the insertion of gaps. It does not take +the possibility of transposed segments into account though. +

      The scoring function in CollateX' implementation can not be freely configured at the moment; the gap penality is assumed to be constant and equals +the score of a match. +

      MEDITE

      Only recently added to the code base, this algorithm takes its name from a pairwise +alignment algorithm developed by Julien Bourdaillet and Jean-Gabriel Ganascia (Bourdaillet 2007). +It is based on suffix trees for the search of maximal unique matches between +text versions and the A* algorithm for optimizing the alignment. +Like Dekker's algorithm, it takes transpositions into account while doing so. +

      CollateX' implementation of this algorithm is in an experimental stage. While it already delivers promising results, it has not been +fully optimized and – above all – not been extensively tested. In the case of issues with this algorithm, +the CollateX team would appreciate feedback. Alternatively users can download +the original version of MEDITE written by the algorithm's +authors. +

      Input

      This section describes the different input formats CollateX supports. Besides the contents of a text's versions to be compared, the +input may also include parameters, i. e. the alignment algorithm to be used. +

      Plain Text

      Like any collation tool, CollateX can process text versions provided as plain text. As CollateX is written for the Java Virtual Machine, +internally the comparison of plain text is based on the JVM's string type and thus on 16-bit +Unicode characters. +

      Depending on the way CollateX is used, plain text version can also be provided in other +encodings supported by the Java Platform and will be converted to Unicode before comparison. The command line interface +is one such interface which supports character set conversions. +

      Plain text version are always subject to tokenization and optional normalization of the resulting token sequence before they will be compared with each other. +

      JSON

      As a more flexible format, CollateX supports input in JavaScript Object Notation (JSON). A set of text versions +to be compared can be JSON encoded as follows: +

      {
      +  "witnesses" : [
      +    {
      +      "id" : "A",
      +      "content" : "A black cat in a black basket"
      +    },
      +    {
      +      "id" : "B",
      +      "content" : "A black cat in a black basket"
      +    },
      +    {
      +      "id" : "C",
      +      "content" : "A striped cat in a black basket"
      +    },
      +    {
      +      "id" : "D",
      +      "content" : "A striped cat in a white basket"
      +    }
      +  ]
      +}
      +

      JSON input always consists of a single root object wrapping input data. The root object has one required property containing the versions to be compared which +(for historical reasons) is named witnesses. The value of this property is an array (a list) of objects in turn, with each object representing a version. +The order of the array elements determines the order in which they are processed by an alignment algorithms, i.e. in which versions are merged into a variant graph. +

      Each object in the witnesses array must have a unique identifier in the required property id. This identifier will be used in the +output to reference a particular version. Besides the identifier each object must describe the content of the version. +The content can either be specified as a string property named content as shown above. In this case the version is treated like a plain text version +with tokenization and normalization taking place before the alignment. +

      Another option is to provide the content of versions in tokenized (and optionally normalized) form:

      {
      +  "witnesses" : [
      +    {
      +      "id" : "A",
      +      "tokens" : [
      +          { "t" : "A", "ref" : 123 },
      +          { "t" : "black" , "adj" : true },
      +          { "t" : "cat", "id" : "xyz" }
      +      ]
      +    },
      +    {
      +      "id" : "B",
      +      "tokens" : [
      +          { "t" : "A" },
      +          { "t" : "white" , "adj" : true },
      +          { "t" : "kitten.", "n" : "cat" }
      +      ]
      +    }
      +  ]
      +}
      +

      Instead of providing a property content for a version, one can provide a sequence of tokens via the property tokens. +The version's property value must be a list with one object per token Each token object in turn must at least contain a property t, which defines +its content. Accordingly, in the example above, version "A" has the tokens ["A", "black", "cat"] whereas version "B" is comprised of the token sequence +["A", "white", "kitten"]. +

      Optionally a normalized reading of the token can be provided in the property n. +Again, in the example above, that means the last token of version "B" is normalized from the reading "kitten" to the reading "cat", thus facilitating +a match of "kitten" with the last token of version "A". +

      Apart from these 2 defined properties t and n, token objects can contain an arbitrary number of additional properties. +Additional properties will not be interpreted by CollateX but just be passed through, reappearing in +the output unchanged. Properties like ref, adj or id in the example would be such +additional properties of a token object. Users of the JSON input can make use of this pass-through mode e.g. in order to uniquely identify aligned tokens +independently of their (possibly non-unique) content. +

      When using interfaces like the HTTP service of CollateX, JSON encoded input can contain optional parameters +controlling the collation process. You can set the alignment algorithm for instance by providing a property algorithm in the root object with +one of the values "needleman-wunsch", "medite" or "dekker" (the default): +

      {
      +  "witnesses": [ … ],
      +  "algorithm": "needleman-wunsch"
      +}
      +

      There is also limited support for customizing the token comparator function. Via JSON, two functions are available:

      {
      +  "witnesses": [ … ],
      +  "algorithm": "…",
      +  "tokenComparator": { type: "equality" }
      +}
      +

      The default function, which can also be explicitly configured like shown above, tests for exact equality of the normalized token content. An alternative is the use of approximate matching via +a Levenshtein/edit distance threshold for matching tokens: +

      {
      +  "witnesses": [ … ],
      +  "tokenComparator": {
      +    "type": "levenshtein",
      +    "distance": 2
      +  }
      +}
      +

      For approximate matching, the type property of the token object descring the token comparator function must be assigned the value "levenshtein". +The optional property distance defines the maximum edit distance between two normalized tokens strings which is still considered to be a match. +An edit distance of 1 is the default. +

      XML

      XML-encoded text versions are also supported by CollateX, though the ability to preserve the markup context during the collation process is +fairly limited at the moment when not using the Java API. Out of the box, you can compare XML documents either via +the command line interface or collate them as part of a processing pipeline in Apache Cocoon. +

      On the command line, CollateX accepts XML documents with arbitrary tag sets. All it needs is an XPath 1.0 +expression that evaluates to a node set for each document. The +tex content of each node in such a set equals a token. For example, the XPath expression "//w" would result in a text version for each XML document with the sequence +of text segments enclosed in <w/> elements as tokens. +

      When using CollateX' Apache Cocoon component, the collation will be performed on one or more XML fragments in an input document which adhere to the following schema:

      <collation xmlns="http://interedition.eu/collatex/ns/1.0" joined="true" editDistance="1" algorithm="medite" format="tei">
      +    <witness>Almost every aspect of what scholarly editors do may be changed</witness>
      +    <witness>Hardly any aspect of what stupid editors do in the privacy of their own home may be changed again and again</witness>
      +    <witness>very many aspects of what scholarly editors do in the livingrooms of their own home may not be changed</witness>
      +</collation>
      +

      CollateX will process any XML fragment enclosed in a <collation/> element from its namespace. In order to configure the collation, this element can have a number of +optional attributes: +

      algorithmthe collation algorithm to use: "dekker" (default), "needleman-wunsch" or "medite"
      formatthe desired output format: "table" for an alignment table in CollateX' custom schema (default), or "tei" for output in TEI P5's parallel segmentation format
      editDistancethe edit distance threshold for optional fuzzy matching of tokens; the default is exact matching
      joinedWhether consecutive alignments of tokens should be joined/merged in the output, or whether each alignment of tokens should occupy a separate cell/segment; the latter being the default.

      Each text version to be collated is given as an XML fragment enclosed in a <witness/> element. CollateX will compare all versions contained in a +<collation/> element against each other. Markup within <witness/> elements is ignored; only the text content is considered and tokenized/normalized +before collation as if it were originating from a plain text source. +

      Output

      CollateX supports several formats to represent collation results.

      JSON

      In conjunction with JSON being supported as an input format, collation results can be output in JSON as well. +The schema of such output resembles matrices commonly encountered in sequence alignment representations, +and looks as follows for the second example given in the section on JSON input (indentation/whitespace added for easier readability): +

      {
      +    "witnesses":["A","B"],
      +    "table":[
      +        [ [ {"t":"A","ref":123 } ], [ {"t":"A" } ] ],
      +        [ [ {"t":"black","adj":true } ], [ {"t":"white","adj":true } ] ],
      +        [ [ {"t":"cat","id":"xyz" } ], [ {"t":"kitten.","n":"cat" } ] ]
      +    ]
      +}
      +

      The root object always contains 2 properties. The value of property witnesses is a list of a all compared text versions, represented by their sigils/identifiers. +The witness list's order is significant insofar as the contents of the second property table – which contains the actual alignment – is ordered accordingly. +

      The tabular alignment data is represented as a list of lists, with

      • the parent list containing one entry per aligned segment, and
      • each segment/ child list containing sets of tokens from each compared version.

      Because aligned segments can span multiple tokens, the aligned token sets of each text version are also represented as lists, we effectively have 3 levels:

      1. On the top-level, each list entry represents a set of aligned segments from different text versions.
      2. On the intermediate level, each list entry represents a set of tokens from a particular text version.
      3. On the lowest level, each list entry is an object representing a single token.

      Out example output thus is comprised of 3 segments, each containing exactly one token per text version. The order in which the token sets are listed equals the +order of the text versions as listed in the property witnesses. Thus "A" from text version A is aligned with "A" from text version B, +"black" from text version A is aligned with "white" from text version B, and so on. +

      Additions and omissions are expressed via empty token sets, e.g. an alignment of

      {
      +    "witnesses":["X","Y"],
      +    "table":[
      +        [ [ {"t":"A" } ], [ {"t":"A" } ] ],
      +        [ [ {"t":"brown" } ], [] ],
      +        [ [ {"t":"dog" } ], [ {"t":"dog" } ] ]
      +    ]
      +}
      +

      could be interpreted as "brown" being added in version "X" or omitted in version "Y".

      Please note that transpositions are not represented explicitly in tabular output formats like this one. While the detection of transpositions affects the alignment, +the links between tokens which are assumed to be transposed by the collation algorithm are not given in this output format. Support for transpositions in tabular +representations of collation results will be added in a future version of CollateX. +

      TEI P5

      The tabular representation of alignments described in the previous section can be encoded in a number of ways.

      <?xml version='1.0' encoding='UTF-8'?>
      +<cx:apparatus
      +  xmlns:cx="http://interedition.eu/collatex/ns/1.0"
      +  xmlns="http://www.tei-c.org/ns/1.0">
      +    A
      +    <app>
      +      <rdg wit="A">black</rdg>
      +      <rdg wit="B">white</rdg>
      +    </app>
      +    <app>
      +      <rdg wit="A">cat</rdg>
      +      <rdg wit="B">kitten.</rdg>
      +    </app>
      +</cx:apparatus>
      +

      XML

      <alignment xmlns="http://interedition.eu/collatex/ns/1.0">
      +    <row>
      +        <cell sigil="w1">Auch hier </cell>
      +        <cell sigil="w2">Ich </cell>
      +        <cell sigil="w3">Ich </cell>
      +    </row>
      +    <row>
      +        <cell sigil="w1">hab </cell>
      +        <cell sigil="w2">hab </cell>
      +        <cell sigil="w3">hab </cell>
      +    </row>
      +    <row>
      +        <cell sigil="w1">ich </cell>
      +        <cell sigil="w2">auch hier </cell>
      +        <cell sigil="w3">auch hier </cell>
      +    </row>
      +    <row>
      +        <cell sigil="w1">wieder ein Plätzchen</cell>
      +        <cell sigil="w2">wieder ein Pläzchen</cell>
      +        <cell sigil="w3">wieder ein Pläzchen</cell>
      +    </row>
      +</alignment>
      +

      GraphML

      The GraphML-formatted output of a variant graph is suitable for import of (possibly larger) graphs in tools +for complex graph analysis and visualization, e. g. Gephi. +For an example GraphML document, take a look at sample output from the +web console. +

      GraphViz DOT

      digraph G {
      +  v301 [label = ""];
      +  v303 [label = "A"];
      +  v304 [label = "black"];
      +  v306 [label = "white"];
      +  v305 [label = "cat"];
      +  v302 [label = ""];
      +  v301 -> v303 [label = "A, B"];
      +  v303 -> v304 [label = "A"];
      +  v303 -> v306 [label = "B"];
      +  v304 -> v305 [label = "A"];
      +  v306 -> v305 [label = "B"];
      +  v305 -> v302 [label = "A, B"];
      +}
      +

      The Command Line Interface

      usage: collatex [<options>]
      +                (<json_input> | <witness_1> <witness_2> [[<witness_3>] ...])
      +-a,--algorithm <arg>           progressive alignment algorithm to use
      +                                 'dekker' (default), 'medite',
      +                                 'needleman-wunsch'
      +-f,--format <arg>              result/output format: 'json', 'csv', 'dot',
      +                                 'graphml', 'tei'
      +-h,--help                      print usage instructions (which your are
      +                                 looking at right now)
      +-ie,--input-encoding <arg>     charset to use for decoding non-XML
      +                                 witnesses; default: UTF-8
      +-o,--output <arg>              output file; '-' for standard output
      +                                 (default)
      +-oe,--output-encoding <arg>    charset to use for encoding the output;
      +                                 default: UTF-8
      +-s,--script <arg>              ECMA/JavaScript resource with functions to be
      +                                 plugged into the alignment algorithm
      +-t,--tokenized                 consecutive matches of tokens will *not* be
      +                                 joined to segments
      +-xml,--xml-mode                witnesses are treated as XML documents
      +  -xp,--xpath <arg>              XPath 1.0 expression evaluating to tokens of
      +                                 XML witnesses; default: '//text()'
      +                                 

      ECMA/JavaScript Callbacks

      Apache Cocoon Integration

      <map:components>
      +...
      +<map:transformers>
      +    ...
      +    <map:transformer name="collatex" src="eu.interedition.collatex.cocoon.CollateXTransformer" />
      +    ...
      +</map:transformers>
      +...
      +</map:components>
      +
      <map:pipelines>
      +...
      +<map:pipeline>
      +  <map:match pattern="collatex-test">
      +    <map:generate src="resource/internal/collation.xml" />
      +    <map:transform type="collatex"/>
      +    <map:serialize type="xml"/>
      +  </map:match>
      +</map:pipeline>
      +...
      +</map:pipelines>
      +

      The RESTful Web Service

      This page documents the +Application Programming Interface (API) +of CollateX via which you can provide textual versions (“witnesses”) to be compared and get the collation result back in a number of formats. +

      The CollateX service is callable via +HTTP POST requests to +${cp}/collate. +

      It expects input formatted in JavaScript Object Notation (JSON) as the request body; +accordingly the content type of the HTTP request must be set to application/json by the client. +

      The output format of the collator, contained in the response to an HTTP POST request, can be chosen via +an Accept HTTP header in the request. The following output formats are supported: +

      application/json(per default) the tabular alignment of the witnesses' tokens, represented in JSON
      application/tei+xmlthe collation result as a list of critical apparatus entries, encoded in TEI P5 parallel segmentation mode
      application/graphml+xmlthe variant graph, represented in GraphML format
      text/plainthe variant graph, represented in Graphviz' DOT Language
      image/svg+xmlthe variant graph, rendered as an SVG vector graphics document

      For further examples, take a look at sample output from the web console.

      The HTTP-based JavaScript API

      Enables the use of CollateX' RESTful API via JavaScript … Based on YUI framework

      Requirements

      Add dependencies to header … YUI library plus CollateX module …

      <script type="text/javascript" src="http://yui.yahooapis.com/3.8.1/build/yui/yui-min.js"></script>
      +<script type="text/javascript" src="http://collatex.net/demo/collatex.js"></script>
      +

      Substitute URL prefix [ROOT] with the base URL of your installation, e.g. +this one for the installation you are currently looking at … +

      YUI module interedition-collate available now … supports cross-domain AJAX requests via +CORS … +

      Sample usage

      YUI().use("node", "collatex", function(Y) {
      +    new Y.CollateX({ serviceUrl: "http://collatex.net/demo/collate" }).toTable([{
      +        id: "A",
      +        content: "Hello World"
      +    }, {
      +        id: "B",
      +        tokens: [
      +            { "t": "Hallo", "n": "hello" },
      +            { "t": "Welt", "n": "world" }
      +        ]
      +    }], Y.one("#result"));
      +});
      +

      toTable() takes witness array as first parameter; second parameter is DOM node which serves as container for +the resulting HTML alignment table … +

      … generic collate(witnesses, callback) as well as methods for other formats available: +toSVG(), toTEI(), toGraphViz() … +

      … configuration of a collator instance via methods like withDekker(), withFuzzyMatching(maxDistance) … +

      API Documentation (Javadoc)

      here

      Resources/ Bibliography

      Bourdaillet 2007
      Bourdaillet J. and Ganascia J.-G., 2007. Practical block sequence alignment with moves. LATA 2007 - International Conference on Language and Automata Theory and Applications, 3/2007.
      Collate
      Robinson, P., 2000. Collate.
      Dekker 2011
      Dekker, R. H. and Middell, G., 2011. Computer-Supported Collation with CollateX: Managing Textual Variance in an Environment with Varying Requirements. Supporting Digital Humanities 2011. University of Copenhagen, Denmark. 17-18 November 2011.
      Juxta 2013
      Performant Software Solutions LLC, 2013. Juxta.
      Needleman 1970
      Needleman, Saul B. and Wunsch, Christian D., 1970. A general method applicable to the search for similarities in the amino acid sequence of two proteins. Journal of Molecular Biology 48 (3), 443–53.
      NMerge 2012
      Schmidt, D., 2012. NMerge. The nmerge Java library/commandline tool for making multi-version documents.
      Schmidt 2008
      Schmidt, D., 2008. What's a Multi-Version Document. Multi-Version Documents Blog.
      Schmidt 2009
      Schmidt, D. and Colomb, R., 2009. A data structure for representing multi-version texts online. International Journal of Human-Computer Studies, 67.6, 497-514.
      Schmidt 2009a
      Schmidt, D., 2009. Merging Multi-Version Texts: a Generic Solution to the Overlap Problem.” Presented at Balisage: +The Markup Conference 2009, Montréal, Canada, August 11 - 14, 2009. In Proceedings of Balisage: The Markup Conference 2009. +Balisage Series on Markup Technologies, vol. 3 (2009). doi:10.4242/BalisageVol3.Schmidt01. +
      Spencer 2004
      Spencer M., Howe and Christopher J., 2004. Collating Texts Using Progressive Multiple Alignment. Computers and the Humanities. 38/2004, 253–270.
      Stolz 2006
      Stolz, M. and Dimpel F. M., 2006. Computergestützte Kollationierung und ihre Integration in den editorischen Arbeitsfluss. 2006.
      Fork me on GitHub \ No newline at end of file diff --git a/site/twig/download.twig b/site/htdocs/download/index.html similarity index 81% rename from site/twig/download.twig rename to site/htdocs/download/index.html index 7d672ca2a..af04153c8 100644 --- a/site/twig/download.twig +++ b/site/htdocs/download/index.html @@ -1,102 +1,43 @@ -{% extends "page.twig" %} - -{% block content %} - -

      CollateX is open source software and made available for download in source as well as binary form.

      - -
      -

      Binary Distributions

      - -

      For usage instructions, please refer to the documentation. Please also - note the license terms under which CollateX is made available.

      - -
      - -
      -

      Requirements

      - -

      CollateX is written for the Java platform which is available - for all major operating systems. You need the Java Runtime Environment (JRE) version 6 or higher.

      -
      -
      -

      2. Core

      - -

      For embedding CollateX into your JVM-based application, please use Maven Central:

      - -
      <dependency>
      -    <groupId>eu.interedition</groupId>
      -    <artifactId>collatex-core</artifactId>
      -    <version>{{ version }}</version>
      +CollateX – Download

      CollateX is open source software and made available for download in source as well as binary form.

      Binary Distributions

      For usage instructions, please refer to the documentation. Please also +note the license terms under which CollateX is made available. +

      Requirements

      CollateX is written for the Java platform which is available +for all major operating systems. You need the Java Runtime Environment (JRE) version 6 or higher. +

      2. Core

      For embedding CollateX into your JVM-based application, please use Maven Central:

      <dependency>
      +  <groupId>eu.interedition</groupId>
      +  <artifactId>collatex-core</artifactId>
      +  <version>1.5</version>
       </dependency>
       <dependency>
      -    <groupId>net.sf.jung</groupId>
      -    <artifactId>jung-graph-impl</artifactId>
      -    <version>2.0.1</version>
      -</dependency>
      -
      -
      -

      3. Cocoon

      - -

      For adding CollateX' block to your Cocoon install, please use Maven Central as well:

      - -
      <dependency>
      -    <groupId>eu.interedition</groupId>
      -    <artifactId>collatex-cocoon</artifactId>
      -    <version>{{ version }}</version>
      +  <groupId>net.sf.jung</groupId>
      +  <artifactId>jung-graph-impl</artifactId>
      +  <version>2.0.1</version>
      +</dependency>
      +

      3. Cocoon

      For adding CollateX' block to your Cocoon install, please use Maven Central as well:

      <dependency>
      +  <groupId>eu.interedition</groupId>
      +  <artifactId>collatex-cocoon</artifactId>
      +  <version>1.5</version>
       </dependency>
       <dependency>
      -    <groupId>net.sf.jung</groupId>
      -    <artifactId>jung-graph-impl</artifactId>
      -    <version>2.0.1</version>
      -</dependency>
      -
      -
      - -

      Depending on how you would like to use CollateX, you can opt for three different ways to download it:

      - -
        -
      1. Should you be in need for a collation component that integrates with your software system independent of its - runtime and development environment, we offer a tool suite comprised of a - command-line tool and a self-contained RESTful web service. So whether your project is web-based or - runs in your local environment, whether you write in Ruby, Python, PHP, JavaScript or some other language: This is - probably the option to get started on.
      2. -
      3. Should you target the Java Virtual Machine as your runtime environment and would like to embed CollateX - in your application, we provide Apache-Maven-compatible artifacts of - CollateX' core for download.
      4. -
      5. Should you use Apache Cocoon as the framework for your - XML-oriented application, we offer a Cocoon block making collation functionality available - as a transformer for your XML processing pipelines.
      6. -
      -
      - -
      -

      Source Code

      - -
      -
      -

      We do not prepare source code bundles for download. Development of CollateX takes place on - GitHub where you can find - the latest version of CollateX' source as well as earlier versions.

      -
      - -
      -
      - -
      -

      License

      - -

      CollateX may be used under the terms of the GNU General Public License Version 3.

      - -
                          GNU GENERAL PUBLIC LICENSE
      +  <groupId>net.sf.jung</groupId>
      +  <artifactId>jung-graph-impl</artifactId>
      +  <version>2.0.1</version>
      +</dependency>
      +

      Depending on how you would like to use CollateX, you can opt for three different ways to download it:

      1. Should you be in need for a collation component that integrates with your software system independent of its +runtime and development environment, we offer a tool suite comprised of a +command-line tool and a self-contained RESTful web service. So whether your project is web-based or +runs in your local environment, whether you write in Ruby, Python, PHP, JavaScript or some other language: This is +probably the option to get started on.
      2. Should you target the Java Virtual Machine as your runtime environment and would like to embed CollateX +in your application, we provide Apache-Maven-compatible artifacts of +CollateX' core for download.
      3. Should you use Apache Cocoon as the framework for your +XML-oriented application, we offer a Cocoon block making collation functionality available +as a transformer for your XML processing pipelines. +

      Source Code

      We do not prepare source code bundles for download. Development of CollateX takes place on +GitHub where you can find +the latest version of CollateX' source as well as earlier versions. +

      License

      CollateX may be used under the terms of the GNU General Public License Version 3.

                          GNU GENERAL PUBLIC LICENSE
                              Version 3, 29 June 2007
       
      - Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
      + Copyright (C) 2007 Free Software Foundation, Inc. 
        Everyone is permitted to copy and distribute verbatim copies
        of this license document, but changing it is not allowed.
       
      @@ -726,8 +667,8 @@ 

      License

      state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> + + Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -740,14 +681,14 @@

      License

      GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: - <program> Copyright (C) <year> <name of author> + Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. @@ -759,13 +700,12 @@

      License

      You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see -<http://www.gnu.org/licenses/>. +. The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read -<http://www.gnu.org/philosophy/why-not-lgpl.html>.
      -
      -{% endblock %} \ No newline at end of file +. +
      Fork me on GitHub \ No newline at end of file diff --git a/site/favicon.ico b/site/htdocs/favicon.ico similarity index 100% rename from site/favicon.ico rename to site/htdocs/favicon.ico diff --git a/site/images/aligner.png b/site/htdocs/images/aligner.png similarity index 100% rename from site/images/aligner.png rename to site/htdocs/images/aligner.png diff --git a/site/images/analyzer.png b/site/htdocs/images/analyzer.png similarity index 100% rename from site/images/analyzer.png rename to site/htdocs/images/analyzer.png diff --git a/site/images/tokenizer.png b/site/htdocs/images/tokenizer.png similarity index 100% rename from site/images/tokenizer.png rename to site/htdocs/images/tokenizer.png diff --git a/site/images/variant-graph-collatex.png b/site/htdocs/images/variant-graph-collatex.png similarity index 100% rename from site/images/variant-graph-collatex.png rename to site/htdocs/images/variant-graph-collatex.png diff --git a/site/images/variant-graph-schmidt.png b/site/htdocs/images/variant-graph-schmidt.png similarity index 100% rename from site/images/variant-graph-schmidt.png rename to site/htdocs/images/variant-graph-schmidt.png diff --git a/site/images/variant-graph-snippet.png b/site/htdocs/images/variant-graph-snippet.png similarity index 100% rename from site/images/variant-graph-snippet.png rename to site/htdocs/images/variant-graph-snippet.png diff --git a/site/htdocs/index.html b/site/htdocs/index.html new file mode 100644 index 000000000..c1957efe9 --- /dev/null +++ b/site/htdocs/index.html @@ -0,0 +1,12 @@ +CollateX

      CollateX is a software to

      1. read multiple (≥ 2) versions of a text, splitting each version into parts (tokens) to be compared,
      2. identify similarities of and differences between the versions (including moved/transposed segments) by aligning tokens, and
      3. output the alignment results in a variety of formats for further processing, for instance
      4. to support the production of a critical apparatus or the stemmatical analysis of a text's genesis.

      It resembles software used to compute differences between files (e.g. diff) +or tools for sequence alignment which are commonly used +in Bioinformatics. While CollateX shares some of the techniques and algorithms with those tools, it mainly aims for a flexible and configurable approach +to the problem of finding similarities and differences in texts, sometimes trading computational soundness or complexity for +the user's ability to influence results. +

      As such it is primarily designed for use cases in disciplines like +Philology or – more specifically – the field +of Textual Criticism where the assessment +of findings is based on interpretation and therefore can be supported by computational means but is not necessarily +computable. +

      Latest Version

      For alternative packages and license terms, please read the download section.

      Variant Graph

      Snippet of a Variant Graph produced by CollateX

      Please refer to the documentation for detailed information +about CollateX like its underlying concepts or usage instructions.

      Fork me on GitHub \ No newline at end of file diff --git a/site/index.php b/site/index.php deleted file mode 100644 index 0040e9337..000000000 --- a/site/index.php +++ /dev/null @@ -1,47 +0,0 @@ -transformMarkdown($input); -} - -\Twig_Autoloader::register(); - -$twig = new \Twig_Environment(new \Twig_Loader_Filesystem("twig")); -$twig->addGlobal("server", $_SERVER); -$twig->addGlobal("version", "1.3-SNAPSHOT"); -$twig->addFilter("markdown", new \Twig_Filter_Function('markdown', array('is_safe' => array('html')))); - -class TwigView extends \Slim\View -{ - public function render($template) - { - global $twig; - return $twig->loadTemplate($template)->render($this->data); - } -} - -// -------------------------------------------------------------------------------- Application Setup - -$app = new \Slim\Slim(array('templates.path' => "twig", 'view' => new TwigView())); - -$app->get("/", function() use ($app) { - $app->render("index.twig"); -}); -$app->get("/about/", function() use ($app) { - $app->render("project.twig", array("title" => "About the Project")); -}); -$app->get("/doc/", function() use ($app) { - $app->render("doc.twig", array("title" => "Documentation")); -}); -$app->get("/download/", function() use ($app) { - $app->render("download.twig", array("title" => "Download")); -}); -$app->run(); - -?> \ No newline at end of file diff --git a/site/package.json b/site/package.json new file mode 100644 index 000000000..e255e7824 --- /dev/null +++ b/site/package.json @@ -0,0 +1,28 @@ +{ + "name": "collatex-site", + "version": "0.0.0", + "description": "collatex.net", + "author": "Gregor Middell (http://gregor.middell.net/)", + "license": "GPLv3", + "private": true, + "devDependencies": { + "browserify": "^5.11.0", + "browserify-shim": "^3.8.0", + "collections": "^1.2.1", + "grunt-browserify": "^3.0.0", + "grunt-contrib-clean": "^0.6.0", + "grunt-contrib-connect": "^0.8.0", + "grunt-contrib-copy": "^0.7.0", + "grunt-contrib-cssmin": "^0.10.0", + "grunt-contrib-jade": "^0.12.0", + "grunt-contrib-less": "^0.11.4", + "grunt-contrib-uglify": "^0.5.1", + "grunt-contrib-watch": "^0.6.1", + "grunt-react": "^0.9.0", + "jit-grunt": "^0.8.0", + "jstify": "^0.7.0", + "load-grunt-config": "^0.13.1", + "marked": "^0.3.2", + "time-grunt": "^1.0.0" + } +} diff --git a/site/templates/about/index.jade b/site/templates/about/index.jade new file mode 100644 index 000000000..a07dfa545 --- /dev/null +++ b/site/templates/about/index.jade @@ -0,0 +1,103 @@ +extends ../page.jade + +block title + title CollateX – About the Project + +block content + #project-history + h2 Project History & Agenda + + p. + Development of CollateX started in 2010 as a project within the EU-funded initiative + Interedition, with the aim to create a successor of + Peter Robinson's Collate. While widely + used within the community and valued for its versatility in producing critical apparatuses for scholarly editions, + by then Collate was increasingly hard to deploy as it depends on a runtime environment whose support had been + phased out by its vendor. + + p. + CollateX was planned as a complete rewrite of Collate that was primarily addressing the architectural challenges + of its predecessor. Over the years though and with more and more participants contributing their requirements and + ideas, it developed a different agenda. On the one hand, Collate is a complete solution for producing a critical apparatus, + with features ranging from its very own algorithm for comparing versions of a text to a powerful graphical user interface that lets the user + control the collation process. On the other hand, CollateX has become a software component which can be embedded into other software or be made a part of + a software system. Its goal is the provision and advancement of current research in the field of computer-supported collation + involving natural language texts. To this end the developers of CollateX put an emphasis on its flexible applicability, be it in terms of + its runtime environment or be it in terms of the specific challenges CollateX has to cope with when applied to + textual traditions of varying language, encoding or publication settings. + + #project-contact + h2 Contact + + p. + You can contact the team via e-mail (info@collatex.net) or individually (see below). + Some of the team members can also be found in Interedition's IRC channel #interedition + on freenode.net. + + p. + Development of CollateX takes place on GitHub. + Bug reports and/or + other contributions are welcome.

      + + #project-team + + h2 Team + + h3 Project Leads + + ol.yui3-g + li.yui3-u-1-4 + .content. + Ronald Haentjens Dekker +
      (Huygens ING) + li.yui3-u-1-4 + .content. + Gregor Middell + + h3 Contributors + + ol.yui3-g + li.yui3-u-1-4 + .content. + Tara L. Andrews +
      (University of Leuven) + li.yui3-u-1-4 + .content. + Bram Buitendijk +
      (Huygens ING) + li.yui3-u-1-4 + .content. + Gertjan Filarski +
      (Huygens ING) + li.yui3-u-1-4 + .content. + Zeth Green +
      (University of Birmingham) + li.yui3-u-1-4 + .content. + Troy A. Griffitts +
      (University of Münster) + li.yui3-u-1-4 + .content. + Nicholas Laiacona +
      (Performant Software Solutions) + li.yui3-u-1-4 + .content. + Marcin Mielnicki +
      (Poznan Supercomputing & Networking Center) + li.yui3-u-1-4 + .content. + Leif-Jöran Olsson +
      (University of Gothenburg) + li.yui3-u-1-4 + .content. + Tomasz Parkola +
      (Poznan Supercomputing & Networking Center) + li.yui3-u-1-4 + .content. + Thorsten Vitt +
      (University of Würzburg) + li.yui3-u-1-4 + .content. + Joris van Zundert +
      (Huygens ING) \ No newline at end of file diff --git a/site/templates/doc/index.jade b/site/templates/doc/index.jade new file mode 100644 index 000000000..cba24adf8 --- /dev/null +++ b/site/templates/doc/index.jade @@ -0,0 +1,828 @@ +extends ../page.jade + +block title + title CollateX – Documentation +block content + ol.toc.float-left + + p. + To express textual variance, CollateX uses a graph-based data model + (Schmidt 2009). + On top of this model it supports several algorithms to progressively align multiple text versions. + + h2#gothenburg-model The Gothenburg Model + + p. + Developers of CollateX and + Juxta met for the first time in 2009 at a joint workshop of + COST Action 32 and + Interedition in Gothenburg. They started discussing, how the different concerns + of computer-supported collation of texts could be separated such that these two as well as similar projects would have a common + understanding of its process and could thus collaborate more efficiently on the development of collation tools + as well as their components. As a first result of this ongoing discussion, the participants identified five distinct tasks + present in any computer-supported collation workflow. + + p CollateX is designed around this separation of concerns. + + h3#tokenization Tokenization + + .figure.float-right + img(src="/images/tokenizer.png" alt="Tokenizer") + p.caption A tokenized text + + p. + While computers can compare a text's versions on a character-by-character basis, in the more common use case each + version is first split up into parts – henceforth called tokens – so the comparison can be conducted + on a more coarse-grained level where the tokens to be compared ideally correspond to the text's units which carry meaning. + This pre-processing step is called tokenization and performed by a tokenizer; it can happen on any + level of granularity, i.e. on the level of syllables, words, lines, phrases, verses, paragraphs or text nodes in a + DOM. + + p. + Another service provided by tokenizers and of special value to the comparison of natural language texts relates to marked-up + text versions: As most collation software primarily compares text versions based on their textual content, embedded markup + would usually get in the way of this process and therefore needs to be discarded or “pushed in the background”, so the + collation tool does not have to be concerned about the specifics of a text's encoding. At the same time it might be valuable + to keep the markup context of every token for reference, for instance if one wanted to make use of it when comparing tokens. + + p. + The figure to the right depicts this process: The line on top shows a marked-up text, its content as the + characters "a", "b", "c" and "d" – each representing a token – and "e1", "e2" as examples of embedded markup elements. + A markup-aware tokenizer would not only split this version into 4 distinct tokens but transform it into a sequence of such tokens, + with each token referring to its markup context. + + p For now CollateX offers a simple tokenizer, mainly serving prototyping purposes by either + + ul + li splitting plain text without any embedded markup on boundaries determined by whitespace, or + li evaluating a configurable XPath 1.0 expression on an XML-encoded text version which yields a list of node values as textual tokens. + + p. + While not offering a comprehensive tokenizer itself, CollateX can be combined with any such tool that suits your specific requirements. + CollateX only expects you then to provide text versions in pre-tokenized form and define a token comparator function which – when + called with any two tokens – evaluates to a match in case those two tokens shall be treated as equal, or a mismatch in case + this should not be assumed. Formally speaking, a token comparator function defines an + equivalence relation over all tokens for a + collation. In processing tokens on the level of their equivalence defined by such a relation, CollateX is agnostic with regard to what constitutes + a token in your specific use case, whether it is plain text, text with a markup context or not textual at all. + + p. + Detailed information about when and how to define your own notion of a token and its corresponding equivalence relation will be given in + the following sections on CollateX' usage. Its built-in tokenizer will provide for an easy start. Later on you can opt for a more versatile tokenizer + and/or token comparator function in order to enhance the accuracy of collation results. + + h3#normalization Normalization/Regularization + + p. + With a configurable equivalence relation between tokens (defined via the aforementioned comparator function), CollateX can compare + text versions which are comprised of arbitrary tokens sequences. For a larger number of use cases though, this flexibility of defining a + fully customized comparator function is not really needed. It might suffice to normalize the tokens' textual content + such that an exact matching of the normalized content yields the desired equivalence relation. For instance, in many cases all tokens + of the text versions are normalized to their lower-case equivalent before being compared, thereby making their comparison case insensitive. Other + examples would be the removal of punctuation, the rule-based normalization of orthographic differences or the + stemming of words. + + p. + Just as with the tokenizer included in CollateX, its normalization options are rather simple. Beyond the mentioned case normalization + and the removal of punctuation and/or whitespace characters, CollateX does not include any sophisticated normalization routines. Instead its + API and supported input formats provide the user with options to plug in their own components when needed. + + h3#alignment Alignment + + .figure.float-right + img(src="/images/aligner.png" alt="Alignment") + p.caption An alignment of 3 versions + + p. + After each version has been split into a sequence of tokens and each has been (optionally) normalized, the token + sequences will be aligned. The alignment process constitutes the core of CollateX' functionality and is generally conducted + by + + ol + li finding a set of matching tokens determined by the token equivalence relation, and + li aligning them via the insertion of gaps such that the token sequences of all versions line up optimally. + + p. + Looking at an example, assume that we have three versions: the first is comprised of the token sequence ["a", "b", + "c", "d"], the second reads ["a", "c", "d", "b"] and the third ["b", "c", "d"]. A collation tool may align these three + versions as depicted on the right. Each version occupies a column, matching tokens are aligned horizontally in a + row, gaps are inserted as needed during the alignment process and denoted via a hyphen. Depending from + which perspective one interprets this alignment table, one can say that the "b" in the second + row was omitted in the second version or that it has been added in the first and the third. A + similar statement can be made about the "b" in the last row, inverting the relationship of being added or + omitted. Basic edit operations (e.g. those underlying the concept + of edit distance) are thus + implicitly expressed in such an alignment and can be interpreted accordingly to make assumptions about how a + text has been changed. + + p. + The concept of sequence alignment and its tabular representation is well established in the field of Humanities + Computing; alignment tables like the one shown can be encoded with well-known apparatus encoding schemes. In + the parallel segmentation mode of TEI-P5's + apparatus encoding scheme, to pick just one possible representation, each row would be encoded as a segment, with empty readings + standing in for the gaps. Optionally, consecutive segments with matching readings for each version could be concatenated, so that for our example + a possible encoding capturing the alignment information reads: + + pre.prettyprint.clear. + <app> + <rdg wit="#w1 #w2">a</rdg> + <rdg wit="#w3" /> + </app> + <app> + <rdg wit="#w1 #w3">b</rdg> + <rdg wit="w2" /> + </app> + <app> + <rdg wit="#w1 #w2 #w3">cd</rdg> + </app> + <app> + <rdg wit="#w2">b</rdg> + <rdg wit="#w1 #w3" /> + </app> + + p. + Also beyond the field of Humanities Computing, the technique of sequence alignment + has many application areas; Bioinformatics for example has addressed it as a computational problem thoroughly in recent years. + In this context and as part of the larger field of pattern matching, + extensive research exists on the topic. CollateX primarily strives to make the results of this research available to + textual scholars. For this import of computational methods it has to be noted though that – generally speaking – + the assessment of findings in the Humanities is based on interpretation. While it certainly can be supported by computational means, + it is not necessarily computable. As a concrete consequence of that difference in methodology, CollateX + offers its users not one algorithm optimized by specific criteria, but a choice between + several alignment algorithms so they can select the one that + supports their expected results best, always assuming that any computational heuristic may fail in the light of + subjective judgement. + + h3#analysis-feedback Analysis/Feedback + + .figure.float-right + img(src="/images/analyzer.png" alt="Alignment Analysis") + p.caption Analyzing an alignment + + p. + As the heuristic approach to the problem of sequence alignment may not yield the desired result, a further analysis + of the alignment may be necessary. Echoing the example from the above section, evidence not accessible to the + collation tool (e.g. because it was not encoded in the text versions at hand) might support the assumption of token "b" + in row 2 and 5 as not only being added/omitted but transposed/moved (see figure to the right). + While heuristic algorithms may compute transpositions as part of the alignment process, the correctness of such a computation, + given external evidence and its heuristic nature, obviously cannot be ensured. + + p. + An additional (possibly manual) analysis of the alignment result therefore may alleviate that deficiency by introducing the + possibility of a feedback cycle, in which users edit the alignment and feed their knowledge back into the alignment process for + another run delivering enhanced results. The declaration of pre-determined alignments between specific tokens and the + parametrization of optimizing algorithms along the requirements of a specific use case would be such feedback information which + influences results substantially. + + p. + CollateX offers rudimentary support for tailoring alignment results to a user's specific requirements, mainly through + its Java API. It is an area in need for improvement, particularly with regard to its + ease of use. + + h3#visualization Visualization + + p. + The final concern of any collation workflow relates to the visualization of its results. As the broad variety + of building principles, layouts and notational conventions found in printed apparatuses already suggests, representing + textual variance is a complex problem on its own. Software like Juxta + has demonstrated the potential of digital media to cope with this complexity in innovative ways. For CollateX, the visualization + of results is deemed out of scope at the moment. Instead it provides several output formats which facilitate + the integration with software in charge of visualizing results, be it in printed or in digital form. + + h2#variant-graphs.clear The Data Model: Variant Graphs + + p. + The tabular representation of collation results as shown in the section on sequence aligment is popular, + in the Humanties and beyond. CollateX can output results in this representation but uses a different one internally + for modelling textual variance: variant graphs. + + p. + Variant graphs are the central data structure of CollateX. Any generated output from CollateX is a derivation, providing different views on it. + The idea of a graph-oriented model for expressing textual variance has been originally developed by Desmond Schmidt (Schmidt 2008, + Schmidt 2009, Schmidt 2009a) + and proved to be particularly well suited as a data model for computer-supported collation. The following figure taken from one of his publications + illustrates it: + + .figure(style="margin-bottom: 1em") + img(src="/images/variant-graph-schmidt.png" alt="Schmidt's Variant Graph Model") + p.caption Schmidt/Colomb's Variant Graph Model + + p. + Variant graphs are in principal directed and acyclic. They are comprised at least of a + start and end node/vertex ("s" and "e" in the figure above) and can be traversed from the one to the other via labelled edges. The labels on each edge + contain content segments of compared text versions and a set of identifiers/sigils, denoting the versions which contain the respective content of an edge's label. + Thus + + ol + li common segments of multiple text versions can be merged in a variant graph, + li differing segments result in the graph branching at nodes, while + li each version can still be retrieved from the graph by traversing it along the edges labeled with the appropriate identifier of that version. + + + p Following these principles, the depicted variant graph models three text versions A, B and C with the following content (markup omitted): + + table + tr + th A + td Queste è l'ultima traccia d'un antico acquedotto di sguardi, una orbita assorta e magica: + tr + th B + td Queste è l'ultima cenno d'un antico acquedotto di sguardi, la sua curva sacra e muta: + tr + th C + td Queste è l'ultima porta d'un antico acquedotto di sguardi, la sua curva sacra e solitaria: + + p. + In order to account for the separation of concerns laid out above, CollateX' implementation of Schmidt's model + adjusted the latter slightly. Instead of labelling the edges of a variant graph with two attributes – the content as well as the sigils of text versions containing it – + the edges of variant graphs in CollateX are only labeled with sigil sets. The version's content segments – in the form of partial token sequences – have + been moved to the nodes/vertices. The ending of the example graph then looks like this (with sigils being mapped from A, B, C to W1, W2, W3): + + .figure(style="margin-bottom: 1em") + img(src="/images/variant-graph-collatex.png" alt="CollateX's Variant Graph Model") + p.caption CollateX' Variant Graph Model + + p. + The above illustration does not reveal the internal structure of the graph completely insofar as the nodes' labels in this figure are a simplification. + For instance, the second node in the graph (read from left to right) is labeled "sacra", with the two versions W2 and W3 "sharing some content". More precisely + though and in line with the above definition of tokens and their equivalence relation, W2 and W3 do not "share some content". Instead they + both contain a token with the content "sacra", both deemed to be equal according to the definition of a specific token comparator function. In the graphical + representation of the variant graph above, this subtle distinction is not made and both tokens are just represented via their common textual content. In CollateX' + data model though, this distinction is still relevant and represented: Each node/vertex in a variant graph is not modelled via textual content (as it would be the + case when translated directly from Schmidt's model) but as a set of tokens per node originating from one or more versions, with all tokens in such a + set belonging to the same equivalence class. + + p. + The described change to Schmidt's model serves mainly two purposes: Firstly, it allows for arbitrary tokens to be compared and their commonalities, differences as + well as their sequential order to be represented in a graph structure. Secondly, CollateX' graph model is easy to transform into the tabular representation + introduced further above by ranking the graph's nodes in topological order + and aligning tokens which belong to nodes of the same rank. + + p. + It has to be noted that also in this data model, the transposition of tokens still remains a problematic case. Like in Schmidt's model, CollateX represents the + transposition of a token (or more precisely: it's content) as a link between nodes (in the case of Schmidt' model: edges) containing the transposed segment. The link is undirected + and does not form part of a variant graph's traversal scheme because transposition links would break the assertion of a variant graph's acyclic nature and + consequently the ability to sort it topologically. While the linking of nodes can represent transposed segments sufficiently, it is superimposed + on a variant graph, i.e. it does not integrate well with it. Future work in this area may yield a more concise representation. + + h2#alignment-algorithms Alignment Algorithms + + p. + CollateX strives for maximum flexibility on the users' side when comparing text versions and adjusting the results to their requirements. One part of this + flexibility is rooted in the support of several alignment algorithms users can switch between and experiment with. + + p. + Currently three algorithms have been implemented. They all operate on variant graph structures and belong to the group of progressive alignment algorithms. + Instead of comparing all versions at once, they + + ol + li start by comparing two versions, + li transform the result into a variant graph, then + li progressively compare another version against that graph, and + li merge the result of that comparison into the graph, + li repeating the procedure until all versions have been merged. + + p. + On the one hand, the progressive approach is advantageous because it reduces the problem of comparing an arbitrary number of versions to the + simpler comparison of a single version with a variant graph representing several ones. The disadvantage on the other hand is the occasional dependence of + the result on the order in which versions are merged into the graph (Spencer 2004). + Adding tools to deal with this dependency, e.g. by performing a phylogenetic analysis + to determine an optimal order, is planned for a future version. + + h3#dekker-algorithm Dekker + + p. + The most mature algorithm offered by CollateX thus far has been developed by Ronald Haentjens Dekker (Dekker 2011). + It aligns an arbitary number of text versions, optimizes the local alignment of partial tokens sequences (phrases) and detects transpositions. + + h3#needleman-wunsch-algorithm Needleman-Wunsch + + p. + The Needleman-Wunsch algorithm + (Needleman 1970) is a well-known global alignment + algorithm broadly applied in Bioinformatics and the social sciences. Based on dynamic programming, + this algorithm searches for an optimal alignment of an arbitrary number of versions by consulting a scoring function which penalizes the insertion of gaps. It does not take + the possibility of transposed segments into account though. + + p. + The scoring function in CollateX' implementation can not be freely configured at the moment; the gap penality is assumed to be constant and equals + the score of a match. + + h3#medite-algorithm MEDITE + + p. + Only recently added to the code base, this algorithm takes its name from a pairwise + alignment algorithm developed by Julien Bourdaillet and Jean-Gabriel Ganascia (Bourdaillet 2007). + It is based on suffix trees for the search of maximal unique matches between + text versions and the A* algorithm for optimizing the alignment. + Like Dekker's algorithm, it takes transpositions into account while doing so. + + p. + CollateX' implementation of this algorithm is in an experimental stage. While it already delivers promising results, it has not been + fully optimized and – above all – not been extensively tested. In the case of issues with this algorithm, + the CollateX team would appreciate feedback. Alternatively users can download + the original version of MEDITE written by the algorithm's + authors. + + h2#input Input + + p. + This section describes the different input formats CollateX supports. Besides the contents of a text's versions to be compared, the + input may also include parameters, i. e. the alignment algorithm to be used. + + h3#text-input Plain Text + + p. + Like any collation tool, CollateX can process text versions provided as plain text. As CollateX is written for the Java Virtual Machine, + internally the comparison of plain text is based on the JVM's string type and thus on 16-bit + Unicode characters. + + p. + Depending on the way CollateX is used, plain text version can also be provided in other + encodings supported by the Java Platform and will be converted to Unicode before comparison. The command line interface + is one such interface which supports character set conversions. + + p. + Plain text version are always subject to tokenization and optional normalization of the resulting token sequence before they will be compared with each other. + + h3#json-input JSON + + p. + As a more flexible format, CollateX supports input in JavaScript Object Notation (JSON). A set of text versions + to be compared can be JSON encoded as follows: + + pre.prettyprint. + { + "witnesses" : [ + { + "id" : "A", + "content" : "A black cat in a black basket" + }, + { + "id" : "B", + "content" : "A black cat in a black basket" + }, + { + "id" : "C", + "content" : "A striped cat in a black basket" + }, + { + "id" : "D", + "content" : "A striped cat in a white basket" + } + ] + } + + p. + JSON input always consists of a single root object wrapping input data. The root object has one required property containing the versions to be compared which + (for historical reasons) is named witnesses. The value of this property is an array (a list) of objects in turn, with each object representing a version. + The order of the array elements determines the order in which they are processed by an alignment algorithms, i.e. in which versions are merged into a variant graph. + + p. + Each object in the witnesses array must have a unique identifier in the required property id. This identifier will be used in the + output to reference a particular version. Besides the identifier each object must describe the content of the version. + The content can either be specified as a string property named content as shown above. In this case the version is treated like a plain text version + with tokenization and normalization taking place before the alignment. + + p Another option is to provide the content of versions in tokenized (and optionally normalized) form: + + pre.prettyprint. + { + "witnesses" : [ + { + "id" : "A", + "tokens" : [ + { "t" : "A", "ref" : 123 }, + { "t" : "black" , "adj" : true }, + { "t" : "cat", "id" : "xyz" } + ] + }, + { + "id" : "B", + "tokens" : [ + { "t" : "A" }, + { "t" : "white" , "adj" : true }, + { "t" : "kitten.", "n" : "cat" } + ] + } + ] + } + + p. + Instead of providing a property content for a version, one can provide a sequence of tokens via the property tokens. + The version's property value must be a list with one object per token Each token object in turn must at least contain a property t, which defines + its content. Accordingly, in the example above, version "A" has the tokens ["A", "black", "cat"] whereas version "B" is comprised of the token sequence + ["A", "white", "kitten"]. + + p. + Optionally a normalized reading of the token can be provided in the property n. + Again, in the example above, that means the last token of version "B" is normalized from the reading "kitten" to the reading "cat", thus facilitating + a match of "kitten" with the last token of version "A". + + p. + Apart from these 2 defined properties t and n, token objects can contain an arbitrary number of additional properties. + Additional properties will not be interpreted by CollateX but just be passed through, reappearing in + the output unchanged. Properties like ref, adj or id in the example would be such + additional properties of a token object. Users of the JSON input can make use of this pass-through mode e.g. in order to uniquely identify aligned tokens + independently of their (possibly non-unique) content. + + p. + When using interfaces like the HTTP service of CollateX, JSON encoded input can contain optional parameters + controlling the collation process. You can set the alignment algorithm for instance by providing a property algorithm in the root object with + one of the values "needleman-wunsch", "medite" or "dekker" (the default): + + pre.prettyprint. + { + "witnesses": [ … ], + "algorithm": "needleman-wunsch" + } + + p There is also limited support for customizing the token comparator function. Via JSON, two functions are available: + + pre.prettyprint. + { + "witnesses": [ … ], + "algorithm": "…", + "tokenComparator": { type: "equality" } + } + + p. + The default function, which can also be explicitly configured like shown above, tests for exact equality of the normalized token content. An alternative is the use of approximate matching via + a Levenshtein/edit distance threshold for matching tokens: + + pre.prettyprint. + { + "witnesses": [ … ], + "tokenComparator": { + "type": "levenshtein", + "distance": 2 + } + } + + p. + For approximate matching, the type property of the token object descring the token comparator function must be assigned the value "levenshtein". + The optional property distance defines the maximum edit distance between two normalized tokens strings which is still considered to be a match. + An edit distance of 1 is the default. + + h3#xml-input XML + + p. + XML-encoded text versions are also supported by CollateX, though the ability to preserve the markup context during the collation process is + fairly limited at the moment when not using the Java API. Out of the box, you can compare XML documents either via + the command line interface or collate them as part of a processing pipeline in Apache Cocoon. + + p. + On the command line, CollateX accepts XML documents with arbitrary tag sets. All it needs is an XPath 1.0 + expression that evaluates to a node set for each document. The + tex content of each node in such a set equals a token. For example, the XPath expression "//w" would result in a text version for each XML document with the sequence + of text segments enclosed in <w/> elements as tokens. + + p When using CollateX' Apache Cocoon component, the collation will be performed on one or more XML fragments in an input document which adhere to the following schema: + + pre.prettyprint. + <collation xmlns="http://interedition.eu/collatex/ns/1.0" joined="true" editDistance="1" algorithm="medite" format="tei"> + <witness>Almost every aspect of what scholarly editors do may be changed</witness> + <witness>Hardly any aspect of what stupid editors do in the privacy of their own home may be changed again and again</witness> + <witness>very many aspects of what scholarly editors do in the livingrooms of their own home may not be changed</witness> + </collation> + + p. + CollateX will process any XML fragment enclosed in a <collation/> element from its namespace. In order to configure the collation, this element can have a number of + optional attributes: + + table + tr + th algorithm + td the collation algorithm to use: "dekker" (default), "needleman-wunsch" or "medite" + tr + th format + td the desired output format: "table" for an alignment table in CollateX' custom schema (default), or "tei" for output in TEI P5's parallel segmentation format + tr + th editDistance + td the edit distance threshold for optional fuzzy matching of tokens; the default is exact matching + tr + th joined + td Whether consecutive alignments of tokens should be joined/merged in the output, or whether each alignment of tokens should occupy a separate cell/segment; the latter being the default. + + p. + Each text version to be collated is given as an XML fragment enclosed in a <witness/> element. CollateX will compare all versions contained in a + <collation/> element against each other. Markup within <witness/> elements is ignored; only the text content is considered and tokenized/normalized + before collation as if it were originating from a plain text source. + + h2#output Output + + p CollateX supports several formats to represent collation results. + + h3#json-output JSON + + p. + In conjunction with JSON being supported as an input format, collation results can be output in JSON as well. + The schema of such output resembles matrices commonly encountered in sequence alignment representations, + and looks as follows for the second example given in the section on JSON input (indentation/whitespace added for easier readability): + + pre.prettyprint. + { + "witnesses":["A","B"], + "table":[ + [ [ {"t":"A","ref":123 } ], [ {"t":"A" } ] ], + [ [ {"t":"black","adj":true } ], [ {"t":"white","adj":true } ] ], + [ [ {"t":"cat","id":"xyz" } ], [ {"t":"kitten.","n":"cat" } ] ] + ] + } + + p. + The root object always contains 2 properties. The value of property witnesses is a list of a all compared text versions, represented by their sigils/identifiers. + The witness list's order is significant insofar as the contents of the second property table – which contains the actual alignment – is ordered accordingly. + + p The tabular alignment data is represented as a list of lists, with + + ul + li the parent list containing one entry per aligned segment, and + li each segment/ child list containing sets of tokens from each compared version. + + p Because aligned segments can span multiple tokens, the aligned token sets of each text version are also represented as lists, we effectively have 3 levels: + + ol + li On the top-level, each list entry represents a set of aligned segments from different text versions. + li On the intermediate level, each list entry represents a set of tokens from a particular text version. + li On the lowest level, each list entry is an object representing a single token. + + p. + Out example output thus is comprised of 3 segments, each containing exactly one token per text version. The order in which the token sets are listed equals the + order of the text versions as listed in the property witnesses. Thus "A" from text version A is aligned with "A" from text version B, + "black" from text version A is aligned with "white" from text version B, and so on. + + p Additions and omissions are expressed via empty token sets, e.g. an alignment of + + pre.prettyprint. + { + "witnesses":["X","Y"], + "table":[ + [ [ {"t":"A" } ], [ {"t":"A" } ] ], + [ [ {"t":"brown" } ], [] ], + [ [ {"t":"dog" } ], [ {"t":"dog" } ] ] + ] + } + + p could be interpreted as "brown" being added in version "X" or omitted in version "Y". + + p. + Please note that transpositions are not represented explicitly in tabular output formats like this one. While the detection of transpositions affects the alignment, + the links between tokens which are assumed to be transposed by the collation algorithm are not given in this output format. Support for transpositions in tabular + representations of collation results will be added in a future version of CollateX. + + h3#tei-p5-output TEI P5 + + p The tabular representation of alignments described in the previous section can be encoded in a number of ways. + + pre.prettyprint. + <?xml version='1.0' encoding='UTF-8'?> + <cx:apparatus + xmlns:cx="http://interedition.eu/collatex/ns/1.0" + xmlns="http://www.tei-c.org/ns/1.0"> + A + <app> + <rdg wit="A">black</rdg> + <rdg wit="B">white</rdg> + </app> + <app> + <rdg wit="A">cat</rdg> + <rdg wit="B">kitten.</rdg> + </app> + </cx:apparatus> + + h3#xml-output XML + + pre.prettyprint. + <alignment xmlns="http://interedition.eu/collatex/ns/1.0"> + <row> + <cell sigil="w1">Auch hier </cell> + <cell sigil="w2">Ich </cell> + <cell sigil="w3">Ich </cell> + </row> + <row> + <cell sigil="w1">hab </cell> + <cell sigil="w2">hab </cell> + <cell sigil="w3">hab </cell> + </row> + <row> + <cell sigil="w1">ich </cell> + <cell sigil="w2">auch hier </cell> + <cell sigil="w3">auch hier </cell> + </row> + <row> + <cell sigil="w1">wieder ein Plätzchen</cell> + <cell sigil="w2">wieder ein Pläzchen</cell> + <cell sigil="w3">wieder ein Pläzchen</cell> + </row> + </alignment> + + h3#graphml-output GraphML + + p. + The GraphML-formatted output of a variant graph is suitable for import of (possibly larger) graphs in tools + for complex graph analysis and visualization, e. g. Gephi. + For an example GraphML document, take a look at sample output from the + web console. + + h3#graphviz-dot-output GraphViz DOT + + pre.prettyprint. + digraph G { + v301 [label = ""]; + v303 [label = "A"]; + v304 [label = "black"]; + v306 [label = "white"]; + v305 [label = "cat"]; + v302 [label = ""]; + v301 -> v303 [label = "A, B"]; + v303 -> v304 [label = "A"]; + v303 -> v306 [label = "B"]; + v304 -> v305 [label = "A"]; + v306 -> v305 [label = "B"]; + v305 -> v302 [label = "A, B"]; + } + + h2#cli The Command Line Interface + + pre.prettyprint.lang-xml. + usage: collatex [<options>] + (<json_input> | <witness_1> <witness_2> [[<witness_3>] ...]) + -a,--algorithm <arg> progressive alignment algorithm to use + 'dekker' (default), 'medite', + 'needleman-wunsch' + -f,--format <arg> result/output format: 'json', 'csv', 'dot', + 'graphml', 'tei' + -h,--help print usage instructions (which your are + looking at right now) + -ie,--input-encoding <arg> charset to use for decoding non-XML + witnesses; default: UTF-8 + -o,--output <arg> output file; '-' for standard output + (default) + -oe,--output-encoding <arg> charset to use for encoding the output; + default: UTF-8 + -s,--script <arg> ECMA/JavaScript resource with functions to be + plugged into the alignment algorithm + -t,--tokenized consecutive matches of tokens will *not* be + joined to segments + -xml,--xml-mode witnesses are treated as XML documents + -xp,--xpath <arg> XPath 1.0 expression evaluating to tokens of + XML witnesses; default: '//text()' + + h3#cli-js-callbacks ECMA/JavaScript Callbacks + + h2#cocoon Apache Cocoon Integration + + pre.prettyprint. + <map:components> + ... + <map:transformers> + ... + <map:transformer name="collatex" src="eu.interedition.collatex.cocoon.CollateXTransformer" /> + ... + </map:transformers> + ... + </map:components> + + pre.prettyprint. + <map:pipelines> + ... + <map:pipeline> + <map:match pattern="collatex-test"> + <map:generate src="resource/internal/collation.xml" /> + <map:transform type="collatex"/> + <map:serialize type="xml"/> + </map:match> + </map:pipeline> + ... + </map:pipelines> + + h2#rest-service The RESTful Web Service + + p. + This page documents the + Application Programming Interface (API) + of CollateX via which you can provide textual versions (“witnesses”) to be compared and get the collation result back in a number of formats. + + p. + The CollateX service is callable via + HTTP POST requests to + ${cp}/collate. + + p. + It expects input formatted in JavaScript Object Notation (JSON) as the request body; + accordingly the content type of the HTTP request must be set to application/json by the client. + + p. + The output format of the collator, contained in the response to an HTTP POST request, can be chosen via + an Accept HTTP header in the request. The following output formats are supported: + + table + tr + th application/json + td (per default) the tabular alignment of the witnesses' tokens, represented in JSON + tr + th application/tei+xml + td the collation result as a list of critical apparatus entries, encoded in TEI P5 parallel segmentation mode + tr + th application/graphml+xml + td the variant graph, represented in GraphML format + tr + th text/plain + td the variant graph, represented in Graphviz' DOT Language + tr + th image/svg+xml + td the variant graph, rendered as an SVG vector graphics document + + p For further examples, take a look at sample output from the web console. + + #js-apidocs + h2 The HTTP-based JavaScript API + + p Enables the use of CollateX' RESTful API via JavaScript … Based on YUI framework … + + h3 Requirements + + p Add dependencies to header … YUI library plus CollateX module … + + pre.prettyprint. + <script type="text/javascript" src="http://yui.yahooapis.com/3.8.1/build/yui/yui-min.js"></script> + <script type="text/javascript" src="http://collatex.net/demo/collatex.js"></script> + + p. + Substitute URL prefix [ROOT] with the base URL of your installation, e.g. + this one for the installation you are currently looking at … + + p. + YUI module interedition-collate available now … supports cross-domain AJAX requests via + CORS … + + h3 Sample usage + + pre.prettyprint. + YUI().use("node", "collatex", function(Y) { + new Y.CollateX({ serviceUrl: "http://collatex.net/demo/collate" }).toTable([{ + id: "A", + content: "Hello World" + }, { + id: "B", + tokens: [ + { "t": "Hallo", "n": "hello" }, + { "t": "Welt", "n": "world" } + ] + }], Y.one("#result")); + }); + + p. + … toTable() takes witness array as first parameter; second parameter is DOM node which serves as container for + the resulting HTML alignment table … + + p. + … generic collate(witnesses, callback) as well as methods for other formats available: + toSVG(), toTEI(), toGraphViz() … + + p. + … configuration of a collator instance via methods like withDekker(), withFuzzyMatching(maxDistance) … + + h2#javadoc API Documentation (Javadoc) + + p here + + h2#bibliography Resources/ Bibliography + + dl.bibliography + dt#bib-bourdaillet-2007 Bourdaillet 2007 + dd Bourdaillet J. and Ganascia J.-G., 2007. Practical block sequence alignment with moves. LATA 2007 - International Conference on Language and Automata Theory and Applications, 3/2007. + + dt#bib-collate-2000 Collate + dd Robinson, P., 2000. Collate. + + dt#bib-dekker-2011 Dekker 2011 + dd Dekker, R. H. and Middell, G., 2011. Computer-Supported Collation with CollateX: Managing Textual Variance in an Environment with Varying Requirements. Supporting Digital Humanities 2011. University of Copenhagen, Denmark. 17-18 November 2011. + + dt#bib-juxta-2013 Juxta 2013 + dd Performant Software Solutions LLC, 2013. Juxta. + + dt#bib-needleman-1970 Needleman 1970 + dd Needleman, Saul B. and Wunsch, Christian D., 1970. A general method applicable to the search for similarities in the amino acid sequence of two proteins. Journal of Molecular Biology 48 (3), 443–53. + + dt#bib-nmerge-2012 NMerge 2012 + dd Schmidt, D., 2012. NMerge. The nmerge Java library/commandline tool for making multi-version documents. + + dt#bib-schmidt-2008 Schmidt 2008 + dd Schmidt, D., 2008. What's a Multi-Version Document. Multi-Version Documents Blog. + + dt#bib-schmidt-2009 Schmidt 2009 + dd Schmidt, D. and Colomb, R., 2009. A data structure for representing multi-version texts online. International Journal of Human-Computer Studies, 67.6, 497-514. + + dt#bib-schmidt-2009a Schmidt 2009a + dd. + Schmidt, D., 2009. Merging Multi-Version Texts: a Generic Solution to the Overlap Problem.” Presented at Balisage: + The Markup Conference 2009, Montréal, Canada, August 11 - 14, 2009. In Proceedings of Balisage: The Markup Conference 2009. + Balisage Series on Markup Technologies, vol. 3 (2009). doi:10.4242/BalisageVol3.Schmidt01. + + dt#bib-spencer-2004 Spencer 2004 + dd Spencer M., Howe and Christopher J., 2004. Collating Texts Using Progressive Multiple Alignment. Computers and the Humanities. 38/2004, 253–270. + + dt#bib-stolz-2006 Stolz 2006 + dd Stolz, M. and Dimpel F. M., 2006. Computergestützte Kollationierung und ihre Integration in den editorischen Arbeitsfluss. 2006. \ No newline at end of file diff --git a/site/templates/download/index.jade b/site/templates/download/index.jade new file mode 100644 index 000000000..5448ed76d --- /dev/null +++ b/site/templates/download/index.jade @@ -0,0 +1,114 @@ +extends ../page.jade + +block title + title CollateX – Download + +block content + p CollateX is open source software and made available for download in source as well as binary form. + + #download-binary + + h2 Binary Distributions + + p. + For usage instructions, please refer to the documentation. Please also + note the license terms under which CollateX is made available. + + .yui3-g + .yui3-u-1-2 + .content.download-option + + h3 1. Tools + + p.download-link » collatex-tools-#{version}.zip + p.download-link » collatex-tools-#{version}.tar.bz2 + + .yui3-u-1-2 + .content + + h3 Requirements + + p. + CollateX is written for the Java platform which is available + for all major operating systems. You need the Java Runtime Environment (JRE) version 6 or higher. + + .yui3-u-1-2 + .content.download-option + + h3 2. Core + + p For embedding CollateX into your JVM-based application, please use Maven Central: + + pre.prettyprint. + <dependency> + <groupId>eu.interedition</groupId> + <artifactId>collatex-core</artifactId> + <version>#{version}</version> + </dependency> + <dependency> + <groupId>net.sf.jung</groupId> + <artifactId>jung-graph-impl</artifactId> + <version>2.0.1</version> + </dependency> + + .yui3-u-1-2 + .content.download-option + + h3 3. Cocoon + + p For adding CollateX' block to your Cocoon install, please use Maven Central as well: + + pre.prettyprint. + <dependency> + <groupId>eu.interedition</groupId> + <artifactId>collatex-cocoon</artifactId> + <version>#{version}</version> + </dependency> + <dependency> + <groupId>net.sf.jung</groupId> + <artifactId>jung-graph-impl</artifactId> + <version>2.0.1</version> + </dependency> + + p Depending on how you would like to use CollateX, you can opt for three different ways to download it: + + ol + li. + Should you be in need for a collation component that integrates with your software system independent of its + runtime and development environment, we offer a tool suite comprised of a + command-line tool and a self-contained RESTful web service. So whether your project is web-based or + runs in your local environment, whether you write in Ruby, Python, PHP, JavaScript or some other language: This is + probably the option to get started on. + li. + Should you target the Java Virtual Machine as your runtime environment and would like to embed CollateX + in your application, we provide Apache-Maven-compatible artifacts of + CollateX' core for download. + li. + Should you use Apache Cocoon as the framework for your + XML-oriented application, we offer a Cocoon block making collation functionality available + as a transformer for your XML processing pipelines. + + #download-source + + h2 Source Code + + .yui3-g + .yui3-u-1-2 + .content + p. + We do not prepare source code bundles for download. Development of CollateX takes place on + GitHub where you can find + the latest version of CollateX' source as well as earlier versions. + + .yui3-u-1-2 + .content.download-option + p.download-link » github.com/interedition/collatex/ + + #license + + h2 License + + p CollateX may be used under the terms of the GNU General Public License Version 3. + + pre.content + include ../../../LICENSE.txt diff --git a/site/templates/index.jade b/site/templates/index.jade new file mode 100644 index 000000000..2d4a2b67b --- /dev/null +++ b/site/templates/index.jade @@ -0,0 +1,46 @@ +extends ./page.jade + +block title + title CollateX +block content + .yui3-g + .yui3-u-1-2 + .content + .teaser + p CollateX is a software to + + ol + li read multiple (≥ 2) versions of a text, splitting each version into parts (tokens) to be compared, + li identify similarities of and differences between the versions (including moved/transposed segments) by aligning tokens, and + li output the alignment results in a variety of formats for further processing, for instance + li to support the production of a critical apparatus or the stemmatical analysis of a text's genesis. + + p. + It resembles software used to compute differences between files (e.g. diff) + or tools for sequence alignment which are commonly used + in Bioinformatics. While CollateX shares some of the techniques and algorithms with those tools, it mainly aims for a flexible and configurable approach + to the problem of finding similarities and differences in texts, sometimes trading computational soundness or complexity for + the user's ability to influence results. + + p. + As such it is primarily designed for use cases in disciplines like + Philology or – more specifically – the field + of Textual Criticism where the assessment + of findings is based on interpretation and therefore can be supported by computational means but is not necessarily + computable. + + .yui3-u-1-2 + .content + .download-option + h3 Latest Version + p.download-link » collatex-tools-#{version}.zip + p.download-link » collatex-tools-#{version}.tar.bz2 + p For alternative packages and license terms, please read the download section. + + .figure(style="margin: 1em 0") + img(src="/images/variant-graph-snippet.png" alt="Variant Graph") + p.caption Snippet of a Variant Graph produced by CollateX + + p. + Please refer to the documentation for detailed information + about CollateX like its underlying concepts or usage instructions. \ No newline at end of file diff --git a/site/templates/page.jade b/site/templates/page.jade new file mode 100644 index 000000000..102d3cc91 --- /dev/null +++ b/site/templates/page.jade @@ -0,0 +1,30 @@ +doctype html +html(lang="en") + head + meta(http-equiv="content-type" content="text/html;charset=utf-8") + block title + link(rel="stylesheet", type="text/css", href="http://yui.yahooapis.com/combo?3.8.1/build/cssfonts/cssfonts-min.css&3.8.1/build/cssgrids/cssgrids-min.css&3.8.1/build/cssreset/cssreset-min.css&3.8.1/build/cssbase/cssbase-min.css") + link(rel="stylesheet" type="text/css" href="/collatex.css") + body(class="yui3-skin-sam") + #header + h1 CollateX – Software for Collating Textual Sources + p.subtitle “In a language, in the system of language, there are only differences.” – Jacques Derrida + + #menu + ol + li Home + li Download + li Documentation + li Demo + li About + #content + block content + + #footer Copyright © 2010-2013 The Interedition Development Group. All rights reserved. + + a(href="https://github.com/interedition/collatex") + img(style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_gray_6d6d6d.png" alt="Fork me on GitHub") + + script(type="text/javascript" src="http://yui.yahooapis.com/combo?3.8.1/build/yui-base/yui-base-min.js&3.8.1/build/oop/oop-min.js&3.8.1/build/event-custom-base/event-custom-base-min.js&3.8.1/build/features/features-min.js&3.8.1/build/dom-core/dom-core-min.js&3.8.1/build/dom-base/dom-base-min.js&3.8.1/build/selector-native/selector-native-min.js&3.8.1/build/selector/selector-min.js&3.8.1/build/node-core/node-core-min.js&3.8.1/build/node-base/node-base-min.js&3.8.1/build/event-base/event-base-min.js&3.8.1/build/event-delegate/event-delegate-min.js&3.8.1/build/node-event-delegate/node-event-delegate-min.js&3.8.1/build/pluginhost-base/pluginhost-base-min.js&3.8.1/build/pluginhost-config/pluginhost-config-min.js&3.8.1/build/node-pluginhost/node-pluginhost-min.js&3.8.1/build/dom-style/dom-style-min.js&3.8.1/build/dom-screen/dom-screen-min.js&3.8.1/build/node-screen/node-screen-min.js&3.8.1/build/node-style/node-style-min.js&3.8.1/build/event-custom-complex/event-custom-complex-min.js&3.8.1/build/event-synthetic/event-synthetic-min.js") + script(type="text/javascript" src="http://yui.yahooapis.com/combo?3.8.1/build/event-mousewheel/event-mousewheel-min.js&3.8.1/build/event-mouseenter/event-mouseenter-min.js&3.8.1/build/event-key/event-key-min.js&3.8.1/build/event-focus/event-focus-min.js&3.8.1/build/event-resize/event-resize-min.js&3.8.1/build/event-hover/event-hover-min.js&3.8.1/build/event-outside/event-outside-min.js&3.8.1/build/event-touch/event-touch-min.js&3.8.1/build/event-move/event-move-min.js&3.8.1/build/event-flick/event-flick-min.js&3.8.1/build/event-valuechange/event-valuechange-min.js&3.8.1/build/event-tap/event-tap-min.js") + script(type="text/javascript" src="/collatex.js") diff --git a/site/twig/doc.twig b/site/twig/doc.twig deleted file mode 100644 index aaaaeef52..000000000 --- a/site/twig/doc.twig +++ /dev/null @@ -1,767 +0,0 @@ -{% extends "page.twig" %} - -{% block content %} - -
        - -

        To express textual variance, CollateX uses a graph-based data model - (Schmidt 2009). - On top of this model it supports several algorithms to progressively align multiple text versions.

        - - -

        The Gothenburg Model

        - -

        Developers of CollateX and - Juxta met for the first time in 2009 at a joint workshop of - COST Action 32 and - Interedition in Gothenburg. They started discussing, how the different concerns - of computer-supported collation of texts could be separated such that these two as well as similar projects would have a common - understanding of its process and could thus collaborate more efficiently on the development of collation tools - as well as their components. As a first result of this ongoing discussion, the participants identified five distinct tasks - present in any computer-supported collation workflow.

        - -

        CollateX is designed around this separation of concerns.

        - -

        Tokenization

        - -
        - Tokenizer -

        A tokenized text

        -
        - -

        While computers can compare a text's versions on a character-by-character basis, in the more common use case each - version is first split up into parts – henceforth called tokens – so the comparison can be conducted - on a more coarse-grained level where the tokens to be compared ideally correspond to the text's units which carry meaning. - This pre-processing step is called tokenization and performed by a tokenizer; it can happen on any - level of granularity, i.e. on the level of syllables, words, lines, phrases, verses, paragraphs or text nodes in a - DOM.

        - -

        Another service provided by tokenizers and of special value to the comparison of natural language texts relates to marked-up - text versions: As most collation software primarily compares text versions based on their textual content, embedded markup - would usually get in the way of this process and therefore needs to be discarded or “pushed in the background”, so the - collation tool does not have to be concerned about the specifics of a text's encoding. At the same time it might be valuable - to keep the markup context of every token for reference, for instance if one wanted to make use of it when comparing tokens.

        - -

        The figure to the right depicts this process: The line on top shows a marked-up text, its content as the - characters "a", "b", "c" and "d" – each representing a token – and "e1", "e2" as examples of embedded markup elements. - A markup-aware tokenizer would not only split this version into 4 distinct tokens but transform it into a sequence of such tokens, - with each token referring to its markup context.

        - -

        For now CollateX offers a simple tokenizer, mainly serving prototyping purposes by either

        - -
          -
        • splitting plain text without any embedded markup on boundaries determined by - whitespace, or
        • -
        • evaluating a configurable XPath 1.0 expression on an XML-encoded - text version which yields a list of node values as textual tokens.
        • -
        - -

        While not offering a comprehensive tokenizer itself, CollateX can be combined with any such tool that suits your specific requirements. - CollateX only expects you then to provide text versions in pre-tokenized form and define a token comparator function which – when - called with any two tokens – evaluates to a match in case those two tokens shall be treated as equal, or a mismatch in case - this should not be assumed. Formally speaking, a token comparator function defines an - equivalence relation over all tokens for a - collation. In processing tokens on the level of their equivalence defined by such a relation, CollateX is agnostic with regard to what constitutes - a token in your specific use case, whether it is plain text, text with a markup context or not textual at all.

        - -

        Detailed information about when and how to define your own notion of a token and its corresponding equivalence relation will be given in - the following sections on CollateX' usage. Its built-in tokenizer will provide for an easy start. Later on you can opt for a more versatile tokenizer - and/or token comparator function in order to enhance the accuracy of collation results.

        - -

        Normalization/Regularization

        - -

        With a configurable equivalence relation between tokens (defined via the aforementioned comparator function), CollateX can compare - text versions which are comprised of arbitrary tokens sequences. For a larger number of use cases though, this flexibility of defining a - fully customized comparator function is not really needed. It might suffice to normalize the tokens' textual content - such that an exact matching of the normalized content yields the desired equivalence relation. For instance, in many cases all tokens - of the text versions are normalized to their lower-case equivalent before being compared, thereby making their comparison case insensitive. Other - examples would be the removal of punctuation, the rule-based normalization of orthographic differences or the - stemming of words.

        - -

        Just as with the tokenizer included in CollateX, its normalization options are rather simple. Beyond the mentioned case normalization - and the removal of punctuation and/or whitespace characters, CollateX does not include any sophisticated normalization routines. Instead its - API and supported input formats provide the user with options to plug in their own components when needed.

        - -

        Alignment

        - -
        - Alignment -

        An alignment of 3 versions

        -
        - -

        After each version has been split into a sequence of tokens and each has been (optionally) normalized, the token - sequences will be aligned. The alignment process constitutes the core of CollateX' functionality and is generally conducted - by

          -
        1. finding a set of matching tokens determined by the token equivalence relation, and
        2. -
        3. aligning them via the insertion of gaps such that the token sequences of all versions line up optimally.
        4. -

        - -

        Looking at an example, assume that we have three versions: the first is comprised of the token sequence ["a", "b", - "c", "d"], the second reads ["a", "c", "d", "b"] and the third ["b", "c", "d"]. A collation tool may align these three - versions as depicted on the right. Each version occupies a column, matching tokens are aligned horizontally in a - row, gaps are inserted as needed during the alignment process and denoted via a hyphen. Depending from - which perspective one interprets this alignment table, one can say that the "b" in the second - row was omitted in the second version or that it has been added in the first and the third. A - similar statement can be made about the "b" in the last row, inverting the relationship of being added or - omitted. Basic edit operations (e.g. those underlying the concept - of edit distance) are thus - implicitly expressed in such an alignment and can be interpreted accordingly to make assumptions about how a - text has been changed. -

        - -

        The concept of sequence alignment and its tabular representation is well established in the field of Humanities - Computing; alignment tables like the one shown can be encoded with well-known apparatus encoding schemes. In - the parallel segmentation mode of TEI-P5's - apparatus encoding scheme, to pick just one possible representation, each row would be encoded as a segment, with empty readings - standing in for the gaps. Optionally, consecutive segments with matching readings for each version could be concatenated, so that for our example - a possible encoding capturing the alignment information reads: -

        - -
        <app>
        -  <rdg wit="#w1 #w2">a</rdg>
        -  <rdg wit="#w3" />
        -</app>
        -<app>
        -  <rdg wit="#w1 #w3">b</rdg>
        -  <rdg wit="w2" />
        -</app>
        -<app>
        -  <rdg wit="#w1 #w2 #w3">cd</rdg>
        -</app>
        -<app>
        -  <rdg wit="#w2">b</rdg>
        -  <rdg wit="#w1 #w3" />
        -</app>
        - -

        Also beyond the field of Humanities Computing, the technique of sequence alignment - has many application areas; Bioinformatics for example has addressed it as a computational problem thoroughly in recent years. - In this context and as part of the larger field of pattern matching, - extensive research exists on the topic. CollateX primarily strives to make the results of this research available to - textual scholars. For this import of computational methods it has to be noted though that – generally speaking – - the assessment of findings in the Humanities is based on interpretation. While it certainly can be supported by computational means, - it is not necessarily computable. As a concrete consequence of that difference in methodology, CollateX - offers its users not one algorithm optimized by specific criteria, but a choice between - several alignment algorithms so they can select the one that - supports their expected results best, always assuming that any computational heuristic may fail in the light of - subjective judgement. -

        - -

        Analysis/Feedback

        - -
        - Alignment Analysis -

        Analyzing an alignment

        -
        - -

        As the heuristic approach to the problem of sequence alignment may not yield the desired result, a further analysis - of the alignment may be necessary. Echoing the example from the above section, evidence not accessible to the - collation tool (e.g. because it was not encoded in the text versions at hand) might support the assumption of token "b" - in row 2 and 5 as not only being added/omitted but transposed/moved (see figure to the right). - While heuristic algorithms may compute transpositions as part of the alignment process, the correctness of such a computation, - given external evidence and its heuristic nature, obviously cannot be ensured.

        - -

        An additional (possibly manual) analysis of the alignment result therefore may alleviate that deficiency by introducing the - possibility of a feedback cycle, in which users edit the alignment and feed their knowledge back into the alignment process for - another run delivering enhanced results. The declaration of pre-determined alignments between specific tokens and the - parametrization of optimizing algorithms along the requirements of a specific use case would be such feedback information which - influences results substantially.

        - -

        CollateX offers rudimentary support for tailoring alignment results to a user's specific requirements, mainly through - its Java API. It is an area in need for improvement, particularly with regard to its - ease of use.

        - -

        Visualization

        - -

        The final concern of any collation workflow relates to the visualization of its results. As the broad variety - of building principles, layouts and notational conventions found in printed apparatuses already suggests, representing - textual variance is a complex problem on its own. Software like Juxta - has demonstrated the potential of digital media to cope with this complexity in innovative ways. For CollateX, the visualization - of results is deemed out of scope at the moment. Instead it provides several output formats which facilitate - the integration with software in charge of visualizing results, be it in printed or in digital form.

        - -

        The Data Model: Variant Graphs

        - -

        The tabular representation of collation results as shown in the section on sequence aligment is popular, - in the Humanties and beyond. CollateX can output results in this representation but uses a different one internally - for modelling textual variance: variant graphs.

        - -

        Variant graphs are the central data structure of CollateX. Any generated output from CollateX is a derivation, providing different views on it. - The idea of a graph-oriented model for expressing textual variance has been originally developed by Desmond Schmidt (Schmidt 2008, - Schmidt 2009, Schmidt 2009a) - and proved to be particularly well suited as a data model for computer-supported collation. The following figure taken from one of his publications - illustrates it:

        - -
        - Schmidt's Variant Graph Model -

        Schmidt/Colomb's Variant Graph Model

        -
        - -

        Variant graphs are in principal directed and acyclic. They are comprised at least of a - start and end node/vertex ("s" and "e" in the figure above) and can be traversed from the one to the other via labelled edges. The labels on each edge - contain content segments of compared text versions and a set of identifiers/sigils, denoting the versions which contain the respective content of an edge's label. - Thus -

          -
        1. common segments of multiple text versions can be merged in a variant graph,
        2. -
        3. differing segments result in the graph branching at nodes, while
        4. -
        5. each version can still be retrieved from the graph by traversing it along the edges labeled with the appropriate identifier of that version.
        6. -

        - -

        - -

        Following these principles, the depicted variant graph models three text versions A, B and C with the following content (markup omitted):

        - - - - - -
        AQueste è l'ultima traccia d'un antico acquedotto di sguardi, una orbita assorta e magica:
        BQueste è l'ultima cenno d'un antico acquedotto di sguardi, la sua curva sacra e muta:
        CQueste è l'ultima porta d'un antico acquedotto di sguardi, la sua curva sacra e solitaria:
        - -

        In order to account for the separation of concerns laid out above, CollateX' implementation of Schmidt's model - adjusted the latter slightly. Instead of labelling the edges of a variant graph with two attributes – the content as well as the sigils of text versions containing it – - the edges of variant graphs in CollateX are only labeled with sigil sets. The version's content segments – in the form of partial token sequences – have - been moved to the nodes/vertices. The ending of the example graph then looks like this (with sigils being mapped from A, B, C to W1, W2, W3):

        - -
        - CollateX's Variant Graph Model -

        CollateX' Variant Graph Model

        -
        - -

        The above illustration does not reveal the internal structure of the graph completely insofar as the nodes' labels in this figure are a simplification. - For instance, the second node in the graph (read from left to right) is labeled "sacra", with the two versions W2 and W3 "sharing some content". More precisely - though and in line with the above definition of tokens and their equivalence relation, W2 and W3 do not "share some content". Instead they - both contain a token with the content "sacra", both deemed to be equal according to the definition of a specific token comparator function. In the graphical - representation of the variant graph above, this subtle distinction is not made and both tokens are just represented via their common textual content. In CollateX' - data model though, this distinction is still relevant and represented: Each node/vertex in a variant graph is not modelled via textual content (as it would be the - case when translated directly from Schmidt's model) but as a set of tokens per node originating from one or more versions, with all tokens in such a - set belonging to the same equivalence class.

        - -

        The described change to Schmidt's model serves mainly two purposes: Firstly, it allows for arbitrary tokens to be compared and their commonalities, differences as - well as their sequential order to be represented in a graph structure. Secondly, CollateX' graph model is easy to transform into the tabular representation - introduced further above by ranking the graph's nodes in topological order - and aligning tokens which belong to nodes of the same rank.

        - -

        It has to be noted that also in this data model, the transposition of tokens still remains a problematic case. Like in Schmidt's model, CollateX represents the - transposition of a token (or more precisely: it's content) as a link between nodes (in the case of Schmidt' model: edges) containing the transposed segment. The link is undirected - and does not form part of a variant graph's traversal scheme because transposition links would break the assertion of a variant graph's acyclic nature and - consequently the ability to sort it topologically. While the linking of nodes can represent transposed segments sufficiently, it is superimposed - on a variant graph, i.e. it does not integrate well with it. Future work in this area may yield a more concise representation.

        - -

        Alignment Algorithms

        - -

        CollateX strives for maximum flexibility on the users' side when comparing text versions and adjusting the results to their requirements. One part of this - flexibility is rooted in the support of several alignment algorithms users can switch between and experiment with.

        - -

        Currently three algorithms have been implemented. They all operate on variant graph structures and belong to the group of progressive alignment algorithms. - Instead of comparing all versions at once, they

          -
        1. start by comparing two versions,
        2. -
        3. transform the result into a variant graph, then
        4. -
        5. progressively compare another version against that graph, and
        6. -
        7. merge the result of that comparison into the graph,
        8. -
        9. repeating the procedure until all versions have been merged.
        10. -

        - -

        On the one hand, the progressive approach is advantageous because it reduces the problem of comparing an arbitrary number of versions to the - simpler comparison of a single version with a variant graph representing several ones. The disadvantage on the other hand is the occasional dependence of - the result on the order in which versions are merged into the graph (Spencer 2004). - Adding tools to deal with this dependency, e.g. by performing a phylogenetic analysis - to determine an optimal order, is planned for a future version.

        - -

        Dekker

        - -

        The most mature algorithm offered by CollateX thus far has been developed by Ronald Haentjens Dekker (Dekker 2011). - It aligns an arbitary number of text versions, optimizes the local alignment of partial tokens sequences (phrases) and detects transpositions.

        - -

        Needleman-Wunsch

        - -

        The Needleman-Wunsch algorithm - (Needleman 1970) is a well-known global alignment - algorithm broadly applied in Bioinformatics and the social sciences. Based on dynamic programming, - this algorithm searches for an optimal alignment of an arbitrary number of versions by consulting a scoring function which penalizes the insertion of gaps. It does not take - the possibility of transposed segments into account though.

        - -

        The scoring function in CollateX' implementation can not be freely configured at the moment; the gap penality is assumed to be constant and equals - the score of a match.

        - -

        MEDITE

        - -

        Only recently added to the code base, this algorithm takes its name from a pairwise - alignment algorithm developed by Julien Bourdaillet and Jean-Gabriel Ganascia (Bourdaillet 2007). - It is based on suffix trees for the search of maximal unique matches between - text versions and the A* algorithm for optimizing the alignment. - Like Dekker's algorithm, it takes transpositions into account while doing so.

        - -

        CollateX' implementation of this algorithm is in an experimental stage. While it already delivers promising results, it has not been - fully optimized and – above all – not been extensively tested. In the case of issues with this algorithm, - the CollateX team would appreciate feedback. Alternatively users can download - the original version of MEDITE written by the algorithm's - authors.

        - -

        Input

        - -

        This section describes the different input formats CollateX supports. Besides the contents of a text's versions to be compared, the - input may also include parameters, i. e. the alignment algorithm to be used.

        - -

        Plain Text

        - -

        Like any collation tool, CollateX can process text versions provided as plain text. As CollateX is written for the Java Virtual Machine, - internally the comparison of plain text is based on the JVM's string type and thus on 16-bit - Unicode characters.

        - -

        Depending on the way CollateX is used, plain text version can also be provided in other - encodings supported by the Java Platform and will be converted to Unicode before comparison. The command line interface - is one such interface which supports character set conversions.

        - -

        Plain text version are always subject to tokenization and optional normalization of the resulting token sequence before they will be compared with each other.

        - -

        JSON

        - -

        As a more flexible format, CollateX supports input in JavaScript Object Notation (JSON). A set of text versions - to be compared can be JSON encoded as follows:

        - -
        {
        -  "witnesses" : [
        -    {
        -      "id" : "A",
        -      "content" : "A black cat in a black basket"
        -    },
        -    {
        -      "id" : "B",
        -      "content" : "A black cat in a black basket"
        -    },
        -    {
        -      "id" : "C",
        -      "content" : "A striped cat in a black basket"
        -    },
        -    {
        -      "id" : "D",
        -      "content" : "A striped cat in a white basket"
        -    }
        -  ]
        -}
        - -

        JSON input always consists of a single root object wrapping input data. The root object has one required property containing the versions to be compared which - (for historical reasons) is named witnesses. The value of this property is an array (a list) of objects in turn, with each object representing a version. - The order of the array elements determines the order in which they are processed by an alignment algorithms, i.e. in which versions are merged into a variant graph.

        - -

        Each object in the witnesses array must have a unique identifier in the required property id. This identifier will be used in the - output to reference a particular version. Besides the identifier each object must describe the content of the version. - The content can either be specified as a string property named content as shown above. In this case the version is treated like a plain text version - with tokenization and normalization taking place before the alignment.

        - -

        Another option is to provide the content of versions in tokenized (and optionally normalized) form:

        - -
        {
        -  "witnesses" : [
        -    {
        -      "id" : "A",
        -      "tokens" : [
        -          { "t" : "A", "ref" : 123 },
        -          { "t" : "black" , "adj" : true },
        -          { "t" : "cat", "id" : "xyz" }
        -      ]
        -    },
        -    {
        -      "id" : "B",
        -      "tokens" : [
        -          { "t" : "A" },
        -          { "t" : "white" , "adj" : true },
        -          { "t" : "kitten.", "n" : "cat" }
        -      ]
        -    }
        -  ]
        -}
        - -

        Instead of providing a property content for a version, one can provide a sequence of tokens via the property tokens. - The version's property value must be a list with one object per token Each token object in turn must at least contain a property t, which defines - its content. Accordingly, in the example above, version "A" has the tokens ["A", "black", "cat"] whereas version "B" is comprised of the token sequence - ["A", "white", "kitten"].

        - -

        Optionally a normalized reading of the token can be provided in the property n. - Again, in the example above, that means the last token of version "B" is normalized from the reading "kitten" to the reading "cat", thus facilitating - a match of "kitten" with the last token of version "A".

        - -

        Apart from these 2 defined properties t and n, token objects can contain an arbitrary number of additional properties. - Additional properties will not be interpreted by CollateX but just be passed through, reappearing in - the output unchanged. Properties like ref, adj or id in the example would be such - additional properties of a token object. Users of the JSON input can make use of this pass-through mode e.g. in order to uniquely identify aligned tokens - independently of their (possibly non-unique) content.

        - -

        When using interfaces like the HTTP service of CollateX, JSON encoded input can contain optional parameters - controlling the collation process. You can set the alignment algorithm for instance by providing a property algorithm in the root object with - one of the values "needleman-wunsch", "medite" or "dekker" (the default):

        - -
        {
        -  "witnesses": [ … ],
        -  "algorithm": "needleman-wunsch"
        -}
        - -

        There is also limited support for customizing the token comparator function. Via JSON, two functions are available:

        - -
        {
        -  "witnesses": [ … ],
        -  "algorithm": "…",
        -  "tokenComparator": { type: "equality" }
        -}
        - -

        The default function, which can also be explicitly configured like shown above, tests for exact equality of the normalized token content. An alternative is the use of approximate matching via - a Levenshtein/edit distance threshold for matching tokens:

        - -
        {
        -  "witnesses": [ … ],
        -  "tokenComparator": {
        -    "type": "levenshtein",
        -    "distance": 2
        -  }
        -}
        - -

        For approximate matching, the type property of the token object descring the token comparator function must be assigned the value "levenshtein". - The optional property distance defines the maximum edit distance between two normalized tokens strings which is still considered to be a match. - An edit distance of 1 is the default.

        - -

        XML

        - -

        XML-encoded text versions are also supported by CollateX, though the ability to preserve the markup context during the collation process is - fairly limited at the moment when not using the Java API. Out of the box, you can compare XML documents either via - the command line interface or collate them as part of a processing pipeline in Apache Cocoon.

        - -

        On the command line, CollateX accepts XML documents with arbitrary tag sets. All it needs is an XPath 1.0 - expression that evaluates to a node set for each document. The - tex content of each node in such a set equals a token. For example, the XPath expression "//w" would result in a text version for each XML document with the sequence - of text segments enclosed in <w/> elements as tokens.

        - -

        When using CollateX' Apache Cocoon component, the collation will be performed on one or more XML fragments in an input document which adhere to the following schema:

        - -
        <collation xmlns="http://interedition.eu/collatex/ns/1.0" joined="true" editDistance="1" algorithm="medite" format="tei">
        -    <witness>Almost every aspect of what scholarly editors do may be changed</witness>
        -    <witness>Hardly any aspect of what stupid editors do in the privacy of their own home may be changed again and again</witness>
        -    <witness>very many aspects of what scholarly editors do in the livingrooms of their own home may not be changed</witness>
        -</collation>
        - -

        CollateX will process any XML fragment enclosed in a <collation/> element from its namespace. In order to configure the collation, this element can have a number of - optional attributes:

        - - - - - - -
        algorithmthe collation algorithm to use: "dekker" (default), "needleman-wunsch" or "medite"
        formatthe desired output format: "table" for an alignment table in CollateX' custom schema (default), or - "tei" for output in TEI P5's parallel segmentation format
        editDistancethe edit distance threshold for optional fuzzy matching of tokens; the default is exact matching
        joinedWhether consecutive alignments of tokens should be joined/merged in the output, or whether each alignment of tokens should - occupy a separate cell/segment; the latter being the default.
        - -

        Each text version to be collated is given as an XML fragment enclosed in a <witness/> element. CollateX will compare all versions contained in a - <collation/> element against each other. Markup within <witness/> elements is ignored; only the text content is considered and tokenized/normalized - before collation as if it were originating from a plain text source.

        - -

        Output

        - -

        CollateX supports several formats to represent collation results.

        - -

        JSON

        - -

        In conjunction with JSON being supported as an input format, collation results can be output in JSON as well. - The schema of such output resembles matrices commonly encountered in sequence alignment representations, - and looks as follows for the second example given in the section on JSON input (indentation/whitespace added for easier readability):

        - -
        {
        -    "witnesses":["A","B"],
        -    "table":[
        -        [ [ {"t":"A","ref":123 } ], [ {"t":"A" } ] ],
        -        [ [ {"t":"black","adj":true } ], [ {"t":"white","adj":true } ] ],
        -        [ [ {"t":"cat","id":"xyz" } ], [ {"t":"kitten.","n":"cat" } ] ]
        -    ]
        -}
        - -

        The root object always contains 2 properties. The value of property witnesses is a list of a all compared text versions, represented by their sigils/identifiers. - The witness list's order is significant insofar as the contents of the second property table – which contains the actual alignment – is ordered accordingly.

        - -

        The tabular alignment data is represented as a list of lists, with

          -
        • the parent list containing one entry per aligned segment, and
        • -
        • each segment/ child list containing sets of tokens from each compared version.
        • -

        - -

        Because aligned segments can span multiple tokens, the aligned token sets of each text version are also represented as lists, we effectively have 3 levels:

        - -
          -
        1. On the top-level, each list entry represents a set of aligned segments from different text versions.
        2. -
        3. On the intermediate level, each list entry represents a set of tokens from a particular text version.
        4. -
        5. On the lowest level, each list entry is an object representing a single token.
        6. -
        - -

        Out example output thus is comprised of 3 segments, each containing exactly one token per text version. The order in which the token sets are listed equals the - order of the text versions as listed in the property witnesses. Thus "A" from text version A is aligned with "A" from text version B, - "black" from text version A is aligned with "white" from text version B, and so on.

        - -

        Additions and omissions are expressed via empty token sets, e.g. an alignment of

        - -
        {
        -    "witnesses":["X","Y"],
        -    "table":[
        -        [ [ {"t":"A" } ], [ {"t":"A" } ] ],
        -        [ [ {"t":"brown" } ], [] ],
        -        [ [ {"t":"dog" } ], [ {"t":"dog" } ] ]
        -    ]
        -}
        - -

        could be interpreted as "brown" being added in version "X" or omitted in version "Y".

        - -

        Please note that transpositions are not represented explicitly in tabular output formats like this one. While the detection of transpositions affects the alignment, - the links between tokens which are assumed to be transposed by the collation algorithm are not given in this output format. Support for transpositions in tabular - representations of collation results will be added in a future version of CollateX.

        - -

        TEI P5

        - -

        The tabular representation of alignments described in the previous section can be encoded in a number of ways.

        -
        <?xml version='1.0' encoding='UTF-8'?>
        -<cx:apparatus
        -  xmlns:cx="http://interedition.eu/collatex/ns/1.0"
        -  xmlns="http://www.tei-c.org/ns/1.0">
        -    A
        -    <app>
        -      <rdg wit="A">black</rdg>
        -      <rdg wit="B">white</rdg>
        -    </app>
        -    <app>
        -      <rdg wit="A">cat</rdg>
        -      <rdg wit="B">kitten.</rdg>
        -    </app>
        -</cx:apparatus>
        - -

        XML

        - -
        <alignment xmlns="http://interedition.eu/collatex/ns/1.0">
        -    <row>
        -        <cell sigil="w1">Auch hier </cell>
        -        <cell sigil="w2">Ich </cell>
        -        <cell sigil="w3">Ich </cell>
        -    </row>
        -    <row>
        -        <cell sigil="w1">hab </cell>
        -        <cell sigil="w2">hab </cell>
        -        <cell sigil="w3">hab </cell>
        -    </row>
        -    <row>
        -        <cell sigil="w1">ich </cell>
        -        <cell sigil="w2">auch hier </cell>
        -        <cell sigil="w3">auch hier </cell>
        -    </row>
        -    <row>
        -        <cell sigil="w1">wieder ein Plätzchen</cell>
        -        <cell sigil="w2">wieder ein Pläzchen</cell>
        -        <cell sigil="w3">wieder ein Pläzchen</cell>
        -    </row>
        -</alignment>
        - -

        GraphML

        - -

        The GraphML-formatted output of a variant graph is suitable for import of (possibly larger) graphs in tools - for complex graph analysis and visualization, e. g. Gephi. - For an example GraphML document, take a look at sample output from the - web console.

        - -

        GraphViz DOT

        - -
        digraph G {
        -  v301 [label = ""];
        -  v303 [label = "A"];
        -  v304 [label = "black"];
        -  v306 [label = "white"];
        -  v305 [label = "cat"];
        -  v302 [label = ""];
        -  v301 -> v303 [label = "A, B"];
        -  v303 -> v304 [label = "A"];
        -  v303 -> v306 [label = "B"];
        -  v304 -> v305 [label = "A"];
        -  v306 -> v305 [label = "B"];
        -  v305 -> v302 [label = "A, B"];
        -}
        - -

        The Command Line Interface

        - -
        usage: collatex [<options>]
        -                (<json_input> | <witness_1> <witness_2> [[<witness_3>] ...])
        -  -a,--algorithm <arg>           progressive alignment algorithm to use
        -                                 'dekker' (default), 'medite',
        -                                 'needleman-wunsch'
        -  -f,--format <arg>              result/output format: 'json', 'csv', 'dot',
        -                                 'graphml', 'tei'
        -  -h,--help                      print usage instructions (which your are
        -                                 looking at right now)
        -  -ie,--input-encoding <arg>     charset to use for decoding non-XML
        -                                 witnesses; default: UTF-8
        -  -o,--output <arg>              output file; '-' for standard output
        -                                 (default)
        -  -oe,--output-encoding <arg>    charset to use for encoding the output;
        -                                 default: UTF-8
        -  -s,--script <arg>              ECMA/JavaScript resource with functions to be
        -                                 plugged into the alignment algorithm
        -  -t,--tokenized                 consecutive matches of tokens will *not* be
        -                                 joined to segments
        -  -xml,--xml-mode                witnesses are treated as XML documents
        -  -xp,--xpath <arg>              XPath 1.0 expression evaluating to tokens of
        -                                 XML witnesses; default: '//text()'
        - -

        ECMA/JavaScript Callbacks

        - -

        Apache Cocoon Integration

        - -
        <map:components>
        -...
        -<map:transformers>
        -    ...
        -    <map:transformer name="collatex" src="eu.interedition.collatex.cocoon.CollateXTransformer" />
        -    ...
        -</map:transformers>
        -...
        -</map:components>
        - -
        <map:pipelines>
        -...
        -<map:pipeline>
        -  <map:match pattern="collatex-test">
        -    <map:generate src="resource/internal/collation.xml" />
        -    <map:transform type="collatex"/>
        -    <map:serialize type="xml"/>
        -  </map:match>
        -</map:pipeline>
        -...
        -</map:pipelines>
        -

        The RESTful Web Service

        - -

        - This page documents the - Application Programming Interface (API) - of CollateX via which you can provide textual versions (“witnesses”) to be compared and get the collation result back in a number of formats. -

        - -

        - The CollateX service is callable via - HTTP POST requests to - ${cp}/collate.

        - -

        It expects input formatted in JavaScript Object Notation (JSON) as the request body; - accordingly the content type of the HTTP request must be set to application/json by the client.

        - -

        - The output format of the collator, contained in the response to an HTTP POST request, can be chosen via - an Accept HTTP header in the request. The following output formats are supported: -

        - - - - - - - - - - - - - - - - - - - - - - -
        application/json(per default) the tabular alignment of the witnesses' tokens, represented in JSON
        application/tei+xmlthe collation result as a list of critical apparatus entries, encoded in TEI P5 parallel segmentation mode
        application/graphml+xmlthe variant graph, represented in GraphML format
        text/plainthe variant graph, represented in Graphviz' DOT Language
        image/svg+xmlthe variant graph, rendered as an SVG vector graphics document
        - -

        For further examples, take a look at sample output from the - web console.

        - -
        -

        The HTTP-based JavaScript API

        - -

        Enables the use of CollateX' RESTful API via JavaScript … Based on YUI framework

        - -

        Requirements

        - -

        Add dependencies to header … YUI library plus CollateX module …

        - -
        <script type="text/javascript" src="http://yui.yahooapis.com/3.8.1/build/yui/yui-min.js"></script>
        -<script type="text/javascript" src="http://collatex.net/demo/collatex.js"></script>
        -
        - -

        Substitute URL prefix [ROOT] with the base URL of your installation, e.g. - this one for the installation you are currently looking at …

        - -

        YUI module interedition-collate available now … supports cross-domain AJAX requests via - CORS

        - -

        Sample usage

        - -
        YUI().use("node", "collatex", function(Y) {
        -    new Y.CollateX({ serviceUrl: "http://collatex.net/demo/collate" }).toTable([{
        -        id: "A",
        -        content: "Hello World"
        -    }, {
        -        id: "B",
        -        tokens: [
        -            { "t": "Hallo", "n": "hello" },
        -            { "t": "Welt", "n": "world" }
        -        ]
        -    }], Y.one("#result"));
        -});
        - -

        toTable() takes witness array as first parameter; second parameter is DOM node which serves as container for - the resulting HTML alignment table …

        - -

        … generic collate(witnesses, callback) as well as methods for other formats available: - toSVG(), toTEI(), toGraphViz()

        - -

        … configuration of a collator instance via methods like withDekker(), withFuzzyMatching(maxDistance)

        -
        - -

        API Documentation (Javadoc)

        - -

        here

        - -

        Resources/ Bibliography

        - -
        -
        Bourdaillet 2007
        -
        Bourdaillet J. and Ganascia J.-G., 2007. Practical block sequence alignment with moves. - LATA 2007 - International Conference on Language and Automata Theory and Applications, 3/2007.
        - -
        Collate
        -
        Robinson, P., 2000. Collate.
        - -
        Dekker 2011
        -
        Dekker, R. H. and Middell, G., 2011. Computer-Supported Collation with CollateX: Managing Textual Variance in an Environment with Varying Requirements. - Supporting Digital Humanities 2011. University of Copenhagen, Denmark. 17-18 November 2011.
        - -
        Juxta 2013
        -
        Performant Software Solutions LLC, 2013. Juxta.
        - -
        Needleman 1970
        -
        Needleman, Saul B. and Wunsch, Christian D., 1970. A general method applicable to the search for similarities in the amino acid sequence of two proteins. - Journal of Molecular Biology 48 (3), 443–53.
        - -
        NMerge 2012
        -
        Schmidt, D., 2012. NMerge. The nmerge Java library/commandline tool for making multi-version documents.
        - -
        Schmidt 2008
        -
        Schmidt, D., 2008. What's a Multi-Version Document. Multi-Version Documents Blog.
        - -
        Schmidt 2009
        -
        Schmidt, D. and Colomb, R., 2009. A data structure for representing multi-version texts online. - International Journal of Human-Computer Studies, 67.6, 497-514.
        - -
        Schmidt 2009a
        -
        Schmidt, D., 2009. Merging Multi-Version Texts: a Generic Solution to the Overlap Problem.” Presented at Balisage: - The Markup Conference 2009, Montréal, Canada, August 11 - 14, 2009. In Proceedings of Balisage: The Markup Conference 2009. - Balisage Series on Markup Technologies, vol. 3 (2009). doi:10.4242/BalisageVol3.Schmidt01.
        - -
        Spencer 2004
        -
        Spencer M., Howe and Christopher J., 2004. Collating Texts Using Progressive Multiple Alignment. Computers and - the Humanities. 38/2004, 253–270.
        - -
        Stolz 2006
        -
        Stolz, M. and Dimpel F. M., 2006. Computergestützte Kollationierung und ihre Integration in den editorischen - Arbeitsfluss. 2006.
        -
        -{% endblock %} \ No newline at end of file diff --git a/site/twig/index.twig b/site/twig/index.twig deleted file mode 100644 index f1b108915..000000000 --- a/site/twig/index.twig +++ /dev/null @@ -1,46 +0,0 @@ -{% extends "page.twig" %} - -{% block content %} -
        -
        -
        -

        CollateX is a software to

        - -
          -
        1. read multiple (≥ 2) versions of a text, splitting each version into parts (tokens) to be compared,
        2. -
        3. identify similarities of and differences between the versions (including moved/transposed segments) by aligning tokens, and
        4. -
        5. output the alignment results in a variety of formats for further processing, for instance
        6. -
        7. to support the production of a critical apparatus or the stemmatical analysis of a text's genesis.
        8. -
        -
        - -

        It resembles software used to compute differences between files (e.g. diff) - or tools for sequence alignment which are commonly used - in Bioinformatics. While CollateX shares some of the techniques and algorithms with those tools, it mainly aims for a flexible and configurable approach - to the problem of finding similarities and differences in texts, sometimes trading computational soundness or complexity for - the user's ability to influence results.

        - -

        As such it is primarily designed for use cases in disciplines like - Philology or – more specifically – the field - of Textual Criticism where the assessment - of findings is based on interpretation and therefore can be supported by computational means but is not necessarily - computable.

        -
        -
        -
        -

        Latest Version

        - - -

        For alternative packages and license terms, please read the download section.

        -
        - -
        - Variant Graph -

        Snippet of a Variant Graph produced by CollateX

        -
        - -

        Please refer to the documentation for detailed information - about CollateX like its underlying concepts or usage instructions.

        -
        -
        -{% endblock %} \ No newline at end of file diff --git a/site/twig/page.twig b/site/twig/page.twig deleted file mode 100644 index aaaaf19b0..000000000 --- a/site/twig/page.twig +++ /dev/null @@ -1,38 +0,0 @@ - - - - - CollateX{% if title %} – {{ title }}{% endif %} - - - - - - - - {% block head %}{% endblock %} - - - - -
        {% block content %}{% endblock %}
        -{% block appendix %} - -{% endblock %} - - - \ No newline at end of file diff --git a/site/twig/project.twig b/site/twig/project.twig deleted file mode 100644 index 9e9f3635e..000000000 --- a/site/twig/project.twig +++ /dev/null @@ -1,102 +0,0 @@ -{% extends "page.twig" %} - -{% block content %} -
        -

        Project History & Agenda

        - -

        Development of CollateX started in 2010 as a project within the EU-funded initiative - Interedition, with the aim to create a successor of - Peter Robinson's Collate. While widely - used within the community and valued for its versatility in producing critical apparatuses for scholarly editions, - by then Collate was increasingly hard to deploy as it depends on a runtime environment whose support had been - phased out by its vendor. -

        - -

        CollateX was planned as a complete rewrite of Collate that was primarily addressing the architectural challenges - of its predecessor. Over the years though and with more and more participants contributing their requirements and - ideas, it developed a different agenda. On the one hand, Collate is a complete solution for producing a critical apparatus, - with features ranging from its very own algorithm for comparing versions of a text to a powerful graphical user interface that lets the user - control the collation process. On the other hand, CollateX has become a software component which can be embedded into other software or be made a part of - a software system. Its goal is the provision and advancement of current research in the field of computer-supported collation - involving natural language texts. To this end the developers of CollateX put an emphasis on its flexible applicability, be it in terms of - its runtime environment or be it in terms of the specific challenges CollateX has to cope with when applied to - textual traditions of varying language, encoding or publication settings.

        -
        - -
        -

        Contact

        - -

        You can contact the team via e-mail (info@collatex.net) or individually (see below). - Some of the team members can also be found in Interedition's IRC channel #interedition - on freenode.net.

        - -

        Development of CollateX takes place on GitHub. - Bug reports and/or - other contributions are welcome.

        -
        - - -{% endblock %} \ No newline at end of file From af4ce3a5f317acfac76328315e27a6b95aab850a Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sun, 4 Jan 2015 21:30:19 +0100 Subject: [PATCH 04/52] Site: static generation of site (cleanup) --- site/Gruntfile.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/Gruntfile.js b/site/Gruntfile.js index bde87230d..940657854 100644 --- a/site/Gruntfile.js +++ b/site/Gruntfile.js @@ -1,5 +1,5 @@ module.exports = function(grunt) { require('time-grunt')(grunt); require('jit-grunt')(grunt); - require('load-grunt-config')(grunt, { loadGruntTasks: false, data: { site: 'htdocs', version: '2.0a' } }); + require('load-grunt-config')(grunt, { loadGruntTasks: false }); }; \ No newline at end of file From 648ab66530c05cab026521cf2ad76dd75baa4743 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 07:43:07 +0100 Subject: [PATCH 05/52] Tools: packaging server and command line tool together in a single shaded JAR --- collatex-tools/pom.xml | 61 ++++------ .../eu/interedition/collatex/http/Server.java | 109 ------------------ .../{cli/Engine.java => tools/CollateX.java} | 91 +++++++++++++-- .../CollatorService.java} | 6 +- .../{http => tools}/JsonProcessor.java | 2 +- .../collatex/{cli => tools}/NodeToken.java | 2 +- .../collatex/{cli => tools}/PluginScript.java | 2 +- .../collatex/{cli => tools}/TextWitness.java | 2 +- .../collatex/{cli => tools}/URLWitness.java | 2 +- 9 files changed, 111 insertions(+), 166 deletions(-) delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/http/Server.java rename collatex-tools/src/main/java/eu/interedition/collatex/{cli/Engine.java => tools/CollateX.java} (72%) rename collatex-tools/src/main/java/eu/interedition/collatex/{http/Collator.java => tools/CollatorService.java} (98%) rename collatex-tools/src/main/java/eu/interedition/collatex/{http => tools}/JsonProcessor.java (99%) rename collatex-tools/src/main/java/eu/interedition/collatex/{cli => tools}/NodeToken.java (96%) rename collatex-tools/src/main/java/eu/interedition/collatex/{cli => tools}/PluginScript.java (99%) rename collatex-tools/src/main/java/eu/interedition/collatex/{cli => tools}/TextWitness.java (96%) rename collatex-tools/src/main/java/eu/interedition/collatex/{cli => tools}/URLWitness.java (98%) diff --git a/collatex-tools/pom.xml b/collatex-tools/pom.xml index eee38d53a..a00f35ea4 100644 --- a/collatex-tools/pom.xml +++ b/collatex-tools/pom.xml @@ -37,53 +37,38 @@ - org.apache.maven.plugins - maven-deploy-plugin - 2.7 - - true - - - - org.codehaus.mojo - appassembler-maven-plugin - 1.2.2 + maven-shade-plugin + 2.3 - flat - lib - true - -Xmx512m - - - eu.interedition.collatex.cli.Engine - collatex - - - eu.interedition.collatex.http.Server - collatex-server - - + false - app-assemble - package + shade - assemble + shade + + + + eu.interedition.collatex.tools.CollateX + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + - - maven-assembly-plugin - 2.3 - - false - - src/main/assembly/app.xml - - - diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/Server.java b/collatex-tools/src/main/java/eu/interedition/collatex/http/Server.java deleted file mode 100644 index fb2c63452..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/Server.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.http; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.GnuParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Options; -import org.glassfish.grizzly.http.CompressionConfig; -import org.glassfish.grizzly.http.server.CLStaticHttpHandler; -import org.glassfish.grizzly.http.server.HttpHandler; -import org.glassfish.grizzly.http.server.HttpServer; -import org.glassfish.grizzly.http.server.NetworkListener; -import org.glassfish.grizzly.http.server.Request; -import org.glassfish.grizzly.http.server.Response; -import org.glassfish.grizzly.http.server.StaticHttpHandler; - -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * @author Gregor Middell - */ -public class Server { - - public static void main(String... args) { - try { - final CommandLine commandLine = new GnuParser().parse(OPTIONS, args); - if (commandLine.hasOption("h")) { - new HelpFormatter().printHelp("collatex-server [ ...]\n", OPTIONS); - return; - } - - - final Collator collator = new Collator( - Integer.parseInt(commandLine.getOptionValue("mpc", "2")), - Integer.parseInt(commandLine.getOptionValue("mcs", "0")), - commandLine.getOptionValue("dot", null) - ); - final String staticPath = System.getProperty("collatex.static.path", ""); - final HttpHandler httpHandler = staticPath.isEmpty() ? new CLStaticHttpHandler(Server.class.getClassLoader(), "/static/") { - @Override - protected void onMissingResource(Request request, Response response) throws Exception { - collator.service(request, response); - } - } : new StaticHttpHandler(staticPath.replaceAll("/+$", "") + "/") { - @Override - protected void onMissingResource(Request request, Response response) throws Exception { - collator.service(request, response); - } - }; - - - final NetworkListener httpListener = new NetworkListener("http", "0.0.0.0", Integer.parseInt(commandLine.getOptionValue("p", "7369"))); - - final CompressionConfig compressionConfig = httpListener.getCompressionConfig(); - compressionConfig.setCompressionMode(CompressionConfig.CompressionMode.ON); - compressionConfig.setCompressionMinSize(860); // http://webmasters.stackexchange.com/questions/31750/what-is-recommended-minimum-object-size-for-gzip-performance-benefits - compressionConfig.setCompressableMimeTypes("application/javascript", "application/json", "application/xml", "text/css", "text/html", "text/javascript", "text/plain", "text/xml"); - - final HttpServer httpServer = new HttpServer(); - httpServer.addListener(httpListener); - httpServer.getServerConfiguration().addHttpHandler(httpHandler, commandLine.getOptionValue("cp", "").replaceAll("/+$", "") + "/*"); - - Runtime.getRuntime().addShutdownHook(new Thread(() -> { - if (LOG.isLoggable(Level.INFO)) { - LOG.info("Stopping HTTP server"); - } - httpServer.shutdown(); - })); - - httpServer.start(); - - Thread.sleep(Long.MAX_VALUE); - } catch (Throwable t) { - LOG.log(Level.SEVERE, "Error while parsing command line", t); - System.exit(1); - } - } - - static final Logger LOG = Logger.getLogger(Server.class.getName()); - static final Options OPTIONS = new Options(); - - static { - OPTIONS.addOption("cp", "context-path", true, "URL base/context path of the service, default: '/'"); - OPTIONS.addOption("dot", "dot-path", true, "path to Graphviz 'dot', auto-detected by default"); - OPTIONS.addOption("h", "help", false, "prints usage instructions"); - OPTIONS.addOption("p", "port", true, "HTTP port to bind server to, default: 7369"); - OPTIONS.addOption("mpc", "max-parallel-collations", true, "maximum number of collations to perform in parallel, default: 2"); - OPTIONS.addOption("mcs", "max-collation-size", true, "maximum number of characters (counted over all witnesses) to perform collations on, default: unlimited"); - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/cli/Engine.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java similarity index 72% rename from collatex-tools/src/main/java/eu/interedition/collatex/cli/Engine.java rename to collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java index 252098356..892d803e6 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/cli/Engine.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java @@ -17,7 +17,7 @@ * along with CollateX. If not, see . */ -package eu.interedition.collatex.cli; +package eu.interedition.collatex.tools; import com.google.common.base.Function; import com.google.common.base.Objects; @@ -29,7 +29,6 @@ import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.http.JsonProcessor; import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleCollation; @@ -43,6 +42,14 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import org.glassfish.grizzly.http.CompressionConfig; +import org.glassfish.grizzly.http.server.CLStaticHttpHandler; +import org.glassfish.grizzly.http.server.HttpHandler; +import org.glassfish.grizzly.http.server.HttpServer; +import org.glassfish.grizzly.http.server.NetworkListener; +import org.glassfish.grizzly.http.server.Request; +import org.glassfish.grizzly.http.server.Response; +import org.glassfish.grizzly.http.server.StaticHttpHandler; import org.xml.sax.SAXException; import javax.script.ScriptException; @@ -65,11 +72,13 @@ import java.nio.charset.Charset; import java.util.Comparator; import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; /** * @author Gregor Middell */ -public class Engine implements Closeable { +public class CollateX implements Closeable { Charset inputCharset; boolean xmlMode; @@ -91,7 +100,7 @@ public class Engine implements Closeable { PrintWriter log = new PrintWriter(System.err); boolean errorOccurred = false; - Engine configure(CommandLine commandLine) throws XPathExpressionException, ParseException, ScriptException, IOException { + CollateX configure(CommandLine commandLine) throws XPathExpressionException, ParseException, ScriptException, IOException { this.inputCharset = Charset.forName(commandLine.getOptionValue("ie", "UTF-8")); this.xmlMode = commandLine.hasOption("xml"); this.tokenXPath = XPathFactory.newInstance().newXPath().compile(commandLine.getOptionValue("xp", "//text()")); @@ -158,7 +167,7 @@ Engine configure(CommandLine commandLine) throws XPathExpressionException, Parse return this; } - Engine read() throws IOException, XPathExpressionException, SAXException { + CollateX read() throws IOException, XPathExpressionException, SAXException { if (inputResources.size() < 2) { try (InputStream inputStream = inputResources.get(0).openStream()) { this.witnesses = JsonProcessor.read(inputStream).getWitnesses(); @@ -174,7 +183,7 @@ Engine read() throws IOException, XPathExpressionException, SAXException { return this; } - Engine collate() { + CollateX collate() { new SimpleCollation(witnesses, collationAlgorithm, joined).collate(variantGraph); return this; } @@ -212,7 +221,53 @@ void write() throws IOException { } } - Engine log(String str) { + CollateX serve(CommandLine commandLine) { + final CollatorService collator = new CollatorService( + Integer.parseInt(commandLine.getOptionValue("mpc", "2")), + Integer.parseInt(commandLine.getOptionValue("mcs", "0")), + commandLine.getOptionValue("dot", null) + ); + final String staticPath = System.getProperty("collatex.static.path", ""); + final HttpHandler httpHandler = staticPath.isEmpty() ? new CLStaticHttpHandler(CollateX.class.getClassLoader(), "/static/") { + @Override + protected void onMissingResource(Request request, Response response) throws Exception { + collator.service(request, response); + } + } : new StaticHttpHandler(staticPath.replaceAll("/+$", "") + "/") { + @Override + protected void onMissingResource(Request request, Response response) throws Exception { + collator.service(request, response); + } + }; + + final NetworkListener httpListener = new NetworkListener("http", "0.0.0.0", Integer.parseInt(commandLine.getOptionValue("p", "7369"))); + + final CompressionConfig compressionConfig = httpListener.getCompressionConfig(); + compressionConfig.setCompressionMode(CompressionConfig.CompressionMode.ON); + compressionConfig.setCompressionMinSize(860); // http://webmasters.stackexchange.com/questions/31750/what-is-recommended-minimum-object-size-for-gzip-performance-benefits + compressionConfig.setCompressableMimeTypes("application/javascript", "application/json", "application/xml", "text/css", "text/html", "text/javascript", "text/plain", "text/xml"); + + final HttpServer httpServer = new HttpServer(); + httpServer.addListener(httpListener); + httpServer.getServerConfiguration().addHttpHandler(httpHandler, commandLine.getOptionValue("cp", "").replaceAll("/+$", "") + "/*"); + + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + if (LOG.isLoggable(Level.INFO)) { + LOG.info("Stopping HTTP server"); + } + httpServer.shutdown(); + })); + + try { + httpServer.start(); + Thread.sleep(Long.MAX_VALUE); + } catch (IOException | InterruptedException e) { + error(e.getMessage(), e); + } + return this; + } + + CollateX log(String str) { log.write(str); return this; } @@ -240,14 +295,18 @@ URL argumentToResource(String arg) throws ParseException { } public static void main(String... args) { - final Engine engine = new Engine(); + final CollateX engine = new CollateX(); try { final CommandLine commandLine = new GnuParser().parse(OPTIONS, args); if (commandLine.hasOption("h")) { engine.help(); return; } - engine.configure(commandLine).read().collate().write(); + if (commandLine.hasOption("srv")) { + engine.serve(commandLine); + } else { + engine.configure(commandLine).read().collate().write(); + } } catch (ParseException e) { engine.error("Error while parsing command line arguments", e); engine.log("\n").help(); @@ -264,15 +323,17 @@ public static void main(String... args) { } finally { try { Closeables.close(engine, false); - } catch (IOException e) { + } catch (IOException ignored) { } } } + static final Logger LOG = Logger.getLogger(CollateX.class.getName()); static final Options OPTIONS = new Options(); static { - OPTIONS.addOption("h", "help", false, "print usage instructions (which your are looking at right now)"); + OPTIONS.addOption("h", "help", false, "print usage instructions"); + OPTIONS.addOption("o", "output", true, "output file; '-' for standard output (default)"); OPTIONS.addOption("ie", "input-encoding", true, "charset to use for decoding non-XML witnesses; default: UTF-8"); OPTIONS.addOption("oe", "output-encoding", true, "charset to use for encoding the output; default: UTF-8"); @@ -282,6 +343,14 @@ public static void main(String... args) { OPTIONS.addOption("t", "tokenized", false, "consecutive matches of tokens will *not* be joined to segments"); OPTIONS.addOption("f", "format", true, "result/output format: 'json', 'csv', 'dot', 'graphml', 'tei'"); OPTIONS.addOption("s", "script", true, "ECMA/JavaScript resource with functions to be plugged into the alignment algorithm"); + + OPTIONS.addOption("srv", "server", false, "start RESTful HTTP server"); + OPTIONS.addOption("cp", "context-path", true, "URL base/context path of the service, default: '/'"); + OPTIONS.addOption("dot", "dot-path", true, "path to Graphviz 'dot', auto-detected by default"); + OPTIONS.addOption("p", "port", true, "HTTP port to bind server to, default: 7369"); + OPTIONS.addOption("mpc", "max-parallel-collations", true, "maximum number of collations to perform in parallel, default: 2"); + OPTIONS.addOption("mcs", "max-collation-size", true, "maximum number of characters (counted over all witnesses) to perform collations on, default: unlimited"); + } @Override diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/Collator.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java similarity index 98% rename from collatex-tools/src/main/java/eu/interedition/collatex/http/Collator.java rename to collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java index 215ff832c..f3444142f 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/Collator.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java @@ -1,4 +1,4 @@ -package eu.interedition.collatex.http; +package eu.interedition.collatex.tools; import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.simple.SimpleCollation; @@ -37,14 +37,14 @@ /** * @author Gregor Middell */ -public class Collator { +public class CollatorService { private final int maxCollationSize; private final String dotPath; private final ExecutorService collationThreads; private final ExecutorService processThreads = Executors.newCachedThreadPool(); - public Collator(int maxParallelCollations, int maxCollationSize, String dotPath) { + public CollatorService(int maxParallelCollations, int maxCollationSize, String dotPath) { this.collationThreads = Executors.newFixedThreadPool(maxParallelCollations, new ThreadFactory() { private final AtomicLong counter = new AtomicLong(); diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/http/JsonProcessor.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/JsonProcessor.java similarity index 99% rename from collatex-tools/src/main/java/eu/interedition/collatex/http/JsonProcessor.java rename to collatex-tools/src/main/java/eu/interedition/collatex/tools/JsonProcessor.java index 1b6e73323..f087f0c96 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/http/JsonProcessor.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/JsonProcessor.java @@ -1,4 +1,4 @@ -package eu.interedition.collatex.http; +package eu.interedition.collatex.tools; import eu.interedition.collatex.CollationAlgorithm; import eu.interedition.collatex.CollationAlgorithmFactory; diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/cli/NodeToken.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/NodeToken.java similarity index 96% rename from collatex-tools/src/main/java/eu/interedition/collatex/cli/NodeToken.java rename to collatex-tools/src/main/java/eu/interedition/collatex/tools/NodeToken.java index 9ff4fc205..6f1ffcc19 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/cli/NodeToken.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/NodeToken.java @@ -17,7 +17,7 @@ * along with CollateX. If not, see . */ -package eu.interedition.collatex.cli; +package eu.interedition.collatex.tools; import eu.interedition.collatex.simple.SimpleToken; import eu.interedition.collatex.simple.SimpleWitness; diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/cli/PluginScript.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java similarity index 99% rename from collatex-tools/src/main/java/eu/interedition/collatex/cli/PluginScript.java rename to collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java index 9a4699c80..d1d7323a0 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/cli/PluginScript.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java @@ -17,7 +17,7 @@ * along with CollateX. If not, see . */ -package eu.interedition.collatex.cli; +package eu.interedition.collatex.tools; import com.google.common.base.Function; import com.google.common.base.Preconditions; diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/cli/TextWitness.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/TextWitness.java similarity index 96% rename from collatex-tools/src/main/java/eu/interedition/collatex/cli/TextWitness.java rename to collatex-tools/src/main/java/eu/interedition/collatex/tools/TextWitness.java index d2687706d..1f3d36153 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/cli/TextWitness.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/TextWitness.java @@ -17,7 +17,7 @@ * along with CollateX. If not, see . */ -package eu.interedition.collatex.cli; +package eu.interedition.collatex.tools; import eu.interedition.collatex.Witness; diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/cli/URLWitness.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java similarity index 98% rename from collatex-tools/src/main/java/eu/interedition/collatex/cli/URLWitness.java rename to collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java index 03bb3a236..717482ef3 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/cli/URLWitness.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java @@ -17,7 +17,7 @@ * along with CollateX. If not, see . */ -package eu.interedition.collatex.cli; +package eu.interedition.collatex.tools; import com.google.common.base.Function; import com.google.common.collect.Lists; From 0eb93e3155bb5c7aaba66bcfdfd0f95310c97feb Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 07:47:49 +0100 Subject: [PATCH 06/52] Upgrade Grizzly HTTP server component --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 86c8d4094..0d9c8b8cf 100644 --- a/pom.xml +++ b/pom.xml @@ -115,7 +115,7 @@ org.glassfish.grizzly grizzly-http-server - 2.3.8 + 2.3.17 From e0d11bce288285549c4ca1fd11dc7ec8d103bd05 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 07:54:50 +0100 Subject: [PATCH 07/52] Remove Cocoon and "no-deps" modules (focus on plain library) --- collatex-cocoon/pom.xml | 97 --- collatex-cocoon/rcl.properties | 17 - .../collatex/cocoon/CollateXTransformer.java | 249 -------- .../src/main/resources/COB-INF/sitemap.xmap | 29 - collatex-nodeps/dependency-reduced-pom.xml | 50 -- collatex-nodeps/pom.xml | 44 -- collatex-python/collatex.py | 577 ------------------ pom.xml | 2 - 8 files changed, 1065 deletions(-) delete mode 100644 collatex-cocoon/pom.xml delete mode 100644 collatex-cocoon/rcl.properties delete mode 100644 collatex-cocoon/src/main/java/eu/interedition/collatex/cocoon/CollateXTransformer.java delete mode 100644 collatex-cocoon/src/main/resources/COB-INF/sitemap.xmap delete mode 100644 collatex-nodeps/dependency-reduced-pom.xml delete mode 100644 collatex-nodeps/pom.xml delete mode 100644 collatex-python/collatex.py diff --git a/collatex-cocoon/pom.xml b/collatex-cocoon/pom.xml deleted file mode 100644 index 2fa0dece1..000000000 --- a/collatex-cocoon/pom.xml +++ /dev/null @@ -1,97 +0,0 @@ - - - 4.0.0 - - eu.interedition - collatex - 1.6-SNAPSHOT - - collatex-cocoon - 1.6-SNAPSHOT - CollateX Cocoon Block - Apache Cocoon block exposing CollateX' functionality as a transformer. - - - javax.servlet - servlet-api - - - eu.interedition - collatex-core - - - net.sf.jung - jung-graph-impl - - - org.apache.cocoon - cocoon-core - 2.2.0 - - - org.apache.cocoon - cocoon-servlet-service-components - 1.0.0 - - - org.apache.cocoon - cocoon-template-impl - 1.1.0 - - - org.apache.cocoon - cocoon-flowscript-impl - 1.0.0 - - - - - - org.apache.cocoon - cocoon-maven-plugin - 1.0.0-M2 - - - prepare - compile - - prepare - - - - - - org.mortbay.jetty - maven-jetty-plugin - 6.1.7 - - - - 8888 - 30000 - - - ${project.build.directory}/rcl/webapp - / - - - org.apache.cocoon.mode - dev - - - - - - maven-jar-plugin - 2.1 - - - - ${project.artifactId} - - - - - - - diff --git a/collatex-cocoon/rcl.properties b/collatex-cocoon/rcl.properties deleted file mode 100644 index 7eeeaec94..000000000 --- a/collatex-cocoon/rcl.properties +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -eu.interedition.collatex.collatex-cocoon.service%classes-dir=./target/classes \ No newline at end of file diff --git a/collatex-cocoon/src/main/java/eu/interedition/collatex/cocoon/CollateXTransformer.java b/collatex-cocoon/src/main/java/eu/interedition/collatex/cocoon/CollateXTransformer.java deleted file mode 100644 index 2b4861f56..000000000 --- a/collatex-cocoon/src/main/java/eu/interedition/collatex/cocoon/CollateXTransformer.java +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.cocoon; - -import com.google.common.base.Objects; -import com.google.common.base.Throwables; -import com.google.common.collect.Iterables; -import com.google.common.collect.LinkedHashMultimap; -import com.google.common.collect.Lists; -import com.google.common.collect.Ordering; -import com.google.common.collect.RowSortedTable; -import com.google.common.collect.SetMultimap; -import eu.interedition.collatex.CollationAlgorithm; -import eu.interedition.collatex.CollationAlgorithmFactory; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.jung.JungVariantGraph; -import eu.interedition.collatex.matching.EditDistanceTokenComparator; -import eu.interedition.collatex.matching.EqualityTokenComparator; -import eu.interedition.collatex.simple.SimpleCollation; -import eu.interedition.collatex.simple.SimpleToken; -import eu.interedition.collatex.simple.SimpleWitness; -import eu.interedition.collatex.util.ParallelSegmentationApparatus; -import eu.interedition.collatex.util.VariantGraphRanking; -import org.apache.avalon.framework.configuration.Configuration; -import org.apache.avalon.framework.configuration.ConfigurationException; -import org.apache.cocoon.ProcessingException; -import org.apache.cocoon.transformation.AbstractSAXTransformer; -import org.apache.cocoon.xml.AttributesImpl; -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; - -import java.io.IOException; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; - -/** - * @author Gregor Middell - */ -public class CollateXTransformer extends AbstractSAXTransformer { - - private static final String TEI_NS = "http://www.tei-c.org/ns/1.0"; - public static final String COLLATEX_NS = "http://interedition.eu/collatex/ns/1.0"; - - private enum Format { - ALIGNMENT_TABLE, TEI_APPARATUS - } - - private Format format = Format.ALIGNMENT_TABLE; - private CollationAlgorithm algorithm; - private boolean joined; - private final List witnesses = Lists.newArrayList(); - private String sigil; - - @Override - public void configure(Configuration configuration) throws ConfigurationException { - super.configure(configuration); - this.defaultNamespaceURI = COLLATEX_NS; - } - - @Override - public void startTransformingElement(String uri, String name, String raw, Attributes attr) throws ProcessingException, IOException, SAXException { - if (!COLLATEX_NS.equals(uri)) { - return; - } - if ("collation".equals(name)) { - final String format = Objects.firstNonNull(attributeValue(attr, "format"), "table").trim().toLowerCase(); - if ("tei".equals(format)) { - this.format = Format.TEI_APPARATUS; - } else { - this.format = Format.ALIGNMENT_TABLE; - } - - Comparator tokenComparator = new EqualityTokenComparator(); - try { - final int editDistance = Integer.parseInt(Objects.firstNonNull(attributeValue(attr, "editDistance"), "0")); - if (editDistance > 0) { - tokenComparator = new EditDistanceTokenComparator(editDistance); - } - } catch (NumberFormatException e) { - } - - final String algorithm = Objects.firstNonNull(attributeValue(attr, "algorithm"), "dekker").trim().toLowerCase(); - if (algorithm.equals("medite")) { - this.algorithm = CollationAlgorithmFactory.medite(tokenComparator, SimpleToken.TOKEN_MATCH_EVALUATOR); - } else if (algorithm.equals("needleman-wunsch")) { - this.algorithm = CollationAlgorithmFactory.needlemanWunsch(tokenComparator); - } else if (algorithm.equals("gst")) { - this.algorithm = CollationAlgorithmFactory.greedyStringTiling(tokenComparator, 2); - } else { - this.algorithm = CollationAlgorithmFactory.dekker(tokenComparator); - } - - this.joined = "true".equals(Objects.firstNonNull(attributeValue(attr, "joined"), "true").trim().toLowerCase()); - - sigil = null; - witnesses.clear(); - } else if ("witness".equals(name)) { - sigil = Objects.firstNonNull(attributeValue(attr, "sigil"), "w" + (witnesses.size() + 1)); - startTextRecording(); - } - } - - @Override - public void endTransformingElement(String uri, String name, String raw) throws ProcessingException, IOException, SAXException { - if (!COLLATEX_NS.equals(uri)) { - return; - } - if ("collation".equals(name) && !witnesses.isEmpty()) { - ignoreHooksCount++; - final VariantGraph graph = new SimpleCollation(witnesses, algorithm, joined).collate(new JungVariantGraph()); - switch (format) { - case TEI_APPARATUS: - sendTeiApparatus(graph); - break; - default: - sendAlignmentTable(graph); - break; - } - ignoreHooksCount--; - } else if ("witness".equals(name)) { - witnesses.add(new SimpleWitness(sigil, endTextRecording())); - } - } - - private void sendAlignmentTable(VariantGraph graph) throws SAXException { - startPrefixMapping("", COLLATEX_NS); - startElement(COLLATEX_NS, "alignment", "alignment", EMPTY_ATTRIBUTES); - final Set witnesses = graph.witnesses(); - final RowSortedTable> table = VariantGraphRanking.of(graph).asTable(); - - for (Integer rowIndex : table.rowKeySet()) { - final Map> row = table.row(rowIndex); - startElement(COLLATEX_NS, "row", "row", EMPTY_ATTRIBUTES); - for (Witness witness : witnesses) { - final AttributesImpl cellAttrs = new AttributesImpl(); - cellAttrs.addCDATAAttribute("sigil", witness.getSigil()); - startElement(COLLATEX_NS, "cell", "cell", cellAttrs); - if (row.containsKey(witness)) { - for (SimpleToken token : Ordering.natural().immutableSortedCopy(Iterables.filter(row.get(witness), SimpleToken.class))) { - sendTextEvent(token.getContent()); - } - } - endElement(COLLATEX_NS, "cell", "cell"); - - } - endElement(COLLATEX_NS, "row", "row"); - } - endElement(COLLATEX_NS, "alignment", "alignment"); - endPrefixMapping(""); - } - - private void sendTeiApparatus(VariantGraph graph) throws SAXException { - try { - ParallelSegmentationApparatus.generate(VariantGraphRanking.of(graph), new ParallelSegmentationApparatus.GeneratorCallback() { - @Override - public void start() { - try { - startPrefixMapping("cx", COLLATEX_NS); - startPrefixMapping("", TEI_NS); - startElement(COLLATEX_NS, "apparatus", "cx:apparatus", EMPTY_ATTRIBUTES); - } catch (SAXException e) { - throw Throwables.propagate(e); - } - } - - @Override - public void segment(SortedMap> contents) { - final SetMultimap segments = LinkedHashMultimap.create(); - for (Map.Entry> cell : contents.entrySet()) { - final StringBuilder sb = new StringBuilder(); - for (SimpleToken token : Ordering.natural().immutableSortedCopy(Iterables.filter(cell.getValue(), SimpleToken.class))) { - sb.append(token.getContent()); - } - segments.put(sb.toString(), cell.getKey()); - } - - final Set segmentContents = segments.keySet(); - try { - if (segmentContents.size() == 1) { - sendTextEvent(Iterables.getOnlyElement(segmentContents)); - } else { - startElement(TEI_NS, "app", "app", EMPTY_ATTRIBUTES); - for (String segment : segmentContents) { - final StringBuilder witnesses = new StringBuilder(); - for (Witness witness : segments.get(segment)) { - witnesses.append(witness.getSigil()).append(" "); - } - - final AttributesImpl attributes = new AttributesImpl(); - attributes.addCDATAAttribute("wit", witnesses.toString().trim()); - startElement(TEI_NS, "rdg", "rdg", attributes); - sendTextEvent(segment); - endElement(TEI_NS, "rdg", "rdg"); - } - endElement(TEI_NS, "app", "app"); - } - } catch (SAXException e) { - throw Throwables.propagate(e); - } - } - - @Override - public void end() { - try { - endElement(COLLATEX_NS, "apparatus", "cx:apparatus"); - endPrefixMapping(""); - endPrefixMapping("cx"); - } catch (SAXException e) { - throw Throwables.propagate(e); - } - } - }); - } catch (Throwable t) { - Throwables.propagateIfInstanceOf(Throwables.getRootCause(t), SAXException.class); - throw Throwables.propagate(t); - } - } - - static String attributeValue(Attributes attr, String localName) { - for (int ac = 0, al = attr.getLength(); ac < al; ac++) { - if (localName.equals(attr.getLocalName(ac))) { - return attr.getValue(ac); - } - } - return null; - } -} \ No newline at end of file diff --git a/collatex-cocoon/src/main/resources/COB-INF/sitemap.xmap b/collatex-cocoon/src/main/resources/COB-INF/sitemap.xmap deleted file mode 100644 index 9a9bf133c..000000000 --- a/collatex-cocoon/src/main/resources/COB-INF/sitemap.xmap +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/collatex-nodeps/dependency-reduced-pom.xml b/collatex-nodeps/dependency-reduced-pom.xml deleted file mode 100644 index 227719c4e..000000000 --- a/collatex-nodeps/dependency-reduced-pom.xml +++ /dev/null @@ -1,50 +0,0 @@ - - - - collatex - eu.interedition - 1.6-SNAPSHOT - - 4.0.0 - collatex-nodeps - CollateX NoDeps - 1.6-SNAPSHOT - CollateX Uber Jar for use with the Python bindings - - - - maven-shade-plugin - 2.2 - - - package - - shade - - - - - - - - - com.google.code.findbugs - jsr305 - 2.0.2 - provided - - - junit - junit - 4.10 - test - - - hamcrest-core - org.hamcrest - - - - - - diff --git a/collatex-nodeps/pom.xml b/collatex-nodeps/pom.xml deleted file mode 100644 index 8dffcfbbc..000000000 --- a/collatex-nodeps/pom.xml +++ /dev/null @@ -1,44 +0,0 @@ - - - 4.0.0 - - eu.interedition - collatex - 1.6-SNAPSHOT - - collatex-nodeps - 1.6-SNAPSHOT - CollateX NoDeps - CollateX Uber Jar for use with the Python bindings - - - eu.interedition - collatex-core - - - net.sf.jung - jung-graph-impl - - - org.codehaus.jackson - jackson-mapper-asl - - - - - - org.apache.maven.plugins - maven-shade-plugin - 2.2 - - - package - - shade - - - - - - - diff --git a/collatex-python/collatex.py b/collatex-python/collatex.py deleted file mode 100644 index 59b032585..000000000 --- a/collatex-python/collatex.py +++ /dev/null @@ -1,577 +0,0 @@ -#!/usr/bin/env python -"""Using CollateX in Python. - -Collatex is a Java library for collating textual sources, -for example, to produce an apparatus. You don't need to know -anything about Java at all to use the bindings (that is the point, -of course). - -Collatex was developed by Ronald Haentjens Dekker and Gregor Middell -amongst other people. - -These Python bindings were by me, Zeth, and any errors or omissions are -mine alone. - -The main interface is the Collation class. - -Use it as follows: - ->>> collation = Collation() ->>> collation.add_witness('one', 'The cat in the hat') ->>> collation.add_witness('two', 'The hat in the cat') ->>> apparatus = collation.get_apparatus() - -For all the available classes and methods see the documentation -provided in the Collation class. - -The general approach is to provide something easy to use within Python, -with special emphasis to make it easy for people using template engines, -such as the one provided by Django or standalone engines such as Mako, -Genshi, Jinja2 and so on. - -Therefore where it makes sense to do so, I have provided human readable -properties rather than method names, (thus following the MVC pattern of -isolating application logic from presentation, i.e. so an HTML-aware -non-developer could edit an HTML template without being scared -or breaking anything). - -Whether this has an effect on performance remains to be seen (a little more -work is performed up front rather than lazily), but sooner or later most of the -apparatus will be in RAM either way, and generating apparati tends to be -a batch process anyway, rather than in real time. So who cares about -performance right? If you cared about performance you would be doing -this directly in Collatex anyway :-) Only kidding, I will make sure it is -performant as possible by using it and tweaking it as I go. - -I hope to add some easy premade default output formats for inclusion in -templates, so the Python bindings can at least do whatever the Java API -can. -""" - -# Fill out the following line with where the collatex jar file is located, -# which you downloaded from the project site. -JAR_PATH = "interedition/trunk/collatex/collatex-nodeps/target/" - -# The following is to manually specify the location of the JVM library -# You only need this if we fail to find your JVM automatically. -JVM_LOCATION = "" -import sys -import os - -try: - import jpype -except ImportError: - print "You need to install jpype.""" - print """Visit http://sourceforge.net/projects/jpype/files/ """ - sys.exit() - - -def get_jvm(): - """Get the JVM location. - - We start by trying to find the default JVM path, - this is in most cases the environment variable JAVA_HOME - which is normally set by SUN's JDK. - - For Linux users, this might not be set. If so we try to use - the location used by the openjdk package, one location for 64bit, - one location for 32bit. - - """ - - if JVM_LOCATION: - return JVM_LOCATION - - try: - jvm_location = jpype.getDefaultJVMPath() - except TypeError: - pass - else: - return jvm_location - - if os.name == 'posix': - if os.uname()[4] == 'x86_64': - return "/usr/lib/jvm/java-6-openjdk/jre/lib/amd64/server/libjvm.so" - else: - return \ - "/usr/lib/debug/usr/lib/jvm/java-6-openjdk/jre/lib/i386/server/libjvm.so" - - if os.name == 'mac': - return \ - "/System/Library/Frameworks/JavaVM.framework/Libraries/libjvm_compat.dylib" - - # No JVM has been found automatically - print "You need to manually specify the location of your JVM library." - print "Please set the JVM_LOCATION configuration within collatex.py." - sys.exit() - -# You can override these manually if the defaults are not working. -JVM = get_jvm() -HOME_PATH = os.path.expanduser('~') -COLLATE_JAR_PATH = os.path.join(HOME_PATH, JAR_PATH) - -# Start the Java Virtual Machine -jpype.startJVM(JVM, "-Djava.ext.dirs=%s" % COLLATE_JAR_PATH) - -# Java Classes -_ENGINE = jpype.JClass(\ - 'eu.interedition.collatex2.implementation.CollateXEngine') -_ALIGNMENT_TABLE = jpype.JClass(\ - 'eu.interedition.collatex2.interfaces.IAlignmentTable') -_COLUMN = jpype.JClass('eu.interedition.collatex2.interfaces.IColumn') -_ROW = jpype.JClass('eu.interedition.collatex2.interfaces.IRow') -_CELL = jpype.JClass('eu.interedition.collatex2.interfaces.ICell') -_VARIANT = jpype.JClass(\ - 'eu.interedition.collatex2.interfaces.INormalizedToken') -_WITNESS = jpype.JClass('eu.interedition.collatex2.interfaces.IWitness') -_PARALLEL_SEGMENTATION_APPARATUS = \ - jpype.JClass(\ - 'eu.interedition.collatex2.output.ParallelSegmentationApparatus') -_APPARATUS_ENTRY = \ - jpype.JClass('eu.interedition.collatex2.output.ApparatusEntry') -_PHRASE = jpype.JClass('eu.interedition.collatex2.interfaces.IPhrase') - - -DEFAULT_TOKENIZER = jpype.JClass(\ -'eu.interedition.collatex2.implementation.tokenization.WhitespaceTokenizer') -DEFAULT_NORMALIZER = jpype.JClass(\ -'eu.interedition.collatex2.implementation.tokenization.DefaultTokenNormalizer') - - -class Collation(object): - """A collation. - - The collation object has the following methods: - - collation.add_witness - add a witness into the witness list - collation.get_alignment_table - align the witnesses together - - There are also the following more advanced configuration methods: - - collation.set_tokenizer - allows you to use your own tokenizer - collation.set_normalizer - allows you to use your own normaliser - - """ - witnesses = [] - - def __init__(self): - self.engine = _ENGINE() - - def _get_raw_witnesses(self): - """Get the raw witness objects.""" - return [witness.get_raw_witness() for witness in self.witnesses] - - def add_witness(self, sigil, content): - """Add a witness into the witness list. - - This method requires the following arguments: - - `sigil` is the name of the witness. - `content` is the textual content that you which to collate - - >>> collation = Collation() - >>> collation.witnesses = [] # Reset because of docttest quirk - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> len(collation.witnesses) - 2 - >>> for witness in collation.witnesses: - ... print witness.sigil - one - two - - """ - witness = Witness(self.engine.createWitness(sigil, content)) - self.witnesses.append(witness) - - def get_alignment_table(self): - """Align the witnesses together into a table. - - See the AlignmentTable class for more details. - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> table = collation.get_alignment_table() - >>> len(table.columns) - 6 - - """ - java_alignment_object = self.engine.align(self._get_raw_witnesses()) - table = AlignmentTable(java_alignment_object) - return table - - def get_apparatus(self, table = None): - """Get an apparatus object, see the Apparatus class for more details. - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> apparatus = collation.get_apparatus() - - """ - alignment_table = table if table else self.get_alignment_table() - apparatus = self.engine.createApparatus(\ - alignment_table.get_raw_table()) - return Apparatus(apparatus) - - def set_tokenizer(self, tokenizer): - """Specify your own tokenizer to use instead of the default. - - >>> collation = Collation() - >>> tokenizer = DEFAULT_TOKENIZER() - >>> collation.set_tokenizer(tokenizer) - >>> collation.add_witness('one', 'The cat in the hat') - >>> print collation.witnesses[0].tokens[0] - The - - """ - self.engine.setTokenizer(tokenizer) - - def set_normalizer(self, normalizer): - """Specify your own normalizer to use instead of the default. - >>> collation = Collation() - >>> normaliser = DEFAULT_NORMALIZER() - >>> collation.set_normalizer(normaliser) - >>> collation.add_witness('one', 'The cat in the hat') - >>> print collation.witnesses[0].tokens[0].normalised - the - - """ - self.engine.setTokenNormalizer(normalizer) - - -class AlignmentTable(object): - """Variants stored in rows and columns. - - - When using any position numbers to get out objects from lists, - remember that position numbers count from 1, while Python counts from 0. - - An alignment table object has the following properties: - - alignment_table.columns - a list of the columns in the table - alignment_table.rows - a list of the rows in the table - alignment_table.repeating_tokens - tokens that are repeated - - An alignment table object also has the following methods: - - alignment_table.to_html - provides an HTML representation of the table - - """ - - def __init__(self, table = None): - self._table = table if table else _ALIGNMENT_TABLE() - self.columns = [Column(column) for column in self._table.getColumns()] - self.repeating_tokens = [token \ - for token in \ - self._table.findRepeatingTokens()] - - self.rows = [Row(row) for row in self._table.getRows()] - - def to_html(self): - """Provides an HTML representation of the alignment table. - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> table = collation.get_alignment_table() - >>> print table.to_html().splitlines()[0] -

        Alignment Table:

        - - """ - return self._table.alignmentTableToHTML() - - def get_raw_table(self): - """Get the unbound table, not recommended as - the underlying API may change.""" - return self._table - - -class Row(object): - """A row of aligned words. - - A row object will have the following properties: - - row.cells - a list of the child cells of the row - row.sigil - the witness represented by the row - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> table = collation.get_alignment_table() - >>> row = table.rows[0] - >>> print row.sigil - one - >>> len(row.cells) - 6 - - """ - - def __init__(self, row = None): - self._row = row if row else _ROW() - self.cells = [Cell(cell) for cell in self._row.cells] - self.sigil = self._row.getSigil() - - -class Cell(object): - """A single cell in the alignment table. - - A cell object will have the following properties: - - cell.empty - whether the cell is empty or not - cell.token - the token object (if there is one), i.e. the variant reading - cell.column - the column object within which the cell resides - cell.position - the position in the row - cell.sigil - the witness represented by the cell - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> table = collation.get_alignment_table() - >>> row = table.rows[0] - >>> cell = row.cells[1] - >>> print cell.empty - False - >>> print cell.token - The - >>> print cell.column - The,hat - >>> print cell.position - 2 - >>> print cell.sigil - one - - """ - - def __init__(self, cell = None): - self._cell = cell if cell else _CELL() - self.empty = True if self._cell.isEmpty() else False - self.token = Variant(self._cell.getToken()) if not self.empty else None - self.column = Column(self._cell.column) - self.position = self._cell.getPosition() - self.sigil = self._cell.sigil - - -class Column(object): - """A column of aligned words. - - A column object will have the following properties: - - column.variants - the list of variants in the column - column.position - the position of the column in the alignment table - column.sigli - the source documents of the variants within the column - column.state - the state (i.e. type) of the variation - - A column object will also have the following methods: - - column.get_variant_by_sigi - get a particular witness' reading. - column.contains_witness - See if the column contains a witness. - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> table = collation.get_alignment_table() - >>> column = table.columns[0] - >>> print column.variants[0].content - The - >>> print column.position - 1 - >>> print column.sigli[0] - two - >>> print column.state - MATCH - - """ - - def __init__(self, column = None): - self._column = column if column else _COLUMN() - self.variants = [Variant(variant) \ - for variant in self._column.getVariants()] - self.position = self._column.getPosition() - self.sigli = [sigli for sigli in self._column.getSigli()] - self.state = str(self._column.getState()) - - def get_variant_by_sigil(self, sigil): - """Get a particular witness' reading. - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> table = collation.get_alignment_table() - >>> column = table.columns[0] - >>> variant = column.get_variant_by_sigil('two') - >>> print variant.content - The - - """ - - return self._column.getToken(sigil) - - def contains_witness(self, sigil): - """See if the column contains a witness. - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> table = collation.get_alignment_table() - >>> column = table.columns[0] - >>> column.contains_witness('one') - False - >>> column.contains_witness('two') - True - - """ - - if self._column.containsWitness(sigil): - return True - else: - return False - - def __str__(self): - return self._column.__str__() - - -class Witness(object): - """A reading of a textual work. - A reading contains tokens (i.e. words).""" - - def __init__(self, witness = None): - self._witness = witness if witness else _WITNESS() - self.sigil = self._witness.getSigil() - self.tokens = [Variant(token) for token in self._witness.getTokens()] - - def _get_token_objects(self): - """Return the raw token objects.""" - return self._witness.getTokens() - - def get_raw_witness(self): - """Return the unbound witness object, - not recommended as underlying API may change.""" - return self._witness - - def __str__(self): - return self.sigil - - -class Variant(object): - """An aligned token, e.g. a word. - - A variant object will have the following properties: - - variant.content - the input form of the variant - variant.normalised - the normalised form of the variant - variant.position - the position in the alignment table - variant.sigil - the source document of the variant - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> table = collation.get_alignment_table() - >>> column = table.columns[0] - >>> variant = column.variants[0] - >>> print variant - The - >>> print variant.content - The - >>> print variant.normalised - the - >>> print variant.position - 1 - >>> print variant.sigil - two - - """ - - def __init__(self, variant = None): - self._variant = variant if variant else _VARIANT() - self.content = self._variant.getContent() - self.normalised = self._variant.getNormalized() - self.position = self._variant.getPosition() - self.sigil = self._variant.getSigil() - - def __str__(self): - return self.content - - -class Apparatus(object): - """A Parallel Segmentation Apparatus - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> apparatus = collation.get_apparatus() - >>> print apparatus.sigli - [u'one', u'two'] - >>> print apparatus.entries[0].get_phrase('two') - The - - """ - - def __init__(self, apparatus = None): - self._apparatus = apparatus if apparatus \ - else _PARALLEL_SEGMENTATION_APPARATUS() - self.entries = [ApparatusEntry(entry) for \ - entry in self._apparatus.getEntries()] - self.sigli = [sigil for sigil in self._apparatus.getSigli()] - - -class ApparatusEntry(object): - """An entry in the Parallel Segmentation Apparatus. - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> apparatus = collation.get_apparatus() - >>> entry = apparatus.entries[0] - - """ - - def __init__(self, entry = None): - self._entry = entry if entry else _APPARATUS_ENTRY() - self.sigli = [sigil for sigil in self._entry.getSigli()] - - def get_phrase(self, sigil): - """Get a phrase by sigil.""" - return Phrase(self._entry.getPhrase(sigil)) - - -class Phrase(object): - """A sequence of tokens. - - >>> collation = Collation() - >>> collation.add_witness('one', 'The cat in the hat') - >>> collation.add_witness('two', 'The hat in the cat') - >>> apparatus = collation.get_apparatus() - >>> entry = apparatus.entries[0] - >>> print entry.get_phrase('two') - The - - """ - - def __init__(self, phrase = None): - self._phrase = phrase if phrase else _PHRASE() - self.normalised = self._phrase.getNormalized() - self.sigil = self._phrase.getSigil() - self.content = self._phrase.getContent() - self.begin_position = self._phrase.getBeginPosition() - self.end_position = self._phrase.getEndPosition() - - def get_first_token(self): - """Get the first token in the phrase.""" - return Variant(self._phrase.getFirstToken()) - - def get_last_token(self): - """Get the last token in the phrase.""" - return Variant(self._phrase.getLastToken()) - - def get_all_tokens(self): - """Get the whole phrase as a list of tokens.""" - return [Variant(token) for token in self._phrase.getTokens()] - - def __str__(self): - return self.content - - -if __name__ == "__main__": - import doctest - doctest.testmod() diff --git a/pom.xml b/pom.xml index 0d9c8b8cf..760d4ff69 100644 --- a/pom.xml +++ b/pom.xml @@ -23,9 +23,7 @@ collatex-core - collatex-cocoon collatex-tools - collatex-nodeps From e175a409fe31f4f76f1bdc343c73b2786705882b Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 08:00:41 +0100 Subject: [PATCH 08/52] Remove some unused code --- collatex-tools/src/main/assembly/app.xml | 26 ------------------------ pom.xml | 25 ----------------------- 2 files changed, 51 deletions(-) delete mode 100644 collatex-tools/src/main/assembly/app.xml diff --git a/collatex-tools/src/main/assembly/app.xml b/collatex-tools/src/main/assembly/app.xml deleted file mode 100644 index 072825b14..000000000 --- a/collatex-tools/src/main/assembly/app.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - app - - zip - tar.bz2 - - - - target/appassembler - - 0755 - - bin/** - - - - target/appassembler - - - bin/** - - - - \ No newline at end of file diff --git a/pom.xml b/pom.xml index 760d4ff69..151d54437 100644 --- a/pom.xml +++ b/pom.xml @@ -61,7 +61,6 @@ 2.0.1 - 1.15 @@ -92,24 +91,6 @@ ${project.version} - - javax.servlet - servlet-api - 2.5 - provided - - - - com.sun.jersey - jersey-server - ${jersey.version} - - - com.sun.jersey - jersey-grizzly2 - ${jersey.version} - - org.glassfish.grizzly grizzly-http-server @@ -122,12 +103,6 @@ 2.6 - - org.codehaus.jackson - jackson-mapper-asl - 1.9.10 - - org.neo4j neo4j From 7d194bc4971670d82f226ccace6a7dd7c7628660 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 08:07:26 +0100 Subject: [PATCH 09/52] Remove Neo4J-based graph implementation; focus on in-memory model with the library --- collatex-core/pom.xml | 6 - .../collatex/dekker/PhraseMatchDetector.java | 5 - .../neo4j/Neo4jGraphRelationships.java | 29 --- .../collatex/neo4j/Neo4jVariantGraph.java | 211 ------------------ .../neo4j/Neo4jVariantGraphAdapter.java | 40 ---- .../collatex/neo4j/Neo4jVariantGraphEdge.java | 118 ---------- .../neo4j/Neo4jVariantGraphTransposition.java | 91 -------- .../neo4j/Neo4jVariantGraphVertex.java | 148 ------------ .../collatex/neo4j/package-info.java | 25 --- .../collatex/dekker/SpencerHoweTest.java | 1 - .../collatex/dekker/VariantGraphTest.java | 1 - pom.xml | 31 --- 12 files changed, 706 deletions(-) delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jGraphRelationships.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraph.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphAdapter.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphEdge.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphTransposition.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphVertex.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/neo4j/package-info.java diff --git a/collatex-core/pom.xml b/collatex-core/pom.xml index 9d8b08342..a1598213e 100644 --- a/collatex-core/pom.xml +++ b/collatex-core/pom.xml @@ -11,15 +11,9 @@ CollateX Core A Java library for collating textual sources, for example, to produce an apparatus. - - org.neo4j - neo4j - true - net.sf.jung jung-graph-impl - true net.sf.jung diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java index 3a1abbcb0..a92ebfef8 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java @@ -23,11 +23,6 @@ import com.google.common.collect.Sets; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.neo4j.Neo4jGraphRelationships; -import eu.interedition.collatex.neo4j.Neo4jVariantGraphVertex; -import org.neo4j.graphdb.Direction; -import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Relationship; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jGraphRelationships.java b/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jGraphRelationships.java deleted file mode 100644 index 5685fee9d..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jGraphRelationships.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.neo4j; - -import org.neo4j.graphdb.RelationshipType; - -/** - * @author Gregor Middell - */ -public enum Neo4jGraphRelationships implements RelationshipType { - PATH, TRANSPOSITION; -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraph.java deleted file mode 100644 index b08b74db7..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraph.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.neo4j; - -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.util.VariantGraphTraversal; -import org.neo4j.graphdb.GraphDatabaseService; -import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Relationship; - -import java.util.Collections; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; - -import static eu.interedition.collatex.neo4j.Neo4jGraphRelationships.PATH; -import static java.util.Collections.singleton; - -/** - * @author Gregor Middell - */ -public class Neo4jVariantGraph implements VariantGraph { - private static final Logger LOG = Logger.getLogger(Neo4jVariantGraph.class.getName()); - - final GraphDatabaseService database; - final Neo4jVariantGraphAdapter adapter; - - final Neo4jVariantGraphVertex start; - final Neo4jVariantGraphVertex end; - - public Neo4jVariantGraph(GraphDatabaseService database, Neo4jVariantGraphAdapter adapter) { - this(database, database.createNode(), database.createNode(), adapter); - connect(start, end, Collections.emptySet()); - } - - public Neo4jVariantGraph(GraphDatabaseService database, Node start, Node end, Neo4jVariantGraphAdapter adapter) { - this.database = database; - this.adapter = adapter; - this.start = (Neo4jVariantGraphVertex) vertexWrapper.apply(start); - this.end = (Neo4jVariantGraphVertex) vertexWrapper.apply(end); - } - - @Override - public Vertex getStart() { - return start; - } - - @Override - public Vertex getEnd() { - return end; - } - - @Override - public Set transpositions() { - final Set transpositions = Sets.newHashSet(); - for (Vertex v : vertices()) { - Iterables.addAll(transpositions, v.transpositions()); - } - return transpositions; - } - - @Override - public Iterable vertices() { - return vertices(null); - } - - @Override - public Iterable vertices(final Set witnesses) { - return VariantGraphTraversal.of(this, witnesses); - } - - @Override - public Iterable edges() { - return edges(null); - } - - @Override - public Iterable edges(final Set witnesses) { - return VariantGraphTraversal.of(this, witnesses).edges(); - } - - @Override - public Neo4jVariantGraphVertex add(Token token) { - if (LOG.isLoggable(Level.FINER)) { - LOG.log(Level.FINER, "Creating new vertex with {0}", token); - } - return new Neo4jVariantGraphVertex(this, singleton(token)); - } - - @Override - public Edge connect(VariantGraph.Vertex from, VariantGraph.Vertex to, Set witnesses) { - Preconditions.checkArgument(!from.equals(to)); - - if (from.equals(start)) { - final Edge startEndEdge = edgeBetween(start, end); - if (startEndEdge != null) { - if (to.equals(end)) { - witnesses = Sets.newHashSet(witnesses); - witnesses.addAll(startEndEdge.witnesses()); - } - startEndEdge.delete(); - } - } - - for (Edge e : from.outgoing()) { - if (to.equals(e.to())) { - return e.add(witnesses); - } - } - return new Neo4jVariantGraphEdge(this, (Neo4jVariantGraphVertex) from, (Neo4jVariantGraphVertex) to, witnesses); - } - - @Override - public Edge register(Witness witness) { - return connect(start, end, Collections.singleton(witness)); - } - - @Override - public Transposition transpose(Set vertices) { - Preconditions.checkArgument(!vertices.isEmpty()); - for (Transposition transposition : vertices.iterator().next().transpositions()) { - if (Sets.newHashSet(transposition).equals(vertices)) { - return transposition; - } - } - return new Neo4jVariantGraphTransposition(this, vertices); - } - - @Override - public Edge edgeBetween(Vertex a, Vertex b) { - final Node aNode = ((Neo4jVariantGraphVertex)a).getNode(); - final Node bNode = ((Neo4jVariantGraphVertex)b).getNode(); - for (Relationship r : aNode.getRelationships(PATH)) { - if (r.getOtherNode(aNode).equals(bNode)) { - return new Neo4jVariantGraphEdge(this, r); - } - } - return null; - } - - @Override - public Set witnesses() { - final Set witnesses = Sets.newHashSet(); - for (Edge e : start.outgoing()) { - witnesses.addAll(e.witnesses()); - } - return witnesses; - } - - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof Neo4jVariantGraph) { - return start.equals(((Neo4jVariantGraph) obj).start); - } - return super.equals(obj); - } - - @Override - public int hashCode() { - return start.hashCode(); - } - - @Override - public String toString() { - return Iterables.toString(witnesses()); - } - - final Function vertexWrapper = new Function() { - @Override - public VariantGraph.Vertex apply(Node input) { - return new Neo4jVariantGraphVertex(Neo4jVariantGraph.this, input); - } - }; - - final Function edgeWrapper = new Function() { - @Override - public VariantGraph.Edge apply(Relationship input) { - return new Neo4jVariantGraphEdge(Neo4jVariantGraph.this, input); - } - }; - - final Function transpositionWrapper = new Function() { - @Override - public VariantGraph.Transposition apply(Node input) { - return new Neo4jVariantGraphTransposition(Neo4jVariantGraph.this, input); - } - }; -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphAdapter.java b/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphAdapter.java deleted file mode 100644 index 2daff43ca..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphAdapter.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.neo4j; - -import eu.interedition.collatex.Token; -import eu.interedition.collatex.Witness; - -import java.util.Set; - -/** - * @author Gregor Middell - */ -public interface Neo4jVariantGraphAdapter { - - Set getTokens(Neo4jVariantGraphVertex vertex, Set witnesses); - - void setTokens(Neo4jVariantGraphVertex vertex, Set tokens); - - Set getWitnesses(Neo4jVariantGraphEdge edge); - - void setWitnesses(Neo4jVariantGraphEdge edge, Set witnesses); - -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphEdge.java b/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphEdge.java deleted file mode 100644 index 8c03d0a3d..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphEdge.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.neo4j; - -import com.google.common.base.Predicate; -import com.google.common.collect.Sets; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import org.neo4j.graphdb.Relationship; - -import java.util.Set; - -/** - * @author Gregor Middell - */ -public class Neo4jVariantGraphEdge implements VariantGraph.Edge { - protected final Neo4jVariantGraph graph; - protected final Relationship relationship; - - public Neo4jVariantGraphEdge(Neo4jVariantGraph graph, Relationship relationship) { - this.graph = graph; - this.relationship = relationship; - } - - public Neo4jVariantGraphEdge(Neo4jVariantGraph graph, Neo4jVariantGraphVertex from, Neo4jVariantGraphVertex to, Set witnesses) { - this(graph, from.getNode().createRelationshipTo(to.getNode(), Neo4jGraphRelationships.PATH)); - graph.adapter.setWitnesses(this, witnesses); - } - - public boolean traversableWith(Set witnesses) { - if (witnesses == null || witnesses.isEmpty()) { - return true; - } - final Set edgeWitnesses = witnesses(); - for (Witness witness : witnesses) { - if (edgeWitnesses.contains(witness)) { - return true; - } - } - return false; - } - - @Override - public VariantGraph.Edge add(Set witnesses) { - graph.adapter.setWitnesses(this, Sets.union(witnesses(), witnesses)); - return this; - } - - @Override - public Set witnesses() { - return graph.adapter.getWitnesses(this); - } - - public static Predicate createTraversableFilter(final Set witnesses) { - return new Predicate() { - - @Override - public boolean apply(VariantGraph.Edge input) { - return ((Neo4jVariantGraphEdge) input).traversableWith(witnesses); - } - }; - } - - @Override - public VariantGraph graph() { - return graph; - } - - @Override - public VariantGraph.Vertex from() { - return graph.vertexWrapper.apply(relationship.getStartNode()); - } - - @Override - public VariantGraph.Vertex to() { - return graph.vertexWrapper.apply(relationship.getEndNode()); - } - - @Override - public void delete() { - relationship.delete(); - } - - @Override - public int hashCode() { - return relationship.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof VariantGraph.Edge) { - return relationship.equals(((Neo4jVariantGraphEdge) obj).relationship); - } - return super.equals(obj); - } - - @Override - public String toString() { - return new StringBuilder(from().toString()).append(" -> ").append(to().toString()).toString(); - } -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphTransposition.java b/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphTransposition.java deleted file mode 100644 index 486fd4c85..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphTransposition.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.neo4j; - -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Iterators; -import eu.interedition.collatex.VariantGraph; -import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Relationship; - -import com.google.common.base.Objects; - -import javax.annotation.Nullable; -import java.util.Iterator; -import java.util.Set; - -/** - * @author Gregor Middell - */ -public class Neo4jVariantGraphTransposition implements VariantGraph.Transposition { - - private final Neo4jVariantGraph graph; - private final Node node; - - public Neo4jVariantGraphTransposition(Neo4jVariantGraph graph, Node node) { - this.graph = graph; - this.node = node; - } - - public Neo4jVariantGraphTransposition(Neo4jVariantGraph graph, Set vertices) { - this(graph, graph.database.createNode()); - for (Neo4jVariantGraphVertex vertex : Iterables.filter(vertices, Neo4jVariantGraphVertex.class)) { - this.node.createRelationshipTo(vertex.node, Neo4jGraphRelationships.TRANSPOSITION); - } - } - - @Override - public Iterator iterator() { - return Iterators.transform(node.getRelationships(Neo4jGraphRelationships.TRANSPOSITION).iterator(), new Function() { - @Override - public VariantGraph.Vertex apply(@Nullable Relationship relationship) { - return graph.vertexWrapper.apply(relationship.getEndNode()); - } - }); - } - - @Override - public void delete() { - for (Relationship r : node.getRelationships(Neo4jGraphRelationships.TRANSPOSITION)) { - r.delete(); - } - node.delete(); - } - - @Override - public int hashCode() { - return node.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof Neo4jVariantGraphTransposition) { - return node.equals(((Neo4jVariantGraphTransposition) obj).node); - } - return super.equals(obj); - } - - @Override - public String toString() { - return Objects.toStringHelper(this).addValue(node).toString(); - } -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphVertex.java b/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphVertex.java deleted file mode 100644 index 73a9f20b0..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/Neo4jVariantGraphVertex.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.neo4j; - -import static com.google.common.collect.Iterables.*; -import static org.neo4j.graphdb.Direction.*; - -import java.util.Set; - -import javax.annotation.Nullable; - -import eu.interedition.collatex.VariantGraph; -import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Relationship; - -import com.google.common.base.Function; -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; - -import eu.interedition.collatex.Token; -import eu.interedition.collatex.Witness; - -/** - * @author Gregor Middell - */ -public class Neo4jVariantGraphVertex implements VariantGraph.Vertex { - protected final Neo4jVariantGraph graph; - protected final Node node; - - public Neo4jVariantGraphVertex(Neo4jVariantGraph graph, Node node) { - this.graph = graph; - this.node = node; - } - - public Neo4jVariantGraphVertex(Neo4jVariantGraph graph, Set tokens) { - this(graph, graph.database.createNode()); - setTokens(tokens); - } - - @Override - public Iterable incoming() { - return incoming(null); - } - - @Override - public Iterable incoming(Set witnesses) { - return Iterables.filter(transform(node.getRelationships(Neo4jGraphRelationships.PATH, INCOMING), graph.edgeWrapper), Neo4jVariantGraphEdge.createTraversableFilter(witnesses)); - } - - @Override - public Iterable outgoing() { - return outgoing(null); - } - - @Override - public Iterable outgoing(Set witnesses) { - return Iterables.filter(transform(node.getRelationships(Neo4jGraphRelationships.PATH, OUTGOING), graph.edgeWrapper), Neo4jVariantGraphEdge.createTraversableFilter(witnesses)); - } - - @Override - public Iterable transpositions() { - return transform(node.getRelationships(Neo4jGraphRelationships.TRANSPOSITION), new Function() { - @Override - public VariantGraph.Transposition apply(@Nullable Relationship relationship) { - return graph.transpositionWrapper.apply(relationship.getStartNode()); - } - }); - } - - @Override - public Set tokens() { - return tokens(null); - } - - @Override - public Set tokens(Set witnesses) { - return graph.adapter.getTokens(this, witnesses); - } - - @Override - public Set witnesses() { - final Set witnesses = Sets.newHashSet(); - for (Token token : tokens()) { - witnesses.add(token.getWitness()); - } - return witnesses; - } - - @Override - public void add(Iterable tokens) { - final Set tokenSet = Sets.newHashSet(tokens()); - Iterables.addAll(tokenSet, tokens); - setTokens(tokenSet); - } - - public void setTokens(Set tokens) { - graph.adapter.setTokens(this, tokens); - } - - @Override - public String toString() { - return Iterables.toString(tokens()); - } - - @Override - public VariantGraph graph() { - return graph; - } - - public Node getNode() { - return node; - } - - @Override - public void delete() { - node.delete(); - } - - @Override - public int hashCode() { - return node.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof Neo4jVariantGraphVertex) { - return node.equals(((Neo4jVariantGraphVertex) obj).node); - } - return super.equals(obj); - } -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/neo4j/package-info.java deleted file mode 100644 index d3ac49706..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/neo4j/package-info.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -/** - * Persistent implementation of variant graphs based on the Neo4j Graph - * Database. - * - */ -package eu.interedition.collatex.neo4j; \ No newline at end of file diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java index 94560e4f3..a9c36d9c9 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java @@ -25,7 +25,6 @@ import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; import eu.interedition.collatex.Token; -import eu.interedition.collatex.neo4j.Neo4jVariantGraphVertex; import eu.interedition.collatex.simple.SimpleWitness; import eu.interedition.collatex.util.VariantGraphRanking; import org.junit.Test; diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java index 9056ff8ce..b68ce9609 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java @@ -24,7 +24,6 @@ import java.util.List; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.neo4j.Neo4jVariantGraphVertex; import org.junit.Assert; import org.junit.Test; diff --git a/pom.xml b/pom.xml index 151d54437..9769987f8 100644 --- a/pom.xml +++ b/pom.xml @@ -103,37 +103,6 @@ 2.6 - - org.neo4j - neo4j - 1.8 - - - org.neo4j - neo4j-cypher - - - org.neo4j - neo4j-graph-matching - - - org.neo4j - neo4j-graph-algo - - - org.neo4j - neo4j-jmx - - - org.neo4j - neo4j-lucene-index - - - org.neo4j - neo4j-udc - - - net.sf.jung jung-graph-impl From 9175927b88ad8da4cbb8d5e48e6614e1eb3c1522 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 09:21:14 +0100 Subject: [PATCH 10/52] Turned VariantGraph interfaces into concrete JUNG-based implementation --- .../interedition/collatex/VariantGraph.java | 265 +++++++++++++++--- .../collatex/jung/JungVariantGraph.java | 154 ---------- .../collatex/jung/JungVariantGraphEdge.java | 78 ------ .../jung/JungVariantGraphTransposition.java | 62 ---- .../collatex/jung/JungVariantGraphVertex.java | 129 --------- .../collatex/jung/package-info.java | 25 -- .../interedition/collatex/AbstractTest.java | 3 +- .../collatex/dekker/AlignmentTest.java | 3 +- .../dekker/matrix/MatchTableTest.java | 3 +- .../collatex/lab/CollateXLaboratory.java | 7 +- .../lab/VariantGraphLayoutAdapter.java | 14 +- .../collatex/lab/VariantGraphPanel.java | 33 +-- .../collatex/neo4j/VariantGraphTest.java | 3 +- .../interedition/collatex/tools/CollateX.java | 3 +- .../collatex/tools/CollatorService.java | 4 +- 15 files changed, 260 insertions(+), 526 deletions(-) delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraph.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphEdge.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphTransposition.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphVertex.java delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/jung/package-info.java diff --git a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java index a5c78ac1d..56fa5277b 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java @@ -20,100 +20,293 @@ package eu.interedition.collatex; import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.base.Predicates; +import com.google.common.collect.HashMultimap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import edu.uci.ics.jung.graph.DirectedSparseGraph; +import eu.interedition.collatex.util.VariantGraphTraversal; import javax.annotation.Nullable; import java.util.ArrayDeque; +import java.util.Collections; import java.util.Deque; +import java.util.Iterator; import java.util.List; import java.util.Set; /** * @author Gregor Middell */ -public interface VariantGraph { - Vertex getStart(); +public class VariantGraph extends DirectedSparseGraph { + final VariantGraph.Vertex start; + final VariantGraph.Vertex end; + final Multimap transpositionIndex = HashMultimap.create(); + + public VariantGraph() { + super(); + addVertex(this.start = new VariantGraph.Vertex(this, Collections.emptySet())); + addVertex(this.end = new VariantGraph.Vertex(this, Collections.emptySet())); + connect(this.start, this.end, Collections.emptySet()); + } - Vertex getEnd(); + public Vertex getStart() { + return start; + } - Set transpositions(); + public Vertex getEnd() { + return end; + } + + public Set transpositions() { + return Sets.newHashSet(transpositionIndex.values()); + } - Iterable vertices(); + public Iterable vertices() { + return vertices(null); + } - Iterable vertices(Set witnesses); + public Iterable vertices(Set witnesses) { + return VariantGraphTraversal.of(this, witnesses); + } - Iterable edges(); + public Iterable edges() { + return edges(null); + } - Iterable edges(Set witnesses); + public Iterable edges(Set witnesses) { + return VariantGraphTraversal.of(this, witnesses).edges(); + } - Vertex add(Token token); + public Vertex add(Token token) { + final VariantGraph.Vertex vertex = new VariantGraph.Vertex(this, Collections.singleton(token)); + addVertex(vertex); + return vertex; + } - Edge connect(Vertex from, Vertex to, Set witnesses); + public Edge connect(Vertex from, Vertex to, Set witnesses) { + Preconditions.checkArgument(!from.equals(to)); - Edge register(Witness witness); + if (from.equals(start)) { + final Edge startEndEdge = edgeBetween(start, end); + if (startEndEdge != null) { + if (to.equals(end)) { + witnesses = Sets.newHashSet(witnesses); + witnesses.addAll(startEndEdge.witnesses()); + } + startEndEdge.delete(); + } + } - Transposition transpose(Set vertices); + for (Edge e : from.outgoing()) { + if (to.equals(e.to())) { + return e.add(witnesses); + } + } - Edge edgeBetween(Vertex a, Vertex b); + final VariantGraph.Edge edge = new VariantGraph.Edge(this, witnesses); + addEdge(edge, from, to); + return edge; + } - Set witnesses(); + public Edge register(Witness witness) { + return connect(start, end, Collections.singleton(witness)); + } + + public Transposition transpose(Set vertices) { + Preconditions.checkArgument(!vertices.isEmpty()); + for (Transposition transposition : vertices.iterator().next().transpositions()) { + if (Sets.newHashSet(transposition).equals(vertices)) { + return transposition; + } + } + return new VariantGraph.Transposition(this, vertices); + } + + public Edge edgeBetween(Vertex a, Vertex b) { + return findEdge(a, b); + } + + public Set witnesses() { + Set witnesses = Sets.newHashSet(); + for (Edge edge : start.outgoing()) { + witnesses.addAll(edge.witnesses()); + } + return witnesses; + } + + @Override + public String toString() { + return Iterables.toString(witnesses()); + } /** * @author Gregor Middell */ - interface Edge { + public static class Edge { - VariantGraph graph(); + final VariantGraph graph; + final Set witnesses; - Edge add(Set witnesses); + public Edge(VariantGraph graph, Set witnesses) { + this.graph = graph; + this.witnesses = Sets.newHashSet(witnesses); + } - Set witnesses(); + public VariantGraph.Edge add(Set witnesses) { + this.witnesses.addAll(witnesses); + return this; + } - Vertex from(); + public Set witnesses() { + return Collections.unmodifiableSet(witnesses); + } - Vertex to(); + public VariantGraph graph() { + return graph; + } + + public VariantGraph.Vertex from() { + return graph.getEndpoints(this).getFirst(); + } + + public VariantGraph.Vertex to() { + return graph.getEndpoints(this).getSecond(); + } + + public void delete() { + graph.removeEdge(this); + } + + @Override + public String toString() { + return Iterables.toString(witnesses); + } - void delete(); } /** * @author Gregor Middell */ - interface Vertex { - Iterable incoming(); + public static class Vertex { + private final VariantGraph graph; + private final Set tokens; + + public Vertex(VariantGraph graph, Set tokens) { + this.graph = graph; + this.tokens = Sets.newHashSet(tokens); + } - Iterable incoming(Set witnesses); + public Iterable incoming() { + return incoming(null); + } - Iterable outgoing(); + public Iterable incoming(final Set witnesses) { + return paths(graph.getInEdges(this), witnesses); + } - Iterable outgoing(Set witnesses); + public Iterable outgoing() { + return outgoing(null); + } - Iterable transpositions(); + public Iterable outgoing(Set witnesses) { + return paths(graph.getOutEdges(this), witnesses); + } - Set tokens(); + public Iterable transpositions() { + return graph.transpositionIndex.get(this); + } - Set tokens(Set witnesses); + public Set tokens() { + return tokens(null); + } + + public Set tokens(final Set witnesses) { + return Collections.unmodifiableSet(Sets.filter(tokens, witnesses == null ? Predicates.alwaysTrue() : new Predicate() { + @Override + public boolean apply(@Nullable Token token) { + return witnesses.contains(token.getWitness()); + } + })); + } + + public Set witnesses() { + final Set witnesses = Sets.newHashSet(); + for (VariantGraph.Edge edge : incoming()) { + witnesses.addAll(edge.witnesses()); + } + return witnesses; + } + + public void add(Iterable tokens) { + Iterables.addAll(this.tokens, tokens); + } - Set witnesses(); + public VariantGraph graph() { + return graph; + } - void add(Iterable tokens); + public void delete() { + graph.removeVertex(this); + } - VariantGraph graph(); + public String toString() { + return Iterables.toString(tokens); + } - void delete(); + protected static Iterable paths(final Iterable edges, final Set witnesses) { + return Iterables.filter(edges, (witnesses == null ? Predicates.alwaysTrue() : new Predicate() { + @Override + public boolean apply(@Nullable VariantGraph.Edge edge) { + for (Witness edgeWitness : edge.witnesses()) { + if (witnesses.contains(edgeWitness)) { + return true; + } + } + return false; + } + })); + } } /** * @author Gregor Middell */ - interface Transposition extends Iterable { - void delete(); + public static class Transposition implements Iterable { + private final VariantGraph graph; + private final Set vertices; + + public Transposition(VariantGraph graph, Set vertices) { + this.graph = graph; + this.vertices = Sets.newHashSet(vertices); + for (VariantGraph.Vertex vertex : this.vertices) { + graph.transpositionIndex.put(vertex, this); + } + } + + public void delete() { + for (VariantGraph.Vertex vertex : this.vertices) { + graph.transpositionIndex.remove(vertex, this); + } + } + + @Override + public Iterator iterator() { + return vertices.iterator(); + } + + @Override + public String toString() { + return Iterables.toString(vertices); + } } - final Function JOIN = new Function() { + public static final Function JOIN = new Function() { @Override public VariantGraph apply(@Nullable VariantGraph graph) { final Set processed = Sets.newHashSet(); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraph.java deleted file mode 100644 index 5d1e73a72..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraph.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.jung; - -import com.google.common.base.Preconditions; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Iterables; -import com.google.common.collect.Multimap; -import com.google.common.collect.Sets; -import edu.uci.ics.jung.graph.DirectedSparseGraph; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.util.VariantGraphTraversal; - -import java.util.Collections; -import java.util.Set; - -/** - * @author Gregor Middell - */ -public class JungVariantGraph extends DirectedSparseGraph implements VariantGraph { - - final JungVariantGraphVertex start; - final JungVariantGraphVertex end; - final Multimap transpositionIndex = HashMultimap.create(); - - public JungVariantGraph() { - super(); - addVertex(this.start = new JungVariantGraphVertex(this, Collections.emptySet())); - addVertex(this.end = new JungVariantGraphVertex(this, Collections.emptySet())); - connect(this.start, this.end, Collections.emptySet()); - } - - @Override - public Vertex getStart() { - return start; - } - - @Override - public Vertex getEnd() { - return end; - } - - @Override - public Set transpositions() { - return Sets.newHashSet(transpositionIndex.values()); - } - - @Override - public Iterable vertices() { - return vertices(null); - } - - @Override - public Iterable vertices(Set witnesses) { - return VariantGraphTraversal.of(this, witnesses); - } - - @Override - public Iterable edges() { - return edges(null); - } - - @Override - public Iterable edges(Set witnesses) { - return VariantGraphTraversal.of(this, witnesses).edges(); - } - - @Override - public Vertex add(Token token) { - final JungVariantGraphVertex vertex = new JungVariantGraphVertex(this, Collections.singleton(token)); - addVertex(vertex); - return vertex; - } - - @Override - public Edge connect(Vertex from, Vertex to, Set witnesses) { - Preconditions.checkArgument(!from.equals(to)); - - if (from.equals(start)) { - final Edge startEndEdge = edgeBetween(start, end); - if (startEndEdge != null) { - if (to.equals(end)) { - witnesses = Sets.newHashSet(witnesses); - witnesses.addAll(startEndEdge.witnesses()); - } - startEndEdge.delete(); - } - } - - for (Edge e : from.outgoing()) { - if (to.equals(e.to())) { - return e.add(witnesses); - } - } - - final JungVariantGraphEdge edge = new JungVariantGraphEdge(this, witnesses); - addEdge(edge, (JungVariantGraphVertex) from, (JungVariantGraphVertex) to); - return edge; - } - - @Override - public Edge register(Witness witness) { - return connect(start, end, Collections.singleton(witness)); - } - - @Override - public Transposition transpose(Set vertices) { - Preconditions.checkArgument(!vertices.isEmpty()); - for (Transposition transposition : vertices.iterator().next().transpositions()) { - if (Sets.newHashSet(transposition).equals(vertices)) { - return transposition; - } - } - return new JungVariantGraphTransposition(this, vertices); - } - - @Override - public Edge edgeBetween(Vertex a, Vertex b) { - return findEdge((JungVariantGraphVertex) a, (JungVariantGraphVertex) b); - } - - @Override - public Set witnesses() { - Set witnesses = Sets.newHashSet(); - for (Edge edge : start.outgoing()) { - witnesses.addAll(edge.witnesses()); - } - return witnesses; - } - - @Override - public String toString() { - return Iterables.toString(witnesses()); - } -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphEdge.java b/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphEdge.java deleted file mode 100644 index 54d50ab7d..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphEdge.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.jung; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; - -import java.util.Collections; -import java.util.Set; - -/** - * @author Gregor Middell - */ -public class JungVariantGraphEdge implements VariantGraph.Edge { - - final JungVariantGraph graph; - final Set witnesses; - - public JungVariantGraphEdge(JungVariantGraph graph, Set witnesses) { - this.graph = graph; - this.witnesses = Sets.newHashSet(witnesses); - } - - @Override - public VariantGraph.Edge add(Set witnesses) { - this.witnesses.addAll(witnesses); - return this; - } - - @Override - public Set witnesses() { - return Collections.unmodifiableSet(witnesses); - } - - @Override - public VariantGraph graph() { - return graph; - } - - @Override - public VariantGraph.Vertex from() { - return graph.getEndpoints(this).getFirst(); - } - - @Override - public VariantGraph.Vertex to() { - return graph.getEndpoints(this).getSecond(); - } - - @Override - public void delete() { - graph.removeEdge(this); - } - - @Override - public String toString() { - return Iterables.toString(witnesses); - } -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphTransposition.java b/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphTransposition.java deleted file mode 100644 index 6618bd99e..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphTransposition.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.jung; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Iterators; -import com.google.common.collect.Sets; -import eu.interedition.collatex.VariantGraph; - -import java.util.Iterator; -import java.util.Set; - -/** - * @author Gregor Middell - */ -public class JungVariantGraphTransposition implements VariantGraph.Transposition { - - private final JungVariantGraph graph; - private final Set vertices; - - public JungVariantGraphTransposition(JungVariantGraph graph, Set vertices) { - this.graph = graph; - this.vertices = Sets.newHashSet(vertices); - for (VariantGraph.Vertex vertex : this.vertices) { - graph.transpositionIndex.put(vertex, this); - } - } - - @Override - public void delete() { - for (VariantGraph.Vertex vertex : this.vertices) { - graph.transpositionIndex.remove(vertex, this); - } - } - - @Override - public Iterator iterator() { - return vertices.iterator(); - } - - @Override - public String toString() { - return Iterables.toString(vertices); - } -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphVertex.java b/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphVertex.java deleted file mode 100644 index b136a19a7..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/jung/JungVariantGraphVertex.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.jung; - -import com.google.common.base.Predicate; -import com.google.common.base.Predicates; -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; - -import javax.annotation.Nullable; -import java.util.Collections; -import java.util.Set; - - -/** - * @author Gregor Middell - */ -public class JungVariantGraphVertex implements VariantGraph.Vertex { - private final JungVariantGraph graph; - private final Set tokens; - - public JungVariantGraphVertex(JungVariantGraph graph, Set tokens) { - this.graph = graph; - this.tokens = Sets.newHashSet(tokens); - } - - @Override - public Iterable incoming() { - return incoming(null); - } - - @Override - public Iterable incoming(final Set witnesses) { - return paths(graph.getInEdges(this), witnesses); - } - - @Override - public Iterable outgoing() { - return outgoing(null); - } - - @Override - public Iterable outgoing(Set witnesses) { - return paths(graph.getOutEdges(this), witnesses); - } - - @Override - public Iterable transpositions() { - return graph.transpositionIndex.get(this); - } - - @Override - public Set tokens() { - return tokens(null); - } - - @Override - public Set tokens(final Set witnesses) { - return Collections.unmodifiableSet(Sets.filter(tokens, witnesses == null ? Predicates.alwaysTrue() : new Predicate() { - @Override - public boolean apply(@Nullable Token token) { - return witnesses.contains(token.getWitness()); - } - })); - } - - @Override - public Set witnesses() { - final Set witnesses = Sets.newHashSet(); - for (VariantGraph.Edge edge : incoming()) { - witnesses.addAll(edge.witnesses()); - } - return witnesses; - } - - @Override - public void add(Iterable tokens) { - Iterables.addAll(this.tokens, tokens); - } - - @Override - public VariantGraph graph() { - return graph; - } - - @Override - public void delete() { - graph.removeVertex(this); - } - - @Override - public String toString() { - return Iterables.toString(tokens); - } - - protected static Iterable paths(final Iterable edges, final Set witnesses) { - return Iterables.filter(edges, (witnesses == null ? Predicates.alwaysTrue() : new Predicate() { - @Override - public boolean apply(@Nullable JungVariantGraphEdge edge) { - for (Witness edgeWitness : edge.witnesses()) { - if (witnesses.contains(edgeWitness)) { - return true; - } - } - return false; - } - })); - } -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/jung/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/jung/package-info.java deleted file mode 100644 index 4b419d7a7..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/jung/package-info.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -/** - * In-memory implementation of variant graphs based on the Java - * Universal Network/Graph Framework (JUNG). - * - */ -package eu.interedition.collatex.jung; \ No newline at end of file diff --git a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java index 2d71bed5b..52ec8513d 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java @@ -30,7 +30,6 @@ import com.google.common.collect.Sets; import eu.interedition.collatex.dekker.DekkerAlgorithm; import eu.interedition.collatex.dekker.Match; -import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleToken; import eu.interedition.collatex.simple.SimpleWitness; @@ -74,7 +73,7 @@ protected SimpleWitness[] createWitnesses(String... contents) { } protected VariantGraph collate(SimpleWitness... witnesses) { - final VariantGraph graph = new JungVariantGraph(); + final VariantGraph graph = new VariantGraph(); collate(graph, witnesses); return graph; } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java index 525494b14..1e5ba0022 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java @@ -38,7 +38,6 @@ import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; -import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; import eu.interedition.collatex.simple.SimpleWitness; @@ -151,7 +150,7 @@ public void testOrderIndependenceTroy() throws XMLStreamException { witnesses.add(new SimpleWitness("w3", "Y A X ")); setCollationAlgorithm(CollationAlgorithmFactory.dekker(new EqualityTokenComparator())); - VariantGraph graph = new JungVariantGraph(); + VariantGraph graph = new VariantGraph(); collationAlgorithm.collate(graph, witnesses.toArray(new SimpleWitness[0])); StringWriter writer = new StringWriter(); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java index b60df399d..8cc1006dc 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java @@ -33,7 +33,6 @@ import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleWitness; @@ -62,7 +61,7 @@ private void assertVectorEquals(int x, int y, int length, Island island) { @Test public void testTableCreationEmptyGraph() { - final VariantGraph graph = new JungVariantGraph(); + final VariantGraph graph = new VariantGraph(); SimpleWitness[] witnesses = createWitnesses("a b"); MatchTable table = MatchTable.create(graph, witnesses[0]); assertEquals(1, table.columnList().size()); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java index fb1583a1a..43ca40d8c 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java @@ -24,7 +24,6 @@ import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.dekker.matrix.MatchTable; -import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.matching.StrictEqualityTokenComparator; import eu.interedition.collatex.simple.SimpleToken; @@ -81,7 +80,7 @@ public CollateXLaboratory() { this.algorithm.setMaximumSize(new Dimension(200, this.algorithm.getMaximumSize().height)); this.tabbedPane = new JTabbedPane(); - this.tabbedPane.addTab("Variant Graph", variantGraphPanel = new VariantGraphPanel(new JungVariantGraph())); + this.tabbedPane.addTab("Variant Graph", variantGraphPanel = new VariantGraphPanel(new VariantGraph())); this.tabbedPane.addTab("Match Table", new JScrollPane(matchMatrixTable)); matchMatrixTable.setAutoResizeMode(JTable.AUTO_RESIZE_OFF); matchMatrixTable.setShowGrid(true); @@ -164,7 +163,7 @@ public void actionPerformed(ActionEvent e) { final EqualityTokenComparator comparator = new EqualityTokenComparator(); - final JungVariantGraph variantGraph = new JungVariantGraph(); + final VariantGraph variantGraph = new VariantGraph(); final CollationAlgorithm collator; if ("Dekker".equals(algorithm.getSelectedItem())) { @@ -205,7 +204,7 @@ public void actionPerformed(ActionEvent e) { } final StrictEqualityTokenComparator comparator = new StrictEqualityTokenComparator(); - final VariantGraph vg = new JungVariantGraph(); + final VariantGraph vg = new VariantGraph(); int outlierTranspositionsSizeLimit = 3; for (int i = 0; i <= w.size() - 2; i++) { diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayoutAdapter.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayoutAdapter.java index 6dbfd4d28..15c058a3b 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayoutAdapter.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayoutAdapter.java @@ -20,9 +20,7 @@ package eu.interedition.collatex.lab; import edu.uci.ics.jung.algorithms.layout.AbstractLayout; -import eu.interedition.collatex.jung.JungVariantGraph; -import eu.interedition.collatex.jung.JungVariantGraphEdge; -import eu.interedition.collatex.jung.JungVariantGraphVertex; +import eu.interedition.collatex.VariantGraph; import java.util.List; @@ -40,7 +38,7 @@ * * @author C. Schanck (chris at schanck dot net) */ -public class VariantGraphLayoutAdapter extends AbstractLayout { +public class VariantGraphLayoutAdapter extends AbstractLayout { public static enum Orientation { TOP, LEFT @@ -52,7 +50,7 @@ public static enum Orientation { private boolean executed = false; - public VariantGraphLayoutAdapter(JungVariantGraph g, Orientation orientation, int horzSpacing, int vertSpacing) { + public VariantGraphLayoutAdapter(VariantGraph g, Orientation orientation, int horzSpacing, int vertSpacing) { super(g); this.orientation = orientation; this.horzSpacing = horzSpacing; @@ -61,17 +59,17 @@ public VariantGraphLayoutAdapter(JungVariantGraph g, Orientation orientation, in public void initialize() { if (!executed) { - for (List level : VariantGraphLayout.of((JungVariantGraph) getGraph())) { + for (List level : VariantGraphLayout.of((VariantGraph) getGraph())) { for (VariantGraphLayout.Cell cell : level) { if (orientation.equals(Orientation.TOP)) { double xCoordinate = 10.0 + (cell.x * horzSpacing); double yCoordinate = 10.0 + (cell.y * vertSpacing); - setLocation((JungVariantGraphVertex) cell.vertex, xCoordinate, yCoordinate); + setLocation(cell.vertex, xCoordinate, yCoordinate); } else { double yCoordinate = 10.0 + (cell.x * vertSpacing); double xCoordinate = 10.0 + (cell.y * horzSpacing); - setLocation((JungVariantGraphVertex) cell.vertex, xCoordinate, yCoordinate); + setLocation(cell.vertex, xCoordinate, yCoordinate); } } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java index e272f64d2..594d1ba88 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java @@ -29,9 +29,6 @@ import com.google.common.collect.Maps; import edu.uci.ics.jung.algorithms.layout.StaticLayout; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.jung.JungVariantGraph; -import eu.interedition.collatex.jung.JungVariantGraphEdge; -import eu.interedition.collatex.jung.JungVariantGraphVertex; import eu.interedition.collatex.util.VariantGraphRanking; import org.apache.commons.collections15.Transformer; import org.apache.commons.lang.StringEscapeUtils; @@ -54,22 +51,22 @@ /** * @author Gregor Middell */ -public class VariantGraphPanel extends VisualizationViewer { +public class VariantGraphPanel extends VisualizationViewer { private VariantGraph variantGraph; private VariantGraphRanking ranking; private Map transpositionColors; - public VariantGraphPanel(JungVariantGraph vg) { - super(new StaticLayout(new JungVariantGraph())); + public VariantGraphPanel(VariantGraph vg) { + super(new StaticLayout<>(new VariantGraph())); setBackground(Color.WHITE); setGraphMouse(new DefaultModalGraphMouse()); - final RenderContext rc = getRenderContext(); - rc.setVertexLabelTransformer(new Transformer() { + final RenderContext rc = getRenderContext(); + rc.setVertexLabelTransformer(new Transformer() { @Override - public String transform(JungVariantGraphVertex variantGraphVertexModel) { + public String transform(VariantGraph.Vertex variantGraphVertexModel) { final Multimap tokens = Multimaps.index(variantGraphVertexModel.tokens(), Token.TO_WITNESS); final StringBuilder label = new StringBuilder(); for (Witness witness : Ordering.from(Witness.SIGIL_COMPARATOR).sortedCopy(tokens.keySet())) { @@ -88,9 +85,9 @@ public String transform(JungVariantGraphVertex variantGraphVertexModel) { return "" + htmllabel + ""; } }); - rc.setEdgeLabelTransformer(new Transformer() { + rc.setEdgeLabelTransformer(new Transformer() { @Override - public String transform(JungVariantGraphEdge variantGraphEdgeModel) { + public String transform(VariantGraph.Edge variantGraphEdgeModel) { return Joiner.on(", ").join(Iterables.transform(variantGraphEdgeModel.witnesses(), new Function() { @Override @@ -100,9 +97,9 @@ public String apply(Witness input) { })); } }); - rc.setVertexFillPaintTransformer(new Transformer() { + rc.setVertexFillPaintTransformer(new Transformer() { @Override - public Paint transform(JungVariantGraphVertex v) { + public Paint transform(VariantGraph.Vertex v) { final VariantGraph.Transposition transposition = Iterables.getFirst(v.transpositions(), null); return (v.tokens().isEmpty() ? Color.BLACK : (transposition == null @@ -111,15 +108,15 @@ public Paint transform(JungVariantGraphVertex v) { )); } }); - rc.setEdgeStrokeTransformer(new Transformer() { + rc.setEdgeStrokeTransformer(new Transformer() { @Override - public Stroke transform(JungVariantGraphEdge variantGraphEdgeModel) { + public Stroke transform(VariantGraph.Edge variantGraphEdgeModel) { return variantGraphEdgeModel.witnesses().isEmpty() ? CollateXLaboratory.DASHED_STROKE : CollateXLaboratory.SOLID_STROKE; } }); - rc.setEdgeDrawPaintTransformer(new Transformer() { + rc.setEdgeDrawPaintTransformer(new Transformer() { @Override - public Paint transform(JungVariantGraphEdge jungVariantGraphEdge) { + public Paint transform(VariantGraph.Edge jungVariantGraphEdge) { return Color.GRAY; } }); @@ -127,7 +124,7 @@ public Paint transform(JungVariantGraphEdge jungVariantGraphEdge) { setVariantGraph(vg); } - public void setVariantGraph(JungVariantGraph variantGraph) { + public void setVariantGraph(VariantGraph variantGraph) { this.variantGraph = variantGraph; this.ranking = VariantGraphRanking.of(variantGraph); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/neo4j/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/neo4j/VariantGraphTest.java index fa5f67288..38758589a 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/neo4j/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/neo4j/VariantGraphTest.java @@ -25,7 +25,6 @@ import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; -import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; import eu.interedition.collatex.simple.SimpleWitness; import org.junit.Assert; @@ -53,7 +52,7 @@ public void emptyGraph() { @Test public void reconnectingVerticesYieldsSameEdge() { final SimpleWitness witness = createWitnesses("hello world")[0]; - final VariantGraph graph = new JungVariantGraph(); + final VariantGraph graph = new VariantGraph(); final VariantGraph.Vertex helloVertex = graph.add(witness.getTokens().get(0)); final VariantGraph.Vertex worldVertex = graph.add(witness.getTokens().get(1)); final VariantGraph.Edge edge = graph.connect(helloVertex, worldVertex, Collections. singleton(witness)); diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java index 892d803e6..c4cc33ce8 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java @@ -29,7 +29,6 @@ import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.jung.JungVariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleCollation; import eu.interedition.collatex.simple.SimplePatternTokenizer; @@ -134,7 +133,7 @@ CollateX configure(CommandLine commandLine) throws XPathExpressionException, Par break; } - this.variantGraph = new JungVariantGraph(); + this.variantGraph = new VariantGraph(); this.joined = !commandLine.hasOption("t"); diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java index f3444142f..c1e42b877 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java @@ -1,6 +1,6 @@ package eu.interedition.collatex.tools; -import eu.interedition.collatex.jung.JungVariantGraph; +import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.simple.SimpleCollation; import eu.interedition.collatex.simple.SimpleToken; import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; @@ -84,7 +84,7 @@ public void service(Request request, Response response) throws Exception { response.suspend(60, TimeUnit.SECONDS, new EmptyCompletionHandler<>()); collationThreads.submit(() -> { try { - final JungVariantGraph graph = new JungVariantGraph(); + final VariantGraph graph = new VariantGraph(); collation.collate(graph); // CORS support From a0bd15a8dfbbb0a422c18f974b40607e1ac29881 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 10:21:00 +0100 Subject: [PATCH 11/52] Remove some Google Guava usages --- .../interedition/collatex/VariantGraph.java | 114 ++++++++---------- 1 file changed, 52 insertions(+), 62 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java index 56fa5277b..390346ddd 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java @@ -19,25 +19,23 @@ package eu.interedition.collatex; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import com.google.common.base.Predicates; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; -import com.google.common.collect.Sets; import edu.uci.ics.jung.graph.DirectedSparseGraph; import eu.interedition.collatex.util.VariantGraphTraversal; -import javax.annotation.Nullable; import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; /** * @author Gregor Middell @@ -45,7 +43,7 @@ public class VariantGraph extends DirectedSparseGraph { final VariantGraph.Vertex start; final VariantGraph.Vertex end; - final Multimap transpositionIndex = HashMultimap.create(); + final Map> transpositionIndex = new HashMap<>(); public VariantGraph() { super(); @@ -63,7 +61,7 @@ public Vertex getEnd() { } public Set transpositions() { - return Sets.newHashSet(transpositionIndex.values()); + return transpositionIndex.values().stream().flatMap(Set::stream).collect(Collectors.toSet()); } public Iterable vertices() { @@ -89,13 +87,15 @@ public Vertex add(Token token) { } public Edge connect(Vertex from, Vertex to, Set witnesses) { - Preconditions.checkArgument(!from.equals(to)); + if (from.equals(to)) { + throw new IllegalArgumentException(); + } if (from.equals(start)) { final Edge startEndEdge = edgeBetween(start, end); if (startEndEdge != null) { if (to.equals(end)) { - witnesses = Sets.newHashSet(witnesses); + witnesses = new HashSet<>(witnesses); witnesses.addAll(startEndEdge.witnesses()); } startEndEdge.delete(); @@ -118,9 +118,11 @@ public Edge register(Witness witness) { } public Transposition transpose(Set vertices) { - Preconditions.checkArgument(!vertices.isEmpty()); + if (vertices.isEmpty()) { + throw new IllegalArgumentException(); + } for (Transposition transposition : vertices.iterator().next().transpositions()) { - if (Sets.newHashSet(transposition).equals(vertices)) { + if (transposition.vertices.equals(vertices)) { return transposition; } } @@ -132,7 +134,7 @@ public Edge edgeBetween(Vertex a, Vertex b) { } public Set witnesses() { - Set witnesses = Sets.newHashSet(); + Set witnesses = new HashSet<>(); for (Edge edge : start.outgoing()) { witnesses.addAll(edge.witnesses()); } @@ -141,7 +143,7 @@ public Set witnesses() { @Override public String toString() { - return Iterables.toString(witnesses()); + return witnesses().toString(); } @@ -155,7 +157,7 @@ public static class Edge { public Edge(VariantGraph graph, Set witnesses) { this.graph = graph; - this.witnesses = Sets.newHashSet(witnesses); + this.witnesses = new HashSet<>(witnesses); } public VariantGraph.Edge add(Set witnesses) { @@ -185,7 +187,7 @@ public void delete() { @Override public String toString() { - return Iterables.toString(witnesses); + return witnesses.toString(); } } @@ -199,27 +201,27 @@ public static class Vertex { public Vertex(VariantGraph graph, Set tokens) { this.graph = graph; - this.tokens = Sets.newHashSet(tokens); + this.tokens = new HashSet<>(tokens); } - public Iterable incoming() { + public Collection incoming() { return incoming(null); } - public Iterable incoming(final Set witnesses) { + public Collection incoming(final Set witnesses) { return paths(graph.getInEdges(this), witnesses); } - public Iterable outgoing() { + public Collection outgoing() { return outgoing(null); } - public Iterable outgoing(Set witnesses) { + public Collection outgoing(Set witnesses) { return paths(graph.getOutEdges(this), witnesses); } - public Iterable transpositions() { - return graph.transpositionIndex.get(this); + public Collection transpositions() { + return graph.transpositionIndex.getOrDefault(this, Collections.emptySet()); } public Set tokens() { @@ -227,16 +229,11 @@ public Set tokens() { } public Set tokens(final Set witnesses) { - return Collections.unmodifiableSet(Sets.filter(tokens, witnesses == null ? Predicates.alwaysTrue() : new Predicate() { - @Override - public boolean apply(@Nullable Token token) { - return witnesses.contains(token.getWitness()); - } - })); + return Collections.unmodifiableSet(witnesses == null ? tokens :tokens.stream().filter(t -> witnesses.contains(t.getWitness())).collect(Collectors.toSet())); } public Set witnesses() { - final Set witnesses = Sets.newHashSet(); + final Set witnesses = new HashSet<>(); for (VariantGraph.Edge edge : incoming()) { witnesses.addAll(edge.witnesses()); } @@ -244,7 +241,7 @@ public Set witnesses() { } public void add(Iterable tokens) { - Iterables.addAll(this.tokens, tokens); + tokens.forEach(this.tokens::add); } public VariantGraph graph() { @@ -256,21 +253,14 @@ public void delete() { } public String toString() { - return Iterables.toString(tokens); + return tokens.toString(); } - protected static Iterable paths(final Iterable edges, final Set witnesses) { - return Iterables.filter(edges, (witnesses == null ? Predicates.alwaysTrue() : new Predicate() { - @Override - public boolean apply(@Nullable VariantGraph.Edge edge) { - for (Witness edgeWitness : edge.witnesses()) { - if (witnesses.contains(edgeWitness)) { - return true; - } - } - return false; - } - })); + protected static Collection paths(final Collection edges, final Set witnesses) { + if (witnesses == null) { + return edges; + } + return Arrays.asList(edges.stream().filter(edge -> edge.witnesses().stream().anyMatch(witnesses::contains)).toArray(Edge[]::new)); } } @@ -283,15 +273,15 @@ public static class Transposition implements Iterable { public Transposition(VariantGraph graph, Set vertices) { this.graph = graph; - this.vertices = Sets.newHashSet(vertices); + this.vertices = new HashSet<>(vertices); for (VariantGraph.Vertex vertex : this.vertices) { - graph.transpositionIndex.put(vertex, this); + graph.transpositionIndex.computeIfAbsent(vertex, v -> new HashSet<>()).add(this); } } public void delete() { for (VariantGraph.Vertex vertex : this.vertices) { - graph.transpositionIndex.remove(vertex, this); + graph.transpositionIndex.getOrDefault(vertex, Collections.emptySet()).remove(this); } } @@ -302,43 +292,43 @@ public Iterator iterator() { @Override public String toString() { - return Iterables.toString(vertices); + return vertices.toString(); } } public static final Function JOIN = new Function() { @Override - public VariantGraph apply(@Nullable VariantGraph graph) { - final Set processed = Sets.newHashSet(); + public VariantGraph apply(VariantGraph graph) { + final Set processed = new HashSet<>(); final Vertex end = graph.getEnd(); - final Deque queue = new ArrayDeque(); + final Deque queue = new ArrayDeque<>(); for (VariantGraph.Edge startingEdges : graph.getStart().outgoing()) { queue.push(startingEdges.to()); } while (!queue.isEmpty()) { final Vertex vertex = queue.pop(); - final Set transpositions = Sets.newHashSet(vertex.transpositions()); - final List outgoingEdges = Lists.newArrayList(vertex.outgoing()); + final Set transpositions = new HashSet<>(vertex.transpositions()); + final List outgoingEdges = new ArrayList<>(vertex.outgoing()); if (outgoingEdges.size() == 1) { final Edge joinCandidateEdge = outgoingEdges.get(0); final Vertex joinCandidateVertex = joinCandidateEdge.to(); - final Set joinCandidateTranspositions = Sets.newHashSet(joinCandidateVertex.transpositions()); + final Set joinCandidateTranspositions = new HashSet<>(joinCandidateVertex.transpositions()); boolean canJoin = !end.equals(joinCandidateVertex) && // - Iterables.size(joinCandidateVertex.incoming()) == 1 && // + joinCandidateVertex.incoming().size() == 1 && // transpositions.equals(joinCandidateTranspositions); if (canJoin) { vertex.add(joinCandidateVertex.tokens()); - for (Transposition t : Sets.newHashSet(joinCandidateVertex.transpositions())) { - final Set transposed = Sets.newHashSet(t); + for (Transposition t : new HashSet<>(joinCandidateVertex.transpositions())) { + final Set transposed = new HashSet<>(t.vertices); transposed.remove(joinCandidateVertex); transposed.add(vertex); t.delete(); graph.transpose(transposed); } - for (Edge e : Lists.newArrayList(joinCandidateVertex.outgoing())) { + for (Edge e : new ArrayList<>(joinCandidateVertex.outgoing())) { final Vertex to = e.to(); final Set witnesses = e.witnesses(); e.delete(); From a5160af9848bf7a779c088af6d1f55a36ecb2d1b Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 10:59:26 +0100 Subject: [PATCH 12/52] Remove some Google Guava usages --- .../collatex/util/VariantGraphTraversal.java | 63 ++++++++++--------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java index 1480b885f..d9cc289fd 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java @@ -19,21 +19,17 @@ package eu.interedition.collatex.util; -import com.google.common.base.Objects; -import com.google.common.collect.AbstractIterator; -import com.google.common.collect.Iterables; -import com.google.common.collect.Maps; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; import java.util.ArrayDeque; +import java.util.HashMap; import java.util.Iterator; import java.util.Map; +import java.util.Optional; import java.util.Queue; import java.util.Set; -import static java.util.Collections.singleton; - /** * @author Gregor Middell */ @@ -56,21 +52,25 @@ public static VariantGraphTraversal of(VariantGraph graph) { @Override public Iterator iterator() { - return new AbstractIterator() { - private final Map encountered = Maps.newHashMap(); - private final Queue queue = new ArrayDeque(singleton(graph.getStart())); + return new Iterator() { + + private final Map encountered = new HashMap<>(); + private final Queue queue = new ArrayDeque<>(); + private Optional next = Optional.of(graph.getStart()); @Override - protected VariantGraph.Vertex computeNext() { - if (queue.isEmpty()) { - return endOfData(); - } - final VariantGraph.Vertex next = queue.remove(); + public boolean hasNext() { + return next.isPresent(); + } + + @Override + public VariantGraph.Vertex next() { + final VariantGraph.Vertex next = this.next.get(); for (VariantGraph.Edge edge : next.outgoing(witnesses)) { final VariantGraph.Vertex end = edge.to(); - final int endEncountered = Objects.firstNonNull(encountered.get(end), 0); - final int endIncoming = Iterables.size(end.incoming(witnesses)); + final int endEncountered = Optional.ofNullable(encountered.get(end)).orElse(0); + final int endIncoming = end.incoming(witnesses).size(); if (endIncoming == endEncountered) { throw new IllegalStateException(String.format("Encountered cycle traversing %s to %s", edge, end)); @@ -80,30 +80,31 @@ protected VariantGraph.Vertex computeNext() { encountered.put(end, endEncountered + 1); } + this.next = Optional.ofNullable(queue.poll()); return next; } }; } public Iterable edges() { - return new Iterable() { + return () -> new Iterator() { + + private final Iterator vertexIt = VariantGraphTraversal.this.iterator(); + private final Queue queue = new ArrayDeque<>(); @Override - public Iterator iterator() { - return new AbstractIterator() { - private final Iterator vertexIt = VariantGraphTraversal.this.iterator(); - private final Queue queue = new ArrayDeque(); - - @Override - protected VariantGraph.Edge computeNext() { - if (queue.isEmpty()) { - if (vertexIt.hasNext()) { - Iterables.addAll(queue, vertexIt.next().outgoing(witnesses)); - } - } - return (queue.isEmpty() ? endOfData() : queue.remove()); + public boolean hasNext() { + if (queue.isEmpty()) { + if (vertexIt.hasNext()) { + vertexIt.next().outgoing(witnesses).forEach(queue::add); } - }; + } + return !queue.isEmpty(); + } + + @Override + public VariantGraph.Edge next() { + return queue.remove(); } }; } From 45dffb666881cf01457f75dd2944ac48b6e89e1b Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 14:12:10 +0100 Subject: [PATCH 13/52] Remove some Google Guava usages --- .../collatex/CollationAlgorithmFactory.java | 2 +- .../medite/AlignmentDecisionGraph.java | 4 +-- .../interedition/collatex/medite/Matches.java | 8 ++--- .../collatex/medite/MediteAlgorithm.java | 6 ++-- .../simple/SimplePatternTokenizer.java | 4 +-- .../collatex/simple/SimpleToken.java | 2 +- .../simple/SimpleWitnessTeiBuilder.java | 5 ++- .../collatex/util/VertexMatch.java | 34 ++++++------------- 8 files changed, 24 insertions(+), 41 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java index b95755d17..aba1fee03 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java @@ -19,7 +19,6 @@ package eu.interedition.collatex; -import com.google.common.base.Function; import eu.interedition.collatex.dekker.DekkerAlgorithm; import eu.interedition.collatex.dekker.matrix.MatchTableLinker; import eu.interedition.collatex.util.GreedyStringTilingAlgorithm; @@ -29,6 +28,7 @@ import java.util.Comparator; import java.util.SortedSet; +import java.util.function.Function; /** * @author Gregor Middell diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java index a2553b0ac..dac70eef9 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.medite; -import com.google.common.base.Function; import com.google.common.base.Objects; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -31,6 +30,7 @@ import java.util.Map; import java.util.PriorityQueue; import java.util.SortedSet; +import java.util.function.Function; /** * @author Gregor Middell @@ -45,7 +45,7 @@ public class AlignmentDecisionGraph { AlignmentDecisionGraph(List> matches, Function, Integer> matchEvaluator) { this.matches = matches; this.matchEvaluator = matchEvaluator; - this.bestPaths = new PriorityQueue(matches.size(), PATH_COST_COMPARATOR); + this.bestPaths = new PriorityQueue<>(matches.size(), PATH_COST_COMPARATOR); this.minCosts = Maps.newHashMap(); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java index 8b732e958..bf7e9dea2 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.medite; -import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.HashMultimap; @@ -28,11 +27,9 @@ import com.google.common.collect.Multimap; import com.google.common.collect.Range; import com.google.common.collect.Sets; -import com.google.common.collect.SortedSetMultimap; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.util.IntegerRangeSet; -import eu.interedition.collatex.util.VariantGraphRanking; import eu.interedition.collatex.util.VertexMatch; import java.util.ArrayList; @@ -43,6 +40,7 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.function.Function; /** * @author Gregor Middell @@ -82,7 +80,7 @@ public static Matches between(VariantGraph.Vertex[][] vertices, SuffixTree phrase = new TreeSet(); + final SortedSet phrase = new TreeSet<>(); phrase.add(new VertexMatch.WithTokenIndex(threadElement.vertex, threadElement.vertexRank, tokenCandidate)); threadPhrases.add(phrase); } else { @@ -160,7 +158,7 @@ public SortedSet> findMaximalUniqueMatches } Preconditions.checkState(maximalUniqueMatches.add(nextMum), "Duplicate MUM"); - Iterables.removeIf(allMatches, VertexMatch.filter( + allMatches.removeIf(VertexMatch.filter( new IntegerRangeSet(Range.closed(nextMum.first().vertexRank, nextMum.last().vertexRank)), new IntegerRangeSet(Range.closed(nextMum.first().token, nextMum.last().token)) )); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java index ad3ed02d8..32f00b517 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.medite; -import com.google.common.base.Function; import com.google.common.collect.Iterables; import com.google.common.collect.Range; import com.google.common.collect.Sets; @@ -33,6 +32,7 @@ import java.util.Comparator; import java.util.SortedSet; import java.util.TreeSet; +import java.util.function.Function; /** * @author Gregor Middell @@ -76,7 +76,7 @@ public void collate(VariantGraph graph, Iterable witness) { tokenFilter.add(Range.closed(firstMatch.token, lastMatch.token)); } - Iterables.removeIf(matchCandidates, VertexMatch.filter(rankFilter, tokenFilter)); + matchCandidates.removeIf(VertexMatch.filter(rankFilter, tokenFilter)); } merge(graph, vertices, tokens, matches); @@ -94,7 +94,7 @@ static class MatchEvaluatorWrapper implements Function input) { - final SortedSet tokenPhrase = new TreeSet(); + final SortedSet tokenPhrase = new TreeSet<>(); for (VertexMatch.WithTokenIndex match : input) { tokenPhrase.add(tokenResolver.apply(match)); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java index f30e9aedd..ef15579be 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java @@ -20,9 +20,9 @@ package eu.interedition.collatex.simple; import com.google.common.base.Function; -import com.google.common.collect.Lists; import javax.annotation.Nullable; +import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -42,7 +42,7 @@ public SimplePatternTokenizer(Pattern pattern) { @Override public Iterable apply(@Nullable String input) { final Matcher matcher = pattern.matcher(input); - final List tokens = Lists.newLinkedList(); + final List tokens = new LinkedList<>(); while (matcher.find()) { tokens.add(input.substring(matcher.start(), matcher.end())); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java index cbb06bb61..7d9c760dc 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.simple; -import com.google.common.base.Function; import com.google.common.collect.Iterables; import eu.interedition.collatex.Token; import eu.interedition.collatex.Witness; @@ -27,6 +26,7 @@ import javax.annotation.Nullable; import java.util.SortedSet; +import java.util.function.Function; public class SimpleToken implements Token, Comparable { private final SimpleWitness witness; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java index a79d4c880..23d7d7a04 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java @@ -20,6 +20,7 @@ package eu.interedition.collatex.simple; import java.io.InputStream; +import java.util.ArrayList; import java.util.List; import javax.xml.namespace.QName; @@ -28,8 +29,6 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.XMLEvent; -import com.google.common.collect.Lists; - // we are going to use stax // previous version was build on DOM, which is not particularly well suited to parsing // large TEI files, with segments and expansions @@ -41,7 +40,7 @@ public class SimpleWitnessTeiBuilder { public SimpleWitness read(InputStream input) throws XMLStreamException { SimpleWitness witness = new SimpleWitness("id"); - List tokenContents = Lists.newArrayList(); + List tokenContents = new ArrayList<>(); XMLInputFactory factory = XMLInputFactory.newInstance(); XMLEventReader reader = factory.createXMLEventReader(input); XMLEvent event = null; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java index 8712ef99b..5311b9e9f 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java @@ -19,14 +19,13 @@ package eu.interedition.collatex.util; -import com.google.common.base.Function; -import com.google.common.base.Predicate; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import javax.annotation.Nullable; import java.util.Comparator; import java.util.SortedSet; +import java.util.function.Function; +import java.util.function.Predicate; /** * @author Gregor Middell @@ -59,12 +58,7 @@ public int hashCode() { } public static Comparator> setComparator() { - return new Comparator>() { - @Override - public int compare(SortedSet o1, SortedSet o2) { - return o1.first().compareTo(o2.first()); - } - }; + return (o1, o2) -> o1.first().compareTo(o2.first()); } /** @@ -104,25 +98,17 @@ public String toString() { } public static Function tokenResolver(final Token[] tokens) { - return new Function() { - @Override - public WithToken apply(@Nullable WithTokenIndex input) { - return new WithToken(input.vertex, input.vertexRank, tokens[input.token]); - } - }; + return input -> new WithToken(input.vertex, input.vertexRank, tokens[input.token]); } - public static final Predicate> filter(final IntegerRangeSet rankFilter, final IntegerRangeSet tokenFilter) { - return new Predicate>() { - @Override - public boolean apply(@Nullable SortedSet input) { - for (WithTokenIndex match : input) { - if (tokenFilter.apply(match.token) || rankFilter.apply(match.vertexRank)) { - return true; - } + public static Predicate> filter(final IntegerRangeSet rankFilter, final IntegerRangeSet tokenFilter) { + return input -> { + for (WithTokenIndex match : input) { + if (tokenFilter.apply(match.token) || rankFilter.apply(match.vertexRank)) { + return true; } - return false; } + return false; }; } } From b5626d6b02a8140ed857508a27f2395e77b52e13 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 14:16:03 +0100 Subject: [PATCH 14/52] Remove some Google Guava usages --- .../util/GreedyStringTilingAlgorithm.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java index 65b574f3c..d822afb0a 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java @@ -19,8 +19,6 @@ package eu.interedition.collatex.util; -import com.google.common.base.Objects; -import com.google.common.collect.Iterables; import eu.interedition.collatex.CollationAlgorithm; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; @@ -35,6 +33,7 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.stream.StreamSupport; /** @@ -58,7 +57,7 @@ public class GreedyStringTilingAlgorithm extends CollationAlgorithm.Base { public boolean isEqual(VariantGraph.Vertex[] a, Token b) { for (VariantGraph.Vertex vertex : a) { final Set tokens = vertex.tokens(); - if (!tokens.isEmpty() && comparator.compare(Iterables.getFirst(tokens, null), b) == 0) { + if (!tokens.isEmpty() && comparator.compare(tokens.stream().findFirst().get(), b) == 0) { return true; } } @@ -74,11 +73,11 @@ public GreedyStringTilingAlgorithm(Comparator comparator, int minimumTile @Override public void collate(VariantGraph graph, Iterable witness) { final VariantGraph.Vertex[][] vertices = VariantGraphRanking.of(graph).asArray(); - final Token[] tokens = Iterables.toArray(witness, Token.class); + final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); - final SortedSet> matches = new TreeSet>(VertexMatch.setComparator()); + final SortedSet> matches = new TreeSet<>(VertexMatch.setComparator()); for (Match match : match(vertices, tokens, equality, minimumTileLength)) { - final SortedSet phrase = new TreeSet(); + final SortedSet phrase = new TreeSet<>(); for (int mc = 0, ml = match.length; mc < ml; mc++) { final int rank = match.left + mc; phrase.add(new VertexMatch.WithTokenIndex(vertices[rank][0], rank, match.right + mc)); @@ -96,8 +95,8 @@ public static SortedSet match(A[] left, B[] right, Equality e Arrays.fill(markedLeft, false); Arrays.fill(markedRight, false); - final SortedSet matches = new TreeSet(); - final Map> matchesByLength = new HashMap>(); + final SortedSet matches = new TreeSet<>(); + final Map> matchesByLength = new HashMap<>(); int maxMatchLength; do { @@ -116,7 +115,7 @@ public static SortedSet match(A[] left, B[] right, Equality e if (matchLength >= maxMatchLength) { List theMatches = matchesByLength.get(matchLength); if (theMatches == null) { - matchesByLength.put(matchLength, theMatches = new ArrayList()); + matchesByLength.put(matchLength, theMatches = new ArrayList<>()); } theMatches.add(new Match(lc, rc)); } @@ -127,7 +126,7 @@ public static SortedSet match(A[] left, B[] right, Equality e } } - for (Match match : Objects.firstNonNull(matchesByLength.get(maxMatchLength), Collections.emptyList())) { + for (Match match : matchesByLength.getOrDefault(maxMatchLength, Collections.emptyList())) { boolean occluded = false; for (int tc = 0; tc < maxMatchLength; tc++) { From ad9d49bfd992b644433ae6db4d129e3c2be09490 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 14:28:47 +0100 Subject: [PATCH 15/52] Remove some Google Guava usages --- .../interedition/collatex/medite/Matches.java | 14 +++-- .../collatex/medite/MediteAlgorithm.java | 12 ++-- .../collatex/util/IntegerRangeSet.java | 55 ------------------- .../collatex/util/VertexMatch.java | 12 +--- 4 files changed, 17 insertions(+), 76 deletions(-) delete mode 100644 collatex-core/src/main/java/eu/interedition/collatex/util/IntegerRangeSet.java diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java index bf7e9dea2..e723d6fe2 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java @@ -25,14 +25,13 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; -import com.google.common.collect.Range; import com.google.common.collect.Sets; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.util.IntegerRangeSet; import eu.interedition.collatex.util.VertexMatch; import java.util.ArrayList; +import java.util.BitSet; import java.util.Collections; import java.util.Comparator; import java.util.LinkedList; @@ -158,10 +157,13 @@ public SortedSet> findMaximalUniqueMatches } Preconditions.checkState(maximalUniqueMatches.add(nextMum), "Duplicate MUM"); - allMatches.removeIf(VertexMatch.filter( - new IntegerRangeSet(Range.closed(nextMum.first().vertexRank, nextMum.last().vertexRank)), - new IntegerRangeSet(Range.closed(nextMum.first().token, nextMum.last().token)) - )); + final BitSet rankFilter = new BitSet(); + final BitSet tokenFilter = new BitSet(); + + rankFilter.set(nextMum.first().vertexRank, nextMum.last().vertexRank + 1); + tokenFilter.set(nextMum.first().token, nextMum.last().token + 1); + + allMatches.removeIf(VertexMatch.filter(rankFilter, tokenFilter)); } return maximalUniqueMatches; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java index 32f00b517..9b0b31660 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java @@ -20,19 +20,19 @@ package eu.interedition.collatex.medite; import com.google.common.collect.Iterables; -import com.google.common.collect.Range; import com.google.common.collect.Sets; import eu.interedition.collatex.CollationAlgorithm; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.util.IntegerRangeSet; import eu.interedition.collatex.util.VariantGraphRanking; import eu.interedition.collatex.util.VertexMatch; +import java.util.BitSet; import java.util.Comparator; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.Function; +import java.util.stream.IntStream; /** * @author Gregor Middell @@ -64,16 +64,16 @@ public void collate(VariantGraph graph, Iterable witness) { break; } - final IntegerRangeSet rankFilter = new IntegerRangeSet(); - final IntegerRangeSet tokenFilter = new IntegerRangeSet(); + final BitSet rankFilter = new BitSet(); + final BitSet tokenFilter = new BitSet(); for (SortedSet phrase : AlignmentDecisionGraph.filter(maximalUniqueMatches, matchEvaluator)) { final VertexMatch.WithTokenIndex firstMatch = phrase.first(); final VertexMatch.WithTokenIndex lastMatch = phrase.last(); matches.add(phrase); - rankFilter.add(Range.closed(firstMatch.vertexRank, lastMatch.vertexRank)); - tokenFilter.add(Range.closed(firstMatch.token, lastMatch.token)); + IntStream.range(firstMatch.vertexRank, lastMatch.vertexRank + 1).forEach(rankFilter::set); + IntStream.range(firstMatch.token, lastMatch.token + 1).forEach(tokenFilter::set); } matchCandidates.removeIf(VertexMatch.filter(rankFilter, tokenFilter)); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/IntegerRangeSet.java b/collatex-core/src/main/java/eu/interedition/collatex/util/IntegerRangeSet.java deleted file mode 100644 index d6042e7ae..000000000 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/IntegerRangeSet.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.util; - -import com.google.common.base.Predicate; -import com.google.common.collect.Range; - -import javax.annotation.Nullable; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; - -/** - * @author Gregor Middell - */ -public class IntegerRangeSet extends HashSet> implements Predicate { - - public IntegerRangeSet() { - } - - public IntegerRangeSet(Range c) { - this(Collections.singleton(c)); - } - - public IntegerRangeSet(Collection> c) { - super(c); - } - - @Override - public boolean apply(@Nullable Integer input) { - for (Range range : this) { - if (range.contains(input)) { - return true; - } - } - return false; - } -} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java index 5311b9e9f..7d6fa7760 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java @@ -22,6 +22,7 @@ import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; +import java.util.BitSet; import java.util.Comparator; import java.util.SortedSet; import java.util.function.Function; @@ -101,14 +102,7 @@ public static Function tokenResolver(final Token[] to return input -> new WithToken(input.vertex, input.vertexRank, tokens[input.token]); } - public static Predicate> filter(final IntegerRangeSet rankFilter, final IntegerRangeSet tokenFilter) { - return input -> { - for (WithTokenIndex match : input) { - if (tokenFilter.apply(match.token) || rankFilter.apply(match.vertexRank)) { - return true; - } - } - return false; - }; + public static Predicate> filter(final BitSet rankFilter, final BitSet tokenFilter) { + return input -> input.stream().anyMatch(match -> tokenFilter.get(match.token) || rankFilter.get(match.vertexRank)); } } From dbf59b6200a6f029294991b68e38ea6d329cc3b8 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 15:36:21 +0100 Subject: [PATCH 16/52] Remove some Google Guava usages --- .../eu/interedition/collatex/Witness.java | 20 ++---- .../simple/SimplePatternTokenizer.java | 45 ++++++-------- .../simple/SimpleTokenNormalizers.java | 61 ++++++++----------- .../collatex/simple/SimpleWitness.java | 40 ++++++------ .../simple/SimpleWitnessTeiBuilder.java | 3 +- .../collatex/suffixtree/SuffixTree.java | 17 +++--- .../collatex/suffixtree/Utils.java | 7 +-- .../interedition/collatex/tools/CollateX.java | 12 ++-- .../collatex/tools/PluginScript.java | 11 ++-- .../collatex/tools/URLWitness.java | 15 ++--- 10 files changed, 98 insertions(+), 133 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java index 75f2adb1f..5f95a3cab 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java @@ -19,11 +19,9 @@ package eu.interedition.collatex; -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.collect.Ordering; - import java.util.Comparator; +import java.util.function.Function; +import java.util.stream.Collectors; /** * IWitness @@ -35,17 +33,7 @@ public interface Witness { String getSigil(); - final Comparator SIGIL_COMPARATOR = new Comparator() { - @Override - public int compare(Witness o1, Witness o2) { - return o1.getSigil().compareTo(o2.getSigil()); - } - }; + final Comparator SIGIL_COMPARATOR = Comparator.comparing(Witness::getSigil); - final Function TO_SIGILS = new Function() { - @Override - public String apply(VariantGraph.Edge input) { - return Joiner.on(", ").join(Ordering.from(SIGIL_COMPARATOR).sortedCopy(input.witnesses())); - } - }; + final Function TO_SIGILS = input -> input.witnesses().stream().sorted(SIGIL_COMPARATOR).map(Object::toString).collect(Collectors.joining(", ")); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java index ef15579be..1677df3a9 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java @@ -19,47 +19,36 @@ package eu.interedition.collatex.simple; -import com.google.common.base.Function; - -import javax.annotation.Nullable; import java.util.LinkedList; import java.util.List; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Stream; /** * @author Gregor Middell * @author Ronald Haentjens Dekker */ -public class SimplePatternTokenizer implements Function> { - - private final Pattern pattern; +public class SimplePatternTokenizer { - public SimplePatternTokenizer(Pattern pattern) { - this.pattern = pattern; - } + static final String PUNCT = Pattern.quote(".?!,;:"); - @Override - public Iterable apply(@Nullable String input) { - final Matcher matcher = pattern.matcher(input); - final List tokens = new LinkedList<>(); - while (matcher.find()) { - tokens.add(input.substring(matcher.start(), matcher.end())); - } - return tokens; + static Function> tokenizer(Pattern pattern) { + return input -> { + final Matcher matcher = pattern.matcher(input); + final List tokens = new LinkedList<>(); + while (matcher.find()) { + tokens.add(input.substring(matcher.start(), matcher.end())); + } + return tokens.stream(); + }; } - public static final SimplePatternTokenizer BY_WHITESPACE = new SimplePatternTokenizer( - Pattern.compile("\\s*?\\S+\\s*]") - ); + public static final Function> BY_WHITESPACE = tokenizer(Pattern.compile("\\s*?\\S+\\s*]")); - static final String PUNCT = Pattern.quote(".?!,;:"); - - public static final SimplePatternTokenizer BY_WS_AND_PUNCT = new SimplePatternTokenizer( - Pattern.compile("[\\s" + PUNCT + "]*?[^\\s" + PUNCT + "]+[\\s" + PUNCT + "]*") - ); + public static final Function> BY_WS_AND_PUNCT = tokenizer(Pattern.compile("[\\s" + PUNCT + "]*?[^\\s" + PUNCT + "]+[\\s" + PUNCT + "]*")); - public static final SimplePatternTokenizer BY_WS_OR_PUNCT = new SimplePatternTokenizer( - Pattern.compile("[" + PUNCT + "]+[\\s]*|[^" + PUNCT + "\\s]+[\\s]*") - ); + public static final Function> BY_WS_OR_PUNCT = tokenizer(Pattern.compile("[" + PUNCT + "]+[\\s]*|[^" + PUNCT + "\\s]+[\\s]*")); + } \ No newline at end of file diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java index 4e489b5be..8e5193015 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java @@ -19,10 +19,7 @@ package eu.interedition.collatex.simple; -import com.google.common.base.Function; -import com.google.common.base.Functions; - -import javax.annotation.Nullable; +import java.util.function.Function; /** * @author Gregor Middell @@ -30,45 +27,37 @@ */ public class SimpleTokenNormalizers { - public static final Function LOWER_CASE = new Function() { - @Override - public String apply(@Nullable String input) { - return input.toLowerCase(); - } - }; + public static final Function LOWER_CASE = String::toLowerCase; + + public static final Function TRIM_WS = String::trim; - public static final Function TRIM_WS = new Function() { - @Override - public String apply(@Nullable String input) { - return input.trim(); + public static final Function TRIM_WS_PUNCT = input -> { + int start = 0; + int end = input.length() - 1; + while (start <= end && isWhitespaceOrPunctuation(input.charAt(start))) { + start++; } + while (end >= start && isWhitespaceOrPunctuation(input.charAt(end))) { + end--; + } + return input.substring(start, end + 1); }; - public static final Function TRIM_WS_PUNCT = new Function() { - - @Override - public String apply(@Nullable String input) { - int start = 0; - int end = input.length() - 1; - while (start <= end && isWhitespaceOrPunctuation(input.charAt(start))) { - start++; - } - while (end >= start && isWhitespaceOrPunctuation(input.charAt(end))) { - end--; - } - return input.substring(start, end + 1); + public static boolean isWhitespaceOrPunctuation(char c) { + if (Character.isWhitespace(c)) { + return true; } - - boolean isWhitespaceOrPunctuation(char c) { - if (Character.isWhitespace(c)) { + switch (Character.getType(c)) { + case Character.START_PUNCTUATION: + case Character.END_PUNCTUATION: + case Character.OTHER_PUNCTUATION: return true; - } - final int type = Character.getType(c); - return (Character.START_PUNCTUATION == type || Character.END_PUNCTUATION == type || Character.OTHER_PUNCTUATION == type); + default: + return false; } - }; + } - public static final Function LC_TRIM_WS_PUNCT = Functions.compose(LOWER_CASE, TRIM_WS_PUNCT); + public static final Function LC_TRIM_WS_PUNCT = LOWER_CASE.andThen(TRIM_WS_PUNCT); - public static final Function LC_TRIM_WS = Functions.compose(LOWER_CASE, TRIM_WS); + public static final Function LC_TRIM_WS = LOWER_CASE.andThen(TRIM_WS); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java index d6532f258..51f6f81be 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java @@ -19,24 +19,23 @@ package eu.interedition.collatex.simple; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Iterators; -import com.google.common.collect.Lists; import eu.interedition.collatex.Token; import eu.interedition.collatex.Witness; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.function.Function; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; public class SimpleWitness implements Iterable, Witness, Comparator { private final String sigil; - private final List tokens = new ArrayList(); + private final List tokens = new ArrayList<>(); public SimpleWitness(String sigil) { this.sigil = sigil; @@ -48,7 +47,7 @@ public SimpleWitness(String sigil, String content) { public SimpleWitness(String sigil, String content, - Function> tokenizer, + Function> tokenizer, Function normalizer) { this(sigil); setTokenContents(tokenizer.apply(content), normalizer); @@ -58,12 +57,8 @@ public List getTokens() { return tokens; } - public void setTokenContents(Iterable tokenContents, Function normalizer) { - final List tokens = Lists.newArrayListWithExpectedSize(Iterables.size(tokenContents)); - for (String content : tokenContents) { - tokens.add(new SimpleToken(this, content, normalizer.apply(content))); - } - setTokens(tokens); + public void setTokenContents(Stream tokenContents, Function normalizer) { + setTokens(tokenContents.map(content -> new SimpleToken(SimpleWitness.this, content, normalizer.apply(content))).collect(Collectors.toList())); } public void setTokens(List tokens) { @@ -78,7 +73,7 @@ public String getSigil() { @Override public Iterator iterator() { - return Iterators.unmodifiableIterator(tokens.iterator()); + return Collections.unmodifiableList(tokens).iterator(); } @Override @@ -90,19 +85,20 @@ public String toString() { public int compare(SimpleToken o1, SimpleToken o2) { final int o1Index = tokens.indexOf(o1); final int o2Index = tokens.indexOf(o2); - Preconditions.checkArgument(o1Index >= 0, o1); - Preconditions.checkArgument(o2Index >= 0, o2); + if (o1Index < 0) { + throw new IllegalArgumentException(o1.toString()); + } + if (o2Index < 0) { + throw new IllegalArgumentException(); + } return (o1Index - o2Index); } public static final Pattern PUNCT = Pattern.compile("\\p{Punct}"); - public static final Function TOKEN_NORMALIZER = new Function() { - @Override - public String apply(String input) { - final String normalized = PUNCT.matcher(input.trim().toLowerCase()).replaceAll(""); - return (normalized == null || normalized.length() == 0 ? input : normalized); - } + public static final Function TOKEN_NORMALIZER = input -> { + final String normalized = PUNCT.matcher(input.trim().toLowerCase()).replaceAll(""); + return (normalized == null || normalized.length() == 0 ? input : normalized); }; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java index 23d7d7a04..b1dbe0a37 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.List; +import java.util.stream.StreamSupport; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; @@ -58,7 +59,7 @@ public SimpleWitness read(InputStream input) throws XMLStreamException { reader.next(); } } - witness.setTokenContents(tokenContents, SimpleTokenNormalizers.LC_TRIM_WS_PUNCT); + witness.setTokenContents(tokenContents.stream(), SimpleTokenNormalizers.LC_TRIM_WS_PUNCT); return witness; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SuffixTree.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SuffixTree.java index ab237da00..1c12fea7b 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SuffixTree.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SuffixTree.java @@ -28,9 +28,9 @@ public class SuffixTree> { * Constructs an empty suffix tree. */ public SuffixTree(){ - sequence = new Sequence(); - root = new Node(null, this.sequence, this); - activePoint = new ActivePoint(root); + sequence = new Sequence<>(); + root = new Node<>(null, this.sequence, this); + activePoint = new ActivePoint<>(root); } /** @@ -40,13 +40,12 @@ public SuffixTree(){ * @param sequenceArray * the array of items for which we are going to generate a suffix * tree. - * @throws Exception */ public SuffixTree(S sequenceArray) { - sequence = new Sequence(sequenceArray); - root = new Node(null, this.sequence, this); - activePoint = new ActivePoint(root); - suffix = new Suffix(0, 0, this.sequence); + sequence = new Sequence<>(sequenceArray); + root = new Node<>(null, this.sequence, this); + activePoint = new ActivePoint<>(root); + suffix = new Suffix<>(0, 0, this.sequence); extendTree(0,sequence.getLength()); } @@ -58,7 +57,7 @@ public SuffixTree(S sequenceArray) { public void add(S sequence){ int start = currentEnd; this.sequence.add(sequence); - suffix = new Suffix(currentEnd,currentEnd,this.sequence); + suffix = new Suffix<>(currentEnd,currentEnd,this.sequence); activePoint.setPosition(root, null, 0); extendTree(start, this.sequence.getLength()); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Utils.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Utils.java index 4906fefba..168343a91 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Utils.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Utils.java @@ -3,7 +3,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; -import java.util.List; import java.util.Map; /** @@ -50,9 +49,9 @@ static > String printTreeForGraphViz(SuffixTree tre * tree. */ static > String printTreeForGraphViz(SuffixTree tree, boolean printSuffixLinks) { - LinkedList> stack = new LinkedList>(); + LinkedList> stack = new LinkedList<>(); stack.add(tree.getRoot()); - Map, Integer> nodeMap = new HashMap, Integer>(); + Map, Integer> nodeMap = new HashMap<>(); nodeMap.put(tree.getRoot(), 0); int nodeId = 1; @@ -60,7 +59,7 @@ static > String printTreeForGraphViz(SuffixTree tre "\ndigraph suffixTree{\n node [shape=circle, label=\"\", fixedsize=true, width=0.1, height=0.1]\n"); while (stack.size() > 0) { - LinkedList> childNodes = new LinkedList>(); + LinkedList> childNodes = new LinkedList<>(); for (Node node : stack) { // List edges = node.getEdges(); diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java index c4cc33ce8..2e40d0c02 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.tools; -import com.google.common.base.Function; import com.google.common.base.Objects; import com.google.common.collect.Lists; import com.google.common.io.Closeables; @@ -71,8 +70,11 @@ import java.nio.charset.Charset; import java.util.Comparator; import java.util.List; +import java.util.Optional; +import java.util.function.Function; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Stream; /** * @author Gregor Middell @@ -85,7 +87,7 @@ public class CollateX implements Closeable { List witnesses; XPathExpression tokenXPath; - Function> tokenizer; + Function> tokenizer; Function normalizer; Comparator comparator; CollationAlgorithm collationAlgorithm; @@ -110,9 +112,9 @@ CollateX configure(CommandLine commandLine) throws XPathExpressionException, Par ? PluginScript.read("", new StringReader("")) : PluginScript.read(argumentToResource(script))); - this.tokenizer = Objects.firstNonNull(pluginScript.tokenizer(), SimplePatternTokenizer.BY_WS_OR_PUNCT); - this.normalizer = Objects.firstNonNull(pluginScript.normalizer(), SimpleTokenNormalizers.LC_TRIM_WS); - this.comparator = Objects.firstNonNull(pluginScript.comparator(), new EqualityTokenComparator()); + this.tokenizer = Optional.ofNullable(pluginScript.tokenizer()).orElse(SimplePatternTokenizer.BY_WS_OR_PUNCT); + this.normalizer = Optional.ofNullable(pluginScript.normalizer()).orElse(SimpleTokenNormalizers.LC_TRIM_WS); + this.comparator = Optional.ofNullable(pluginScript.comparator()).orElse(new EqualityTokenComparator()); } catch (IOException e) { throw new ParseException("Failed to read script '" + script + "' - " + e.getMessage()); } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java index d1d7323a0..e1525cf1d 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.tools; -import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.io.Closeables; @@ -41,6 +40,8 @@ import java.nio.charset.Charset; import java.util.Comparator; import java.util.List; +import java.util.function.Function; +import java.util.stream.Stream; /** * @author Gregor Middell @@ -84,10 +85,10 @@ public static PluginScript read(String filename, Reader source) throws ScriptExc comparator = hasFunction(COMPARATOR_FUNCTION, "", ""); } - Function> tokenizer() { - return (tokenizer ? new Function>() { + Function> tokenizer() { + return (tokenizer ? new Function>() { @Override - public Iterable apply(@Nullable String input) { + public Stream apply(@Nullable String input) { final Object result = invoke(TOKENIZER_FUNCTION, input); if (!(result instanceof Iterable)) { throw new PluginScriptExecutionException("Wrong result type of " + @@ -107,7 +108,7 @@ public Iterable apply(@Nullable String input) { tokens.add((String) token); } - return tokens; + return tokens.stream(); } } : null); } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java index 717482ef3..85374c37f 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.tools; -import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.io.CharStreams; import com.google.common.io.Closeables; @@ -43,6 +42,9 @@ import java.net.URL; import java.nio.charset.Charset; import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * @author Gregor Middell @@ -57,7 +59,7 @@ public URLWitness(String sigil, URL url) { } public URLWitness read( - Function> tokenizer, + Function> tokenizer, Function normalizer, Charset charset, XPathExpression tokenXPath) @@ -79,11 +81,10 @@ public URLWitness read( } setTokens(tokens); } else { - final List tokens = Lists.newLinkedList(); - for (String tokenText : tokenizer.apply(CharStreams.toString(new InputStreamReader(stream, charset)))) { - tokens.add(new SimpleToken(this, tokenText, normalizer.apply(tokenText))); - } - setTokens(tokens); + setTokens(tokenizer.apply(CharStreams.toString(new InputStreamReader(stream, charset))) + .map(tokenText -> new SimpleToken(this, tokenText, normalizer.apply(tokenText))) + .collect(Collectors.toList()) + ); } } catch (ParserConfigurationException e) { throw new SAXException(e); From 0e6f4f87a3359f6e0d875bfc93f3b9948a906821 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 24 Jan 2015 22:59:31 +0100 Subject: [PATCH 17/52] Remove some Google Guava usages --- .../medite/AlignmentDecisionGraph.java | 24 +++++------- .../interedition/collatex/medite/Matches.java | 39 +++++++++---------- .../interedition/collatex/tools/CollateX.java | 7 ++-- 3 files changed, 31 insertions(+), 39 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java index dac70eef9..96d815b4d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java @@ -19,17 +19,17 @@ package eu.interedition.collatex.medite; -import com.google.common.base.Objects; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; import eu.interedition.collatex.util.VertexMatch; +import java.util.ArrayList; import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.PriorityQueue; import java.util.SortedSet; +import java.util.TreeSet; import java.util.function.Function; /** @@ -45,14 +45,14 @@ public class AlignmentDecisionGraph { AlignmentDecisionGraph(List> matches, Function, Integer> matchEvaluator) { this.matches = matches; this.matchEvaluator = matchEvaluator; - this.bestPaths = new PriorityQueue<>(matches.size(), PATH_COST_COMPARATOR); - this.minCosts = Maps.newHashMap(); + this.bestPaths = new PriorityQueue<>(matches.size(), Comparator.comparingInt(n -> n.cost)); + this.minCosts = new HashMap<>(); } static SortedSet> filter(SortedSet> matches, Function, Integer> matchEvaluator) { - final SortedSet> alignments = Sets.newTreeSet(VertexMatch.setComparator()); + final SortedSet> alignments = new TreeSet<>(VertexMatch.setComparator()); - final List> matchList = Lists.newArrayList(matches); + final List> matchList = new ArrayList<>(matches); Node optimal = new AlignmentDecisionGraph(matchList, matchEvaluator).findBestPath(); while (optimal.matchIndex >= 0) { if (optimal.aligned) { @@ -145,14 +145,8 @@ public boolean equals(Object obj) { @Override public int hashCode() { - return Objects.hashCode(matchIndex, aligned); + return Objects.hash(matchIndex, aligned); } } - static final Comparator PATH_COST_COMPARATOR = new Comparator() { - @Override - public int compare(Node o1, Node o2) { - return (o1.cost - o2.cost); - } - }; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java index e723d6fe2..563cd1912 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java @@ -19,27 +19,24 @@ package eu.interedition.collatex.medite; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; -import com.google.common.collect.Sets; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.util.VertexMatch; import java.util.ArrayList; +import java.util.Arrays; import java.util.BitSet; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.Function; +import java.util.stream.Collectors; /** * @author Gregor Middell @@ -52,27 +49,27 @@ public Matches(int initialCapacity) { public static Matches between(VariantGraph.Vertex[][] vertices, SuffixTree suffixTree, Function, Integer> matchEvaluator) { - final Multimap matchThreads = HashMultimap.create(); + final Map> matchThreads = new HashMap<>(); for (int rank = 0; rank < vertices.length; rank++) { for (VariantGraph.Vertex vertex : vertices[rank]) { final MatchThreadElement matchThreadElement = new MatchThreadElement(suffixTree).advance(vertex, rank); if (matchThreadElement != null) { - matchThreads.put(rank, matchThreadElement); + matchThreads.computeIfAbsent(rank, r -> new LinkedList<>()).add(matchThreadElement); } } for (MatchThreadElement matchThreadElement : matchThreads.get(rank - 1)) { for (VariantGraph.Vertex vertex : vertices[rank]) { final MatchThreadElement advanced = matchThreadElement.advance(vertex, rank); if (advanced != null) { - matchThreads.put(rank, advanced); + matchThreads.computeIfAbsent(rank, r -> new LinkedList<>()).add(advanced); } } } } final Matches matches = new Matches(matchThreads.size()); - for (MatchThreadElement matchThreadElement : matchThreads.values()) { - final List> threadPhrases = Lists.newArrayList(); + matchThreads.values().stream().flatMap(List::stream).forEach(matchThreadElement -> { + final List> threadPhrases = new ArrayList<>(); boolean firstElement = true; for (MatchThreadElement threadElement : matchThreadElement.thread()) { final SuffixTree.EquivalenceClass equivalenceClass = threadElement.cursor.matchedClass(); @@ -93,7 +90,7 @@ public static Matches between(VariantGraph.Vertex[][] vertices, SuffixTree o1, SortedSet> findMaximalUniqueMatches() { - final List> allMatches = Lists.newArrayList(this); - final SortedSet> maximalUniqueMatches = Sets.newTreeSet(VertexMatch.setComparator()); + final List> allMatches = new ArrayList<>(this); + final SortedSet> maximalUniqueMatches = new TreeSet<>(VertexMatch.setComparator()); while (true) { SortedSet nextMum = null; @@ -150,12 +147,14 @@ public SortedSet> findMaximalUniqueMatches candidate = successor; } if (nextMum == null) { - nextMum = Iterables.getFirst(allMatches, null); + nextMum = allMatches.stream().findFirst().orElse(null); } if (nextMum == null) { break; } - Preconditions.checkState(maximalUniqueMatches.add(nextMum), "Duplicate MUM"); + if (!maximalUniqueMatches.add(nextMum)) { + throw new IllegalStateException("Duplicate MUM"); + } final BitSet rankFilter = new BitSet(); final BitSet tokenFilter = new BitSet(); @@ -192,7 +191,7 @@ static class MatchThreadElement { MatchThreadElement advance(VariantGraph.Vertex vertex, int vertexRank) { final Set tokens = vertex.tokens(); if (!tokens.isEmpty()) { - final SuffixTree.Cursor next = cursor.move(Iterables.get(tokens, 0)); + final SuffixTree.Cursor next = cursor.move(tokens.stream().findFirst().get()); if (next != null) { return new MatchThreadElement(this, vertex, vertexRank, next); } @@ -201,7 +200,7 @@ MatchThreadElement advance(VariantGraph.Vertex vertex, int vertexRank) { } List thread() { - final LinkedList thread = Lists.newLinkedList(); + final LinkedList thread = new LinkedList<>(); MatchThreadElement current = this; while (current.vertex != null) { thread.addFirst(current); @@ -212,7 +211,7 @@ List thread() { @Override public String toString() { - return "[" + Joiner.on(", ").join(vertexRank, vertex, cursor.matchedClass()) + "]"; + return "[" + Arrays.asList(vertexRank, vertex, cursor.matchedClass()).stream().map(Object::toString).collect(Collectors.joining(", ")) + "]"; } } } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java index 2e40d0c02..e952b3880 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java @@ -19,8 +19,6 @@ package eu.interedition.collatex.tools; -import com.google.common.base.Objects; -import com.google.common.collect.Lists; import com.google.common.io.Closeables; import com.google.common.io.Closer; import com.google.common.io.Files; @@ -68,6 +66,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.Charset; +import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Optional; @@ -156,7 +155,7 @@ CollateX configure(CommandLine commandLine) throws XPathExpressionException, Par final String[] witnessSpecs = commandLine.getArgs(); - this.inputResources = Lists.newArrayListWithExpectedSize(witnessSpecs.length); + this.inputResources = new ArrayList<>(witnessSpecs.length); for (String witnessSpec : witnessSpecs) { inputResources.add(argumentToResource(witnessSpec)); } @@ -174,7 +173,7 @@ CollateX read() throws IOException, XPathExpressionException, SAXException { this.witnesses = JsonProcessor.read(inputStream).getWitnesses(); } } else { - this.witnesses = Lists.newArrayListWithExpectedSize(inputResources.size()); + this.witnesses = new ArrayList<>(inputResources.size()); //noinspection Convert2streamapi for (URL witnessURL : inputResources) { this.witnesses.add(new URLWitness("w" + (witnesses.size() + 1), witnessURL) From b199d132697ed7b3f09dce8b818d3812480aa1bc Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 13:38:42 +0100 Subject: [PATCH 18/52] Medite Matching: Fix NPE while accessing former MultiMap --- .../src/main/java/eu/interedition/collatex/medite/Matches.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java index 563cd1912..69bb91267 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java @@ -57,7 +57,7 @@ public static Matches between(VariantGraph.Vertex[][] vertices, SuffixTree new LinkedList<>()).add(matchThreadElement); } } - for (MatchThreadElement matchThreadElement : matchThreads.get(rank - 1)) { + for (MatchThreadElement matchThreadElement : matchThreads.getOrDefault(rank - 1, Collections.emptyList())) { for (VariantGraph.Vertex vertex : vertices[rank]) { final MatchThreadElement advanced = matchThreadElement.advance(vertex, rank); if (advanced != null) { From 1bd1c5c1a6bb2de62d94012888dc9761edac7c30 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 14:09:39 +0100 Subject: [PATCH 19/52] Remove some Google Guava usages --- .../util/ParallelSegmentationApparatus.java | 38 +++++++++---------- .../collatex/util/VariantGraphRanking.java | 19 ++++------ 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java index 7b5313287..020e004f4 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java @@ -19,25 +19,20 @@ package eu.interedition.collatex.util; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Iterables; -import com.google.common.collect.LinkedHashMultimap; -import com.google.common.collect.Maps; -import com.google.common.collect.Multimap; -import com.google.common.collect.Ordering; -import com.google.common.collect.SetMultimap; -import com.google.common.collect.Sets; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; -import javax.xml.stream.XMLStreamWriter; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; +import java.util.TreeMap; /** * @author Gregor Middell @@ -61,41 +56,42 @@ public static void generate(VariantGraphRanking ranking, GeneratorCallback callb for (Iterator>> rowIt = ranking.getByRank().asMap().entrySet().iterator(); rowIt.hasNext(); ) { final Map.Entry> row = rowIt.next(); final int rank = row.getKey(); - final Collection vertices = row.getValue(); + final Collection verticesOfRank = row.getValue(); - if (vertices.size() == 1 && Iterables.getOnlyElement(vertices).tokens().isEmpty()) { + + if (verticesOfRank.size() == 1 && verticesOfRank.stream().findFirst().map(VariantGraph.Vertex::tokens).map(Set::isEmpty).orElse(false)) { // skip start and end vertex continue; } // spreading vertices with same rank according to their registered transpositions - final Multimap verticesByTranspositionRank = HashMultimap.create(); - for (VariantGraph.Vertex v : vertices) { + final SortedMap> verticesByTranspositionRank = new TreeMap<>(); + for (VariantGraph.Vertex v : verticesOfRank) { int transpositionRank = 0; for (VariantGraph.Transposition transposition : v.transpositions()) { for (VariantGraph.Vertex tv : transposition) { transpositionRank += (ranking.apply(tv).intValue() - rank); } } - verticesByTranspositionRank.put(transpositionRank, v); + verticesByTranspositionRank.computeIfAbsent(transpositionRank, r -> new LinkedList<>()).add(v); } // render segments - for (Iterator transpositionRankIt = Ordering.natural().immutableSortedCopy(verticesByTranspositionRank.keySet()).iterator(); transpositionRankIt.hasNext() ;) { - final Multimap tokensByWitness = HashMultimap.create(); - for (VariantGraph.Vertex v : verticesByTranspositionRank.get(transpositionRankIt.next())) { + verticesByTranspositionRank.values().forEach(vertices -> { + final Map> tokensByWitness = new HashMap<>(); + for (VariantGraph.Vertex v : vertices) { for (Token token : v.tokens()) { - tokensByWitness.put(token.getWitness(), token); + tokensByWitness.computeIfAbsent(token.getWitness(), w -> new LinkedList<>()).add(token); } } - final SortedMap> cellContents = Maps.newTreeMap(Witness.SIGIL_COMPARATOR); + final SortedMap> cellContents = new TreeMap<>(Witness.SIGIL_COMPARATOR); for (Witness witness : allWitnesses) { - cellContents.put(witness, tokensByWitness.containsKey(witness) ? Iterables.unmodifiableIterable(tokensByWitness.get(witness)) : Collections.emptySet()); + cellContents.put(witness, Collections.unmodifiableCollection(tokensByWitness.getOrDefault(witness, Collections.emptyList()))); } callback.segment(cellContents); - } + }); } callback.end(); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java index 7bd280d30..b3c6648ff 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java @@ -19,15 +19,6 @@ package eu.interedition.collatex.util; -import java.util.Collections; -import java.util.Comparator; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.util.SortedSet; - -import javax.annotation.Nullable; - import com.google.common.base.Function; import com.google.common.base.Objects; import com.google.common.base.Preconditions; @@ -40,12 +31,18 @@ import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeBasedTable; import com.google.common.collect.TreeMultimap; - import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.VariantGraph.Vertex; import eu.interedition.collatex.Witness; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; + /** * @author Gregor Middell * @author Ronald Haentjens Dekker @@ -153,7 +150,7 @@ public VariantGraph.Vertex[][] asArray() { } @Override - public Integer apply(@Nullable VariantGraph.Vertex vertex) { + public Integer apply(VariantGraph.Vertex vertex) { return byVertex.get(vertex); } From 370f4389bf20330d34ae26db0b95051a879a27ea Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 16:48:11 +0100 Subject: [PATCH 20/52] Remove some Google Guava usages --- .../collatex/CollationAlgorithm.java | 1 - .../simple/SimpleVariantGraphSerializer.java | 2 +- .../util/ParallelSegmentationApparatus.java | 4 +- .../collatex/util/VariantGraphRanking.java | 90 +++++++------------ .../interedition/collatex/AbstractTest.java | 58 ++++++------ .../collatex/dekker/AlignmentTest.java | 43 +++++---- .../collatex/dekker/DarwinTest.java | 2 +- .../collatex/dekker/SpencerHoweTest.java | 6 +- .../dekker/TranspositionRenderingTest.java | 24 +++-- .../collatex/lab/VariantGraphLayout.java | 14 ++- .../collatex/output/AlignmentTableTest.java | 35 ++++---- 11 files changed, 124 insertions(+), 155 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java index c4e6949ac..d24d99364 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java @@ -27,7 +27,6 @@ import eu.interedition.collatex.dekker.Match; import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm; import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschScorer; -import eu.interedition.collatex.util.VariantGraphRanking; import eu.interedition.collatex.util.VertexMatch; import java.util.ArrayList; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java index f1d69a587..be4b86f34 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java @@ -280,7 +280,7 @@ VariantGraphRanking ranking() { Set> transposedTuples() { final Set> tuples = Sets.newHashSet(); - final Ordering vertexOrdering = Ordering.from(ranking()).compound((o1, o2) -> Ordering.arbitrary().compare(o1, o2)); + final Ordering vertexOrdering = Ordering.from(ranking().comparator()).compound((o1, o2) -> Ordering.arbitrary().compare(o1, o2)); for (VariantGraph.Transposition transposition : graph.transpositions()) { final SortedSetMultimap verticesByWitness = TreeMultimap.create(Witness.SIGIL_COMPARATOR, vertexOrdering); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java index 020e004f4..2be62ee28 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java @@ -53,8 +53,8 @@ public static void generate(VariantGraphRanking ranking, GeneratorCallback callb callback.start(); final Set allWitnesses = ranking.witnesses(); - for (Iterator>> rowIt = ranking.getByRank().asMap().entrySet().iterator(); rowIt.hasNext(); ) { - final Map.Entry> row = rowIt.next(); + for (Iterator>> rowIt = ranking.getByRank().entrySet().iterator(); rowIt.hasNext(); ) { + final Map.Entry> row = rowIt.next(); final int rank = row.getKey(); final Collection verticesOfRank = row.getValue(); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java index b3c6648ff..ff8f4e839 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java @@ -19,18 +19,6 @@ package eu.interedition.collatex.util; -import com.google.common.base.Function; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; -import com.google.common.collect.AbstractIterator; -import com.google.common.collect.Maps; -import com.google.common.collect.Multimaps; -import com.google.common.collect.Ordering; -import com.google.common.collect.RowSortedTable; -import com.google.common.collect.Sets; -import com.google.common.collect.SortedSetMultimap; -import com.google.common.collect.TreeBasedTable; -import com.google.common.collect.TreeMultimap; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.VariantGraph.Vertex; @@ -38,19 +26,26 @@ import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; -import java.util.SortedSet; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.function.Function; +import java.util.stream.Collectors; /** * @author Gregor Middell * @author Ronald Haentjens Dekker */ -public class VariantGraphRanking implements Iterable>, Function, Comparator { +public class VariantGraphRanking implements Iterable>, Function { - private final Map byVertex = Maps.newHashMap(); - private final SortedSetMultimap byRank = TreeMultimap.create(Ordering.natural(), Ordering.arbitrary()); + private final Map byVertex = new HashMap<>(); + private final SortedMap> byRank = new TreeMap<>(); private final VariantGraph graph; private final Set witnesses; @@ -72,7 +67,7 @@ public static VariantGraphRanking of(VariantGraph graph, Set witnesses) } rank++; ranking.byVertex.put(v, rank); - ranking.byRank.put(rank, v); + ranking.byRank.computeIfAbsent(rank, r -> new HashSet<>()).add(v); } return ranking; } @@ -88,21 +83,21 @@ public static VariantGraphRanking ofOnlyCertainVertices(VariantGraph graph, Set< rank++; } ranking.byVertex.put(v, rank); - ranking.byRank.put(rank, v); + ranking.byRank.computeIfAbsent(rank, r -> new HashSet<>()).add(v); } return ranking; } public Set witnesses() { - return Objects.firstNonNull(witnesses, graph.witnesses()); + return Optional.ofNullable(witnesses).orElse(graph.witnesses()); } public Map getByVertex() { return Collections.unmodifiableMap(byVertex); } - public SortedSetMultimap getByRank() { - return Multimaps.unmodifiableSortedSetMultimap(byRank); + public Map> getByRank() { + return Collections.unmodifiableMap(byRank); } public int size() { @@ -111,41 +106,23 @@ public int size() { @Override public Iterator> iterator() { - return new AbstractIterator>() { - private final Iterator it = byRank.keySet().iterator(); - - @Override - protected Set computeNext() { - return (it.hasNext() ? byRank.get(it.next()) : endOfData()); - } - }; + return byRank.values().iterator(); } - public RowSortedTable> asTable() { - final TreeBasedTable> table = TreeBasedTable.create(Ordering.natural(), Witness.SIGIL_COMPARATOR); - for (Map.Entry rank : byVertex.entrySet()) { - final int row = rank.getValue(); - for (Token token : rank.getKey().tokens(witnesses)) { - final Witness column = token.getWitness(); - - Set cell = table.get(row, column); - if (cell == null) { - table.put(row, column, cell = Sets.newHashSet()); - } - cell.add(token); - } - } - return table; + public List>> asTable() { + return byRank.values().stream() + .filter(rank -> rank.stream().flatMap(v -> v.tokens(witnesses).stream()).findFirst().isPresent()) + .map(vertices -> { + final SortedMap> row = new TreeMap<>(Witness.SIGIL_COMPARATOR); + vertices.stream().flatMap(v -> v.tokens(witnesses).stream()).forEach(token -> row.computeIfAbsent(token.getWitness(), w -> new HashSet<>()).add(token)); + return row; + }) + .collect(Collectors.toList()); } public VariantGraph.Vertex[][] asArray() { - final Set ranks = byRank.keySet(); - final VariantGraph.Vertex[][] arr = new VariantGraph.Vertex[ranks.size()][]; - for (final Iterator it = ranks.iterator(); it.hasNext(); ) { - final Integer rank = it.next(); - final SortedSet vertices = byRank.get(rank); - arr[rank] = vertices.toArray(new Vertex[vertices.size()]); - } + final VariantGraph.Vertex[][] arr = new VariantGraph.Vertex[byRank.size()][]; + byRank.forEach((rank, vertices) -> arr[rank] = vertices.toArray(new Vertex[vertices.size()])); return arr; } @@ -154,14 +131,7 @@ public Integer apply(VariantGraph.Vertex vertex) { return byVertex.get(vertex); } - @Override - public int compare(VariantGraph.Vertex o1, VariantGraph.Vertex o2) { - final Integer o1Rank = byVertex.get(o1); - final Integer o2Rank = byVertex.get(o2); - - Preconditions.checkState(o1Rank != null, o1); - Preconditions.checkState(o2Rank != null, o2); - - return (o1Rank.intValue() - o2Rank.intValue()); + public Comparator comparator() { + return Comparator.comparingInt(byVertex::get); } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java index 52ec8513d..379136b0b 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java @@ -19,14 +19,12 @@ package eu.interedition.collatex; -import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import com.google.common.collect.Multimaps; import com.google.common.collect.Ordering; -import com.google.common.collect.RowSortedTable; import com.google.common.collect.Sets; import eu.interedition.collatex.dekker.DekkerAlgorithm; import eu.interedition.collatex.dekker.Match; @@ -41,8 +39,11 @@ import java.util.Collections; import java.util.List; import java.util.Set; +import java.util.SortedMap; import java.util.SortedSet; import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static eu.interedition.collatex.dekker.Match.PHRASE_MATCH_TO_TOKENS; import static org.junit.Assert.assertEquals; @@ -86,7 +87,7 @@ protected VariantGraph collate(String... witnesses) { return collate(createWitnesses(witnesses)); } - protected static RowSortedTable> table(VariantGraph graph) { + protected static List>> table(VariantGraph graph) { return VariantGraphRanking.of(graph).asTable(); } @@ -144,31 +145,32 @@ protected static VariantGraph.Vertex vertexWith(VariantGraph graph, String conte return null; } - protected static String toString(RowSortedTable> table) { - final StringBuilder tableStr = new StringBuilder(); - for (Witness witness : table.columnKeySet()) { - tableStr.append(witness.getSigil()).append(": ").append(toString(table, witness)).append("\n"); - } - return tableStr.toString(); - } - - protected static String toString(RowSortedTable> table, Witness witness) { - final StringBuilder tableRowStr = new StringBuilder("|"); - for (Integer row : table.rowKeySet()) { - final Set tokens = table.get(row, witness); - if (tokens == null) { - tableRowStr.append(" |"); - } else { - final List simpleTokens = Ordering.natural().sortedCopy(Iterables.filter(tokens, SimpleToken.class)); - tableRowStr.append(Joiner.on(" ").join(Iterables.transform(simpleTokens, new Function() { - @Override - public String apply(Token input) { - return ((SimpleToken) input).getNormalized(); - } - }))).append("|"); - } - } - return tableRowStr.toString(); + protected static Stream witnesses(List>> table) { + return table.stream() + .map(SortedMap::keySet) + .flatMap(Set::stream) + .distinct(); + } + + protected static String toString(List>> table) { + return witnesses(table) + .sorted(Witness.SIGIL_COMPARATOR) + .map(witness -> String.format("%s: %s\n", witness.getSigil(), toString(table, witness))) + .collect(Collectors.joining()); + } + + protected static String toString(List>> table, Witness witness) { + return String.format("|%s|", table.stream() + .map(r -> r.getOrDefault(witness, Collections.emptySet())) + .map(tokens -> tokens.stream() + .filter(t -> SimpleToken.class.isAssignableFrom(t.getClass())) + .map(t -> (SimpleToken) t) + .sorted() + .map(SimpleToken::getNormalized) + .collect(Collectors.joining(" ")) + ) + .map(cell -> cell.isEmpty() ? " " : cell) + .collect(Collectors.joining("|"))); } protected void assertPhraseMatches(String... expectedPhrases) { diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java index 1e5ba0022..75d5fd17e 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java @@ -18,21 +18,6 @@ */ package eu.interedition.collatex.dekker; -import static org.junit.Assert.assertEquals; - -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; - -import org.junit.Test; - -import com.google.common.collect.RowSortedTable; - import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.Token; @@ -41,6 +26,18 @@ import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; + +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.SortedMap; + +import static org.junit.Assert.assertEquals; /** * @@ -54,7 +51,7 @@ public class AlignmentTest extends AbstractTest { @Test public void doubleTransposition1() { final SimpleWitness[] w = createWitnesses("the cat is black", "black is the cat"); - final RowSortedTable> t = table(collate(w)); + final List>> t = table(collate(w)); assertEquals("|the|cat|is|black| |", toString(t, w[0])); assertEquals("|black| |is|the|cat|", toString(t, w[1])); } @@ -62,7 +59,7 @@ public void doubleTransposition1() { @Test public void doubleTransposition2() { final SimpleWitness[] w = createWitnesses("a b", "b a"); - final RowSortedTable> t = table(collate(w)); + final List>> t = table(collate(w)); assertEquals("| |a|b|", toString(t, w[0])); assertEquals("|b|a| |", toString(t, w[1])); } @@ -70,7 +67,7 @@ public void doubleTransposition2() { @Test public void doubleTransposition3() { final SimpleWitness[] w = createWitnesses("a b c", "b a c"); - final RowSortedTable> t = table(collate(w)); + final List>> t = table(collate(w)); assertEquals("| |a|b|c|", toString(t, w[0])); assertEquals("|b|a| |c|", toString(t, w[1])); } @@ -81,7 +78,7 @@ public void additionInCombinationWithTransposition() { "the cat is very happy",// "very happy is the cat",// "very delitied and happy is the cat"); - final RowSortedTable> t = table(collate(w)); + final List>> t = table(collate(w)); assertEquals("|the|cat| | |is|very|happy|", toString(t, w[0])); assertEquals("|very| | |happy|is|the|cat|", toString(t, w[1])); assertEquals("|very|delitied|and|happy|is|the|cat|", toString(t, w[2])); @@ -92,7 +89,7 @@ public void simpleTransposition() { final SimpleWitness[] w = createWitnesses(// "A black cat in a white basket",// "A white cat in a black basket"); - final RowSortedTable> t = table(collate(w)); + final List>> t = table(collate(w)); assertEquals("|a|black|cat|in|a|white|basket|", toString(t, w[0])); assertEquals("|a|white|cat|in|a|black|basket|", toString(t, w[1])); } @@ -100,7 +97,7 @@ public void simpleTransposition() { @Test public void transposeInOnePair() { final SimpleWitness[] w = createWitnesses("y", "x y z", "z y"); - final RowSortedTable> t = table(collate(w)); + final List>> t = table(collate(w)); assertEquals("| |y| |", toString(t, w[0])); assertEquals("|x|y|z|", toString(t, w[1])); assertEquals("|z|y| |", toString(t, w[2])); @@ -109,7 +106,7 @@ public void transposeInOnePair() { @Test public void transposeInTwoPairs() { final SimpleWitness[] w = createWitnesses("y x", "x y z", "z y"); - final RowSortedTable> t = table(collate(w)); + final List>> t = table(collate(w)); assertEquals("| |y|x|", toString(t, w[0])); assertEquals("|x|y|z|", toString(t, w[1])); assertEquals("|z|y| |", toString(t, w[2])); @@ -144,7 +141,7 @@ public void testPhraseMatchingShouldNotIgnoreAdditions() { @Test public void testOrderIndependenceTroy() throws XMLStreamException { - final List witnesses = new ArrayList(); + final List witnesses = new ArrayList<>(); witnesses.add(new SimpleWitness("w1", "X A Z ")); witnesses.add(new SimpleWitness("w2", "Y B Z ")); witnesses.add(new SimpleWitness("w3", "Y A X ")); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java index 68199e55c..17f0add09 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java @@ -55,6 +55,6 @@ public void incomplete() { "Effects of Habit; Correlation of Growth; Inheritance. Habit also has a decided influence, as in the period of flowering with plants when transported from one climate to another. In animals it has a more marked effect; for instance, I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild-duck; and I presume that this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parent. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with the state of these organs in other countries, is probably another instance of the effects of use. Not a single domestic animal can be named which has not in some country drooping ears; and the view which has been suggested that the drooping is due to the disuse of the muscles of the ear, from the animals being seldom alarmed by danger, seems probable.", "Habits are inherited and have a decided influence; as in the period of the flowering of plants when transported from one climate to another. In animals they have a more marked effect; for instance, I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild-duck; and this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parents. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with the state of these organs in other countries, is probably another instance of the effects of use. Not one of our domestic animals can be named which has not in some country drooping ears; and the view which has been suggested that the drooping is due to the disuse of the muscles of the ear, from the animals being seldom alarmed by danger, seems probable.", "Effects of Habit and of the Use or Disuse of Parts; Correlated Variation; Inheritance. Changed habits produce an inherited effect, as in the period of the flowering of plants when transported from one climate to another. With animals the increased use or disuse of parts has had a more marked influence; thus I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild duck; and this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parents. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with these organs in other countries, is probably another instance of the effects of use. Not one of our domestic animals can be named which has not in some country drooping ears; and the view which has been suggested that the drooping is due to the disuse of the muscles of the ear, from the animals being seldom much alarmed, seems probable."); - Assert.assertEquals(6, table(graph).columnKeySet().size()); + Assert.assertEquals(6, witnesses(table(graph)).count()); } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java index a9c36d9c9..ddd3810a4 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java @@ -29,7 +29,9 @@ import eu.interedition.collatex.util.VariantGraphRanking; import org.junit.Test; +import java.util.List; import java.util.Set; +import java.util.SortedMap; import static org.junit.Assert.assertEquals; @@ -48,9 +50,9 @@ public class SpencerHoweTest extends AbstractTest { @Test public void alignmentTable() { final SimpleWitness[] w = createWitnesses("a b c d e f", "x y z d e", "a b x y z"); - final RowSortedTable> table = VariantGraphRanking.of(collate(w)).asTable(); + final List>> table = VariantGraphRanking.of(collate(w)).asTable(); - assertEquals(3, table.columnKeySet().size()); + assertEquals(3, table.stream().flatMap(r -> r.keySet().stream()).distinct().count()); //NOTE: Currently the AT visualization aligns variation to the left of the table: see the 'C' element assertEquals("|a|b|c| | |d|e|f|", toString(table, w[0])); assertEquals("| | |x|y|z|d|e| |", toString(table, w[1])); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionRenderingTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionRenderingTest.java index 786722ff7..e8459b158 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionRenderingTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionRenderingTest.java @@ -19,18 +19,16 @@ package eu.interedition.collatex.dekker; -import com.google.common.collect.RowSortedTable; - import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; -import eu.interedition.collatex.Token; import eu.interedition.collatex.simple.SimpleWitness; - -import org.junit.Ignore; import org.junit.Test; +import java.util.List; import java.util.Set; +import java.util.SortedMap; import static org.junit.Assert.assertEquals; @@ -43,7 +41,7 @@ public void transposition1() { final SimpleWitness[] w = createWitnesses(// "the white and black cat", "The black cat",// "the black and white cat", "the black and green cat"); - final RowSortedTable> table = table(collate(w)); + final List>> table = table(collate(w)); assertEquals("|the|white|and|black|cat|", toString(table, w[0])); assertEquals("|the| | |black|cat|", toString(table, w[1])); @@ -54,7 +52,7 @@ public void transposition1() { @Test public void transposition2() { final SimpleWitness[] w = createWitnesses("He was agast, so", "He was agast", "So he was agast"); - final RowSortedTable> table = table(collate(w)); + final List>> table = table(collate(w)); assertEquals("| |he|was|agast|,|so|", toString(table, w[0])); assertEquals("| |he|was|agast| | |", toString(table, w[1])); @@ -64,7 +62,7 @@ public void transposition2() { @Test public void transposition2Reordered() { final SimpleWitness[] w = createWitnesses("So he was agast", "He was agast", "He was agast, so"); - final RowSortedTable> table = table(collate(w)); + final List>> table = table(collate(w)); assertEquals("|so|he|was|agast| | |", toString(table, w[0])); assertEquals("| |he|was|agast| | |", toString(table, w[1])); @@ -76,7 +74,7 @@ public void testTranspositionLimiter1() { final SimpleWitness a = new SimpleWitness("A","X a b"); final SimpleWitness b = new SimpleWitness("B","a b X"); VariantGraph graph = collate(a,b); - final RowSortedTable> table = table(graph); + final List>> table = table(graph); assertEquals("|x|a|b| |", toString(table, a)); assertEquals("| |a|b|x|", toString(table, b)); } @@ -85,7 +83,7 @@ public void testTranspositionLimiter1() { public void testTranspositionLimiter2() { final SimpleWitness a = new SimpleWitness("A","a b c ."); final SimpleWitness b = new SimpleWitness("B","a b c d e f g h i j k l m n o p q r s t u v w ."); - final RowSortedTable> table = table(collate(a,b)); + final List>> table = table(collate(a, b)); assertEquals("|a|b|c| | | | | | | | | | | | | | | | | | | | |.|", toString(table, a)); assertEquals("|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|.|", toString(table, b)); } @@ -96,7 +94,7 @@ public void testTranspositionLimiter3() { final SimpleWitness b = new SimpleWitness("B","a b c d e f g h i j k l m n o p X"); VariantGraph graph = collate(a,b); assertEquals(0, graph.transpositions().size()); - final RowSortedTable> table = table(graph); + final List>> table = table(graph); assertEquals("|x|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| |", toString(table, a)); assertEquals("| |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|x|", toString(table, b)); } @@ -107,7 +105,7 @@ public void testTranspositionLimiter4() { final SimpleWitness b = new SimpleWitness("B","X a b c d e f g h i j k l m n o p"); VariantGraph graph = collate(a,b); assertEquals(0, graph.transpositions().size()); - final RowSortedTable> table = table(graph); + final List>> table = table(graph); assertEquals("| |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|x|", toString(table, a)); assertEquals("|x|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| |", toString(table, b)); } @@ -118,7 +116,7 @@ public void additionInCombinationWithTransposition2() { "the cat is black",// "black is the cat",// "black and white is the cat"); - final RowSortedTable> t = table(collate(w[0], w[1], w[2])); + final List>> t = table(collate(w[0], w[1], w[2])); assertEquals("|the|cat| |is|black| |", toString(t, w[0])); assertEquals("|black| | |is|the|cat|", toString(t, w[1])); assertEquals("|black|and|white|is|the|cat|", toString(t, w[2])); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java index ca77b75e9..23e1d6f39 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java @@ -31,6 +31,8 @@ import java.util.Map; import java.util.Set; import java.util.SortedSet; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; /** * @author Gregor Middell @@ -73,15 +75,11 @@ private VariantGraphLayout(VariantGraph graph) { } private void fillLevels() { - final SortedSetMultimap ranks = VariantGraphRanking.of(graph).getByRank(); - for (Integer rank : ranks.keySet()) { - final List cells = Lists.newLinkedList(); - for (VariantGraph.Vertex vertex : ranks.get(rank)) { - cells.add(new Cell(rank, cells.size(), vertex)); - } - grid.add(cells); + VariantGraphRanking.of(graph).getByRank().forEach((rank, vertices) -> { + final AtomicInteger cellNum = new AtomicInteger(); + grid.add(vertices.stream().map(vertex -> new Cell(rank, cellNum.getAndIncrement(), vertex)).collect(Collectors.toList())); maxX = Math.max(maxX, rank); - } + }); } private void solveEdgeCrosses() { diff --git a/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java b/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java index c5e58fedf..aba3a3965 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java @@ -21,7 +21,9 @@ import static org.junit.Assert.assertEquals; +import java.util.List; import java.util.Set; +import java.util.SortedMap; import eu.interedition.collatex.VariantGraph; import org.junit.Ignore; @@ -44,16 +46,16 @@ public void emptyTable() { @Test public void firstWitness() { final SimpleWitness[] w = createWitnesses("the black cat"); - final RowSortedTable> table = table(collate(w)); - assertEquals(1, table.columnKeySet().size()); + final List>> table = table(collate(w)); + assertEquals(1, witnesses(table).count()); assertEquals("|the|black|cat|", toString(table, w[0])); } @Test public void everythingMatches() { final SimpleWitness[] w = createWitnesses("the black cat", "the black cat", "the black cat"); - final RowSortedTable> table = table(collate(w)); - assertEquals(3, table.columnKeySet().size()); + final List>> table = table(collate(w)); + assertEquals(3, witnesses(table).count()); assertEquals("|the|black|cat|", toString(table, w[0])); assertEquals("|the|black|cat|", toString(table, w[1])); assertEquals("|the|black|cat|", toString(table, w[2])); @@ -62,8 +64,8 @@ public void everythingMatches() { @Test public void variant() { final SimpleWitness[] w = createWitnesses("the black cat", "the white cat", "the green cat", "the red cat", "the yellow cat"); - final RowSortedTable> table = table(collate(w)); - assertEquals(5, table.columnKeySet().size()); + final List>> table = table(collate(w)); + assertEquals(5, witnesses(table).count()); assertEquals("|the|black|cat|", toString(table, w[0])); assertEquals("|the|white|cat|", toString(table, w[1])); assertEquals("|the|green|cat|", toString(table, w[2])); @@ -73,38 +75,39 @@ public void variant() { @Test public void omission() { - final RowSortedTable> table = table(collate("the black cat", "the cat", "the black cat")); + final List>> table = table(collate("the black cat", "the cat", "the black cat")); assertEquals("A: |the|black|cat|\nB: |the| |cat|\nC: |the|black|cat|\n", toString(table)); } @Test public void addition1() { - final RowSortedTable> table = table(collate("the black cat", "the white and black cat")); + final List>> table = table(collate("the black cat", "the white and black cat")); assertEquals("A: |the| | |black|cat|\nB: |the|white|and|black|cat|\n", toString(table)); } @Test public void addition2() { - final RowSortedTable> table = table(collate("the cat", "before the cat", "the black cat", "the cat walks")); + final List>> table = table(collate("the cat", "before the cat", "the black cat", "the cat walks")); assertEquals("A: | |the| |cat| |\nB: |before|the| |cat| |\nC: | |the|black|cat| |\nD: | |the| |cat|walks|\n", toString(table)); } @Test public void addition3() { - final RowSortedTable> t = table(collate("the cat", "before the cat", "the black cat", "just before midnight the cat walks")); + final List>> t = table(collate("the cat", "before the cat", "the black cat", "just before midnight the cat walks")); assertEquals("A: | | | |the| |cat| |\nB: | |before| |the| |cat| |\nC: | | | |the|black|cat| |\nD: |just|before|midnight|the| |cat|walks|\n", toString(t)); } @Test public void transpositionAndReplacement() { - final RowSortedTable> t = table(collate("the black dog chases a red cat", "a red cat chases the black dog", "a red cat chases the yellow dog")); + final + List>> t = table(collate("the black dog chases a red cat", "a red cat chases the black dog", "a red cat chases the yellow dog")); assertEquals("A: |the|black|dog|chases|a|red|cat|\nB: |a|red|cat|chases|the|black|dog|\nC: |a|red|cat|chases|the|yellow|dog|\n", toString(t)); } @Test @Ignore("By default we align to the left; right alignment would be nicer in this specific case") public void variation() { - final RowSortedTable> t = table(collate("the black cat", "the black and white cat", "the black very special cat", "the black not very special cat")); + final List>> t = table(collate("the black cat", "the black and white cat", "the black very special cat", "the black not very special cat")); assertEquals("A: |the|black| | | |cat|\nB: |the|black| |and|white|cat|\nC: |the|black| |very|special|cat|\nD: |the|black|not|very|special|cat|\n", toString(t)); } @@ -112,7 +115,7 @@ public void variation() { public void witnessReorder() { final SimpleWitness[] w = createWitnesses("the black cat", "the black and white cat", "the black not very special cat", "the black very special cat"); VariantGraph variantgraph = collate(w); - final RowSortedTable> table = table(variantgraph); + final List>> table = table(variantgraph); assertEquals("|the|black| | | |cat|", toString(table, w[0])); assertEquals("|the|black|and|white| |cat|", toString(table, w[1])); assertEquals("|the|black|not|very|special|cat|", toString(table, w[2])); @@ -122,8 +125,8 @@ public void witnessReorder() { @Test public void testSimpleSpencerHowe() { final SimpleWitness[] w = createWitnesses("a", "b", "a b"); - final RowSortedTable> table = table(collate(w)); - assertEquals(3, table.columnKeySet().size()); + final List>> table = table(collate(w)); + assertEquals(3, witnesses(table).count()); assertEquals("|a| |", toString(table, w[0])); assertEquals("| |b|", toString(table, w[1])); assertEquals("|a|b|", toString(table, w[2])); @@ -136,7 +139,7 @@ public void stringOutputOneWitness() { @Test public void stringOutputTwoWitnesses() { - final RowSortedTable> table = table(collate("the black cat", "the black cat")); + final List>> table = table(collate("the black cat", "the black cat")); assertEquals("A: |the|black|cat|\nB: |the|black|cat|\n", toString(table)); } From 1e3486ce017df04c975335652be3ae4e43175c36 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 17:06:33 +0100 Subject: [PATCH 21/52] Remove some Google Guava usages --- .../collatex/simple/SimpleToken.java | 28 +++---- .../simple/SimpleVariantGraphSerializer.java | 79 ++++++++----------- .../simple/SimpleWitnessTeiBuilder.java | 8 +- 3 files changed, 51 insertions(+), 64 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java index 7d9c760dc..ce6511085 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java @@ -19,14 +19,14 @@ package eu.interedition.collatex.simple; -import com.google.common.collect.Iterables; import eu.interedition.collatex.Token; import eu.interedition.collatex.Witness; import eu.interedition.collatex.util.VertexMatch; -import javax.annotation.Nullable; import java.util.SortedSet; import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; public class SimpleToken implements Token, Comparable { private final SimpleWitness witness; @@ -58,11 +58,12 @@ public String toString() { } public static String toString(Iterable tokens) { - final StringBuilder normalized = new StringBuilder(); - for (SimpleToken token : Iterables.filter(tokens, SimpleToken.class)) { - normalized.append(token.getContent()); - } - return normalized.toString().trim(); + return StreamSupport.stream(tokens.spliterator(), false) + .filter(t -> SimpleToken.class.isAssignableFrom(t.getClass())) + .map(t -> (SimpleToken) t) + .map(SimpleToken::getContent) + .collect(Collectors.joining()) + .trim(); } @Override @@ -70,14 +71,11 @@ public int compareTo(SimpleToken o) { return witness.compare(this, o); } - public static final Function, Integer> TOKEN_MATCH_EVALUATOR = new Function, Integer>() { - @Override - public Integer apply(@Nullable SortedSet input) { - int value = 0; - for (VertexMatch.WithToken match : input) { - value += ((SimpleToken) match.token).getContent().length(); - } - return value; + public static final Function, Integer> TOKEN_MATCH_EVALUATOR = input -> { + int value = 0; + for (VertexMatch.WithToken match : input) { + value += ((SimpleToken) match.token).getContent().length(); } + return value; }; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java index be4b86f34..4baaacaf6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java @@ -19,14 +19,6 @@ package eu.interedition.collatex.simple; -import com.google.common.base.Function; -import com.google.common.base.Objects; -import com.google.common.base.Throwables; -import com.google.common.collect.Iterables; -import com.google.common.collect.Ordering; -import com.google.common.collect.Sets; -import com.google.common.collect.SortedSetMultimap; -import com.google.common.collect.TreeMultimap; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; @@ -34,22 +26,27 @@ import eu.interedition.collatex.util.ParallelSegmentationApparatus; import eu.interedition.collatex.util.VariantGraphRanking; -import javax.annotation.Nullable; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import java.io.IOException; import java.io.PrintWriter; import java.io.Writer; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.function.Function; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; /** * @author Gregor Middell @@ -157,8 +154,7 @@ public void toCsv(final Writer out) throws IOException { @Override public void start() { try { - final List witnessList = Ordering.from(Witness.SIGIL_COMPARATOR).immutableSortedCopy(graph.witnesses()); - for (Iterator it = witnessList.iterator(); it.hasNext(); ) { + for (Iterator it = graph.witnesses().stream().sorted(Witness.SIGIL_COMPARATOR).iterator(); it.hasNext(); ) { out.write(escapeCsvField(it.next().getSigil())); if (it.hasNext()) { out.write(","); @@ -166,7 +162,7 @@ public void start() { } out.write("\r\n"); } catch (IOException e) { - throw Throwables.propagate(e); + throw new RuntimeException(e); } } @@ -174,14 +170,14 @@ public void start() { public void segment(SortedMap> contents) { try { for (Iterator witnessIt = contents.keySet().iterator(); witnessIt.hasNext();) { - out.write(escapeCsvField(tokensToString.apply(Objects.firstNonNull(contents.get(witnessIt.next()), Collections.emptySet())))); + out.write(escapeCsvField(tokensToString.apply(contents.getOrDefault(witnessIt.next(), Collections.emptySet())))); if (witnessIt.hasNext()) { out.write(","); } } out.write("\r\n"); } catch (IOException e) { - throw Throwables.propagate(e); + throw new RuntimeException(e); } } @@ -190,8 +186,15 @@ public void end() { } }); } catch (Throwable t) { - Throwables.propagateIfInstanceOf(Throwables.getRootCause(t), IOException.class); - throw Throwables.propagate(t); + for (Throwable cause = t; cause != null; cause = cause.getCause()) { + if (cause instanceof IOException) { + throw (IOException) cause; + } + } + if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } + throw new RuntimeException(t); } } @@ -250,15 +253,6 @@ private int numericId(VariantGraph.Vertex vertex) { return id; } - private String id(VariantGraph.Transposition transposition) { - Integer id = transpositionIds.get(transposition); - if (id == null) { - id = transpositionIds.size(); - transpositionIds.put(transposition, id); - } - return ("t" + id); - } - String toDotLabel(VariantGraph.Edge e) { return escapeDotLabel(Witness.TO_SIGILS.apply(e)); } @@ -279,14 +273,14 @@ VariantGraphRanking ranking() { } Set> transposedTuples() { - final Set> tuples = Sets.newHashSet(); - final Ordering vertexOrdering = Ordering.from(ranking().comparator()).compound((o1, o2) -> Ordering.arbitrary().compare(o1, o2)); + final Set> tuples = new HashSet<>(); + final Comparator vertexOrdering = ranking().comparator(); for (VariantGraph.Transposition transposition : graph.transpositions()) { - final SortedSetMultimap verticesByWitness = TreeMultimap.create(Witness.SIGIL_COMPARATOR, vertexOrdering); + final SortedMap> verticesByWitness = new TreeMap<>(Witness.SIGIL_COMPARATOR); for (VariantGraph.Vertex vertex : transposition) { for (Witness witness : vertex.witnesses()) { - verticesByWitness.put(witness, vertex); + verticesByWitness.computeIfAbsent(witness, w -> new TreeSet<>(vertexOrdering)).add(vertex); } } @@ -299,7 +293,7 @@ Set> transposedTuples() { final VariantGraph.Vertex prevVertex = prevIt.next(); final VariantGraph.Vertex nextVertex = nextIt.next(); if (!prevVertex.equals(nextVertex)) { - tuples.add(new Tuple(prevVertex, nextVertex)); + tuples.add(new Tuple<>(prevVertex, nextVertex)); } } } @@ -431,20 +425,17 @@ public void declare(XMLStreamWriter xml) throws XMLStreamException { final Function vertexToString = new Function() { @Override - public String apply(@Nullable VariantGraph.Vertex input) { - final Witness witness = Iterables.getFirst(input.witnesses(), null); - return (witness == null ? "" : tokensToString.apply(input.tokens(Collections.singleton(witness)))); + public String apply(VariantGraph.Vertex input) { + return input.witnesses().stream().findFirst() + .map(witness -> tokensToString.apply(input.tokens(Collections.singleton(witness)))) + .orElse(""); } }; - static final Function, String> SIMPLE_TOKEN_TO_STRING = input -> { - final List tokens = Ordering.natural().immutableSortedCopy( - Iterables.filter(input, SimpleToken.class) - ); - final StringBuilder sb = new StringBuilder(); - for (SimpleToken token : tokens) { - sb.append(token.getContent()); - } - return sb.toString(); - }; + static final Function, String> SIMPLE_TOKEN_TO_STRING = input -> StreamSupport.stream(input.spliterator(), false) + .filter(t -> SimpleToken.class.isAssignableFrom(t.getClass())) + .map(t -> (SimpleToken) t) + .sorted() + .map(SimpleToken::getContent) + .collect(Collectors.joining()); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java index b1dbe0a37..286c8338d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java @@ -19,16 +19,14 @@ package eu.interedition.collatex.simple; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.StreamSupport; - import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.XMLEvent; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; // we are going to use stax // previous version was build on DOM, which is not particularly well suited to parsing From 0df762efdd627c8246ed8727e54a5dadb7e04fb2 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 17:11:49 +0100 Subject: [PATCH 22/52] Remove some Google Guava usages --- .../NeedlemanWunschAlgorithm.java | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java index e5aaab797..93c278abd 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java @@ -19,16 +19,17 @@ package eu.interedition.collatex.needlemanwunsch; -import com.google.common.collect.Iterables; -import com.google.common.collect.Maps; import eu.interedition.collatex.CollationAlgorithm; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.util.VariantGraphRanking; +import java.util.Arrays; import java.util.Comparator; +import java.util.HashMap; import java.util.Map; import java.util.Set; +import java.util.stream.StreamSupport; /** * @author Gregor Middell @@ -40,13 +41,7 @@ public class NeedlemanWunschAlgorithm extends CollationAlgorithm.Base { @Override public float score(VariantGraph.Vertex[] a, Token b) { - for (VariantGraph.Vertex vertex : a) { - final Set tokens = vertex.tokens(); - if (!tokens.isEmpty() && comparator.compare(Iterables.getFirst(tokens, null), b) == 0) { - return 1; - } - } - return -1; + return Arrays.stream(a).map(VariantGraph.Vertex::tokens).flatMap(Set::stream).anyMatch(t -> comparator.compare(t, b) == 0) ? 1 : -1; } @Override @@ -62,9 +57,9 @@ public NeedlemanWunschAlgorithm(Comparator comparator) { @Override public void collate(VariantGraph against, Iterable witness) { final VariantGraph.Vertex[][] ranks = VariantGraphRanking.of(against).asArray(); - final Token[] tokens = Iterables.toArray(witness, Token.class); + final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); - final Map alignments = Maps.newHashMap(); + final Map alignments = new HashMap<>(); for (Map.Entry alignment : align(ranks, tokens, scorer).entrySet()) { boolean aligned = false; final Token token = alignment.getValue(); @@ -87,7 +82,7 @@ public void collate(VariantGraph against, Iterable witness) { public static Map align(A[] a, B[] b, NeedlemanWunschScorer scorer) { - final Map alignments = Maps.newHashMap(); + final Map alignments = new HashMap<>(); final float[][] matrix = new float[a.length + 1][b.length + 1]; int ac = 0; From a58d94e347928d70f6d4416bc08dd230a9046b7a Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 17:29:25 +0100 Subject: [PATCH 23/52] Remove some Google Guava usages --- .../collatex/medite/MediteAlgorithm.java | 7 +- .../collatex/medite/SuffixTree.java | 75 ++++++++----------- 2 files changed, 34 insertions(+), 48 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java index 9b0b31660..6cdc2b829 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java @@ -19,8 +19,6 @@ package eu.interedition.collatex.medite; -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; import eu.interedition.collatex.CollationAlgorithm; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; @@ -33,6 +31,7 @@ import java.util.TreeSet; import java.util.function.Function; import java.util.stream.IntStream; +import java.util.stream.StreamSupport; /** * @author Gregor Middell @@ -50,13 +49,13 @@ public MediteAlgorithm(Comparator comparator, Function witness) { final VariantGraph.Vertex[][] vertices = VariantGraphRanking.of(graph).asArray(); - final Token[] tokens = Iterables.toArray(witness, Token.class); + final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); final SuffixTree suffixTree = SuffixTree.build(comparator, tokens); final MatchEvaluatorWrapper matchEvaluator = new MatchEvaluatorWrapper(this.matchEvaluator, tokens); final Matches matchCandidates = Matches.between(vertices, suffixTree, matchEvaluator); - final SortedSet> matches = Sets.newTreeSet(VertexMatch.setComparator()); + final SortedSet> matches = new TreeSet<>(VertexMatch.setComparator()); while (true) { final SortedSet> maximalUniqueMatches = matchCandidates.findMaximalUniqueMatches(); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java index 7c560daa4..12f195ea7 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java @@ -19,12 +19,6 @@ package eu.interedition.collatex.medite; -import com.google.common.base.Joiner; -import com.google.common.base.Strings; -import com.google.common.collect.AbstractIterator; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; - import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; @@ -34,6 +28,9 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.IntStream; /** * @author Gregor Middell @@ -45,10 +42,12 @@ class SuffixTree { final T[] source; final Node root; + @SafeVarargs static SuffixTree build(Comparator comparator, T... source) { - return new SuffixTree(comparator, source).build(); + return new SuffixTree<>(comparator, source).build(); } + @SafeVarargs private SuffixTree(Comparator comparator, T... source) { this.comparator = comparator; this.sourceComparator = new SentinelAwareComparator(comparator); @@ -61,24 +60,23 @@ public Cursor cursor() { } public Iterable match(final Iterable str) { - return new Iterable() { + return () -> new Iterator() { + + final Iterator it = str.iterator(); + Optional cursor = Optional.ofNullable(it.hasNext() ? cursor().move(it.next()) : null); + @Override - public Iterator iterator() { - return new AbstractIterator() { - - Cursor cursor = cursor(); - final Iterator it = str.iterator(); - - @Override - protected EquivalenceClass computeNext() { - if (it.hasNext()) { - cursor = cursor.move(it.next()); - return (cursor == null ? endOfData() : cursor.matchedClass()); - } - return endOfData(); - } - }; + public boolean hasNext() { + return cursor.isPresent(); + } + + @Override + public EquivalenceClass next() { + final EquivalenceClass next = cursor.get().matchedClass(); + cursor = Optional.ofNullable(it.hasNext() ? cursor.get().move(it.next()) : null); + return next; } + }; } @@ -108,13 +106,11 @@ private void compactNodes(Node node) { @Override public String toString() { final StringBuilder sb = new StringBuilder(); - final Deque nodes = new ArrayDeque(Collections.singleton(root)); + final Deque nodes = new ArrayDeque<>(Collections.singleton(root)); while (!nodes.isEmpty()) { final Node node = nodes.remove(); - sb.append(Strings.repeat("\t", node.depth())).append(node).append("\n"); - for (Node child : node.children) { - nodes.addFirst(child); - } + sb.append(IntStream.range(0, node.depth()).mapToObj(i -> "\t").collect(Collectors.joining())).append(node).append("\n"); + node.children.forEach(nodes::addFirst); } return sb.toString(); } @@ -127,11 +123,11 @@ class Node { final LinkedList incomingLabel; Node parent; - List children = new ArrayList(); + List children = new ArrayList<>(); public Node(Node parent, int firstIndex) { this.parent = parent; - this.incomingLabel = Lists.newLinkedList(Collections.singleton(new EquivalenceClass(firstIndex))); + this.incomingLabel = new LinkedList<>(Collections.singleton(new EquivalenceClass(firstIndex))); } public Node() { @@ -175,7 +171,7 @@ private Node addSuffix(Node node, int start) { @Override public String toString() { - return Iterables.toString(incomingLabel == null ? Collections.emptySet() : incomingLabel); + return Optional.ofNullable(incomingLabel).map(label -> label.stream().map(Object::toString).collect(Collectors.joining(", "))).orElse(""); } } @@ -200,7 +196,7 @@ boolean isMember(int index) { } public boolean isMember(T symbol) { - return (members[0] == source.length ? false : comparator.compare(symbol, source[members[0]]) == 0); + return (members[0] != source.length && comparator.compare(symbol, source[members[0]]) == 0); } @Override @@ -223,18 +219,9 @@ public int compareTo(EquivalenceClass o) { @Override public String toString() { - return "{" + Joiner.on(", ").join(new AbstractIterator() { - private int mc = 0; - @Override - protected String computeNext() { - if (mc == length) { - return endOfData(); - } - - final int member = members[mc++]; - return "<[" + member + "] " + (member == source.length ? "$" : source[member].toString()) + ">"; - } - }) + "}"; + return String.format("{%s}", Arrays.stream(members, 0, length) + .mapToObj(member -> "<[" + member + "] " + (member == source.length ? "$" : source[member].toString()) + ">") + .collect(Collectors.joining(", "))); } } From c8ea1ba412413643daede58c1a44e60b756b2c0f Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 18:30:31 +0100 Subject: [PATCH 24/52] Remove some Google Guava usages --- .../collatex/dekker/matrix/MatchTable.java | 7 +- .../collatex/matching/Matches.java | 141 +++++++----------- .../collatex/dekker/BeckettTest.java | 10 +- .../collatex/matching/MatchesTest.java | 14 +- .../collatex/matching/NearMatcherTest.java | 8 +- 5 files changed, 77 insertions(+), 103 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java index db77d3bac..2f63a573c 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java @@ -19,6 +19,7 @@ package eu.interedition.collatex.dekker.matrix; +import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -128,12 +129,12 @@ private static MatchTable createEmptyTable(VariantGraphRanking ranking, VariantG // move parameters into fields? private void fillTableWithMatches(VariantGraphRanking ranking, VariantGraph graph, Iterable witness, Comparator comparator) { Matches matches = Matches.between(graph.vertices(), witness, comparator); - Set unique = matches.getUnique(); - Set ambiguous = matches.getAmbiguous(); + Set unique = matches.uniqueInWitness; + Set ambiguous = matches.ambiguousInWitness; int rowIndex=0; for (Token t : witness) { if (unique.contains(t) || ambiguous.contains(t)) { - List matchingVertices = matches.getAll().get(t); + List matchingVertices = matches.allMatches.getOrDefault(t, Collections.emptyList()); for (VariantGraph.Vertex vgv : matchingVertices) { set(rowIndex, ranking.apply(vgv) - 1, t, vgv); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java index 3c5d2a356..cf281cd72 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java @@ -19,105 +19,76 @@ package eu.interedition.collatex.matching; -import java.util.Collection; +import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; + +import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; import java.util.Map; import java.util.Set; - -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ImmutableMultiset; -import com.google.common.collect.Iterables; -import com.google.common.collect.ListMultimap; -import com.google.common.collect.Multiset; -import com.google.common.collect.Sets; - -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; public class Matches { - private final ListMultimap all; - private final Set unmatched; - private final Set ambiguous; - private final Set unique; + public final Map> allMatches; + public final Set unmatchedInWitness; + public final Set ambiguousInWitness; + public final Set uniqueInWitness; public static Matches between(final Iterable vertices, final Iterable witnessTokens, Comparator comparator) { - final ListMultimap all = ArrayListMultimap.create(); - for (VariantGraph.Vertex vertex : vertices) { - final Set tokens = vertex.tokens(); - if (tokens.isEmpty()) { - continue; - } - for (Token witnessToken : witnessTokens) { - if (comparator.compare(Iterables.getFirst(tokens, null), witnessToken) == 0) { - all.put(witnessToken, vertex); - } - } - } - - // unmatched tokens - Set unmatched = Sets.newLinkedHashSet(); - for (Token witnessToken : witnessTokens) { - if (!all.containsKey(witnessToken)) { - unmatched.add(witnessToken); - } - } - // unsure tokens (have to check: base -> witness, and witness -> base) - Set ambiguous = Sets.newLinkedHashSet(); - for (Token witnessToken : witnessTokens) { - int count = all.keys().count(witnessToken); - if (count > 1) { - ambiguous.add(witnessToken); - } - } - Multiset bag = ImmutableMultiset.copyOf(all.values()); - Set unsureBaseTokens = Sets.newLinkedHashSet(); - for (VariantGraph.Vertex baseToken : vertices) { - int count = bag.count(baseToken); - if (count > 1) { - unsureBaseTokens.add(baseToken); - } - } - Collection> entries = all.entries(); - for (Map.Entry entry : entries) { - if (unsureBaseTokens.contains(entry.getValue())) { - ambiguous.add(entry.getKey()); - } - } + final Map> allMatches = new HashMap<>(); + + StreamSupport.stream(vertices.spliterator(), false).forEach(vertex -> + vertex.tokens().stream().findFirst().ifPresent(baseToken -> + StreamSupport.stream(witnessTokens.spliterator(), false) + .filter(witnessToken -> comparator.compare(baseToken, witnessToken) == 0) + .forEach(matchingToken -> allMatches.computeIfAbsent(matchingToken, t -> new ArrayList<>()).add(vertex)))); + + final Set unmatchedInWitness = StreamSupport.stream(witnessTokens.spliterator(), false) + .filter(t -> !allMatches.containsKey(t)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + final Set ambiguousInBase = allMatches.values().stream() + .flatMap(List::stream) + .collect(Collectors.toMap(Function.identity(), v -> 1, (a, b) -> a + b)) + .entrySet() + .stream() + .filter(v -> v.getValue() > 1) + .map(Map.Entry::getKey) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + // (have to check: base -> witness, and witness -> base) + final Set ambiguousInWitness = Stream.concat( + StreamSupport.stream(witnessTokens.spliterator(), false) + .filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() > 1), + + allMatches.entrySet().stream() + .filter(match -> match.getValue().stream().anyMatch(ambiguousInBase::contains)) + .map(Map.Entry::getKey) + ).collect(Collectors.toCollection(LinkedHashSet::new)); + // sure tokens // have to check unsure tokens because of (base -> witness && witness -> base) - Set unique = Sets.newLinkedHashSet(); - for (Token witnessToken : witnessTokens) { - if (all.keys().count(witnessToken) == 1 && !ambiguous.contains(witnessToken)) { - unique.add(witnessToken); - } - } - - return new Matches(all, unmatched, ambiguous, unique); - } - - private Matches(ListMultimap all, Set unmatched, Set ambiguous, Set unique) { - this.all = all; - this.unmatched = unmatched; - this.ambiguous = ambiguous; - this.unique = unique; - } - - public ListMultimap getAll() { - return all; - } - - public Set getUnmatched() { - return unmatched; - } + final Set uniqueInWitness = StreamSupport.stream(witnessTokens.spliterator(), false) + .filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() == 1 && !ambiguousInWitness.contains(t)) + .collect(Collectors.toCollection(LinkedHashSet::new)); - public Set getAmbiguous() { - return ambiguous; + return new Matches(allMatches, unmatchedInWitness, ambiguousInWitness, uniqueInWitness); } - public Set getUnique() { - return unique; + private Matches(Map> allMatches, Set unmatchedInWitness, Set ambiguousInWitness, Set uniqueInWitness) { + this.allMatches = Collections.unmodifiableMap(allMatches); + this.unmatchedInWitness = Collections.unmodifiableSet(unmatchedInWitness); + this.ambiguousInWitness = Collections.unmodifiableSet(ambiguousInWitness); + this.uniqueInWitness = Collections.unmodifiableSet(uniqueInWitness); } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java index 65bfbf087..876007f9d 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java @@ -26,6 +26,7 @@ import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.SortedSet; @@ -33,7 +34,6 @@ import org.junit.Test; import com.google.common.collect.Iterables; -import com.google.common.collect.ListMultimap; import com.google.common.collect.Sets; import eu.interedition.collatex.AbstractTest; @@ -73,7 +73,7 @@ public void dirkVincent() { "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); final VariantGraph graph = collate(w[0]); - final ListMultimap matches = Matches.between(graph.vertices(), w[1], new EqualityTokenComparator()).getAll(); + final Map> matches = Matches.between(graph.vertices(), w[1], new EqualityTokenComparator()).allMatches; assertVertexHasContent(matches.get(w[1].getTokens().get(0)).get(0), "its", w[0]); assertEquals(2, matches.get(w[1].getTokens().get(3)).size()); // 2 matches for 'light' @@ -86,7 +86,7 @@ public void dirkVincentWithMatchMatrixLinker() { "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); final VariantGraph graph = collate(w[0]); - final ListMultimap matches = Matches.between(graph.vertices(), w[1], new EqualityTokenComparator()).getAll(); + final Map> matches = Matches.between(graph.vertices(), w[1], new EqualityTokenComparator()).allMatches; assertVertexHasContent(matches.get(w[1].getTokens().get(0)).get(0), "its", w[0]); assertEquals(2, matches.get(w[1].getTokens().get(3)).size()); // 2 matches for 'light' @@ -140,8 +140,8 @@ public void dirkVincent8() { final VariantGraph graph = collate(w[0], w[1]); final Matches matches = Matches.between(graph.vertices(), w[2].getTokens(), new EqualityTokenComparator()); - final Set unmatchedTokens = matches.getUnmatched(); - final Set unsureTokens = matches.getAmbiguous(); + final Set unmatchedTokens = matches.unmatchedInWitness; + final Set unsureTokens = matches.ambiguousInWitness; final List w2Tokens = w[2].getTokens(); assertTrue(unmatchedTokens.contains(w2Tokens.get(1))); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java b/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java index 2a560b10b..1a8af54de 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java @@ -21,11 +21,11 @@ import static org.junit.Assert.*; +import java.util.List; +import java.util.Map; import java.util.Set; import java.util.logging.Level; -import com.google.common.collect.ListMultimap; - import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; @@ -68,21 +68,21 @@ public void test3OverlappingIslands() { VariantGraph vg = collate(sw[0]); final Matches matches = Matches.between(vg.vertices(), sw[1].getTokens(), new EqualityTokenComparator()); assertMatches(matches, 0, 3, 2); - assertEquals(7, matches.getAll().size()); + assertEquals(7, matches.allMatches.values().stream().flatMap(List::stream).count()); } private void assertMatches(final Matches matches, int expected_unmatched, int expected_unique, int expected_ambiguous) { - Set unmatched = matches.getUnmatched(); + Set unmatched = matches.unmatchedInWitness; LOG.log(Level.FINE, "unmatched: {0}", unmatched); - Set unique = matches.getUnique(); + Set unique = matches.uniqueInWitness; LOG.log(Level.FINE, "unique: {0}", unique); - Set ambiguous = matches.getAmbiguous(); + Set ambiguous = matches.ambiguousInWitness; LOG.log(Level.FINE, "ambiguous: {0}", ambiguous); - ListMultimap all = matches.getAll(); + Map> all = matches.allMatches; LOG.log(Level.FINE, "all: {0}", all); assertEquals(expected_unmatched, unmatched.size()); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java b/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java index ddef92dc0..eb06608ab 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java @@ -20,13 +20,15 @@ package eu.interedition.collatex.matching; import com.google.common.collect.Iterables; -import com.google.common.collect.ListMultimap; import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.simple.SimpleWitness; import org.junit.Test; +import java.util.List; +import java.util.Map; + import static org.junit.Assert.assertEquals; public class NearMatcherTest extends AbstractTest { @@ -35,9 +37,9 @@ public class NearMatcherTest extends AbstractTest { public void nearTokenMatching() { final SimpleWitness[] w = createWitnesses("near matching yeah", "nar matching"); final VariantGraph graph = collate(w[0]); - final ListMultimap matches = Matches.between(graph.vertices(), w[1].getTokens(), new EditDistanceTokenComparator()).getAll(); + final Map> matches = Matches.between(graph.vertices(), w[1].getTokens(), new EditDistanceTokenComparator()).allMatches; - assertEquals(2, matches.size()); + assertEquals(2, matches.values().stream().flatMap(List::stream).count()); assertEquals(w[0].getTokens().get(0), Iterables.getFirst(Iterables.get(matches.get(w[1].getTokens().get(0)), 0).tokens(), null)); assertEquals(w[0].getTokens().get(1), Iterables.getFirst(Iterables.get(matches.get(w[1].getTokens().get(1)), 0).tokens(), null)); } From 4c32dcfa62d591827b70b49d79c0e60c016c6ca1 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 18:36:14 +0100 Subject: [PATCH 25/52] Remove some Google Guava usages --- .../collatex/CollationAlgorithm.java | 29 ++++++++++--------- .../java/eu/interedition/collatex/Token.java | 8 ----- .../interedition/collatex/AbstractTest.java | 2 +- .../collatex/lab/VariantGraphPanel.java | 2 +- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java index d24d99364..8ce18f7a4 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java @@ -19,11 +19,6 @@ package eu.interedition.collatex; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; import eu.interedition.collatex.dekker.Match; import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm; import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschScorer; @@ -33,6 +28,9 @@ import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -40,6 +38,7 @@ import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.StreamSupport; /** * @author Gregor Middell @@ -75,13 +74,15 @@ public void collate(VariantGraph against, List> witnes } protected void merge(VariantGraph into, Iterable witnessTokens, Map alignments) { - Preconditions.checkArgument(!Iterables.isEmpty(witnessTokens), "Empty witness"); - final Witness witness = Iterables.getFirst(witnessTokens, null).getWitness(); + final Witness witness = StreamSupport.stream(witnessTokens.spliterator(), false) + .findFirst() + .map(Token::getWitness) + .orElseThrow(() -> new IllegalArgumentException("Empty witness")); if (LOG.isLoggable(Level.FINE)) { LOG.log(Level.FINE, "{0} + {1}: Merge comparand into graph", new Object[] { into, witness }); } - witnessTokenVertices = Maps.newHashMap(); + witnessTokenVertices = new HashMap<>(); VariantGraph.Vertex last = into.getStart(); final Set witnessSet = Collections.singleton(witness); for (Token token : witnessTokens) { @@ -107,7 +108,7 @@ protected void mergeTranspositions(VariantGraph into, Iterable transposed = Sets.newHashSet(); + final Set transposed = new HashSet<>(); for (VertexMatch.WithToken match : transposedPhrase) { transposed.add(witnessTokenVertices.get(match.token)); transposed.add(match.vertex); @@ -121,7 +122,7 @@ protected void mergeTranspositions(VariantGraph into, List> transpos if (LOG.isLoggable(Level.FINE)) { LOG.log(Level.FINE, "Transposition: {0}", transposedPhrase); } - final Set transposed = Sets.newHashSet(); + final Set transposed = new HashSet<>(); for (Match match : transposedPhrase) { transposed.add(witnessTokenVertices.get(match.token)); transposed.add(match.vertex); @@ -159,7 +160,7 @@ public float gap() { } ).keySet(); - final List> transpositions = new ArrayList>(); + final List> transpositions = new ArrayList<>(); for (SortedSet phraseMatch : matches) { if (!inOrderMatches.contains(phraseMatch)) { transpositions.add(phraseMatch); @@ -167,16 +168,16 @@ public float gap() { } - final Map matchedTokens = Maps.newHashMap(); + final Map matchedTokens = new HashMap<>(); for (SortedSet phraseMatch : matches) { for (VertexMatch.WithTokenIndex tokenMatch : phraseMatch) { matchedTokens.put(tokens[tokenMatch.token], tokenMatch.vertex); } } - final List> transposedTokens = Lists.newLinkedList(); + final List> transposedTokens = new LinkedList<>(); for (SortedSet transposition : transpositions) { - final SortedSet transpositionMatch = new TreeSet(); + final SortedSet transpositionMatch = new TreeSet<>(); for (VertexMatch.WithTokenIndex match : transposition) { matchedTokens.remove(tokens[match.token]); transpositionMatch.add(new VertexMatch.WithToken(match.vertex, match.vertexRank, tokens[match.token])); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/Token.java b/collatex-core/src/main/java/eu/interedition/collatex/Token.java index 6954de509..33a67d680 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/Token.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/Token.java @@ -19,18 +19,10 @@ package eu.interedition.collatex; -import com.google.common.base.Function; /** * The normalized version of the token. */ public interface Token { Witness getWitness(); - - final Function TO_WITNESS = new Function() { - @Override - public Witness apply(Token input) { - return input.getWitness(); - } - }; } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java index 379136b0b..8231b0c3a 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java @@ -103,7 +103,7 @@ protected static SortedSet extractPhrases(SortedSet phrases, Var } protected static String toString(VariantGraph.Vertex vertex, Witness... witnesses) { - final Multimap tokens = Multimaps.index(vertex.tokens(Sets.newHashSet(Arrays.asList(witnesses))), Token.TO_WITNESS); + final Multimap tokens = Multimaps.index(vertex.tokens(Sets.newHashSet(Arrays.asList(witnesses))), Token::getWitness); List tokenContents = Lists.newArrayListWithExpectedSize(tokens.size()); for (Witness witness : Ordering.from(Witness.SIGIL_COMPARATOR).sortedCopy(tokens.keySet())) { for (Token token : Ordering.natural().sortedCopy(Iterables.filter(tokens.get(witness), SimpleToken.class))) { diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java index 594d1ba88..29359b0c5 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java @@ -67,7 +67,7 @@ public VariantGraphPanel(VariantGraph vg) { rc.setVertexLabelTransformer(new Transformer() { @Override public String transform(VariantGraph.Vertex variantGraphVertexModel) { - final Multimap tokens = Multimaps.index(variantGraphVertexModel.tokens(), Token.TO_WITNESS); + final Multimap tokens = Multimaps.index(variantGraphVertexModel.tokens(), Token::getWitness); final StringBuilder label = new StringBuilder(); for (Witness witness : Ordering.from(Witness.SIGIL_COMPARATOR).sortedCopy(tokens.keySet())) { label.append("[").append(witness.getSigil()).append(": '"); From 17fd6526cb4fe0e9e73f17466b80917720f3a1f2 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 18:50:57 +0100 Subject: [PATCH 26/52] Remove some Google Guava usages --- .../collatex/dekker/DekkerAlgorithm.java | 25 +++--- .../interedition/collatex/dekker/Match.java | 36 +++----- .../collatex/dekker/PhraseMatchDetector.java | 23 +++--- .../dekker/TranspositionDetector.java | 82 +++++++++---------- .../interedition/collatex/dekker/Tuple.java | 12 +-- 5 files changed, 75 insertions(+), 103 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java index e2d87edfe..c807a2d71 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java @@ -18,16 +18,15 @@ */ package eu.interedition.collatex.dekker; +import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.logging.Level; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import eu.interedition.collatex.CollationAlgorithm; import eu.interedition.collatex.Token; @@ -61,8 +60,10 @@ public DekkerAlgorithm(Comparator comparator, TokenLinker tokenLinker) { @Override public void collate(VariantGraph graph, Iterable tokens) { - Preconditions.checkArgument(!Iterables.isEmpty(tokens), "Empty witness"); - final Witness witness = Iterables.getFirst(tokens, null).getWitness(); + final Witness witness = StreamSupport.stream(tokens.spliterator(), false) + .findFirst() + .map(Token::getWitness) + .orElseThrow(() -> new IllegalArgumentException("Empty witness")); if (LOG.isLoggable(Level.FINER)) { LOG.log(Level.FINER, "{0} + {1}: {2} vs. {3}", new Object[] { graph, witness, graph.vertices(), tokens }); @@ -85,7 +86,7 @@ public void collate(VariantGraph graph, Iterable tokens) { phraseMatches = phraseMatchDetector.detect(tokenLinks, graph, tokens); if (LOG.isLoggable(Level.FINER)) { for (List phraseMatch : phraseMatches) { - LOG.log(Level.FINER, "{0} + {1}: Phrase match: {2}", new Object[] { graph, witness, Iterables.toString(phraseMatch) }); + LOG.log(Level.FINER, "{0} + {1}: Phrase match: {2}", new Object[] { graph, witness, phraseMatch }); } } @@ -99,14 +100,14 @@ public void collate(VariantGraph graph, Iterable tokens) { if (LOG.isLoggable(Level.FINER)) { for (List transposition : transpositions) { - LOG.log(Level.FINER, "{0} + {1}: Transposition: {2}", new Object[] { graph, witness, Iterables.toString(transposition) }); + LOG.log(Level.FINER, "{0} + {1}: Transposition: {2}", new Object[] { graph, witness, transposition }); } } if (LOG.isLoggable(Level.FINE)) { LOG.log(Level.FINE, "{0} + {1}: Determine aligned tokens by filtering transpositions", new Object[] { graph, witness }); } - alignments = Maps.newHashMap(); + alignments = new HashMap<>(); for (List phrase : phraseMatches) { for (Match match : phrase) { alignments.put(match.token, match.vertex); @@ -127,7 +128,7 @@ public void collate(VariantGraph graph, Iterable tokens) { merge(graph, tokens, alignments); // we filter out small transposed phrases over large distances - List> falseTranspositions = Lists.newArrayList(); + List> falseTranspositions = new ArrayList<>(); VariantGraphRanking ranking = VariantGraphRanking.of(graph); @@ -150,7 +151,7 @@ public void collate(VariantGraph graph, Iterable tokens) { } if (LOG.isLoggable(Level.FINER)) { - LOG.log(Level.FINER, "!{0}: {1}", new Object[] {graph, Iterables.toString(graph.vertices())}); + LOG.log(Level.FINER, "!{0}: {1}", new Object[] {graph, StreamSupport.stream(graph.vertices().spliterator(), false).map(Object::toString).collect(Collectors.joining(", ")) }); } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java index e44bc9b6c..3642266ce 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java @@ -19,15 +19,16 @@ package eu.interedition.collatex.dekker; -import com.google.common.base.Function; -import com.google.common.base.Objects; -import com.google.common.base.Predicate; -import com.google.common.collect.Lists; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Objects; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; /** * @author Gregor Middell @@ -43,7 +44,7 @@ public Match(VariantGraph.Vertex vertex, Token token) { @Override public int hashCode() { - return Objects.hashCode(vertex, token); + return Objects.hash(vertex, token); } @Override @@ -57,11 +58,11 @@ public boolean equals(Object obj) { @Override public String toString() { - return new StringBuilder("{").append(vertex).append("; ").append(token).append("}").toString(); + return "{" + vertex + "; " + token + "}"; } public static List createPhraseMatch(List vertices, List tokens) { - final List phraseMatch = Lists.newArrayListWithExpectedSize(vertices.size()); + final List phraseMatch = new ArrayList<>(vertices.size()); final Iterator vertexIt = vertices.iterator(); final Iterator tokenIt = tokens.iterator(); while (vertexIt.hasNext() && tokenIt.hasNext()) { @@ -72,25 +73,8 @@ public static List createPhraseMatch(List vertices, public static Predicate createNoBoundaryMatchPredicate(final VariantGraph graph) { - return new Predicate() { - @Override - public boolean apply(Match input) { - return !input.vertex.equals(graph.getStart()) && !input.vertex.equals(graph.getEnd()); - } - }; + return input -> !input.vertex.equals(graph.getStart()) && !input.vertex.equals(graph.getEnd()); } - public static final Function MATCH_TO_TOKENS = new Function() { - @Override - public Token apply(Match input) { - return input.token; - } - }; - - public static final Function, List> PHRASE_MATCH_TO_TOKENS = new Function, List>() { - @Override - public List apply(List input) { - return Lists.transform(input, MATCH_TO_TOKENS); - } - }; + public static final Function, List> PHRASE_MATCH_TO_TOKENS = input -> input.stream().map(m -> m.token).collect(Collectors.toList()); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java index a92ebfef8..39856ac47 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java @@ -18,16 +18,13 @@ */ package eu.interedition.collatex.dekker; -import java.util.List; -import java.util.Map; - -import com.google.common.collect.Sets; +import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; - -import eu.interedition.collatex.Token; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; /** * @@ -37,9 +34,9 @@ public class PhraseMatchDetector { public List> detect(Map linkedTokens, VariantGraph base, Iterable tokens) { - List> phraseMatches = Lists.newArrayList(); - List basePhrase = Lists.newArrayList(); - List witnessPhrase = Lists.newArrayList(); + List> phraseMatches = new ArrayList<>(); + List basePhrase = new ArrayList<>(); + List witnessPhrase = new ArrayList<>(); VariantGraph.Vertex previous = base.getStart(); for (Token token : tokens) { @@ -53,10 +50,10 @@ public List> detect(Map linkedTokens, Va // - previous and base vertex should either be in the same transposition(s) or both aren't in any transpositions // - there should be a directed edge between previous and base vertex // - there may not be a longer path between previous and base vertex - boolean sameTranspositions = Sets.newHashSet(previous.transpositions()).equals(Sets.newHashSet(baseVertex.transpositions())); + boolean sameTranspositions = new HashSet<>(previous.transpositions()).equals(new HashSet<>(baseVertex.transpositions())); boolean sameWitnesses = previous.witnesses().equals(baseVertex.witnesses()); boolean directedEdge = (base.edgeBetween(previous, baseVertex) != null); - boolean isNear = sameTranspositions && sameWitnesses && directedEdge && (Iterables.size(previous.outgoing()) == 1 || Iterables.size(baseVertex.incoming()) == 1); + boolean isNear = sameTranspositions && sameWitnesses && directedEdge && (previous.outgoing().size() == 1 || baseVertex.incoming().size() == 1); if (!isNear) { addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java index 27f73126b..8fb5f6a57 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java @@ -18,20 +18,20 @@ */ package eu.interedition.collatex.dekker; +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.simple.SimpleToken; +import eu.interedition.collatex.util.VariantGraphRanking; + +import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; - -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.simple.SimpleToken; -import eu.interedition.collatex.util.VariantGraphRanking; - /** * * @author Ronald Haentjens Dekker @@ -43,7 +43,7 @@ public List> detect(final List> phraseMatches, VariantGr // if there are no phrase matches it is not possible // to detect transpositions, return an empty list if (phraseMatches.isEmpty()) { - return Lists.newArrayList(); + return new ArrayList<>(); } /* @@ -54,26 +54,23 @@ public List> detect(final List> phraseMatches, VariantGr */ final VariantGraphRanking ranking = rankTheGraph(phraseMatches, base); - Comparator> comp = new Comparator>() { - @Override - public int compare(List pm1, List pm2) { - int rank1 = ranking.apply(pm1.get(0).vertex); - int rank2 = ranking.apply(pm2.get(0).vertex); - int difference = rank1 - rank2; - if (difference != 0) { - return difference; - } - int index1 = phraseMatches.indexOf(pm1); - int index2 = phraseMatches.indexOf(pm2); - return index1 - index2; + Comparator> comp = (pm1, pm2) -> { + int rank1 = ranking.apply(pm1.get(0).vertex); + int rank2 = ranking.apply(pm2.get(0).vertex); + int difference = rank1 - rank2; + if (difference != 0) { + return difference; } + int index1 = phraseMatches.indexOf(pm1); + int index2 = phraseMatches.indexOf(pm2); + return index1 - index2; }; - List> phraseMatchesGraphOrder = Lists.newArrayList(phraseMatches); + List> phraseMatchesGraphOrder = new ArrayList<>(phraseMatches); Collections.sort(phraseMatchesGraphOrder, comp); // Map 1 - phraseMatchToIndex = Maps.newHashMap(); + phraseMatchToIndex = new HashMap<>(); for (int i = 0; i < phraseMatchesGraphOrder.size(); i++) { phraseMatchToIndex.put(phraseMatchesGraphOrder.get(i), i); } @@ -82,8 +79,8 @@ public int compare(List pm1, List pm2) { * We calculate the index for all the phrase matches * First in witness order, then in graph order */ - List phraseMatchesGraphIndex = Lists.newArrayList(); - List phraseMatchesWitnessIndex = Lists.newArrayList(); + List phraseMatchesGraphIndex = new ArrayList<>(); + List phraseMatchesWitnessIndex = new ArrayList<>(); for (int i=0; i < phraseMatches.size(); i++) { phraseMatchesGraphIndex.add(i); @@ -96,15 +93,15 @@ public int compare(List pm1, List pm2) { /* * Initialize result variables */ - List> nonTransposedPhraseMatches = Lists.newArrayList(phraseMatches); - List> transpositions = Lists.newArrayList(); + List> nonTransposedPhraseMatches = new ArrayList<>(phraseMatches); + List> transpositions = new ArrayList<>(); /* * loop here until the maximum distance == 0 */ while (true) { // Map 2 - final Map, Integer> phraseMatchToDistanceMap = Maps.newLinkedHashMap(); + final Map, Integer> phraseMatchToDistanceMap = new LinkedHashMap<>(); for (int i=0; i < nonTransposedPhraseMatches.size(); i++) { Integer graphIndex = phraseMatchesGraphIndex.get(i); Integer witnessIndex = phraseMatchesWitnessIndex.get(i); @@ -113,7 +110,7 @@ public int compare(List pm1, List pm2) { phraseMatchToDistanceMap.put(phraseMatch, distance); } - List distanceList = Lists.newArrayList(phraseMatchToDistanceMap.values()); + List distanceList = new ArrayList<>(phraseMatchToDistanceMap.values()); if (distanceList.isEmpty()||Collections.max(distanceList) == 0) { break; @@ -123,23 +120,20 @@ public int compare(List pm1, List pm2) { // TODO: order by 3) graph rank? // TODO: I have not yet found evidence/a use case that // TODO: indicates that it is needed. - Comparator> comp2 = new Comparator>() { - @Override - public int compare(List pm1, List pm2) { - // first order by distance - int distance1 = phraseMatchToDistanceMap.get(pm1); - int distance2 = phraseMatchToDistanceMap.get(pm2); - int difference = distance2 - distance1; - if (difference != 0) { - return difference; - } - // second order by size - // return pm1.size() - pm2.size(); - return determineSize(pm1) - determineSize(pm2); + Comparator> comp2 = (pm1, pm2) -> { + // first order by distance + int distance1 = phraseMatchToDistanceMap.get(pm1); + int distance2 = phraseMatchToDistanceMap.get(pm2); + int difference = distance2 - distance1; + if (difference != 0) { + return difference; } + // second order by size + // return pm1.size() - pm2.size(); + return determineSize(pm1) - determineSize(pm2); }; - List> sortedPhraseMatches = Lists.newArrayList(nonTransposedPhraseMatches); + List> sortedPhraseMatches = new ArrayList<>(nonTransposedPhraseMatches); Collections.sort(sortedPhraseMatches, comp2); List transposedPhrase = sortedPhraseMatches.remove(0); @@ -169,7 +163,7 @@ private void addTransposition(List phraseWitnessRanks, List ph private VariantGraphRanking rankTheGraph(List> phraseMatches, VariantGraph base) { // rank the variant graph - Set matchedVertices = Sets.newHashSet(); + Set matchedVertices = new HashSet<>(); for (List phraseMatch : phraseMatches) { matchedVertices.add(phraseMatch.get(0).vertex); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java index 0bbc8c528..f18d8e775 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java @@ -19,9 +19,7 @@ package eu.interedition.collatex.dekker; -import com.google.common.collect.Sets; - -import java.util.Set; +import java.util.Objects; /** * @author Gregor Middell @@ -31,24 +29,22 @@ public class Tuple { public final T left; public final T right; - private final Set set; - public Tuple(T left, T right) { this.left = left; this.right = right; - this.set = Sets.newHashSet(left, right); } @Override public boolean equals(Object obj) { if (obj != null && obj instanceof Tuple) { - return set.equals(((Tuple) obj).set); + final Tuple other = (Tuple) obj; + return (left.equals(other.left) || left.equals(other.right)) && (right.equals(other.right) || right.equals(other.left)); } return super.equals(obj); } @Override public int hashCode() { - return set.hashCode(); + return Objects.hash(left, right); } } From f34bf2979c4e7c2b33dca2f991a633e59f768bcf Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 19:23:06 +0100 Subject: [PATCH 27/52] Remove some Google Guava usages --- .../collatex/CollationAlgorithmFactory.java | 2 +- .../collatex/dekker/DekkerAlgorithm.java | 2 +- .../collatex/dekker/matrix/Archipelago.java | 20 +++--- .../collatex/dekker/matrix/Coordinate.java | 4 +- .../collatex/dekker/matrix/Island.java | 9 +-- .../dekker/matrix/IslandConflictResolver.java | 72 ++++++++----------- .../dekker/matrix/MatchTableLinker.java | 11 ++- .../dekker/matrix/MatchTableSelection.java | 40 +++++------ .../collatex/dekker/matrix/HermansTest.java | 7 +- .../matrix/IslandConflictResolverTest.java | 18 +++-- .../dekker/matrix/MatchTableLinkerTest.java | 20 +++--- .../collatex/lab/MatchMatrixTableModel.java | 3 +- 12 files changed, 92 insertions(+), 116 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java index aba1fee03..dfce79bf0 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java @@ -41,7 +41,7 @@ public static CollationAlgorithm dekker(Comparator comparator) { } public static CollationAlgorithm dekkerMatchMatrix(Comparator comparator, int outlierTranspositionsSizeLimit) { - return new DekkerAlgorithm(comparator, new MatchTableLinker(outlierTranspositionsSizeLimit)); + return new DekkerAlgorithm(comparator, new MatchTableLinker()); } public static CollationAlgorithm needlemanWunsch(Comparator comparator) { diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java index c807a2d71..b35b33e36 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java @@ -48,7 +48,7 @@ public class DekkerAlgorithm extends CollationAlgorithm.Base { private boolean mergeTranspositions = false; public DekkerAlgorithm(Comparator comparator) { - this(comparator, new MatchTableLinker(3)); + this(comparator, new MatchTableLinker()); } public DekkerAlgorithm(Comparator comparator, TokenLinker tokenLinker) { diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java index 884f62a11..eca8b8bcc 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java @@ -20,16 +20,14 @@ package eu.interedition.collatex.dekker.matrix; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.logging.Logger; -import com.google.common.base.Objects; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; - /* * @author Meindert Kroese * @author Bram Buitendijk @@ -42,14 +40,14 @@ public class Archipelago { private final Set islandvectors; public Archipelago() { - islands = new ArrayList(); - this.islandvectors = Sets.newHashSet(); // row - column, all islands should have direction 1, so this diff should be the same for all coordinates on the island. + islands = new ArrayList<>(); + this.islandvectors = new HashSet<>(); // row - column, all islands should have direction 1, so this diff should be the same for all coordinates on the island. } //copy constructor public Archipelago(Archipelago orig) { - this.islands = Lists.newArrayList(orig.islands); - this.islandvectors = Sets.newHashSet(orig.islandvectors); + this.islands = new ArrayList<>(orig.islands); + this.islandvectors = new HashSet<>(orig.islandvectors); } public Archipelago(Island isl) { @@ -72,7 +70,7 @@ public Island get(int i) { } public boolean containsCoordinate(int row, int column) { - return Objects.equal(getCoordinatesMap().get(row), column); + return Objects.equals(getCoordinatesMap().get(row), column); } public List getIslands() { @@ -113,7 +111,7 @@ public boolean equals(Object object) { } private Map getCoordinatesMap() { - final Map map = Maps.newHashMap(); + final Map map = new HashMap<>(); for (final Island isl : islands) { for (final Coordinate c : isl) { map.put(c.getRow(), c.getColumn()); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java index 4dadbbd2d..d6f416487 100755 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java @@ -19,7 +19,7 @@ package eu.interedition.collatex.dekker.matrix; -import com.google.common.base.Objects; +import java.util.Objects; public class Coordinate implements Comparable { int row; @@ -65,7 +65,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hashCode(row, column); + return Objects.hash(row, column); } @Override diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java index 9502ddf12..fac24273b 100755 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java @@ -25,9 +25,6 @@ import java.util.Iterator; import java.util.List; -import com.google.common.base.Objects; -import com.google.common.collect.Lists; - /** * A DirectedIsland is a collections of Coordinates all on the same * diagonal. The direction of this diagonal can be -1, 0, or 1. @@ -41,7 +38,7 @@ public class Island implements Iterable, Comparable { private int direction = 0; - private final List islandCoordinates = Lists.newArrayList(); + private final List islandCoordinates = new ArrayList<>(); public Island() {} @@ -181,7 +178,7 @@ public int value() { } protected boolean removeSameColOrRow(Coordinate c) { - ArrayList remove = new ArrayList(); + ArrayList remove = new ArrayList<>(); for (Coordinate coor : islandCoordinates) { if (coor.sameColumn(c) || coor.sameRow(c)) { remove.add(coor); @@ -201,7 +198,7 @@ public Iterator iterator() { @Override public int hashCode() { - return Objects.hashCode(islandCoordinates); + return islandCoordinates.hashCode(); } @Override diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java index 3291cbc44..a29461e34 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java @@ -19,17 +19,18 @@ package eu.interedition.collatex.dekker.matrix; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; import java.util.logging.Logger; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; -import com.google.common.collect.Sets; - /** * * @author Ronald Haentjens Dekker @@ -42,7 +43,7 @@ public class IslandConflictResolver { private final MatchTableSelection selection; //NOTE: outlierTranspositionLimit is ignored for now - public IslandConflictResolver(MatchTable table, int outlierTranspositionsSizeLimit) { + public IslandConflictResolver(MatchTable table) { selection = new MatchTableSelection(table); } @@ -58,7 +59,7 @@ public MatchTableSelection createNonConflictingVersion() { if (possibleIslands.size() == 1) { selection.addIsland(possibleIslands.get(0)); } else if (possibleIslands.size() > 1) { - Multimap analysis = analyzeConflictsBetweenPossibleIslands(possibleIslands); + Map> analysis = analyzeConflictsBetweenPossibleIslands(possibleIslands); resolveConflictsBySelectingPreferredIslands(selection, analysis); } } @@ -73,18 +74,18 @@ public MatchTableSelection createNonConflictingVersion() { * * Parameters: the size of the islands that you want to analyze */ - public Multimap analyzeConflictsBetweenPossibleIslands(List possibleIslands) { - Multimap conflictMap = ArrayListMultimap.create(); + public Map> analyzeConflictsBetweenPossibleIslands(List possibleIslands) { + Map> conflictMap = new HashMap<>(); Set competingIslands = getCompetingIslands(possibleIslands); for (Island island : competingIslands) { if (selection.doesCandidateLayOnVectorOfCommittedIsland(island)) { - conflictMap.put(IslandCompetition.CompetingIslandAndOnIdealIine, island); + conflictMap.computeIfAbsent(IslandCompetition.CompetingIslandAndOnIdealIine, c -> new ArrayList<>()).add(island); } else { - conflictMap.put(IslandCompetition.CompetingIsland, island); + conflictMap.computeIfAbsent(IslandCompetition.CompetingIsland, c -> new ArrayList<>()).add(island); } } for (Island island : getNonCompetingIslands(possibleIslands, competingIslands)) { - conflictMap.put(IslandCompetition.NonCompetingIsland, island); + conflictMap.computeIfAbsent(IslandCompetition.NonCompetingIsland, c -> new ArrayList<>()).add(island); } return conflictMap; } @@ -96,63 +97,50 @@ public Multimap analyzeConflictsBetweenPossibleIsland * we have to move this code out of this method and move it to the caller * class */ - private void resolveConflictsBySelectingPreferredIslands(MatchTableSelection selection, Multimap islandConflictMap) { + private void resolveConflictsBySelectingPreferredIslands(MatchTableSelection selection, Map> islandConflictMap) { // First select competing islands that are on the ideal line - Multimap distanceMap1 = makeDistanceMap(islandConflictMap.get(IslandCompetition.CompetingIslandAndOnIdealIine)); LOG.fine("addBestOfCompeting with competingIslandsOnIdealLine"); - addBestOfCompeting(selection, distanceMap1); - + makeDistanceMap(islandConflictMap.getOrDefault(IslandCompetition.CompetingIslandAndOnIdealIine, Collections.emptyList())) + .values().stream() + .flatMap(List::stream).filter(ci1 -> selection.isIslandPossibleCandidate(ci1)) + .forEach(selection::addIsland); + // Second select other competing islands - Multimap distanceMap2 = makeDistanceMap(islandConflictMap.get(IslandCompetition.CompetingIsland)); LOG.fine("addBestOfCompeting with otherCompetingIslands"); - addBestOfCompeting(selection, distanceMap2); + makeDistanceMap(islandConflictMap.getOrDefault(IslandCompetition.CompetingIsland, Collections.emptyList())) + .values().stream() + .flatMap(List::stream).filter(ci -> selection.isIslandPossibleCandidate(ci)) + .forEach(selection::addIsland); // Third select non competing islands LOG.fine("add non competing islands"); - for (Island i : islandConflictMap.get(IslandCompetition.NonCompetingIsland)) { - selection.addIsland(i); - } - } - - private void addBestOfCompeting(MatchTableSelection selection, Multimap distanceMap1) { - for (Double d : shortestToLongestDistances(distanceMap1)) { - for (Island ci : distanceMap1.get(d)) { - if (selection.isIslandPossibleCandidate(ci)) { - selection.addIsland(ci); - } - } - } + islandConflictMap.getOrDefault(IslandCompetition.NonCompetingIsland, Collections.emptyList()) + .forEach(selection::addIsland); } // TODO: This method calculates the distance from the ideal line // TODO: by calculating the ratio x/y. // TODO: but the ideal line may have moved (due to additions/deletions). - private Multimap makeDistanceMap(Collection competingIslands) { - Multimap distanceMap = ArrayListMultimap.create(); + private SortedMap> makeDistanceMap(Collection competingIslands) { + SortedMap> distanceMap = new TreeMap<>(); for (Island isl : competingIslands) { Coordinate leftEnd = isl.getLeftEnd(); double ratio = ((leftEnd.column+1) / (double) (leftEnd.row+1)); double b2 = Math.log(ratio)/Math.log(2); double distanceToIdealLine = Math.abs(b2); - distanceMap.put(distanceToIdealLine, isl); + distanceMap.computeIfAbsent(distanceToIdealLine, d -> new ArrayList<>()).add(isl); } return distanceMap; } - private List shortestToLongestDistances(Multimap distanceMap) { - List distances = Lists.newArrayList(distanceMap.keySet()); - Collections.sort(distances); - return distances; - } - private Set getNonCompetingIslands(List islands, Set competingIslands) { - Set nonCompetingIslands = Sets.newHashSet(islands); + Set nonCompetingIslands = new HashSet<>(islands); nonCompetingIslands.removeAll(competingIslands); return nonCompetingIslands; } private Set getCompetingIslands(List islands) { - Set competingIslands = Sets.newHashSet(); + Set competingIslands = new HashSet<>(); for (int i = 0; i < islands.size(); i++) { Island i1 = islands.get(i); for (int j = 1; j < islands.size() - i; j++) { diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java index 7ff961793..f30136399 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java @@ -20,23 +20,20 @@ package eu.interedition.collatex.dekker.matrix; import java.util.Comparator; +import java.util.HashMap; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; -import com.google.common.collect.Maps; - import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.dekker.TokenLinker; public class MatchTableLinker implements TokenLinker { static Logger LOG = Logger.getLogger(MatchTableLinker.class.getName()); - private final int outlierTranspositionsSizeLimit; - public MatchTableLinker(int outlierTranspositionsSizeLimit) { + public MatchTableLinker() { super(); - this.outlierTranspositionsSizeLimit = outlierTranspositionsSizeLimit; } @Override @@ -47,7 +44,7 @@ public Map link(VariantGraph base, Iterable w // create IslandConflictResolver LOG.fine("create island conflict resolver"); - IslandConflictResolver resolver = new IslandConflictResolver(table, outlierTranspositionsSizeLimit); + IslandConflictResolver resolver = new IslandConflictResolver(table); // The IslandConflictResolver createNonConflictingVersion() method // selects the optimal islands @@ -58,7 +55,7 @@ public Map link(VariantGraph base, Iterable w } // Here the result is put in a map - Map map = Maps.newHashMap(); + Map map = new HashMap<>(); for (Island island : preferredIslands.getIslands()) { for (Coordinate c : island) { map.put(table.tokenAt(c.row, c.column), table.vertexAt(c.row, c.column)); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java index ae46c0630..794fd6a40 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java @@ -1,16 +1,16 @@ package eu.interedition.collatex.dekker.matrix; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; - -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; -import com.google.common.collect.Sets; +import java.util.stream.Collectors; import eu.interedition.collatex.VariantGraph; @@ -21,7 +21,7 @@ // islands may change after commit islands public class MatchTableSelection { Logger LOG = Logger.getLogger(MatchTableSelection.class.getName()); - private final Multimap islandMultimap; + private final Map> islandMultimap; private final Archipelago fixedIslands; //this fields are needed for the locking of table cells private final Set fixedRows; @@ -29,23 +29,23 @@ public class MatchTableSelection { private final MatchTable table; public MatchTableSelection(MatchTable table) { - fixedRows = Sets.newHashSet(); - fixedVertices = Sets.newHashSet(); + fixedRows = new HashSet<>(); + fixedVertices = new HashSet<>(); this.table = table; this.fixedIslands = new Archipelago(); - islandMultimap = ArrayListMultimap.create(); + islandMultimap = new HashMap<>(); for (Island isl : table.getIslands()) { - islandMultimap.put(isl.size(), isl); + islandMultimap.computeIfAbsent(isl.size(), s -> new ArrayList<>()).add(isl); } } // copy constructor public MatchTableSelection(MatchTableSelection orig) { // table structure is read only, does not have to be copied - this.islandMultimap = ArrayListMultimap.create(orig.islandMultimap); + this.islandMultimap = orig.islandMultimap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> new ArrayList<>(e.getValue()))); this.fixedIslands = new Archipelago(orig.fixedIslands); - this.fixedRows = Sets.newHashSet(orig.fixedRows); - this.fixedVertices = Sets.newHashSet(orig.fixedVertices); + this.fixedRows = new HashSet<>(orig.fixedRows); + this.fixedVertices = new HashSet<>(orig.fixedVertices); this.table = orig.table; } @@ -80,7 +80,7 @@ public void addIsland(Island isl) { fixedVertices.add(table.vertexAt(coordinate.row, coordinate.column)); } fixedIslands.add(isl); - islandMultimap.remove(isl.size(), isl); + islandMultimap.computeIfPresent(isl.size(), (s, i) -> { i.remove(isl); return (i.isEmpty() ? null : i); }); } public boolean doesCandidateLayOnVectorOfCommittedIsland(Island island) { @@ -109,14 +109,14 @@ public boolean containsCoordinate(int row, int column) { */ //TODO: the original Island object is modified here //TODO: That should not happen, if we want to build a decision tree. - public void removeOrSplitImpossibleIslands(Integer islandSize, Multimap islandMultimap) { - Collection islandsToCheck = Lists.newArrayList(islandMultimap.get(islandSize)); + public void removeOrSplitImpossibleIslands(Integer islandSize, Map> islandMultimap) { + Collection islandsToCheck = new ArrayList<>(islandMultimap.getOrDefault(islandSize, Collections.emptyList())); for (Island island : islandsToCheck) { if (!isIslandPossibleCandidate(island)) { - islandMultimap.remove(islandSize, island); + islandMultimap.computeIfPresent(islandSize, (s, i) -> { i.remove(island); return (i.isEmpty() ? null : i); }); removeConflictingEndCoordinates(island); if (island.size() > 0) { - islandMultimap.put(island.size(), island); + islandMultimap.computeIfAbsent(island.size(), s -> new ArrayList<>()).add(island); } } } @@ -150,7 +150,7 @@ private void removeConflictingEndCoordinates(Island island) { } public List getPossibleIslands() { - List possibleIslands = Lists.newArrayList(); + List possibleIslands = new ArrayList<>(); while(possibleIslands.isEmpty()&&!islandMultimap.isEmpty()) { // find the maximum island size and traverse groups in descending order Integer max = Collections.max(islandMultimap.keySet()); @@ -158,7 +158,7 @@ public List getPossibleIslands() { // check the possible islands of a certain size against // the already committed islands. removeOrSplitImpossibleIslands(max, islandMultimap); - possibleIslands = Lists.newArrayList(islandMultimap.get(max)); + possibleIslands = new ArrayList<>(islandMultimap.getOrDefault(max, Collections.emptyList())); } return possibleIslands; } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java index d4bcc644f..2da1f9e6d 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java @@ -25,7 +25,6 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoMoreInteractions; -import static org.mockito.Mockito.when; import java.io.StringWriter; import java.util.Arrays; @@ -80,7 +79,7 @@ public void testHermansText2() { VariantGraph vg = collate(sw[0]); MatchTable matchTable = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); // System.out.println(buildMatrix.toHtml()); - IslandConflictResolver archipelago = new IslandConflictResolver(matchTable, 1); + IslandConflictResolver archipelago = new IslandConflictResolver(matchTable); // LOG.fine("archipelago: " + archipelago); // LOG.fine("archipelago.size(): " + archipelago.size()); // assertEquals(42, archipelago.size()); @@ -126,7 +125,7 @@ public void testHermansText3() { // e.printStackTrace(); // } // System.out.println(buildMatrix.toHtml()); - IslandConflictResolver archipelago = new IslandConflictResolver(matchTable, 1); + IslandConflictResolver archipelago = new IslandConflictResolver(matchTable); // LOG.fine("archipelago: " + archipelago); // LOG.fine("archipelago.size(): " + archipelago.size()); MatchTableSelection firstVersion = archipelago.createNonConflictingVersion(); @@ -192,7 +191,7 @@ public void testHermansText2aWithMocking() throws XMLStreamException { VariantGraph base = collate(witnesses[0]); MatchTable matchTable = MatchTable.create(base, witnesses[1], new EqualityTokenComparator()); - IslandConflictResolver creator = new IslandConflictResolver(matchTable, 1); + IslandConflictResolver creator = new IslandConflictResolver(matchTable); //Mock Archipelago Archipelago result = mock(Archipelago.class); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java index c92ca54a6..10856f256 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java @@ -1,17 +1,15 @@ package eu.interedition.collatex.dekker.matrix; -import static org.junit.Assert.assertEquals; - -import java.util.List; - -import org.junit.Test; - import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; - import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; + +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; public class IslandConflictResolverTest extends AbstractTest { @@ -32,8 +30,8 @@ public void testPartlyOverlappingIslands() { possibleIslands.add(island); } } - IslandConflictResolver resolver = new IslandConflictResolver(table, 3); - Multimap competition = resolver.analyzeConflictsBetweenPossibleIslands(possibleIslands); + IslandConflictResolver resolver = new IslandConflictResolver(table); + Map> competition = resolver.analyzeConflictsBetweenPossibleIslands(possibleIslands); assertEquals(3, competition.get(IslandCompetition.CompetingIsland).size()); } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java index 0a43b1c4c..1ec086eb6 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java @@ -57,7 +57,7 @@ public void additionInCombinationWithTransposition2() { "black is the cat",// "black and white is the cat"); final VariantGraph graph = collate(w[0], w[1]); - MatchTableLinker linker = new MatchTableLinker(3); + MatchTableLinker linker = new MatchTableLinker(); Map link = linker.link(graph, w[2], new EqualityTokenComparator()); Set tokens = link.keySet(); Map tokensAsString = Maps.newHashMap(); @@ -71,7 +71,7 @@ public void additionInCombinationWithTransposition2() { public void testUsecase1() { final SimpleWitness[] w = createWitnesses("The black cat", "The black and white cat"); final VariantGraph graph = collate(w[0]); - MatchTableLinker linker = new MatchTableLinker(3); + MatchTableLinker linker = new MatchTableLinker(); Map link = linker.link(graph, w[1], new EqualityTokenComparator()); assertEquals(3, link.size()); } @@ -83,7 +83,7 @@ public void testGapsEverythingEqual() { // Optimal alignment has no gaps final SimpleWitness[] w = createWitnesses("The red cat and the black cat", "The red cat and the black cat"); final VariantGraph graph = collate(w[0]); - MatchTableLinker linker = new MatchTableLinker(3); + MatchTableLinker linker = new MatchTableLinker(); Map link = linker.link(graph, w[1], new EqualityTokenComparator()); PhraseMatchDetector detector = new PhraseMatchDetector(); List> phraseMatches = detector.detect(link, graph, w[1]); @@ -97,7 +97,7 @@ public void testGapsOmission() { // Note: there are two paths here that contain 1 gap final SimpleWitness[] w = createWitnesses("The red cat and the black cat", "the black cat"); final VariantGraph graph = collate(w[0]); - MatchTableLinker linker = new MatchTableLinker(3); + MatchTableLinker linker = new MatchTableLinker(); Map link = linker.link(graph, w[1], new EqualityTokenComparator()); PhraseMatchDetector detector = new PhraseMatchDetector(); List> phraseMatches = detector.detect(link, graph, w[1]); @@ -114,7 +114,7 @@ public void testHermansText2c() throws XMLStreamException { VariantGraph graph = collate(witnesses[0], witnesses[1]); - MatchTableLinker linker = new MatchTableLinker(1); + MatchTableLinker linker = new MatchTableLinker(); Map linkedTokens = linker.link(graph, witnesses[2], new EqualityTokenComparator()); Set tokens = linkedTokens.keySet(); @@ -139,7 +139,7 @@ public void testHermansText2c() throws XMLStreamException { public void test1() { SimpleWitness[] sw = createWitnesses("A B C A B", "A B C A B"); VariantGraph vg = collate(sw[0]); - MatchTableLinker linker = new MatchTableLinker(1); + MatchTableLinker linker = new MatchTableLinker(); Map linkedTokens = linker.link(vg, sw[1], new EqualityTokenComparator()); Set tokens = linkedTokens.keySet(); @@ -162,7 +162,7 @@ public void testOverDeAtlantischeOceaan() { String textDMD1 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        \nOp sommige dekken van de stomer lagen mensen in de zon, op andere dekken werd getennist, op nog andere liepen de passagiers heen en weer en praatten. Wie over de reling hing en recht naar beneden keek, kon vaststellen dat het schip vorderde; of draaide alleen de aarde er onderdoor?

        \nOp de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon.

        "; SimpleWitness[] sw = createWitnesses(textD9, textDMD1); VariantGraph vg = collate(sw[0]); - Map linkedTokens = new MatchTableLinker(outlierTranspositionsSizeLimit).link(vg, sw[1], new StrictEqualityTokenComparator()); + Map linkedTokens = new MatchTableLinker().link(vg, sw[1], new StrictEqualityTokenComparator()); Set tokens = linkedTokens.keySet(); Set tokensAsString = Sets.newLinkedHashSet(); @@ -188,7 +188,7 @@ public void testHermansAllesIsBetrekkelijk1() throws XMLStreamException { String textDmd1 = "Natuurlijk, alles is betrekkelijk"; SimpleWitness[] sw = createWitnesses(textD1, textD9, textDmd1); VariantGraph vg = collate(sw[0], sw[1]); - Map linkedTokens = new MatchTableLinker(outlierTranspositionsSizeLimit).link(vg, sw[2], new StrictEqualityTokenComparator()); + Map linkedTokens = new MatchTableLinker().link(vg, sw[2], new StrictEqualityTokenComparator()); Set tokens = linkedTokens.keySet(); Set tokensAsString = Sets.newLinkedHashSet(); @@ -214,7 +214,7 @@ public void testSuscepto() throws XMLStreamException { String c = "Et sortem mortis tribus diebus sompno suscepto et tunc ab inferis regressus ad lucem veniet."; SimpleWitness[] sw = createWitnesses(a, b, c); VariantGraph vg = collate(sw[0], sw[1]); - Map linkedTokens = new MatchTableLinker(outlierTranspositionsSizeLimit).link(vg, sw[2], new StrictEqualityTokenComparator()); + Map linkedTokens = new MatchTableLinker().link(vg, sw[2], new StrictEqualityTokenComparator()); Set tokens = linkedTokens.keySet(); Set tokensAsString = Sets.newLinkedHashSet(); @@ -243,7 +243,7 @@ public void testOutlierTranspositionLimitAndPunctuation() { assertEquals(4, tokensA.size()); VariantGraph vg = collate(sw[0]); - Map linkedTokens = new MatchTableLinker(outlierTranspositionsSizeLimit).link(vg, sw[1], new StrictEqualityTokenComparator()); + Map linkedTokens = new MatchTableLinker().link(vg, sw[1], new StrictEqualityTokenComparator()); // assert linked tokens; helper method Set tokens = linkedTokens.keySet(); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java index d81c7c2b5..d2704341e 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java @@ -25,7 +25,6 @@ import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.dekker.matrix.Archipelago; import eu.interedition.collatex.dekker.matrix.IslandConflictResolver; import eu.interedition.collatex.dekker.matrix.MatchTable; import eu.interedition.collatex.dekker.matrix.MatchTableSelection; @@ -90,7 +89,7 @@ public MatchMatrixTableModel(MatchTable matchTable, VariantGraph vg, Iterable Date: Sat, 7 Feb 2015 19:42:10 +0100 Subject: [PATCH 28/52] Remove some Google Guava usages --- .../collatex/dekker/matrix/MatchTable.java | 75 ++++++++++--------- .../dekker/matrix/MatchTableTest.java | 2 +- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java index 2f63a573c..05a052e74 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java @@ -19,27 +19,26 @@ package eu.interedition.collatex.dekker.matrix; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import com.google.common.collect.ContiguousSet; -import com.google.common.collect.DiscreteDomain; -import com.google.common.collect.HashBasedTable; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Range; -import com.google.common.collect.Sets; - import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.matching.Matches; import eu.interedition.collatex.util.VariantGraphRanking; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; + /* @author: Ronald Haentjens Dekker * * This class represents a table of the matches. @@ -52,9 +51,9 @@ * MatchTableSelection class. */ public class MatchTable { - private final HashBasedTable table; - private final Iterable witness; - private final List ranks; + private final MatchTableCell[][] table; + private final Token[] witness; + private final int[] ranks; // assumes default token comparator public static MatchTable create(VariantGraph graph, Iterable witness) { @@ -71,59 +70,62 @@ public static MatchTable create(VariantGraph graph, Iterable witness, Com return table; } + private Optional cell(int rowIndex, int columnIndex) { + return Optional.ofNullable(table[rowIndex][columnIndex]); + } + public VariantGraph.Vertex vertexAt(int rowIndex, int columnIndex) { - MatchTableCell cell = table.get(rowIndex, columnIndex); - return cell==null ? null : cell.vertex; + return cell(rowIndex, columnIndex).map(c -> c.vertex).orElse(null); } public Token tokenAt(int rowIndex, int columnIndex) { - MatchTableCell cell = table.get(rowIndex, columnIndex); - return cell==null ? null : cell.token; + return cell(rowIndex, columnIndex).map(c -> c.token).orElse(null); } // Warning: this method reiterates the witness! // This method is only meant for the user interface and serialization classes! // Use the tokenAt method in all other cases. public List rowList() { - return Lists.newArrayList(witness); + return Collections.unmodifiableList(Arrays.asList(witness)); } public List columnList() { - return ranks; + return Arrays.stream(ranks).boxed().collect(Collectors.toList()); } // Since the coordinates in allMatches are ordered from upper left to lower right, // we don't need to check the lower right neighbor. public Set getIslands() { - Map coordinateMapper = Maps.newHashMap(); + Map coordinateMapper = new HashMap<>(); List allMatches = allMatches(); for (Coordinate c : allMatches) { // LOG.debug("coordinate {}", c); addToIslands(coordinateMapper, c); } - Set smallestIslandsCoordinates = Sets.newHashSet(allMatches); + Set smallestIslandsCoordinates = new HashSet<>(allMatches); smallestIslandsCoordinates.removeAll(coordinateMapper.keySet()); for (Coordinate coordinate : smallestIslandsCoordinates) { Island island = new Island(); island.add(coordinate); coordinateMapper.put(coordinate, island); } - return Sets.newHashSet(coordinateMapper.values()); + return new HashSet<>(coordinateMapper.values()); } - private MatchTable(Iterable tokens, List ranks) { - this.table = HashBasedTable.create(); + private MatchTable(Token[] tokens, int[] ranks) { + this.table = new MatchTableCell[tokens.length][ranks.length]; this.witness = tokens; this.ranks = ranks; } private static MatchTable createEmptyTable(VariantGraphRanking ranking, VariantGraph graph, Iterable witness) { // -2 === ignore the start and the end vertex - Range ranksRange = Range.closed(0, Math.max(0, ranking.apply(graph.getEnd()) - 2)); - ImmutableList ranksSet = ContiguousSet.create(ranksRange, DiscreteDomain.integers()).asList(); - return new MatchTable(witness, ranksSet); + return new MatchTable( + StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new), + IntStream.range(0, Math.max(0, ranking.apply(graph.getEnd()) - 1)).toArray() + ); } // move parameters into fields? @@ -145,8 +147,7 @@ private void fillTableWithMatches(VariantGraphRanking ranking, VariantGraph grap private void set(int rowIndex, int columnIndex, Token token, VariantGraph.Vertex vertex) { // LOG.debug("putting: {}<->{}<->{}", new Object[] { token, columnIndex, variantGraphVertex }); - MatchTableCell cell = new MatchTableCell(token, vertex); - table.put(rowIndex, columnIndex, cell); + table[rowIndex][columnIndex] = new MatchTableCell(token, vertex); } private void addToIslands(Map coordinateMapper, Coordinate c) { @@ -155,7 +156,9 @@ private void addToIslands(Map coordinateMapper, Coordinate c VariantGraph.Vertex neighbor = null; try { neighbor = vertexAt(c.row + diff, c.column + diff); - } catch (IndexOutOfBoundsException e) {} + } catch (IndexOutOfBoundsException e) { + // ignored + } if (neighbor != null) { Island island = coordinateMapper.get(neighborCoordinate); if (island == null) { @@ -177,7 +180,7 @@ private void addToIslands(Map coordinateMapper, Coordinate c // TODO: might be simpler to work from the valueSet // TODO: try remove the call to rowList / columnList List allMatches() { - List pairs = Lists.newArrayList(); + List pairs = new ArrayList<>(); int rows = rowList().size(); int cols = columnList().size(); for (int i = 0; i < rows; i++) { diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java index 8cc1006dc..dff0abeff 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java @@ -64,7 +64,7 @@ public void testTableCreationEmptyGraph() { final VariantGraph graph = new VariantGraph(); SimpleWitness[] witnesses = createWitnesses("a b"); MatchTable table = MatchTable.create(graph, witnesses[0]); - assertEquals(1, table.columnList().size()); + assertEquals(0, table.columnList().size()); } @Test From 4d775397e485a931fe01885276d3205664a13f0e Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 21:26:30 +0100 Subject: [PATCH 29/52] Remove some Google Guava usages --- .../interedition/collatex/AbstractTest.java | 37 +++---- .../collatex/ScriptEngineTest.java | 16 +-- .../{neo4j => }/VariantGraphTest.java | 36 +++--- .../collatex/lab/CollateXLaboratory.java | 9 +- .../collatex/lab/MatchMatrixTableModel.java | 14 +-- .../collatex/lab/VariantGraphLayout.java | 18 ++- .../collatex/lab/VariantGraphPanel.java | 104 +++++------------- .../collatex/lab/WitnessPanel.java | 20 ++-- .../collatex/matching/MatchesTest.java | 13 +-- .../collatex/matching/NearMatcherTest.java | 5 +- .../collatex/medite/SuffixTreeTest.java | 8 +- .../collatex/output/AlignmentTableTest.java | 19 ++-- 12 files changed, 112 insertions(+), 187 deletions(-) rename collatex-core/src/test/java/eu/interedition/collatex/{neo4j => }/VariantGraphTest.java (88%) diff --git a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java index 8231b0c3a..1e9a663ee 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java @@ -19,13 +19,6 @@ package eu.interedition.collatex; -import com.google.common.base.Joiner; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; -import com.google.common.collect.Multimaps; -import com.google.common.collect.Ordering; -import com.google.common.collect.Sets; import eu.interedition.collatex.dekker.DekkerAlgorithm; import eu.interedition.collatex.dekker.Match; import eu.interedition.collatex.matching.EqualityTokenComparator; @@ -37,7 +30,10 @@ import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; @@ -91,10 +87,6 @@ protected static List>> table(VariantGraph graph) return VariantGraphRanking.of(graph).asTable(); } - protected static SortedSet extractPhrases(VariantGraph graph, Witness witness) { - return extractPhrases(Sets. newTreeSet(), graph, witness); - } - protected static SortedSet extractPhrases(SortedSet phrases, VariantGraph graph, Witness witness) { for (VariantGraph.Vertex v : graph.vertices(Collections.singleton(witness))) { phrases.add(toString(v, witness)); @@ -103,18 +95,21 @@ protected static SortedSet extractPhrases(SortedSet phrases, Var } protected static String toString(VariantGraph.Vertex vertex, Witness... witnesses) { - final Multimap tokens = Multimaps.index(vertex.tokens(Sets.newHashSet(Arrays.asList(witnesses))), Token::getWitness); - List tokenContents = Lists.newArrayListWithExpectedSize(tokens.size()); - for (Witness witness : Ordering.from(Witness.SIGIL_COMPARATOR).sortedCopy(tokens.keySet())) { - for (Token token : Ordering.natural().sortedCopy(Iterables.filter(tokens.get(witness), SimpleToken.class))) { - tokenContents.add(((SimpleToken) token).getNormalized()); - } - } - return Joiner.on(' ').join(tokenContents); + return vertex.tokens(new HashSet<>(Arrays.asList(witnesses))).stream() + .collect(Collectors.groupingBy(Token::getWitness)).entrySet().stream() + .sorted(Comparator.comparing(e -> e.getKey().getSigil())) + .map(Map.Entry::getValue) + .flatMap(tokens -> tokens.stream() + .filter(t -> t instanceof SimpleToken) + .map(t -> (SimpleToken) t) + .sorted() + .map(SimpleToken::getNormalized) + ) + .collect(Collectors.joining(" ")); } protected static void assertHasWitnesses(VariantGraph.Edge edge, Witness... witnesses) { - assertEquals(Sets.newHashSet(Arrays.asList(witnesses)), edge.witnesses()); + assertEquals(new HashSet<>(Arrays.asList(witnesses)), edge.witnesses()); } protected static VariantGraph.Edge edgeBetween(VariantGraph.Vertex start, VariantGraph.Vertex end) { @@ -124,7 +119,7 @@ protected static VariantGraph.Edge edgeBetween(VariantGraph.Vertex start, Varian } protected static void assertVertexEquals(String expected, VariantGraph.Vertex vertex) { - assertEquals(expected, ((SimpleToken) Iterables.getFirst(vertex.tokens(), null)).getNormalized()); + assertEquals(expected, vertex.tokens().stream().findFirst().map(t -> (SimpleToken) t).map(SimpleToken::getNormalized).get()); } protected static void assertTokenEquals(String expected, Token token) { diff --git a/collatex-core/src/test/java/eu/interedition/collatex/ScriptEngineTest.java b/collatex-core/src/test/java/eu/interedition/collatex/ScriptEngineTest.java index a0ec342aa..d121ac212 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/ScriptEngineTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/ScriptEngineTest.java @@ -19,9 +19,6 @@ package eu.interedition.collatex; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; import org.junit.Test; import javax.script.Compilable; @@ -30,26 +27,29 @@ import javax.script.ScriptEngineFactory; import javax.script.ScriptEngineManager; import javax.script.ScriptException; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * @author Gregor Middell */ -public class ScriptEngineTest { +public class ScriptEngineTest extends AbstractTest { @Test public void functions() throws ScriptException, NoSuchMethodException { final ScriptEngineManager scriptEngineManager = new ScriptEngineManager(); for (ScriptEngineFactory scriptEngineFactory : scriptEngineManager.getEngineFactories()) { - System.out.println(Joiner.on("; ").join( + LOG.fine(() -> Stream.of( scriptEngineFactory.getEngineName(), scriptEngineFactory.getEngineVersion(), scriptEngineFactory.getLanguageName(), scriptEngineFactory.getLanguageVersion(), - Iterables.toString(scriptEngineFactory.getExtensions()) - )); + scriptEngineFactory.getExtensions().toString() + ).collect(Collectors.joining("; "))); } - final Compilable compiler = (Compilable) Preconditions.checkNotNull(scriptEngineManager.getEngineByExtension("js")); + final Compilable compiler = (Compilable) Objects.requireNonNull(scriptEngineManager.getEngineByExtension("js")); final CompiledScript script = compiler.compile("function compare(a, b) { return a == b }\nfunction cost(a) { return 1; }"); script.eval(); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/neo4j/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java similarity index 88% rename from collatex-core/src/test/java/eu/interedition/collatex/neo4j/VariantGraphTest.java rename to collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java index 38758589a..76b46aab3 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/neo4j/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java @@ -17,14 +17,8 @@ * along with CollateX. If not, see . */ -package eu.interedition.collatex.neo4j; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import eu.interedition.collatex.AbstractTest; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; +package eu.interedition.collatex; + import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; import eu.interedition.collatex.simple.SimpleWitness; import org.junit.Assert; @@ -33,9 +27,12 @@ import java.io.StringWriter; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.logging.Level; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import static org.junit.Assert.assertEquals; @@ -45,8 +42,8 @@ public class VariantGraphTest extends AbstractTest { public void emptyGraph() { final VariantGraph graph = collate(createWitnesses()); assertEquals(0, graph.witnesses().size()); - assertEquals(2, Iterables.size(graph.vertices())); - assertEquals(1, Iterables.size(graph.edges())); + assertEquals(2, StreamSupport.stream(graph.vertices().spliterator(), false).count()); + assertEquals(1, StreamSupport.stream(graph.edges().spliterator(), false).count()); } @Test @@ -67,7 +64,7 @@ public void reconnectingVerticesYieldsSameEdge() { public void getTokens() { final SimpleWitness[] w = createWitnesses("a b c d"); final VariantGraph graph = collate(w); - final List vertices = Lists.newArrayList(graph.vertices(Sets.newHashSet(Arrays. asList(w)))); + final List vertices = StreamSupport.stream(graph.vertices(new HashSet<>(Arrays.asList(w))).spliterator(), false).collect(Collectors.toList()); assertEquals(6, vertices.size()); assertEquals(graph.getStart(), vertices.get(0)); assertVertexEquals("a", vertices.get(1)); @@ -82,8 +79,8 @@ public void oneWitness() { final SimpleWitness[] w = createWitnesses("only one witness"); final VariantGraph graph = collate(w); - assertEquals(5, Iterables.size(graph.vertices())); - assertEquals(4, Iterables.size(graph.edges())); + assertEquals(5, StreamSupport.stream(graph.vertices().spliterator(), false).count()); + assertEquals(4, StreamSupport.stream(graph.edges().spliterator(), false).count()); final VariantGraph.Vertex firstVertex = vertexWith(graph, "only", w[0]); final VariantGraph.Vertex secondVertex = vertexWith(graph, "one", w[0]); @@ -100,7 +97,7 @@ public void getPathForWitness() { final SimpleWitness[] w = createWitnesses("a b c d e f ", "x y z d e", "a b x y z"); final VariantGraph graph = collate(w); final Set witnessSet = Collections. singleton(w[0]); - final List path = Lists.newArrayList(graph.vertices(witnessSet)); + final List path = StreamSupport.stream(graph.vertices(witnessSet).spliterator(), false).collect(Collectors.toList()); assertEquals(8, path.size()); assertEquals(graph.getStart(), path.get(0)); @@ -116,8 +113,7 @@ public void getPathForWitness() { @Test public void transpositions1() { final VariantGraph graph = collate("the nice black and white cat", "the friendly white and black cat"); - assertEquals(12, Iterables.size(graph.edges())); - assertEquals(12, Iterables.size(graph.edges())); + assertEquals(12, StreamSupport.stream(graph.edges().spliterator(), false).count()); } @Test @@ -131,8 +127,8 @@ public void transpositions2() { edge = edgeBetween(vertexWith(graph, "red", w[1]), vertexWith(graph, "cat", w[1])); assertHasWitnesses(edge, w[1], w[2]); - assertEquals(17, Iterables.size(graph.vertices())); // start and end vertices included - assertEquals(20, Iterables.size(graph.edges())); + assertEquals(17, StreamSupport.stream(graph.vertices().spliterator(), false).count()); // start and end vertices included + assertEquals(20, StreamSupport.stream(graph.edges().spliterator(), false).count()); } @Test @@ -140,8 +136,8 @@ public void joinTwoIdenticalWitnesses() { final SimpleWitness[] w = createWitnesses("the black cat", "the black cat"); final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); - assertEquals(3, Iterables.size(graph.vertices())); - assertEquals(2, Iterables.size(graph.edges())); + assertEquals(3, StreamSupport.stream(graph.vertices().spliterator(), false).count()); + assertEquals(2, StreamSupport.stream(graph.edges().spliterator(), false).count()); final VariantGraph.Vertex joinedVertex = vertexWith(graph, "the black cat", w[0]); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java index 43ca40d8c..3553cad2c 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.lab; -import com.google.common.collect.Iterables; import eu.interedition.collatex.CollationAlgorithm; import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.VariantGraph; @@ -74,7 +73,7 @@ public class CollateXLaboratory extends JFrame { public CollateXLaboratory() { super("CollateX Laboratory"); - this.algorithm = new JComboBox(new String[] { "Dekker", "Needleman-Wunsch", "Greedy String Tiling", "MEDITE" }); + this.algorithm = new JComboBox<>(new String[] { "Dekker", "Needleman-Wunsch", "Greedy String Tiling", "MEDITE" }); this.algorithm.setEditable(false); this.algorithm.setFocusable(false); this.algorithm.setMaximumSize(new Dimension(200, this.algorithm.getMaximumSize().height)); @@ -158,7 +157,7 @@ public void actionPerformed(ActionEvent e) { final List w = witnessPanel.getWitnesses(); if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Collating {0}", Iterables.toString(w)); + LOG.log(Level.FINE, "Collating {0}", w.toString()); } @@ -182,7 +181,7 @@ public void actionPerformed(ActionEvent e) { variantGraphPanel.setVariantGraph(variantGraph); if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Collated {0}", Iterables.toString(w)); + LOG.log(Level.FINE, "Collated {0}", w.toString()); } tabbedPane.setSelectedIndex(0); @@ -219,7 +218,7 @@ public void actionPerformed(ActionEvent e) { if (LOG.isLoggable(Level.FINE)) { LOG.log(Level.FINE, "Creating MatchTable for: {0}", lastWitness.getSigil()); } - matchMatrixTable.setModel(new MatchMatrixTableModel(MatchTable.create(vg, lastWitness, comparator), vg, lastWitness, outlierTranspositionsSizeLimit)); + matchMatrixTable.setModel(new MatchMatrixTableModel(MatchTable.create(vg, lastWitness, comparator))); final TableColumnModel columnModel = matchMatrixTable.getColumnModel(); columnModel.getColumn(0).setCellRenderer(matchMatrixTable.getTableHeader().getDefaultRenderer()); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java index d2704341e..53fa13095 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java @@ -19,10 +19,6 @@ package eu.interedition.collatex.lab; -import java.util.List; - -import javax.swing.table.AbstractTableModel; - import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.dekker.matrix.IslandConflictResolver; @@ -30,6 +26,9 @@ import eu.interedition.collatex.dekker.matrix.MatchTableSelection; import eu.interedition.collatex.simple.SimpleToken; +import javax.swing.table.AbstractTableModel; +import java.util.List; + /** * @author Gregor Middell * @author Bram Buitendijk @@ -41,10 +40,8 @@ public class MatchMatrixTableModel extends AbstractTableModel { private final String[] rowNames; private final String[] columnNames; private final MatchTableCell[][] data; - private final int outlierTranspositionsSizeLimit; - public MatchMatrixTableModel(MatchTable matchTable, VariantGraph vg, Iterable witness, int outlierTranspositionsSizeLimit) { - this.outlierTranspositionsSizeLimit = outlierTranspositionsSizeLimit; + public MatchMatrixTableModel(MatchTable matchTable) { List rowList = matchTable.rowList(); List columnList = matchTable.columnList(); @@ -91,8 +88,7 @@ private MatchTableSelection preferred(MatchTable matchTable) { // prepare IslandConflictResolver resolver = new IslandConflictResolver(matchTable); // find preferred islands - MatchTableSelection preferred = resolver.createNonConflictingVersion(); - return preferred; + return resolver.createNonConflictingVersion(); } @Override diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java index 23e1d6f39..304faa4fe 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java @@ -19,18 +19,15 @@ package eu.interedition.collatex.lab; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.SortedSetMultimap; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.util.VariantGraphRanking; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.SortedSet; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; @@ -40,7 +37,7 @@ public class VariantGraphLayout { private final VariantGraph graph; - private final List> grid = Lists.newLinkedList(); + private final List> grid = new LinkedList<>(); /** * represents the size of the grid in horizontal grid elements @@ -108,7 +105,7 @@ private int solveEdgeCrosses(boolean down, int level) { // Get the current level final List cells = grid.get(level); // remember the old sort - final List levelSortBefore = Lists.newArrayList(cells); + final List levelSortBefore = new ArrayList<>(cells); // new sort Collections.sort(cells); @@ -121,7 +118,8 @@ private int solveEdgeCrosses(boolean down, int level) { } // Collections Sort sorts the highest value to the first value - for (Cell cell : Lists.reverse(cells)) { + for (int cc = cells.size() - 1; cc >= 0 ; cc--) { + final Cell cell = cells.get(cc); final VariantGraph.Vertex vertex = cell.vertex; for (VariantGraph.Edge edge : (down ? vertex.outgoing() : vertex.incoming())) { @@ -251,7 +249,7 @@ private boolean move(boolean toRight, List currentLevel, int currentIndexI } private List neighborsOf(VariantGraph.Vertex vertex) { - final List neighbors = Lists.newLinkedList(); + final List neighbors = new LinkedList<>(); for (VariantGraph.Edge outgoing : vertex.outgoing()) { neighbors.add(outgoing.to()); } @@ -261,7 +259,7 @@ private List neighborsOf(VariantGraph.Vertex vertex) { return neighbors; } - private final Map vertexToCell = Maps.newHashMap(); + private final Map vertexToCell = new HashMap<>(); class Cell implements Comparable { /** diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java index 29359b0c5..f791d15e9 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java @@ -19,34 +19,22 @@ package eu.interedition.collatex.lab; -import java.awt.Color; -import java.awt.Paint; -import java.awt.Stroke; -import java.util.Iterator; -import java.util.Map; - -import com.google.common.base.Objects; -import com.google.common.collect.Maps; import edu.uci.ics.jung.algorithms.layout.StaticLayout; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.util.VariantGraphRanking; -import org.apache.commons.collections15.Transformer; -import org.apache.commons.lang.StringEscapeUtils; -import org.apache.commons.lang.WordUtils; - -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.collect.Iterables; -import com.google.common.collect.Multimap; -import com.google.common.collect.Multimaps; -import com.google.common.collect.Ordering; - import edu.uci.ics.jung.visualization.RenderContext; import edu.uci.ics.jung.visualization.VisualizationViewer; import edu.uci.ics.jung.visualization.control.DefaultModalGraphMouse; import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; import eu.interedition.collatex.simple.SimpleToken; +import eu.interedition.collatex.util.VariantGraphRanking; + +import java.awt.Color; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; /** * @author Gregor Middell @@ -64,62 +52,22 @@ public VariantGraphPanel(VariantGraph vg) { setGraphMouse(new DefaultModalGraphMouse()); final RenderContext rc = getRenderContext(); - rc.setVertexLabelTransformer(new Transformer() { - @Override - public String transform(VariantGraph.Vertex variantGraphVertexModel) { - final Multimap tokens = Multimaps.index(variantGraphVertexModel.tokens(), Token::getWitness); - final StringBuilder label = new StringBuilder(); - for (Witness witness : Ordering.from(Witness.SIGIL_COMPARATOR).sortedCopy(tokens.keySet())) { - label.append("[").append(witness.getSigil()).append(": '"); - for (Iterator tokenIt = Ordering.natural().sortedCopy(Iterables.filter(tokens.get(witness), SimpleToken.class)).iterator(); tokenIt.hasNext(); ) { - label.append(tokenIt.next().getContent()); - if (tokenIt.hasNext()) { - label.append(" "); - } - } - label.append("']\n"); - } - String trim = label.append("(").append(Objects.firstNonNull(ranking.apply(variantGraphVertexModel), 0)).append(")").toString().trim(); - String wrappedLabel = WordUtils.wrap(trim, 30, "\n", false); - String htmllabel = StringEscapeUtils.escapeHtml(wrappedLabel).replaceAll("\n", "
        "); - return "" + htmllabel + ""; - } - }); - rc.setEdgeLabelTransformer(new Transformer() { - @Override - public String transform(VariantGraph.Edge variantGraphEdgeModel) { - return Joiner.on(", ").join(Iterables.transform(variantGraphEdgeModel.witnesses(), new Function() { - - @Override - public String apply(Witness input) { - return input.getSigil(); - } - })); - } - }); - rc.setVertexFillPaintTransformer(new Transformer() { - @Override - public Paint transform(VariantGraph.Vertex v) { - final VariantGraph.Transposition transposition = Iterables.getFirst(v.transpositions(), null); - - return (v.tokens().isEmpty() ? Color.BLACK : (transposition == null - ? Color.WHITE - : transpositionColors.get(transposition) - )); - } - }); - rc.setEdgeStrokeTransformer(new Transformer() { - @Override - public Stroke transform(VariantGraph.Edge variantGraphEdgeModel) { - return variantGraphEdgeModel.witnesses().isEmpty() ? CollateXLaboratory.DASHED_STROKE : CollateXLaboratory.SOLID_STROKE; - } - }); - rc.setEdgeDrawPaintTransformer(new Transformer() { - @Override - public Paint transform(VariantGraph.Edge jungVariantGraphEdge) { - return Color.GRAY; - } - }); + rc.setVertexLabelTransformer(variantGraphVertexModel -> String.format("%s (%d)", + variantGraphVertexModel.tokens().stream() + .collect(Collectors.groupingBy(Token::getWitness)).entrySet().stream() + .sorted(Comparator.comparing(e -> e.getKey().getSigil())) + .map(e -> String.format("[%s: %s]", e.getKey().getSigil(), e.getValue().stream() + .map(t -> (SimpleToken) t) + .sorted() + .map(SimpleToken::getContent) + .collect(Collectors.joining(" ")))) + .collect(Collectors.joining("\n")), + Optional.ofNullable(ranking.apply(variantGraphVertexModel)).orElse(0)) + ); + rc.setEdgeLabelTransformer(variantGraphEdgeModel -> variantGraphEdgeModel.witnesses().stream().map(Witness::getSigil).collect(Collectors.joining(", "))); + rc.setVertexFillPaintTransformer(v -> v.tokens().isEmpty() ? Color.BLACK : v.transpositions().stream().findFirst().map(transpositionColors::get).orElse(Color.WHITE)); + rc.setEdgeStrokeTransformer(variantGraphEdgeModel -> variantGraphEdgeModel.witnesses().isEmpty() ? CollateXLaboratory.DASHED_STROKE : CollateXLaboratory.SOLID_STROKE); + rc.setEdgeDrawPaintTransformer(jungVariantGraphEdge -> Color.GRAY); setVariantGraph(vg); } @@ -128,7 +76,7 @@ public void setVariantGraph(VariantGraph variantGraph) { this.variantGraph = variantGraph; this.ranking = VariantGraphRanking.of(variantGraph); - this.transpositionColors = Maps.newHashMap(); + this.transpositionColors = new HashMap<>(); int tc = 0; for (VariantGraph.Transposition transposition : variantGraph.transpositions()) { this.transpositionColors.put(transposition, KELLY_MAX_CONTRAST_COLORS[tc++ % KELLY_MAX_CONTRAST_COLORS.length]); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/WitnessPanel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/WitnessPanel.java index de9f776f7..fd491e984 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/WitnessPanel.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/WitnessPanel.java @@ -19,8 +19,6 @@ package eu.interedition.collatex.lab; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; import eu.interedition.collatex.simple.SimpleWitness; import javax.swing.BorderFactory; @@ -39,7 +37,7 @@ import java.awt.event.FocusEvent; import java.awt.event.KeyAdapter; import java.awt.event.KeyEvent; -import java.util.Iterator; +import java.util.ArrayList; import java.util.List; /** @@ -48,7 +46,7 @@ public class WitnessPanel extends JPanel { private static final char[] SIGLA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); - private List witnesses = Lists.newArrayListWithCapacity(SIGLA.length); + private List witnesses = new ArrayList<>(SIGLA.length); public WitnessPanel() { super(); @@ -61,7 +59,7 @@ public WitnessPanel() { public void newWitness() { WitnessTextArea newWitness = null; for (WitnessTextArea witness : witnesses) { - if (Strings.isNullOrEmpty(witness.getTextContent())) { + if (witness.getTextContent().isEmpty()) { newWitness = witness; break; } @@ -75,10 +73,10 @@ public void newWitness() { } public List getWitnesses() { - List witnesses = Lists.newArrayListWithCapacity(this.witnesses.size()); + List witnesses = new ArrayList<>(this.witnesses.size()); for (WitnessTextArea textArea : this.witnesses) { final String textContent = textArea.getTextContent(); - if (!Strings.isNullOrEmpty(textContent)) { + if (!textContent.isEmpty()) { witnesses.add(new SimpleWitness(textArea.getSigil(), textContent)); } } @@ -86,11 +84,11 @@ public List getWitnesses() { } public void removeEmptyWitnesses() { - for (Iterator textAreaIt = Lists.reverse(witnesses).iterator(); textAreaIt.hasNext() && witnesses.size() > 2; ) { - final WitnessTextArea textArea = textAreaIt.next(); - if (Strings.isNullOrEmpty(textArea.getTextContent())) { + for (int wc = witnesses.size() - 1; wc >= 0 && witnesses.size() > 2; wc--) { + final WitnessTextArea textArea = witnesses.get(wc); + if (textArea.getTextContent().isEmpty()) { remove(SwingUtilities.getAncestorOfClass(JScrollPane.class, textArea)); - textAreaIt.remove(); + witnesses.remove(wc); } } revalidate(); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java b/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java index 1a8af54de..fdc49bab7 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java @@ -19,19 +19,18 @@ package eu.interedition.collatex.matching; -import static org.junit.Assert.*; +import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; -import eu.interedition.collatex.AbstractTest; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; - -import eu.interedition.collatex.simple.SimpleWitness; -import org.junit.Test; +import static org.junit.Assert.assertEquals; public class MatchesTest extends AbstractTest { diff --git a/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java b/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java index eb06608ab..f6840fdfd 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.matching; -import com.google.common.collect.Iterables; import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; @@ -40,7 +39,7 @@ public void nearTokenMatching() { final Map> matches = Matches.between(graph.vertices(), w[1].getTokens(), new EditDistanceTokenComparator()).allMatches; assertEquals(2, matches.values().stream().flatMap(List::stream).count()); - assertEquals(w[0].getTokens().get(0), Iterables.getFirst(Iterables.get(matches.get(w[1].getTokens().get(0)), 0).tokens(), null)); - assertEquals(w[0].getTokens().get(1), Iterables.getFirst(Iterables.get(matches.get(w[1].getTokens().get(1)), 0).tokens(), null)); + assertEquals(w[0].getTokens().get(0), matches.get(w[1].getTokens().get(0)).get(0).tokens().stream().findFirst().get()); + assertEquals(w[0].getTokens().get(1), matches.get(w[1].getTokens().get(1)).get(0).tokens().stream().findFirst().get()); } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/medite/SuffixTreeTest.java b/collatex-core/src/test/java/eu/interedition/collatex/medite/SuffixTreeTest.java index ead48e3a4..c129b7d2d 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/medite/SuffixTreeTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/medite/SuffixTreeTest.java @@ -19,12 +19,13 @@ package eu.interedition.collatex.medite; -import com.google.common.collect.Iterables; import eu.interedition.collatex.AbstractTest; import org.junit.Test; import java.util.Arrays; import java.util.Comparator; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; /** * @author Gregor Middell @@ -40,9 +41,8 @@ public int compare(String o1, String o2) { } }, "S", "P", "O", "a", "s", "p", "o"); - LOG.fine(st.toString()); - LOG.fine(Iterables.toString(st.match(Arrays.asList("s", "p", "o", "a")))); - + LOG.fine(() -> st.toString()); + LOG.fine(() -> StreamSupport.stream(st.match(Arrays.asList("s", "p", "o", "a")).spliterator(), false).map(Object::toString).collect(Collectors.joining(", "))); } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java b/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java index aba3a3965..3d6dea954 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java @@ -19,22 +19,19 @@ package eu.interedition.collatex.output; -import static org.junit.Assert.assertEquals; - -import java.util.List; -import java.util.Set; -import java.util.SortedMap; - +import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.Witness; +import eu.interedition.collatex.simple.SimpleWitness; import org.junit.Ignore; import org.junit.Test; -import com.google.common.collect.RowSortedTable; +import java.util.List; +import java.util.Set; +import java.util.SortedMap; -import eu.interedition.collatex.AbstractTest; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.simple.SimpleWitness; +import static org.junit.Assert.assertEquals; public class AlignmentTableTest extends AbstractTest { From 5af12b282000b1636af7caa8d70d32a59a356147 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 21:45:27 +0100 Subject: [PATCH 30/52] Remove some Google Guava usages --- .../interedition/collatex/AbstractTest.java | 13 ++++ .../collatex/dekker/BeckettTest.java | 36 +++++------ .../collatex/dekker/DarwinTest.java | 5 +- .../collatex/dekker/SpencerHoweTest.java | 8 +-- .../dekker/TranspositionGraphTest.java | 21 +++---- .../dekker/VariantGraphRankerTest.java | 16 +++-- .../collatex/dekker/VariantGraphTest.java | 29 +++------ .../collatex/dekker/matrix/HermansTest.java | 63 +++++++++---------- .../matrix/IslandConflictResolverTest.java | 4 +- .../collatex/dekker/matrix/IslandTest.java | 13 ++-- .../dekker/matrix/MatchTableLinkerTest.java | 63 +++++++++---------- .../dekker/matrix/MatchTableTest.java | 26 ++++---- 12 files changed, 134 insertions(+), 163 deletions(-) diff --git a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java index 1e9a663ee..49fe5929b 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java @@ -40,6 +40,7 @@ import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.stream.StreamSupport; import static eu.interedition.collatex.dekker.Match.PHRASE_MATCH_TO_TOKENS; import static org.junit.Assert.assertEquals; @@ -108,6 +109,18 @@ protected static String toString(VariantGraph.Vertex vertex, Witness... witnesse .collect(Collectors.joining(" ")); } + protected static void assertGraphVertices(VariantGraph graph, int vertices) { + assertEquals(vertices, StreamSupport.stream(graph.vertices().spliterator(), false).count()); + } + + protected static void assertGraphEdges(VariantGraph graph, int edges) { + assertEquals(edges, StreamSupport.stream(graph.edges().spliterator(), false).count()); + } + protected static void assetGraphSize(VariantGraph graph, int vertices, int edges) { + assertGraphVertices(graph, vertices); + assertGraphEdges(graph, edges); + } + protected static void assertHasWitnesses(VariantGraph.Edge edge, Witness... witnesses) { assertEquals(new HashSet<>(Arrays.asList(witnesses)), edge.witnesses()); } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java index 876007f9d..2558dd8d1 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java @@ -19,23 +19,6 @@ package eu.interedition.collatex.dekker; -import static eu.interedition.collatex.dekker.Match.PHRASE_MATCH_TO_TOKENS; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedSet; - -import org.junit.Assert; -import org.junit.Test; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; - import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.Token; @@ -45,6 +28,19 @@ import eu.interedition.collatex.matching.Matches; import eu.interedition.collatex.simple.SimpleToken; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import static eu.interedition.collatex.dekker.Match.PHRASE_MATCH_TO_TOKENS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; public class BeckettTest extends AbstractTest { @@ -126,9 +122,9 @@ public void dirkVincent6() { public void testDirkVincent7() { final SimpleWitness[] w = createWitnesses(// "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.", "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); - VariantGraph graph = collate(w); + collate(w); assertPhraseMatches("Its soft","light", "any light he could remember from the days", "nights when day followed", "night", "vice versa."); - assertTrue(Iterables.isEmpty(((DekkerAlgorithm) collationAlgorithm).getTranspositions())); + assertTrue(((DekkerAlgorithm) collationAlgorithm).getTranspositions().isEmpty()); } @Test @@ -228,7 +224,7 @@ public void sentence42Transposition() { } private static void assertGraphContains(VariantGraph graph, String... expected) { - SortedSet contents = Sets.newTreeSet(); + SortedSet contents = new TreeSet<>(); for (Witness witness : graph.witnesses()) { extractPhrases(contents, graph, witness); } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java index 17f0add09..178cfd3ab 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java @@ -19,7 +19,6 @@ package eu.interedition.collatex.dekker; -import com.google.common.collect.Iterables; import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.VariantGraph; import org.junit.Assert; @@ -42,8 +41,8 @@ public void cyclicJoin() { "It has been disputed at what period of life the causes of variability, whatever they may be, generally act; whether during the early or late period of development of the embryo, or at the instant of conception. Geoffroy St. Hilaire's experiments show that unnatural treatment of the embryo causes monstrosities; and monstrosities cannot be separated by any clear line of distinction from mere variations. But I am strongly inclined to suspect that the most frequent cause of variability may be attributed to the male and female reproductive elements having been affected prior to the act of conception. Several reasons make me believe in this; but the chief one is the remarkable effect which confinement or cultivation has on the functions of the reproductive system; this system appearing to be far more susceptible than any other part of the organisation, to the action of any change in the conditions of life. Nothing is more easy than to tame an animal, and few things more difficult than to get it to breed freely under confinement, even in the many cases when the male and female unite. How many animals there are which will not breed, though living long under not very close confinement in their native country! This is generally attributed to vitiated instincts; but how many cultivated plants display the utmost vigour, and yet rarely or never seed! In some few such cases it has been found out that very trifling changes, such as a little more or less water at some particular period of growth, will determine whether or not the plant sets a seed. I cannot here enter on the copious details which I have collected on this curious subject; but to show how singular the laws are which determine the reproduction of animals under confinement, I may just mention that carnivorous animals, even from the tropics, breed in this country pretty freely under confinement, with the exception of the plantigrades or bear family; whereas, carnivorous birds, with the rarest exceptions, hardly ever lay fertile eggs. Many exotic plants have pollen utterly worthless, in the same exact condition as in the most sterile hybrids. When, on the one hand, we see domesticated animals and plants, though often weak and sickly, yet breeding quite freely under confinement; and when, on the other hand, we see individuals, though taken young from a state of nature, perfectly tamed, long-lived, and healthy (of which I could give numerous instances), yet having their reproductive system so seriously affected by unperceived causes as to fail in acting, we need not be surprised at this system, when it does act under confinement, acting not quite regularly, and producing offspring not perfectly like their parents or variable.",// "With respect to what I have called the indirect action of changed conditions, namely, through the reproductive system being affected, we may infer that variability is thus induced, partly from the fact of this system being extremely sensitive to any change in the conditions, and partly from the similarity, as Kölreuter and others have remarked, between the variability which follows from the crossing of distinct species, and that which may be observed with all plants and animals when reared under new or unnatural conditions. Many facts clearly show how eminently susceptible the reproductive system is to very slight changes in the surrounding conditions. Nothing is more easy than to tame an animal, and few things more difficult than to get it to breed freely under confinement, even when the male and female unite. How many animals there are which will not breed, though kept in an almost free state in their native country! This is generally, but erroneously, attributed to vitiated instincts. Many cultivated plants display the utmost vigour, and yet rarely or never seed! In some few cases it has been discovered that a very trifling change, such as a little more or less water at some particular period of growth, will determine whether or not a plant will produce seeds. I cannot here give the details which I have collected and elsewhere published on this curious subject; but to show how singular the laws are which determine the reproduction of animals under confinement, I may mention that carnivorous animals, even from the tropics, breed in this country pretty freely under confinement, with the exception of the plantigrades or bear family, which seldom produce young; whereas carnivorous birds, with the rarest exceptions, hardly ever lay fertile eggs. Many exotic plants have pollen utterly worthless, in the same condition as in the most sterile hybrids. When, on the one hand, we see domesticated animals and plants, though often weak and sickly, yet breeding freely under confinement; and when, on the other hand, we see individuals, though taken young from a state of nature, perfectly tamed, long-lived, and healthy (of which I could give numerous instances), yet having their reproductive system so seriously affected by unperceived causes as to fail to act, we need not be surprised at this system, when it does act under confinement, acting irregularly, and producing offspring somewhat unlike their parents. I may add, that as some organisms breed freely under the most unnatural conditions (for instance, rabbits and ferrets kept in hutches), showing that their reproductive organs are not affected; so will some animals and plants withstand domestication or cultivation, and vary very slightly — perhaps hardly more than in a state of nature."); - Iterables.size(graph.vertices()); // does implicit cycle detection - Iterables.size(VariantGraph.JOIN.apply(graph).vertices()); // does implicit cycle detection + graph.vertices().forEach(v -> {}); // does implicit cycle detection + VariantGraph.JOIN.apply(graph).vertices().forEach(v -> {}); // does implicit cycle detection } @Test diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java index ddd3810a4..98c79dd29 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java @@ -19,12 +19,10 @@ package eu.interedition.collatex.dekker; -import com.google.common.collect.Iterables; -import com.google.common.collect.RowSortedTable; import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.Witness; -import eu.interedition.collatex.Token; import eu.interedition.collatex.simple.SimpleWitness; import eu.interedition.collatex.util.VariantGraphRanking; import org.junit.Test; @@ -63,8 +61,8 @@ public void alignmentTable() { public void graph() { final SimpleWitness[] w = createWitnesses("a", "b", "a b"); final VariantGraph graph = collate(w); - assertEquals(4, Iterables.size(graph.vertices())); - assertEquals(5, Iterables.size(graph.edges())); + + assetGraphSize(graph, 4, 5); final VariantGraph.Vertex startVertex = graph.getStart(); final VariantGraph.Vertex aVertex = vertexWith(graph, "a", w[0]); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java index c117408a9..f92bde8c9 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java @@ -1,22 +1,19 @@ package eu.interedition.collatex.dekker; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.util.Set; - -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; - -import com.google.common.collect.Sets; - import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.VariantGraph.Transposition; import eu.interedition.collatex.VariantGraph.Vertex; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; public class TranspositionGraphTest extends AbstractTest { @@ -79,7 +76,7 @@ public void testGreekThreeWitnesses() { Set transpositions = graph.transpositions(); assertEquals(1, transpositions.size()); Transposition transposition = transpositions.iterator().next(); - Set transposedVertices = Sets.newHashSet(); + Set transposedVertices = new HashSet<>(); for (Vertex transposedVertex : transposition) { transposedVertices.add(transposedVertex.toString()); } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphRankerTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphRankerTest.java index 7b5b1ba93..1d7a80027 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphRankerTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphRankerTest.java @@ -19,18 +19,16 @@ package eu.interedition.collatex.dekker; -import static org.junit.Assert.assertEquals; - -import java.util.List; -import java.util.Map; - +import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.util.VariantGraphRanking; import org.junit.Test; -import com.google.common.collect.Lists; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; -import eu.interedition.collatex.AbstractTest; +import static org.junit.Assert.assertEquals; public class VariantGraphRankerTest extends AbstractTest { @@ -38,7 +36,7 @@ public class VariantGraphRankerTest extends AbstractTest { public void ranking() { final VariantGraph graph = collate("The black cat", "The black and white cat", "The black and green cat"); final VariantGraphRanking ranking = VariantGraphRanking.of(graph); - final List vertices = Lists.newArrayList(graph.vertices()); + final List vertices = StreamSupport.stream(graph.vertices().spliterator(), false).collect(Collectors.toList()); assertVertexEquals("the", vertices.get(1)); assertEquals(1, (long) ranking.apply(vertices.get(1))); @@ -60,7 +58,7 @@ public void ranking() { public void agastTranspositionHandling() { final VariantGraph graph = collate("He was agast, so", "He was agast", "So he was agast"); final VariantGraphRanking ranking = VariantGraphRanking.of(graph); - final List vertices = Lists.newArrayList(graph.vertices()); + final List vertices = StreamSupport.stream(graph.vertices().spliterator(), false).collect(Collectors.toList()); assertVertexEquals("so", vertices.get(1)); assertEquals(1,(long) ranking.apply(vertices.get(1))); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java index b68ce9609..dec853d7f 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java @@ -19,22 +19,13 @@ package eu.interedition.collatex.dekker; -import static org.junit.Assert.assertEquals; - -import java.util.List; - -import eu.interedition.collatex.VariantGraph; -import org.junit.Assert; -import org.junit.Test; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; - import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; /** - * @todo Add test with an addition or omission in between! + * todo Add test with an addition or omission in between! */ public class VariantGraphTest extends AbstractTest { @@ -43,8 +34,7 @@ public void twoWitnesses() { final SimpleWitness[] w = createWitnesses("the black cat", "the black cat"); final VariantGraph graph = collate(w); - assertEquals(5, Iterables.size(graph.vertices())); - assertEquals(4, Iterables.size(graph.edges())); + assetGraphSize(graph, 5, 4); final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); @@ -61,8 +51,7 @@ public void addition1() { final SimpleWitness[] w = createWitnesses("the black cat", "the white and black cat"); final VariantGraph graph = collate(w); - assertEquals(7, Lists.newArrayList(graph.vertices()).size()); - assertEquals(7, Iterables.size(graph.edges())); + assetGraphSize(graph, 7, 7); final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); final VariantGraph.Vertex whiteVertex = vertexWith(graph, "white", w[1]); @@ -84,9 +73,7 @@ public void variant() { final SimpleWitness[] w = createWitnesses("the black cat", "the white cat", "the green cat", "the red cat", "the yellow cat"); final VariantGraph graph = collate(w); - final List vertices = Lists.newArrayList(graph.vertices()); - assertEquals(9, vertices.size()); - assertEquals(12, Iterables.size(graph.edges())); + assetGraphSize(graph, 9, 12); final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); @@ -115,7 +102,7 @@ public void doubleTransposition2() { final SimpleWitness[] w = createWitnesses("a b", "b a"); final VariantGraph graph = collate(w); - assertEquals(5, Iterables.size(graph.vertices())); + assertGraphVertices(graph, 5); assertHasWitnesses(edgeBetween(vertexWith(graph, "b", w[1]), vertexWith(graph, "a", w[1])), w[1]); assertHasWitnesses(edgeBetween(vertexWith(graph, "a", w[0]), vertexWith(graph, "b", w[0])), w[0]); @@ -126,7 +113,7 @@ public void mirroredTranspositionsWithMatchInBetween() { final SimpleWitness[] w = createWitnesses("the black and white cat", "the white and black cat"); final VariantGraph graph = collate(w); - Assert.assertEquals(9, Iterables.size(graph.vertices())); + assertGraphVertices(graph, 9); // FIXME: find out, how to test this without stable topological order /* diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java index 2da1f9e6d..ef6721d76 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java @@ -19,30 +19,6 @@ package eu.interedition.collatex.dekker.matrix; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoMoreInteractions; - -import java.io.StringWriter; -import java.util.Arrays; -import java.util.Set; -import java.util.logging.Level; - -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; - -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; - import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.Token; @@ -53,6 +29,26 @@ import eu.interedition.collatex.simple.SimpleToken; import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +import javax.xml.stream.FactoryConfigurationError; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import java.io.StringWriter; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Level; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; public class HermansTest extends AbstractTest { @@ -245,22 +241,22 @@ public void test4JoinedTranspositions2witnesses() throws XMLStreamException { VariantGraph vg = collate(sw); Set transpositions0 = vg.transpositions(); for (VariantGraph.Transposition t : transpositions0) { - LOG.log(Level.FINE, "transposition {0}", showTransposition(t)); + LOG.log(Level.FINE, "transposition {0}", t.toString()); } Iterable vertices = vg.vertices(); for (VariantGraph.Vertex v : vertices) { - LOG.log(Level.FINE, "vertex:{0}, transpositions:{1}", new Object[] { v, Iterables.toString(v.transpositions()) }); + LOG.log(Level.FINE, "vertex:{0}, transpositions:{1}", new Object[] { v, v.transpositions().toString() }); } vg = VariantGraph.JOIN.apply(vg); LOG.fine(toString(table(vg))); Set transpositions = vg.transpositions(); LOG.log(Level.FINE, "{0} transpositions", transpositions.size()); for (VariantGraph.Transposition t : transpositions) { - LOG.log(Level.FINE, "transposition {0}", showTransposition(t)); + LOG.log(Level.FINE, "transposition {0}", t.toString()); // all joined vertices should be size 3 for (VariantGraph.Vertex vertex : t) { - assertEquals(showTransposition(t), 3, vertex.tokens().size()); + assertEquals(t.toString(), 3, vertex.tokens().size()); } } assertEquals(3, transpositions.size()); @@ -278,7 +274,7 @@ public void test4JoinedTranspositions3witnesses() throws XMLStreamException { Iterable vertices = vg.vertices(); for (VariantGraph.Vertex v : vertices) { - LOG.log(Level.FINE, "vertex:{0}, transpositions:{1}", new Object[]{v, Iterables.toString(v.transpositions())}); + LOG.log(Level.FINE, "vertex:{0}, transpositions:{1}", new Object[]{v, v.transpositions()}); } final StringWriter writer = new StringWriter(); @@ -370,17 +366,17 @@ public void testNoLoops() throws XMLStreamException { assertEquals(1, transpositions.size()); VariantGraph.Transposition t = transpositions.iterator().next(); for (VariantGraph.Vertex vertex : t) { - for (SimpleToken token : Iterables.filter(vertex.tokens(), SimpleToken.class)) { + for (SimpleToken token : vertex.tokens().stream().map(tk -> (SimpleToken)tk).toArray(SimpleToken[]::new)) { assertEquals(token.toString(), token.getNormalized(), "c"); } } - final Set witnessesInTransposition = Sets.newHashSet(); + final Set witnessesInTransposition = new HashSet<>(); for (VariantGraph.Vertex vertex : t) { for (Token token : vertex.tokens()) { witnessesInTransposition.add(token.getWitness()); } } - assertEquals(Sets.newHashSet(Arrays.asList(sw)), witnessesInTransposition); + assertEquals(new HashSet<>(Arrays.asList(sw)), witnessesInTransposition); } // @Test @@ -403,7 +399,4 @@ private String generateTEI(VariantGraph vg) throws XMLStreamException, FactoryCo return writer.toString(); } - private String showTransposition(VariantGraph.Transposition t) { - return Iterables.toString(t); - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java index 10856f256..13f596edb 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java @@ -1,11 +1,11 @@ package eu.interedition.collatex.dekker.matrix; -import com.google.common.collect.Lists; import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.simple.SimpleWitness; import org.junit.Test; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -24,7 +24,7 @@ public void testPartlyOverlappingIslands() { VariantGraph graph = collate(w[0]); // create table from the graph and the second witness MatchTable table = MatchTable.create(graph, w[1]); - List possibleIslands = Lists.newArrayList(); + List possibleIslands = new ArrayList<>(); for (Island island : table.getIslands()) { if (island.size()==2) { possibleIslands.add(island); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandTest.java index e33fe0098..97f80fd7a 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandTest.java @@ -19,16 +19,15 @@ package eu.interedition.collatex.dekker.matrix; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import eu.interedition.collatex.VariantGraph; -import org.junit.Test; - import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; public class IslandTest extends AbstractTest { diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java index 1ec086eb6..4155142e2 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java @@ -19,25 +19,6 @@ package eu.interedition.collatex.dekker.matrix; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.logging.Level; - -import javax.xml.stream.XMLStreamException; - -import org.junit.Test; - -import com.google.common.base.Joiner; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; - import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.Token; @@ -47,6 +28,21 @@ import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.matching.StrictEqualityTokenComparator; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; + +import javax.xml.stream.XMLStreamException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Level; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; public class MatchTableLinkerTest extends AbstractTest { @@ -60,7 +56,7 @@ public void additionInCombinationWithTransposition2() { MatchTableLinker linker = new MatchTableLinker(); Map link = linker.link(graph, w[2], new EqualityTokenComparator()); Set tokens = link.keySet(); - Map tokensAsString = Maps.newHashMap(); + Map tokensAsString = new HashMap<>(); for (Token token : tokens) { tokensAsString.put(token.toString(), link.get(token).toString()); } @@ -118,11 +114,11 @@ public void testHermansText2c() throws XMLStreamException { Map linkedTokens = linker.link(graph, witnesses[2], new EqualityTokenComparator()); Set tokens = linkedTokens.keySet(); - Set tokensAsString = Sets.newLinkedHashSet(); + Set tokensAsString = new LinkedHashSet<>(); for (Token token : tokens) { tokensAsString.add(token.toString()); } - LOG.fine(Iterables.toString(tokensAsString)); + LOG.fine(tokensAsString::toString); assertTrue(tokensAsString.contains("C:0:'over'")); assertTrue(tokensAsString.contains("C:1:'de'")); assertTrue(tokensAsString.contains("C:2:'atlantische'")); @@ -143,7 +139,7 @@ public void test1() { Map linkedTokens = linker.link(vg, sw[1], new EqualityTokenComparator()); Set tokens = linkedTokens.keySet(); - Set tokensAsString = Sets.newLinkedHashSet(); + Set tokensAsString = new LinkedHashSet<>(); for (Token token : tokens) { tokensAsString.add(token.toString()); } @@ -165,13 +161,13 @@ public void testOverDeAtlantischeOceaan() { Map linkedTokens = new MatchTableLinker().link(vg, sw[1], new StrictEqualityTokenComparator()); Set tokens = linkedTokens.keySet(); - Set tokensAsString = Sets.newLinkedHashSet(); + Set tokensAsString = new LinkedHashSet<>(); for (Token token : tokens) { tokensAsString.add(token.toString()); } - List l = Lists.newArrayList(tokensAsString); + List l = new ArrayList<>(tokensAsString); Collections.sort(l); - LOG.log(Level.FINE, Joiner.on('\n').join(l)); + LOG.log(Level.FINE, () -> l.stream().collect(Collectors.joining("\n"))); assertTrue(tokensAsString.contains("B:87:'onder'")); assertTrue(tokensAsString.contains("B:0:'over'")); assertTrue(tokensAsString.contains("B:1:'de'")); @@ -182,7 +178,6 @@ public void testOverDeAtlantischeOceaan() { @Test public void testHermansAllesIsBetrekkelijk1() throws XMLStreamException { - int outlierTranspositionsSizeLimit = 1; String textD1 = "natuurlijk is alles betrekkelijk"; String textD9 = "Natuurlijk, alles mag relatief zijn"; String textDmd1 = "Natuurlijk, alles is betrekkelijk"; @@ -191,11 +186,11 @@ public void testHermansAllesIsBetrekkelijk1() throws XMLStreamException { Map linkedTokens = new MatchTableLinker().link(vg, sw[2], new StrictEqualityTokenComparator()); Set tokens = linkedTokens.keySet(); - Set tokensAsString = Sets.newLinkedHashSet(); + Set tokensAsString = new LinkedHashSet<>(); for (Token token : tokens) { tokensAsString.add(token.toString()); } - List l = Lists.newArrayList(tokensAsString); + List l = new ArrayList<>(tokensAsString); Collections.sort(l); LOG.log(Level.FINE, "tokensAsString={0}", l); // assertTrue(tokensAsString.contains("B:75:'onder'")); @@ -208,7 +203,6 @@ public void testHermansAllesIsBetrekkelijk1() throws XMLStreamException { @Test public void testSuscepto() throws XMLStreamException { - int outlierTranspositionsSizeLimit = 1; String a = "Et sumpno suscepto tribus diebus morte morietur et deinde ab inferis regressus ad lucem veniet."; String b = "Et mortem sortis finiet post tridui somnum et morte morietur tribus diebus somno suscepto et tunc ab inferis regressus ad lucem veniet."; String c = "Et sortem mortis tribus diebus sompno suscepto et tunc ab inferis regressus ad lucem veniet."; @@ -217,11 +211,11 @@ public void testSuscepto() throws XMLStreamException { Map linkedTokens = new MatchTableLinker().link(vg, sw[2], new StrictEqualityTokenComparator()); Set tokens = linkedTokens.keySet(); - Set tokensAsString = Sets.newLinkedHashSet(); + Set tokensAsString = new LinkedHashSet<>(); for (Token token : tokens) { tokensAsString.add(token.toString()); } - List l = Lists.newArrayList(tokensAsString); + List l = new ArrayList<>(tokensAsString); Collections.sort(l); LOG.log(Level.FINE, "tokensAsString={0}", l); assertTrue(tokensAsString.contains("C:6:'suscepto'")); @@ -229,7 +223,6 @@ public void testSuscepto() throws XMLStreamException { @Test public void testOutlierTranspositionLimitAndPunctuation() { - int outlierTranspositionsSizeLimit = 200; String w1 = "a b c ."; String w2 = "a b c Natuurlijk, alles mag relatief zijn."; SimpleWitness[] sw = createWitnesses(w1, w2); @@ -247,11 +240,11 @@ public void testOutlierTranspositionLimitAndPunctuation() { // assert linked tokens; helper method Set tokens = linkedTokens.keySet(); - Set tokensAsString = Sets.newLinkedHashSet(); + Set tokensAsString = new LinkedHashSet<>(); for (Token token : tokens) { tokensAsString.add(token.toString()); } - List l = Lists.newArrayList(tokensAsString); + List l = new ArrayList<>(tokensAsString); Collections.sort(l); assertTrue(l.contains("B:0:'a'")); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java index dff0abeff..d936f49d0 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java @@ -19,22 +19,20 @@ package eu.interedition.collatex.dekker.matrix; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.util.Collections; -import java.util.List; - -import org.junit.Test; - -import com.google.common.collect.Lists; - import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.Token; import eu.interedition.collatex.VariantGraph; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; public class MatchTableTest extends AbstractTest { @@ -173,7 +171,7 @@ public void testIslandDetectionAbcabCab() { SimpleWitness[] witnesses = createWitnesses("a b c a b", "c a b"); VariantGraph graph = collate(witnesses[0]); MatchTable table = MatchTable.create(graph, witnesses[1]); - List islands = Lists.newArrayList(table.getIslands()); + List islands = new ArrayList<>(table.getIslands()); assertEquals(2, islands.size()); Collections.sort(islands); Island island = islands.get(1); @@ -185,7 +183,7 @@ public void testIslandDetectionXabcabXcab() { SimpleWitness[] witnesses = createWitnesses("x a b c a b", "x c a b"); VariantGraph graph = collate(witnesses[0]); MatchTable table = MatchTable.create(graph, witnesses[1]); - List islands = Lists.newArrayList(table.getIslands()); + List islands = new ArrayList<>(table.getIslands()); assertEquals(3, islands.size()); Collections.sort(islands); Island island = islands.get(0); @@ -197,7 +195,7 @@ public void testIslandDetectionPartlyOverlappingIslandsUsecase() { SimpleWitness[] w = createWitnesses("The cat and the dog", "the dog and the cat"); VariantGraph graph = collate(w[0]); MatchTable table = MatchTable.create(graph, w[1], new EqualityTokenComparator()); - List islands = Lists.newArrayList(table.getIslands()); + List islands = new ArrayList<>(table.getIslands()); Collections.sort(islands); assertEquals(4, islands.size()); assertVectorEquals(0, 0, 1, islands.get(0)); From 8bc24814b70b10795d110115c1e129201545fa44 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 7 Feb 2015 22:39:42 +0100 Subject: [PATCH 31/52] Remove some Google Guava usages --- collatex-core/pom.xml | 9 ----- .../collatex/suffixarray/SuffixArrays.java | 4 +- .../collatex/dekker/matrix/HermansTest.java | 40 ------------------- collatex-tools/pom.xml | 13 +++++- pom.xml | 11 ----- 5 files changed, 13 insertions(+), 64 deletions(-) diff --git a/collatex-core/pom.xml b/collatex-core/pom.xml index a1598213e..b6d2a3102 100644 --- a/collatex-core/pom.xml +++ b/collatex-core/pom.xml @@ -20,15 +20,6 @@ jung-visualization test - - org.mockito - mockito-all - - - commons-lang - commons-lang - test - diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixArrays.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixArrays.java index 7e5188e2f..c7788a2ec 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixArrays.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixArrays.java @@ -4,8 +4,6 @@ import java.util.Comparator; import java.util.List; -import com.google.common.collect.Lists; - /* * TODO: ultimately, this class should be "intelligent" enough to pick the best * algorithm, depending on the distribution and properties of the input (alphabet size, @@ -177,7 +175,7 @@ private static ISuffixArrayBuilder defaultAlgorithm() public static List toString(CharSequence input, int [] suffixes) { final String full = input.toString(); - final ArrayList result = Lists.newArrayList(); + final ArrayList result = new ArrayList<>(); for (int i = 0; i < input.length(); i++) { result.add(full.subSequence(suffixes[i], full.length())); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java index ef6721d76..3078d5484 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java @@ -46,9 +46,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoMoreInteractions; public class HermansTest extends AbstractTest { @@ -178,43 +175,6 @@ public void testHermansText2c() throws XMLStreamException { testWitnessCollation(witnesses); } - @Ignore - @Test - public void testHermansText2aWithMocking() throws XMLStreamException { - String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer, de lucht was helder blauw, het water rimpelend satijn. Op den Atlantischen Oceaan voer een groote stoomer. Onder de velen aan boojrd bevond zich een bruine, korte dikke man. JSg werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ontzaggelijk breed; hij omsloot den buik van den kleinen man als een soort balcon."; - String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; - SimpleWitness[] witnesses = createWitnesses(textD1, textD9); - - VariantGraph base = collate(witnesses[0]); - MatchTable matchTable = MatchTable.create(base, witnesses[1], new EqualityTokenComparator()); - IslandConflictResolver creator = new IslandConflictResolver(matchTable); - - //Mock Archipelago - Archipelago result = mock(Archipelago.class); - Island i1 = new Island(new Coordinate(3, 3), new Coordinate(5, 5)); - Island i2 = new Island(new Coordinate(3, 20), new Coordinate(5, 22)); - Island i3 = new Island(new Coordinate(20, 3), new Coordinate(22, 5)); - Island i4 = new Island(new Coordinate(20, 20), new Coordinate(22, 22)); - Island i5 = new Island(new Coordinate(69, 66), new Coordinate(71, 68)); - Island i6 = new Island(new Coordinate(25, 25), new Coordinate(26, 26)); - Island i7 = new Island(new Coordinate(73, 70), new Coordinate(74, 71)); - Island i8 = new Island(new Coordinate(0, 17), new Coordinate(0, 17)); - Island i9 = new Island(new Coordinate(17, 17), new Coordinate(17, 17)); - - //TODO: push mock into createNonConflictingVersion - creator.createNonConflictingVersion(); - verify(result).add(new Island(new Coordinate(40, 39), new Coordinate(58, 57))); - verify(result).add(new Island(new Coordinate(8, 8), new Coordinate(15, 15))); - verify(result).add(new Island(new Coordinate(30, 31), new Coordinate(36, 37))); - verify(result).add(new Island(new Coordinate(62, 59), new Coordinate(67, 64))); - verify(result).add(new Island(new Coordinate(77, 74), new Coordinate(80, 77))); - verify(result).add(i5); - verify(result).add(i1); - verify(result).add(i4); - verify(result).add(i6); - verify(result).add(i7); - verifyNoMoreInteractions(result); - } private void testWitnessCollation(SimpleWitness[] sw) throws XMLStreamException, FactoryConfigurationError { VariantGraph vg = collate(sw); diff --git a/collatex-tools/pom.xml b/collatex-tools/pom.xml index a00f35ea4..6d1bf6506 100644 --- a/collatex-tools/pom.xml +++ b/collatex-tools/pom.xml @@ -11,7 +11,18 @@ CollateX Tools CollateX Tool Suite including a command line interface and a HTTP service - + + com.google.code.findbugs + jsr305 + 2.0.2 + provided + + + com.google.guava + guava + 15.0 + + org.glassfish javax.json 1.0.4 diff --git a/pom.xml b/pom.xml index 9769987f8..c45b0e963 100644 --- a/pom.xml +++ b/pom.xml @@ -64,17 +64,6 @@ - - com.google.code.findbugs - jsr305 - 2.0.2 - provided - - - com.google.guava - guava - 15.0 - junit junit From 24fecfb49817c93acc5276403b1ddf19387a46b1 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sun, 8 Feb 2015 11:21:48 +0100 Subject: [PATCH 32/52] Remove some JUNG usages --- collatex-core/pom.xml | 5 - .../interedition/collatex/VariantGraph.java | 99 +++--- .../collatex/lab/CollateXLaboratory.java | 8 +- .../collatex/lab/VariantGraphLayout.java | 329 ------------------ .../lab/VariantGraphLayoutAdapter.java | 89 ----- .../collatex/lab/VariantGraphPanel.java | 112 ------ pom.xml | 13 - 7 files changed, 49 insertions(+), 606 deletions(-) delete mode 100644 collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java delete mode 100644 collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayoutAdapter.java delete mode 100644 collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java diff --git a/collatex-core/pom.xml b/collatex-core/pom.xml index b6d2a3102..02770f43e 100644 --- a/collatex-core/pom.xml +++ b/collatex-core/pom.xml @@ -15,11 +15,6 @@ net.sf.jung jung-graph-impl - - net.sf.jung - jung-visualization - test - diff --git a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java index 390346ddd..b553c2a6e 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java @@ -296,62 +296,59 @@ public String toString() { } } - public static final Function JOIN = new Function() { - @Override - public VariantGraph apply(VariantGraph graph) { - final Set processed = new HashSet<>(); - - final Vertex end = graph.getEnd(); - final Deque queue = new ArrayDeque<>(); - for (VariantGraph.Edge startingEdges : graph.getStart().outgoing()) { - queue.push(startingEdges.to()); - } - - while (!queue.isEmpty()) { - final Vertex vertex = queue.pop(); - final Set transpositions = new HashSet<>(vertex.transpositions()); - final List outgoingEdges = new ArrayList<>(vertex.outgoing()); - if (outgoingEdges.size() == 1) { - final Edge joinCandidateEdge = outgoingEdges.get(0); - final Vertex joinCandidateVertex = joinCandidateEdge.to(); - final Set joinCandidateTranspositions = new HashSet<>(joinCandidateVertex.transpositions()); - - boolean canJoin = !end.equals(joinCandidateVertex) && // - joinCandidateVertex.incoming().size() == 1 && // - transpositions.equals(joinCandidateTranspositions); - if (canJoin) { - vertex.add(joinCandidateVertex.tokens()); - for (Transposition t : new HashSet<>(joinCandidateVertex.transpositions())) { - final Set transposed = new HashSet<>(t.vertices); - transposed.remove(joinCandidateVertex); - transposed.add(vertex); - t.delete(); - graph.transpose(transposed); - } - for (Edge e : new ArrayList<>(joinCandidateVertex.outgoing())) { - final Vertex to = e.to(); - final Set witnesses = e.witnesses(); - e.delete(); - graph.connect(vertex, to, witnesses); - } - joinCandidateEdge.delete(); - joinCandidateVertex.delete(); - queue.push(vertex); - continue; + public static final Function JOIN = graph -> { + final Set processed = new HashSet<>(); + + final Vertex end1 = graph.getEnd(); + final Deque queue = new ArrayDeque<>(); + for (Edge startingEdges : graph.getStart().outgoing()) { + queue.push(startingEdges.to()); + } + + while (!queue.isEmpty()) { + final Vertex vertex = queue.pop(); + final Set transpositions = new HashSet<>(vertex.transpositions()); + final List outgoingEdges = new ArrayList<>(vertex.outgoing()); + if (outgoingEdges.size() == 1) { + final Edge joinCandidateEdge = outgoingEdges.get(0); + final Vertex joinCandidateVertex = joinCandidateEdge.to(); + final Set joinCandidateTranspositions = new HashSet<>(joinCandidateVertex.transpositions()); + + boolean canJoin = !end1.equals(joinCandidateVertex) && // + joinCandidateVertex.incoming().size() == 1 && // + transpositions.equals(joinCandidateTranspositions); + if (canJoin) { + vertex.add(joinCandidateVertex.tokens()); + for (Transposition t : new HashSet<>(joinCandidateVertex.transpositions())) { + final Set transposed = new HashSet<>(t.vertices); + transposed.remove(joinCandidateVertex); + transposed.add(vertex); + t.delete(); + graph.transpose(transposed); } - } - - processed.add(vertex); - for (Edge e : outgoingEdges) { - final Vertex next = e.to(); - // FIXME: Why do we run out of memory in some cases here, if this is not checked? - if (!processed.contains(next)) { - queue.push(next); + for (Edge e : new ArrayList<>(joinCandidateVertex.outgoing())) { + final Vertex to = e.to(); + final Set witnesses = e.witnesses(); + e.delete(); + graph.connect(vertex, to, witnesses); } + joinCandidateEdge.delete(); + joinCandidateVertex.delete(); + queue.push(vertex); + continue; } } - return graph; + processed.add(vertex); + for (Edge e : outgoingEdges) { + final Vertex next = e.to(); + // FIXME: Why do we run out of memory in some cases here, if this is not checked? + if (!processed.contains(next)) { + queue.push(next); + } + } } + + return graph; }; } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java index 3553cad2c..51b5c5292 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java @@ -63,8 +63,6 @@ public class CollateXLaboratory extends JFrame { private final WitnessPanel witnessPanel = new WitnessPanel(); - private final VariantGraphPanel variantGraphPanel; - private final JTable matchMatrixTable = new JTable(); private final JComboBox algorithm; @@ -79,7 +77,6 @@ public CollateXLaboratory() { this.algorithm.setMaximumSize(new Dimension(200, this.algorithm.getMaximumSize().height)); this.tabbedPane = new JTabbedPane(); - this.tabbedPane.addTab("Variant Graph", variantGraphPanel = new VariantGraphPanel(new VariantGraph())); this.tabbedPane.addTab("Match Table", new JScrollPane(matchMatrixTable)); matchMatrixTable.setAutoResizeMode(JTable.AUTO_RESIZE_OFF); matchMatrixTable.setShowGrid(true); @@ -179,12 +176,9 @@ public void actionPerformed(ActionEvent e) { VariantGraph.JOIN.apply(variantGraph); - variantGraphPanel.setVariantGraph(variantGraph); if (LOG.isLoggable(Level.FINE)) { LOG.log(Level.FINE, "Collated {0}", w.toString()); } - - tabbedPane.setSelectedIndex(0); } } @@ -226,7 +220,7 @@ public void actionPerformed(ActionEvent e) { columnModel.getColumn(col).setCellRenderer(MATCH_MATRIX_CELL_RENDERER); } - tabbedPane.setSelectedIndex(1); + tabbedPane.setSelectedIndex(0); } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java deleted file mode 100644 index 304faa4fe..000000000 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayout.java +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.lab; - -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.util.VariantGraphRanking; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; - -/** - * @author Gregor Middell - */ -public class VariantGraphLayout { - - private final VariantGraph graph; - private final List> grid = new LinkedList<>(); - - /** - * represents the size of the grid in horizontal grid elements - */ - private int maxX = Integer.MIN_VALUE; - - /** - * Implementation. - *

        - * First of all, the Algorithm searches the roots from the - * Graph. Starting from this roots the Algorithm creates - * levels and stores them in the member levels. - * The Member levels contains LinkedList Objects and the LinkedList per level - * contains Cell Wrapper Objects. After that the Algorithm - * tries to solve the edge crosses from level to level and - * goes top down and bottom up. After minimization of the - * edge crosses the algorithm moves each node to its - * bary center. - */ - public static List> of(VariantGraph graph) { - final VariantGraphLayout layout = new VariantGraphLayout(graph); - - layout.fillLevels(); - layout.solveEdgeCrosses(); - layout.moveToBarycenter(); - - return layout.grid; - } - - private VariantGraphLayout(VariantGraph graph) { - this.graph = graph; - } - - private void fillLevels() { - VariantGraphRanking.of(graph).getByRank().forEach((rank, vertices) -> { - final AtomicInteger cellNum = new AtomicInteger(); - grid.add(vertices.stream().map(vertex -> new Cell(rank, cellNum.getAndIncrement(), vertex)).collect(Collectors.toList())); - maxX = Math.max(maxX, rank); - }); - } - - private void solveEdgeCrosses() { - int movementsCurrentLoop = -1; - - while (movementsCurrentLoop != 0) { - // reset the movements per loop count - movementsCurrentLoop = 0; - - // top down - for (int i = 0; i < grid.size() - 1; i++) { - movementsCurrentLoop += solveEdgeCrosses(true, i); - } - - // bottom up - for (int i = grid.size() - 1; i >= 1; i--) { - movementsCurrentLoop += solveEdgeCrosses(false, i); - } - } - } - - /** - * @return movements - */ - private int solveEdgeCrosses(boolean down, int level) { - // Get the current level - final List cells = grid.get(level); - // remember the old sort - final List levelSortBefore = new ArrayList<>(cells); - // new sort - Collections.sort(cells); - - // test for movements - int movements = 0; - for (int j = 0; j < levelSortBefore.size(); j++) { - if (levelSortBefore.get(j).avgWeight() != cells.get(j).avgWeight()) { - movements++; - } - } - - // Collections Sort sorts the highest value to the first value - for (int cc = cells.size() - 1; cc >= 0 ; cc--) { - final Cell cell = cells.get(cc); - final VariantGraph.Vertex vertex = cell.vertex; - - for (VariantGraph.Edge edge : (down ? vertex.outgoing() : vertex.incoming())) { - final Cell neighborCell = vertexToCell.get((down ? edge.to() : edge.from())); - - // do it only if the edge is a forward edge to a deeper level - if (down && neighborCell.y > level) { - neighborCell.addWeight(cell.avgWeight()); - } - if (!down && neighborCell.y < level) { - neighborCell.addWeight(cell.avgWeight()); - } - } - } - return movements; - } - - private void moveToBarycenter() { - for (VariantGraph.Vertex vertex : graph.vertices()) { - final Cell cell = vertexToCell.get(vertex); - for (VariantGraph.Vertex neighbor : neighborsOf(vertex)) { - if (cell.y != vertexToCell.get(neighbor).y) { - cell.priority++; - } - } - } - - for (List level : grid) { - int pos = 1; - for (Cell cell : level) { - // calculate the initial Grid Positions 1, 2, 3, .... per Level - cell.x = pos++; - } - } - - int movementsCurrentLoop = -1; - - while (movementsCurrentLoop != 0) { - // reset movements - movementsCurrentLoop = 0; - - // top down - for (int i = 1; i < grid.size(); i++) { - movementsCurrentLoop += moveToBarycenter(i); - } - // bottom up - for (int i = grid.size() - 1; i >= 0; i--) { - movementsCurrentLoop += moveToBarycenter(i); - } - } - } - - - private int moveToBarycenter(int level) { - // Counter for the movements - int movements = 0; - - // Get the current level - final List cells = grid.get(level); - - for (int currentIndexInTheLevel = 0; currentIndexInTheLevel < cells.size(); currentIndexInTheLevel++) { - final Cell cell = cells.get(currentIndexInTheLevel); - - float gridPositionsSum = 0; - float countNodes = 0; - - for (VariantGraph.Vertex neighbor : neighborsOf(cell.vertex)) { - Cell neighborCell = vertexToCell.get(neighbor); - if (neighborCell.y != level) { - gridPositionsSum += neighborCell.x; - countNodes++; - } - } - - if (countNodes > 0) { - float tmp = (gridPositionsSum / countNodes); - int newGridPosition = Math.round(tmp); - boolean toRight = (newGridPosition > cell.x); - - boolean moved = true; - - while (newGridPosition != cell.x && moved) { - moved = move(toRight, cells, currentIndexInTheLevel, cell.priority); - if (moved) { - movements++; - } - } - } - } - return movements; - } - - private boolean move(boolean toRight, List currentLevel, int currentIndexInTheLevel, int currentPriority) { - final Cell cell = currentLevel.get(currentIndexInTheLevel); - - boolean moved; - int neighborIndexInTheLevel = currentIndexInTheLevel + (toRight ? 1 : -1); - int newX = cell.x + (toRight ? 1 : -1); - - if (0 > newX || newX >= maxX) { - return false; - } - - // if the node is the first or the last we can move - if (toRight && currentIndexInTheLevel == currentLevel.size() - 1 || !toRight && currentIndexInTheLevel == 0) { - moved = true; - } else { - // else get the neighbor and ask his gridposition - // if he has the requested new grid position - // check the priority - final Cell neighborCell = currentLevel.get(neighborIndexInTheLevel); - if (neighborCell.x == newX) { - if (neighborCell.priority >= currentPriority) { - return false; - } else { - moved = move(toRight, currentLevel, neighborIndexInTheLevel, currentPriority); - } - } else { - moved = true; - } - } - - if (moved) { - cell.x = newX; - } - return moved; - } - - private List neighborsOf(VariantGraph.Vertex vertex) { - final List neighbors = new LinkedList<>(); - for (VariantGraph.Edge outgoing : vertex.outgoing()) { - neighbors.add(outgoing.to()); - } - for (VariantGraph.Edge incoming : vertex.incoming()) { - neighbors.add(incoming.from()); - } - return neighbors; - } - - private final Map vertexToCell = new HashMap<>(); - - class Cell implements Comparable { - /** - * sum value for edge Crosses - */ - double totalWeight = 0; - - /** - * counter for additions to the edgeCrossesIndicator - */ - int additions = 0; - - /** - * current position in the grid - */ - int x = 0; - - /** - * the vertical level where the cell wrapper is inserted - */ - int y; - - /** - * priority for movements to the barycenter - */ - int priority = 0; - - /** - * reference to the wrapped cell - */ - VariantGraph.Vertex vertex = null; - - - Cell(int y, double weight, VariantGraph.Vertex vertex) { - this.y = y; - this.vertex = vertex; - vertexToCell.put(vertex, this); - addWeight(weight); - } - - /** - * Returns the average value for the edge crosses indicator for the cell - */ - double avgWeight() { - return (totalWeight / additions); - } - - /** - * Adds a value to the edge crosses indicator for the cell - */ - void addWeight(double weight) { - this.totalWeight += weight; - this.additions++; - } - - public int compareTo(Cell other) { - final double result = (other.avgWeight() - this.avgWeight()); - if (result < 0) { - return -1; - } else if (result > 0) { - return 1; - } else { - return 0; - } - } - } -} diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayoutAdapter.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayoutAdapter.java deleted file mode 100644 index 15c058a3b..000000000 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphLayoutAdapter.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.lab; - -import edu.uci.ics.jung.algorithms.layout.AbstractLayout; -import eu.interedition.collatex.VariantGraph; - -import java.util.List; - -/** - * Arranges the nodes with the Sugiyama Layout Algorithm. - *

        - * Link to the algorithm - *

        - * Originally, source was posted to the Jung2 forum, for Jung 1.x. Not sure where the original - * code came from, but ti didn;t work for Jung2, but it was not that complicated, so I pounded it - * into shape for Jung2, complete with generics and such. Lays out either top-down to left-right. - *

        - * Seems to work. Paramterize with spacing and orientation. - *

        - * - * @author C. Schanck (chris at schanck dot net) - */ -public class VariantGraphLayoutAdapter extends AbstractLayout { - - public static enum Orientation { - TOP, LEFT - } - - private final Orientation orientation; - private final int horzSpacing; - private final int vertSpacing; - - private boolean executed = false; - - public VariantGraphLayoutAdapter(VariantGraph g, Orientation orientation, int horzSpacing, int vertSpacing) { - super(g); - this.orientation = orientation; - this.horzSpacing = horzSpacing; - this.vertSpacing = vertSpacing; - } - - public void initialize() { - if (!executed) { - for (List level : VariantGraphLayout.of((VariantGraph) getGraph())) { - for (VariantGraphLayout.Cell cell : level) { - - if (orientation.equals(Orientation.TOP)) { - double xCoordinate = 10.0 + (cell.x * horzSpacing); - double yCoordinate = 10.0 + (cell.y * vertSpacing); - setLocation(cell.vertex, xCoordinate, yCoordinate); - } else { - double yCoordinate = 10.0 + (cell.x * vertSpacing); - double xCoordinate = 10.0 + (cell.y * horzSpacing); - setLocation(cell.vertex, xCoordinate, yCoordinate); - } - } - } - executed = true; - } - - } - - public void reset() { - executed = false; - } - - public String toString() { - return "Jung Sugiyama"; - } - -} diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java deleted file mode 100644 index f791d15e9..000000000 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/VariantGraphPanel.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.lab; - -import edu.uci.ics.jung.algorithms.layout.StaticLayout; -import edu.uci.ics.jung.visualization.RenderContext; -import edu.uci.ics.jung.visualization.VisualizationViewer; -import edu.uci.ics.jung.visualization.control.DefaultModalGraphMouse; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.simple.SimpleToken; -import eu.interedition.collatex.util.VariantGraphRanking; - -import java.awt.Color; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Collectors; - -/** - * @author Gregor Middell - */ -public class VariantGraphPanel extends VisualizationViewer { - - private VariantGraph variantGraph; - private VariantGraphRanking ranking; - private Map transpositionColors; - - public VariantGraphPanel(VariantGraph vg) { - super(new StaticLayout<>(new VariantGraph())); - - setBackground(Color.WHITE); - setGraphMouse(new DefaultModalGraphMouse()); - - final RenderContext rc = getRenderContext(); - rc.setVertexLabelTransformer(variantGraphVertexModel -> String.format("%s (%d)", - variantGraphVertexModel.tokens().stream() - .collect(Collectors.groupingBy(Token::getWitness)).entrySet().stream() - .sorted(Comparator.comparing(e -> e.getKey().getSigil())) - .map(e -> String.format("[%s: %s]", e.getKey().getSigil(), e.getValue().stream() - .map(t -> (SimpleToken) t) - .sorted() - .map(SimpleToken::getContent) - .collect(Collectors.joining(" ")))) - .collect(Collectors.joining("\n")), - Optional.ofNullable(ranking.apply(variantGraphVertexModel)).orElse(0)) - ); - rc.setEdgeLabelTransformer(variantGraphEdgeModel -> variantGraphEdgeModel.witnesses().stream().map(Witness::getSigil).collect(Collectors.joining(", "))); - rc.setVertexFillPaintTransformer(v -> v.tokens().isEmpty() ? Color.BLACK : v.transpositions().stream().findFirst().map(transpositionColors::get).orElse(Color.WHITE)); - rc.setEdgeStrokeTransformer(variantGraphEdgeModel -> variantGraphEdgeModel.witnesses().isEmpty() ? CollateXLaboratory.DASHED_STROKE : CollateXLaboratory.SOLID_STROKE); - rc.setEdgeDrawPaintTransformer(jungVariantGraphEdge -> Color.GRAY); - - setVariantGraph(vg); - } - - public void setVariantGraph(VariantGraph variantGraph) { - this.variantGraph = variantGraph; - this.ranking = VariantGraphRanking.of(variantGraph); - - this.transpositionColors = new HashMap<>(); - int tc = 0; - for (VariantGraph.Transposition transposition : variantGraph.transpositions()) { - this.transpositionColors.put(transposition, KELLY_MAX_CONTRAST_COLORS[tc++ % KELLY_MAX_CONTRAST_COLORS.length]); - } - setGraphLayout(new VariantGraphLayoutAdapter(variantGraph, VariantGraphLayoutAdapter.Orientation.LEFT, 300, 150)); - revalidate(); - } - - private static Color[] KELLY_MAX_CONTRAST_COLORS = new Color[]{ - new Color(0xFFFFB300), //Vivid Yellow - new Color(0xFF803E75), //Strong Purple - new Color(0xFFFF6800), //Vivid Orange - new Color(0xFFA6BDD7), //Very Light Blue - new Color(0xFFC10020), //Vivid Red - new Color(0xFFCEA262), //Grayish Yellow - new Color(0xFF817066), //Medium Gray - - //The following will not be good for people with defective color vision - new Color(0xFF007D34), //Vivid Green - new Color(0xFFF6768E), //Strong Purplish Pink - new Color(0xFF00538A), //Strong Blue - new Color(0xFFFF7A5C), //Strong Yellowish Pink - new Color(0xFF53377A), //Strong Violet - new Color(0xFFFF8E00), //Vivid Orange Yellow - new Color(0xFFB32851), //Strong Purplish Red - new Color(0xFFF4C800), //Vivid Greenish Yellow - new Color(0xFF7F180D), //Strong Reddish Brown - new Color(0xFF93AA00), //Vivid Yellowish Green - new Color(0xFF593315), //Deep Yellowish Brown - new Color(0xFFF13A13), //Vivid Reddish Orange - new Color(0xFF232C16), //Dark Olive Green - }; -} diff --git a/pom.xml b/pom.xml index c45b0e963..17c8d81e5 100644 --- a/pom.xml +++ b/pom.xml @@ -86,12 +86,6 @@ 2.3.17 - - commons-lang - commons-lang - 2.6 - - net.sf.jung jung-graph-impl @@ -102,13 +96,6 @@ jung-visualization ${jung.version} - - - org.mockito - mockito-all - 1.9.0-rc1 - test - From b40b1b97b1a3cf581167aa55a78112ae02322f12 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sun, 8 Feb 2015 12:01:45 +0100 Subject: [PATCH 33/52] Remove support for empty witnesses in VG --- .../eu/interedition/collatex/VariantGraph.java | 6 ++---- .../collatex/simple/SimpleCollation.java | 6 +----- .../eu/interedition/collatex/VariantGraphTest.java | 14 +++++--------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java index b553c2a6e..c6acc559d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java @@ -113,10 +113,6 @@ public Edge connect(Vertex from, Vertex to, Set witnesses) { return edge; } - public Edge register(Witness witness) { - return connect(start, end, Collections.singleton(witness)); - } - public Transposition transpose(Set vertices) { if (vertices.isEmpty()) { throw new IllegalArgumentException(); @@ -198,6 +194,8 @@ public String toString() { public static class Vertex { private final VariantGraph graph; private final Set tokens; + private final Map> outgoing = new HashMap<>(); + private final Map> incoming = new HashMap<>(); public Vertex(VariantGraph graph, Set tokens) { this.graph = graph; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java index fcfdd6b12..6feff87fb 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java @@ -50,11 +50,7 @@ public boolean isJoined() { public VariantGraph collate(VariantGraph graph) { for (SimpleWitness witness : witnesses) { - if (witness.getTokens().isEmpty()) { - graph.register(witness); - } else { - algorithm.collate(graph, witness); - } + algorithm.collate(graph, witness); } if (joined) { VariantGraph.JOIN.apply(graph); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java index 76b46aab3..d9a3dd0f4 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java @@ -42,8 +42,7 @@ public class VariantGraphTest extends AbstractTest { public void emptyGraph() { final VariantGraph graph = collate(createWitnesses()); assertEquals(0, graph.witnesses().size()); - assertEquals(2, StreamSupport.stream(graph.vertices().spliterator(), false).count()); - assertEquals(1, StreamSupport.stream(graph.edges().spliterator(), false).count()); + assetGraphSize(graph, 2, 1); } @Test @@ -79,8 +78,7 @@ public void oneWitness() { final SimpleWitness[] w = createWitnesses("only one witness"); final VariantGraph graph = collate(w); - assertEquals(5, StreamSupport.stream(graph.vertices().spliterator(), false).count()); - assertEquals(4, StreamSupport.stream(graph.edges().spliterator(), false).count()); + assetGraphSize(graph, 5, 4); final VariantGraph.Vertex firstVertex = vertexWith(graph, "only", w[0]); final VariantGraph.Vertex secondVertex = vertexWith(graph, "one", w[0]); @@ -113,7 +111,7 @@ public void getPathForWitness() { @Test public void transpositions1() { final VariantGraph graph = collate("the nice black and white cat", "the friendly white and black cat"); - assertEquals(12, StreamSupport.stream(graph.edges().spliterator(), false).count()); + assertGraphEdges(graph, 12); } @Test @@ -127,8 +125,7 @@ public void transpositions2() { edge = edgeBetween(vertexWith(graph, "red", w[1]), vertexWith(graph, "cat", w[1])); assertHasWitnesses(edge, w[1], w[2]); - assertEquals(17, StreamSupport.stream(graph.vertices().spliterator(), false).count()); // start and end vertices included - assertEquals(20, StreamSupport.stream(graph.edges().spliterator(), false).count()); + assetGraphSize(graph, 17, 20); } @Test @@ -136,8 +133,7 @@ public void joinTwoIdenticalWitnesses() { final SimpleWitness[] w = createWitnesses("the black cat", "the black cat"); final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); - assertEquals(3, StreamSupport.stream(graph.vertices().spliterator(), false).count()); - assertEquals(2, StreamSupport.stream(graph.edges().spliterator(), false).count()); + assetGraphSize(graph, 3, 2); final VariantGraph.Vertex joinedVertex = vertexWith(graph, "the black cat", w[0]); From 8924562f5b594a4ef2e2c7e2f950c4e514048384 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sun, 8 Feb 2015 12:44:11 +0100 Subject: [PATCH 34/52] Variant Graph: push filtered traversal of graph to caller --- .../interedition/collatex/VariantGraph.java | 52 +++---------------- .../collatex/dekker/PhraseMatchDetector.java | 2 +- .../dekker/TranspositionDetector.java | 2 +- .../simple/SimpleVariantGraphSerializer.java | 34 ++++++------ .../collatex/util/VariantGraphRanking.java | 29 ++++------- .../collatex/util/VariantGraphTraversal.java | 34 +++--------- .../interedition/collatex/AbstractTest.java | 19 ++++--- .../collatex/VariantGraphTest.java | 6 +-- 8 files changed, 60 insertions(+), 118 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java index c6acc559d..f4029e61e 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java @@ -24,7 +24,6 @@ import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Deque; @@ -65,19 +64,7 @@ public Set transpositions() { } public Iterable vertices() { - return vertices(null); - } - - public Iterable vertices(Set witnesses) { - return VariantGraphTraversal.of(this, witnesses); - } - - public Iterable edges() { - return edges(null); - } - - public Iterable edges(Set witnesses) { - return VariantGraphTraversal.of(this, witnesses).edges(); + return VariantGraphTraversal.of(this); } public Vertex add(Token token) { @@ -92,7 +79,7 @@ public Edge connect(Vertex from, Vertex to, Set witnesses) { } if (from.equals(start)) { - final Edge startEndEdge = edgeBetween(start, end); + final Edge startEndEdge = findEdge(start, end); if (startEndEdge != null) { if (to.equals(end)) { witnesses = new HashSet<>(witnesses); @@ -125,10 +112,6 @@ public Transposition transpose(Set vertices) { return new VariantGraph.Transposition(this, vertices); } - public Edge edgeBetween(Vertex a, Vertex b) { - return findEdge(a, b); - } - public Set witnesses() { Set witnesses = new HashSet<>(); for (Edge edge : start.outgoing()) { @@ -203,19 +186,11 @@ public Vertex(VariantGraph graph, Set tokens) { } public Collection incoming() { - return incoming(null); - } - - public Collection incoming(final Set witnesses) { - return paths(graph.getInEdges(this), witnesses); + return graph.getInEdges(this); } public Collection outgoing() { - return outgoing(null); - } - - public Collection outgoing(Set witnesses) { - return paths(graph.getOutEdges(this), witnesses); + return graph.getOutEdges(this); } public Collection transpositions() { @@ -223,19 +198,11 @@ public Collection transpositions() { } public Set tokens() { - return tokens(null); - } - - public Set tokens(final Set witnesses) { - return Collections.unmodifiableSet(witnesses == null ? tokens :tokens.stream().filter(t -> witnesses.contains(t.getWitness())).collect(Collectors.toSet())); + return tokens; } public Set witnesses() { - final Set witnesses = new HashSet<>(); - for (VariantGraph.Edge edge : incoming()) { - witnesses.addAll(edge.witnesses()); - } - return witnesses; + return incoming().stream().map(Edge::witnesses).flatMap(Set::stream).collect(Collectors.toSet()); } public void add(Iterable tokens) { @@ -253,13 +220,6 @@ public void delete() { public String toString() { return tokens.toString(); } - - protected static Collection paths(final Collection edges, final Set witnesses) { - if (witnesses == null) { - return edges; - } - return Arrays.asList(edges.stream().filter(edge -> edge.witnesses().stream().anyMatch(witnesses::contains)).toArray(Edge[]::new)); - } } /** diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java index 39856ac47..df109ffaf 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java @@ -52,7 +52,7 @@ public List> detect(Map linkedTokens, Va // - there may not be a longer path between previous and base vertex boolean sameTranspositions = new HashSet<>(previous.transpositions()).equals(new HashSet<>(baseVertex.transpositions())); boolean sameWitnesses = previous.witnesses().equals(baseVertex.witnesses()); - boolean directedEdge = (base.edgeBetween(previous, baseVertex) != null); + boolean directedEdge = previous.outgoing().stream().filter(e -> baseVertex.equals(e.to())).findFirst().isPresent(); boolean isNear = sameTranspositions && sameWitnesses && directedEdge && (previous.outgoing().size() == 1 || baseVertex.incoming().size() == 1); if (!isNear) { addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java index 8fb5f6a57..8906614b2 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java @@ -167,7 +167,7 @@ private VariantGraphRanking rankTheGraph(List> phraseMatches, Varian for (List phraseMatch : phraseMatches) { matchedVertices.add(phraseMatch.get(0).vertex); } - final VariantGraphRanking ranking = VariantGraphRanking.ofOnlyCertainVertices(base, null, matchedVertices); + final VariantGraphRanking ranking = VariantGraphRanking.ofOnlyCertainVertices(base, matchedVertices); return ranking; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java index 4baaacaf6..6de210975 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.io.PrintWriter; import java.io.Writer; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -65,7 +66,6 @@ public class SimpleVariantGraphSerializer { private final VariantGraph graph; private final Function, String> tokensToString; private final Map vertexIds = new HashMap<>(); - private final Map transpositionIds = new HashMap<>(); private VariantGraphRanking ranking; public SimpleVariantGraphSerializer(VariantGraph graph) { @@ -217,10 +217,12 @@ public void toDot(Writer writer) { out.println(";"); } - for (VariantGraph.Edge e : graph.edges()) { - out.print(indent + id(e.from()) + connector + id(e.to())); - out.print(" [label = \"" + toDotLabel(e) + "\"]"); - out.println(";"); + for (VariantGraph.Vertex v : graph.vertices()) { + for (VariantGraph.Edge e : v.outgoing()) { + out.print(indent + id(e.from()) + connector + id(e.to())); + out.print(" [label = \"" + toDotLabel(e) + "\"]"); + out.println(";"); + } } for (Tuple transposedTuple : transposedTuples()) { @@ -333,15 +335,17 @@ public void toGraphML(XMLStreamWriter xml) throws XMLStreamException { } int edgeNumber = 0; - for (VariantGraph.Edge edge : graph.edges()) { - xml.writeStartElement(GRAPHML_NS, EDGE_TAG); - xml.writeAttribute(ID_ATT, "e" + edgeNumber); - xml.writeAttribute(SOURCE_ATT, "n" + numericId(edge.from())); - xml.writeAttribute(TARGET_ATT, "n" + numericId(edge.to())); - GraphMLProperty.EDGE_NUMBER.write(Integer.toString(edgeNumber++), xml); - GraphMLProperty.EDGE_TYPE.write(EDGE_TYPE_PATH, xml); - GraphMLProperty.EDGE_WITNESSES.write(Witness.TO_SIGILS.apply(edge), xml); - xml.writeEndElement(); + for (VariantGraph.Vertex v : graph.vertices()) { + for (VariantGraph.Edge edge : v.outgoing()) { + xml.writeStartElement(GRAPHML_NS, EDGE_TAG); + xml.writeAttribute(ID_ATT, "e" + edgeNumber); + xml.writeAttribute(SOURCE_ATT, "n" + numericId(edge.from())); + xml.writeAttribute(TARGET_ATT, "n" + numericId(edge.to())); + GraphMLProperty.EDGE_NUMBER.write(Integer.toString(edgeNumber++), xml); + GraphMLProperty.EDGE_TYPE.write(EDGE_TYPE_PATH, xml); + GraphMLProperty.EDGE_WITNESSES.write(Witness.TO_SIGILS.apply(edge), xml); + xml.writeEndElement(); + } } for (Tuple transposedTuple : transposedTuples()) { @@ -427,7 +431,7 @@ public void declare(XMLStreamWriter xml) throws XMLStreamException { @Override public String apply(VariantGraph.Vertex input) { return input.witnesses().stream().findFirst() - .map(witness -> tokensToString.apply(input.tokens(Collections.singleton(witness)))) + .map(witness -> tokensToString.apply(Arrays.asList(input.tokens().stream().filter(t -> witness.equals(t.getWitness())).toArray(Token[]::new)))) .orElse(""); } }; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java index ff8f4e839..77191b762 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java @@ -31,7 +31,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -47,22 +46,16 @@ public class VariantGraphRanking implements Iterable>, private final Map byVertex = new HashMap<>(); private final SortedMap> byRank = new TreeMap<>(); private final VariantGraph graph; - private final Set witnesses; - VariantGraphRanking(VariantGraph graph, Set witnesses) { + VariantGraphRanking(VariantGraph graph) { this.graph = graph; - this.witnesses = witnesses; } public static VariantGraphRanking of(VariantGraph graph) { - return of(graph, null); - } - - public static VariantGraphRanking of(VariantGraph graph, Set witnesses) { - final VariantGraphRanking ranking = new VariantGraphRanking(graph, witnesses); - for (VariantGraph.Vertex v : graph.vertices(witnesses)) { + final VariantGraphRanking ranking = new VariantGraphRanking(graph); + for (VariantGraph.Vertex v : graph.vertices()) { int rank = -1; - for (VariantGraph.Edge e : v.incoming(witnesses)) { + for (VariantGraph.Edge e : v.incoming()) { rank = Math.max(rank, ranking.byVertex.get(e.from())); } rank++; @@ -72,11 +65,11 @@ public static VariantGraphRanking of(VariantGraph graph, Set witnesses) return ranking; } - public static VariantGraphRanking ofOnlyCertainVertices(VariantGraph graph, Set witnesses, Set vertices) { - final VariantGraphRanking ranking = new VariantGraphRanking(graph, witnesses); - for (VariantGraph.Vertex v : graph.vertices(witnesses)) { + public static VariantGraphRanking ofOnlyCertainVertices(VariantGraph graph, Set vertices) { + final VariantGraphRanking ranking = new VariantGraphRanking(graph); + for (VariantGraph.Vertex v : graph.vertices()) { int rank = -1; - for (VariantGraph.Edge e : v.incoming(witnesses)) { + for (VariantGraph.Edge e : v.incoming()) { rank = Math.max(rank, ranking.byVertex.get(e.from())); } if (vertices.contains(v)) { @@ -89,7 +82,7 @@ public static VariantGraphRanking ofOnlyCertainVertices(VariantGraph graph, Set< } public Set witnesses() { - return Optional.ofNullable(witnesses).orElse(graph.witnesses()); + return graph.witnesses(); } public Map getByVertex() { @@ -111,10 +104,10 @@ public Iterator> iterator() { public List>> asTable() { return byRank.values().stream() - .filter(rank -> rank.stream().flatMap(v -> v.tokens(witnesses).stream()).findFirst().isPresent()) + .filter(rank -> rank.stream().anyMatch(v -> !v.tokens().isEmpty())) .map(vertices -> { final SortedMap> row = new TreeMap<>(Witness.SIGIL_COMPARATOR); - vertices.stream().flatMap(v -> v.tokens(witnesses).stream()).forEach(token -> row.computeIfAbsent(token.getWitness(), w -> new HashSet<>()).add(token)); + vertices.stream().flatMap(v -> v.tokens().stream()).forEach(token -> row.computeIfAbsent(token.getWitness(), w -> new HashSet<>()).add(token)); return row; }) .collect(Collectors.toList()); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java index d9cc289fd..fe82bffcd 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java @@ -54,7 +54,7 @@ public static VariantGraphTraversal of(VariantGraph graph) { public Iterator iterator() { return new Iterator() { - private final Map encountered = new HashMap<>(); + private final Map encountered = new HashMap<>(); private final Queue queue = new ArrayDeque<>(); private Optional next = Optional.of(graph.getStart()); @@ -66,11 +66,14 @@ public boolean hasNext() { @Override public VariantGraph.Vertex next() { final VariantGraph.Vertex next = this.next.get(); - for (VariantGraph.Edge edge : next.outgoing(witnesses)) { + for (VariantGraph.Edge edge : next.outgoing()) { + if (witnesses != null && !edge.witnesses().stream().anyMatch(witnesses::contains)) { + continue; + } final VariantGraph.Vertex end = edge.to(); - final int endEncountered = Optional.ofNullable(encountered.get(end)).orElse(0); - final int endIncoming = end.incoming(witnesses).size(); + final long endEncountered = Optional.ofNullable(encountered.get(end)).orElse(0L); + final long endIncoming = end.incoming().stream().filter(e -> witnesses == null || e.witnesses().stream().anyMatch(witnesses::contains)).count(); if (endIncoming == endEncountered) { throw new IllegalStateException(String.format("Encountered cycle traversing %s to %s", edge, end)); @@ -85,27 +88,4 @@ public VariantGraph.Vertex next() { } }; } - - public Iterable edges() { - return () -> new Iterator() { - - private final Iterator vertexIt = VariantGraphTraversal.this.iterator(); - private final Queue queue = new ArrayDeque<>(); - - @Override - public boolean hasNext() { - if (queue.isEmpty()) { - if (vertexIt.hasNext()) { - vertexIt.next().outgoing(witnesses).forEach(queue::add); - } - } - return !queue.isEmpty(); - } - - @Override - public VariantGraph.Edge next() { - return queue.remove(); - } - }; - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java index 49fe5929b..826c0fe97 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java @@ -25,15 +25,18 @@ import eu.interedition.collatex.simple.SimpleToken; import eu.interedition.collatex.simple.SimpleWitness; import eu.interedition.collatex.util.VariantGraphRanking; +import eu.interedition.collatex.util.VariantGraphTraversal; import org.junit.Assert; import org.junit.Before; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; @@ -89,14 +92,16 @@ protected static List>> table(VariantGraph graph) } protected static SortedSet extractPhrases(SortedSet phrases, VariantGraph graph, Witness witness) { - for (VariantGraph.Vertex v : graph.vertices(Collections.singleton(witness))) { + for (VariantGraph.Vertex v : VariantGraphTraversal.of(graph, Collections.singleton(witness))) { phrases.add(toString(v, witness)); } return phrases; } protected static String toString(VariantGraph.Vertex vertex, Witness... witnesses) { - return vertex.tokens(new HashSet<>(Arrays.asList(witnesses))).stream() + final Set witnessSet = new HashSet<>(Arrays.asList(witnesses)); + return vertex.tokens().stream() + .filter(t -> witnessSet.contains(t.getWitness())) .collect(Collectors.groupingBy(Token::getWitness)).entrySet().stream() .sorted(Comparator.comparing(e -> e.getKey().getSigil())) .map(Map.Entry::getValue) @@ -114,7 +119,7 @@ protected static void assertGraphVertices(VariantGraph graph, int vertices) { } protected static void assertGraphEdges(VariantGraph graph, int edges) { - assertEquals(edges, StreamSupport.stream(graph.edges().spliterator(), false).count()); + assertEquals(edges, StreamSupport.stream(graph.vertices().spliterator(), false).map(VariantGraph.Vertex::outgoing).flatMap(Collection::stream).count()); } protected static void assetGraphSize(VariantGraph graph, int vertices, int edges) { assertGraphVertices(graph, vertices); @@ -126,9 +131,9 @@ protected static void assertHasWitnesses(VariantGraph.Edge edge, Witness... witn } protected static VariantGraph.Edge edgeBetween(VariantGraph.Vertex start, VariantGraph.Vertex end) { - final VariantGraph.Edge edge = start.graph().edgeBetween(start, end); - Assert.assertNotNull(String.format("No edge between %s and %s", start, end), edge); - return edge; + final Optional edge = start.outgoing().stream().filter(e -> end.equals(e.to())).findFirst(); + Assert.assertTrue(String.format("No edge between %s and %s", start, end), edge.isPresent()); + return edge.get(); } protected static void assertVertexEquals(String expected, VariantGraph.Vertex vertex) { @@ -144,7 +149,7 @@ protected static void assertVertexHasContent(VariantGraph.Vertex vertex, String } protected static VariantGraph.Vertex vertexWith(VariantGraph graph, String content, Witness in) { - for (VariantGraph.Vertex v : graph.vertices(Collections.singleton(in))) { + for (VariantGraph.Vertex v : VariantGraphTraversal.of(graph, Collections.singleton(in))) { if (content.equals(toString(v, in))) { return v; } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java index d9a3dd0f4..bcd23725a 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java @@ -21,6 +21,7 @@ import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; import eu.interedition.collatex.simple.SimpleWitness; +import eu.interedition.collatex.util.VariantGraphTraversal; import org.junit.Assert; import org.junit.Test; @@ -63,7 +64,7 @@ public void reconnectingVerticesYieldsSameEdge() { public void getTokens() { final SimpleWitness[] w = createWitnesses("a b c d"); final VariantGraph graph = collate(w); - final List vertices = StreamSupport.stream(graph.vertices(new HashSet<>(Arrays.asList(w))).spliterator(), false).collect(Collectors.toList()); + final List vertices = StreamSupport.stream(VariantGraphTraversal.of(graph).spliterator(), false).collect(Collectors.toList()); assertEquals(6, vertices.size()); assertEquals(graph.getStart(), vertices.get(0)); assertVertexEquals("a", vertices.get(1)); @@ -94,8 +95,7 @@ public void oneWitness() { public void getPathForWitness() { final SimpleWitness[] w = createWitnesses("a b c d e f ", "x y z d e", "a b x y z"); final VariantGraph graph = collate(w); - final Set witnessSet = Collections. singleton(w[0]); - final List path = StreamSupport.stream(graph.vertices(witnessSet).spliterator(), false).collect(Collectors.toList()); + final List path = StreamSupport.stream(VariantGraphTraversal.of(graph, Collections.singleton(w[0])).spliterator(), false).collect(Collectors.toList()); assertEquals(8, path.size()); assertEquals(graph.getStart(), path.get(0)); From 6bc6237fc4c29df5d47c64e6138f97e846f3605b Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Tue, 10 Feb 2015 20:56:11 +0100 Subject: [PATCH 35/52] Remove some JUNG usages --- .../interedition/collatex/VariantGraph.java | 221 +++++------------- .../eu/interedition/collatex/Witness.java | 2 - .../collatex/dekker/PhraseMatchDetector.java | 2 +- .../simple/SimpleVariantGraphSerializer.java | 20 +- .../util/ParallelSegmentationApparatus.java | 2 +- .../collatex/util/VariantGraphRanking.java | 8 +- .../collatex/util/VariantGraphTraversal.java | 8 +- .../interedition/collatex/AbstractTest.java | 11 +- .../collatex/VariantGraphTest.java | 24 +- .../dekker/TranspositionGraphTest.java | 9 +- .../collatex/dekker/matrix/HermansTest.java | 20 +- 11 files changed, 98 insertions(+), 229 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java index f4029e61e..94f5b88da 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java @@ -19,19 +19,16 @@ package eu.interedition.collatex; -import edu.uci.ics.jung.graph.DirectedSparseGraph; import eu.interedition.collatex.util.VariantGraphTraversal; import java.util.ArrayDeque; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; -import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -39,16 +36,18 @@ /** * @author Gregor Middell */ -public class VariantGraph extends DirectedSparseGraph { +public class VariantGraph { final VariantGraph.Vertex start; final VariantGraph.Vertex end; - final Map> transpositionIndex = new HashMap<>(); + final Map>> transpositionIndex = new HashMap<>(); public VariantGraph() { super(); - addVertex(this.start = new VariantGraph.Vertex(this, Collections.emptySet())); - addVertex(this.end = new VariantGraph.Vertex(this, Collections.emptySet())); - connect(this.start, this.end, Collections.emptySet()); + this.start = new VariantGraph.Vertex(this); + this.end = new VariantGraph.Vertex(this); + + this.start.outgoing.put(this.end, Collections.emptySet()); + this.end.incoming.put(this.start, Collections.emptySet()); } public Vertex getStart() { @@ -59,7 +58,7 @@ public Vertex getEnd() { return end; } - public Set transpositions() { + public Set> transpositions() { return transpositionIndex.values().stream().flatMap(Set::stream).collect(Collectors.toSet()); } @@ -68,56 +67,44 @@ public Iterable vertices() { } public Vertex add(Token token) { - final VariantGraph.Vertex vertex = new VariantGraph.Vertex(this, Collections.singleton(token)); - addVertex(vertex); + final VariantGraph.Vertex vertex = new VariantGraph.Vertex(this); + vertex.tokens.add(token); return vertex; } - public Edge connect(Vertex from, Vertex to, Set witnesses) { + public void connect(Vertex from, Vertex to, Set witnesses) { if (from.equals(to)) { throw new IllegalArgumentException(); } - if (from.equals(start)) { - final Edge startEndEdge = findEdge(start, end); - if (startEndEdge != null) { - if (to.equals(end)) { - witnesses = new HashSet<>(witnesses); - witnesses.addAll(startEndEdge.witnesses()); - } - startEndEdge.delete(); - } - } - - for (Edge e : from.outgoing()) { - if (to.equals(e.to())) { - return e.add(witnesses); - } - } + witnesses = new HashSet<>(witnesses); + Optional.ofNullable(from.outgoing.remove(to)).ifPresent(witnesses::addAll); + + from.outgoing.put(to, witnesses); + to.incoming.put(from, witnesses); - final VariantGraph.Edge edge = new VariantGraph.Edge(this, witnesses); - addEdge(edge, from, to); - return edge; + start.outgoing.remove(end); + end.incoming.remove(start); } - public Transposition transpose(Set vertices) { + public Set transpose(Set vertices) { if (vertices.isEmpty()) { throw new IllegalArgumentException(); } - for (Transposition transposition : vertices.iterator().next().transpositions()) { - if (transposition.vertices.equals(vertices)) { + for (Set transposition : vertices.iterator().next().transpositions()) { + if (transposition.equals(vertices)) { return transposition; } } - return new VariantGraph.Transposition(this, vertices); + final Set t = new HashSet<>(vertices); + for (VariantGraph.Vertex vertex : t) { + transpositionIndex.computeIfAbsent(vertex, v -> new HashSet<>()).add(t); + } + return t; } public Set witnesses() { - Set witnesses = new HashSet<>(); - for (Edge edge : start.outgoing()) { - witnesses.addAll(edge.witnesses()); - } - return witnesses; + return start.outgoing().values().stream().flatMap(Collection::stream).collect(Collectors.toSet()); } @Override @@ -126,74 +113,28 @@ public String toString() { } - /** - * @author Gregor Middell - */ - public static class Edge { - - final VariantGraph graph; - final Set witnesses; - - public Edge(VariantGraph graph, Set witnesses) { - this.graph = graph; - this.witnesses = new HashSet<>(witnesses); - } - - public VariantGraph.Edge add(Set witnesses) { - this.witnesses.addAll(witnesses); - return this; - } - - public Set witnesses() { - return Collections.unmodifiableSet(witnesses); - } - - public VariantGraph graph() { - return graph; - } - - public VariantGraph.Vertex from() { - return graph.getEndpoints(this).getFirst(); - } - - public VariantGraph.Vertex to() { - return graph.getEndpoints(this).getSecond(); - } - - public void delete() { - graph.removeEdge(this); - } - - @Override - public String toString() { - return witnesses.toString(); - } - - } - /** * @author Gregor Middell */ public static class Vertex { private final VariantGraph graph; - private final Set tokens; + private final Set tokens = new HashSet<>(); private final Map> outgoing = new HashMap<>(); private final Map> incoming = new HashMap<>(); - public Vertex(VariantGraph graph, Set tokens) { + public Vertex(VariantGraph graph) { this.graph = graph; - this.tokens = new HashSet<>(tokens); } - public Collection incoming() { - return graph.getInEdges(this); + public Map> incoming() { + return incoming; } - public Collection outgoing() { - return graph.getOutEdges(this); + public Map> outgoing() { + return outgoing; } - public Collection transpositions() { + public Set> transpositions() { return graph.transpositionIndex.getOrDefault(this, Collections.emptySet()); } @@ -202,7 +143,7 @@ public Set tokens() { } public Set witnesses() { - return incoming().stream().map(Edge::witnesses).flatMap(Set::stream).collect(Collectors.toSet()); + return incoming().values().stream().flatMap(Set::stream).collect(Collectors.toSet()); } public void add(Iterable tokens) { @@ -213,98 +154,50 @@ public VariantGraph graph() { return graph; } - public void delete() { - graph.removeVertex(this); - } - public String toString() { return tokens.toString(); } } - /** - * @author Gregor Middell - */ - public static class Transposition implements Iterable { - private final VariantGraph graph; - private final Set vertices; - - public Transposition(VariantGraph graph, Set vertices) { - this.graph = graph; - this.vertices = new HashSet<>(vertices); - for (VariantGraph.Vertex vertex : this.vertices) { - graph.transpositionIndex.computeIfAbsent(vertex, v -> new HashSet<>()).add(this); - } - } - - public void delete() { - for (VariantGraph.Vertex vertex : this.vertices) { - graph.transpositionIndex.getOrDefault(vertex, Collections.emptySet()).remove(this); - } - } - - @Override - public Iterator iterator() { - return vertices.iterator(); - } - - @Override - public String toString() { - return vertices.toString(); - } - } - public static final Function JOIN = graph -> { final Set processed = new HashSet<>(); - - final Vertex end1 = graph.getEnd(); - final Deque queue = new ArrayDeque<>(); - for (Edge startingEdges : graph.getStart().outgoing()) { - queue.push(startingEdges.to()); - } + final Deque queue = new ArrayDeque<>(graph.start.outgoing.keySet()); while (!queue.isEmpty()) { final Vertex vertex = queue.pop(); - final Set transpositions = new HashSet<>(vertex.transpositions()); - final List outgoingEdges = new ArrayList<>(vertex.outgoing()); - if (outgoingEdges.size() == 1) { - final Edge joinCandidateEdge = outgoingEdges.get(0); - final Vertex joinCandidateVertex = joinCandidateEdge.to(); - final Set joinCandidateTranspositions = new HashSet<>(joinCandidateVertex.transpositions()); - - boolean canJoin = !end1.equals(joinCandidateVertex) && // - joinCandidateVertex.incoming().size() == 1 && // + final Set> transpositions = new HashSet<>(vertex.transpositions()); + if (vertex.outgoing.size() == 1) { + final Vertex joinCandidateVertex = vertex.outgoing.keySet().iterator().next(); + final Set> joinCandidateTranspositions = new HashSet<>(joinCandidateVertex.transpositions()); + + boolean canJoin = !graph.end.equals(joinCandidateVertex) && // + joinCandidateVertex.incoming.size() == 1 && // transpositions.equals(joinCandidateTranspositions); if (canJoin) { vertex.add(joinCandidateVertex.tokens()); - for (Transposition t : new HashSet<>(joinCandidateVertex.transpositions())) { - final Set transposed = new HashSet<>(t.vertices); + for (Set t : new HashSet<>(joinCandidateVertex.transpositions())) { + final Set transposed = new HashSet<>(t); transposed.remove(joinCandidateVertex); transposed.add(vertex); - t.delete(); + for (Vertex tv : t) { + graph.transpositionIndex.getOrDefault(tv, Collections.emptySet()).remove(t); + } graph.transpose(transposed); } - for (Edge e : new ArrayList<>(joinCandidateVertex.outgoing())) { - final Vertex to = e.to(); - final Set witnesses = e.witnesses(); - e.delete(); - graph.connect(vertex, to, witnesses); - } - joinCandidateEdge.delete(); - joinCandidateVertex.delete(); + + vertex.outgoing.clear(); + vertex.outgoing.putAll(joinCandidateVertex.outgoing); + + vertex.outgoing.keySet().forEach(v -> v.incoming.put(vertex, v.incoming.remove(joinCandidateVertex))); + queue.push(vertex); continue; } } + // FIXME: Why do we run out of memory in some cases here, if this is not checked? processed.add(vertex); - for (Edge e : outgoingEdges) { - final Vertex next = e.to(); - // FIXME: Why do we run out of memory in some cases here, if this is not checked? - if (!processed.contains(next)) { - queue.push(next); - } - } + vertex.outgoing.keySet().stream().filter(v -> !processed.contains(v)).forEach(queue::push); } return graph; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java index 5f95a3cab..c0d307846 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java @@ -34,6 +34,4 @@ public interface Witness { String getSigil(); final Comparator SIGIL_COMPARATOR = Comparator.comparing(Witness::getSigil); - - final Function TO_SIGILS = input -> input.witnesses().stream().sorted(SIGIL_COMPARATOR).map(Object::toString).collect(Collectors.joining(", ")); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java index df109ffaf..5ea3adb37 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java @@ -52,7 +52,7 @@ public List> detect(Map linkedTokens, Va // - there may not be a longer path between previous and base vertex boolean sameTranspositions = new HashSet<>(previous.transpositions()).equals(new HashSet<>(baseVertex.transpositions())); boolean sameWitnesses = previous.witnesses().equals(baseVertex.witnesses()); - boolean directedEdge = previous.outgoing().stream().filter(e -> baseVertex.equals(e.to())).findFirst().isPresent(); + boolean directedEdge = previous.outgoing().containsKey(baseVertex); boolean isNear = sameTranspositions && sameWitnesses && directedEdge && (previous.outgoing().size() == 1 || baseVertex.incoming().size() == 1); if (!isNear) { addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java index 6de210975..0d8b3e4ff 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java @@ -218,9 +218,9 @@ public void toDot(Writer writer) { } for (VariantGraph.Vertex v : graph.vertices()) { - for (VariantGraph.Edge e : v.outgoing()) { - out.print(indent + id(e.from()) + connector + id(e.to())); - out.print(" [label = \"" + toDotLabel(e) + "\"]"); + for (Map.Entry> e : v.outgoing().entrySet()) { + out.print(indent + id(v) + connector + id(e.getKey())); + out.print(" [label = \"" + toDotLabel(e.getValue()) + "\"]"); out.println(";"); } } @@ -255,8 +255,8 @@ private int numericId(VariantGraph.Vertex vertex) { return id; } - String toDotLabel(VariantGraph.Edge e) { - return escapeDotLabel(Witness.TO_SIGILS.apply(e)); + String toDotLabel(Set e) { + return escapeDotLabel(e.stream().map(Witness::getSigil).distinct().sorted().collect(Collectors.joining(", "))); } String toDotLabel(VariantGraph.Vertex v) { @@ -278,7 +278,7 @@ Set> transposedTuples() { final Set> tuples = new HashSet<>(); final Comparator vertexOrdering = ranking().comparator(); - for (VariantGraph.Transposition transposition : graph.transpositions()) { + for (Set transposition : graph.transpositions()) { final SortedMap> verticesByWitness = new TreeMap<>(Witness.SIGIL_COMPARATOR); for (VariantGraph.Vertex vertex : transposition) { for (Witness witness : vertex.witnesses()) { @@ -336,14 +336,14 @@ public void toGraphML(XMLStreamWriter xml) throws XMLStreamException { int edgeNumber = 0; for (VariantGraph.Vertex v : graph.vertices()) { - for (VariantGraph.Edge edge : v.outgoing()) { + for (Map.Entry> edge : v.outgoing().entrySet()) { xml.writeStartElement(GRAPHML_NS, EDGE_TAG); xml.writeAttribute(ID_ATT, "e" + edgeNumber); - xml.writeAttribute(SOURCE_ATT, "n" + numericId(edge.from())); - xml.writeAttribute(TARGET_ATT, "n" + numericId(edge.to())); + xml.writeAttribute(SOURCE_ATT, "n" + numericId(v)); + xml.writeAttribute(TARGET_ATT, "n" + numericId(edge.getKey())); GraphMLProperty.EDGE_NUMBER.write(Integer.toString(edgeNumber++), xml); GraphMLProperty.EDGE_TYPE.write(EDGE_TYPE_PATH, xml); - GraphMLProperty.EDGE_WITNESSES.write(Witness.TO_SIGILS.apply(edge), xml); + GraphMLProperty.EDGE_WITNESSES.write(edge.getValue().stream().map(Witness::getSigil).distinct().sorted().collect(Collectors.joining(", ")), xml); xml.writeEndElement(); } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java index 2be62ee28..62a7492f6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java @@ -68,7 +68,7 @@ public static void generate(VariantGraphRanking ranking, GeneratorCallback callb final SortedMap> verticesByTranspositionRank = new TreeMap<>(); for (VariantGraph.Vertex v : verticesOfRank) { int transpositionRank = 0; - for (VariantGraph.Transposition transposition : v.transpositions()) { + for (Set transposition : v.transpositions()) { for (VariantGraph.Vertex tv : transposition) { transpositionRank += (ranking.apply(tv).intValue() - rank); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java index 77191b762..75425870f 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java @@ -55,8 +55,8 @@ public static VariantGraphRanking of(VariantGraph graph) { final VariantGraphRanking ranking = new VariantGraphRanking(graph); for (VariantGraph.Vertex v : graph.vertices()) { int rank = -1; - for (VariantGraph.Edge e : v.incoming()) { - rank = Math.max(rank, ranking.byVertex.get(e.from())); + for (VariantGraph.Vertex incoming : v.incoming().keySet()) { + rank = Math.max(rank, ranking.byVertex.get(incoming)); } rank++; ranking.byVertex.put(v, rank); @@ -69,8 +69,8 @@ public static VariantGraphRanking ofOnlyCertainVertices(VariantGraph graph, Set< final VariantGraphRanking ranking = new VariantGraphRanking(graph); for (VariantGraph.Vertex v : graph.vertices()) { int rank = -1; - for (VariantGraph.Edge e : v.incoming()) { - rank = Math.max(rank, ranking.byVertex.get(e.from())); + for (VariantGraph.Vertex incoming : v.incoming().keySet()) { + rank = Math.max(rank, ranking.byVertex.get(incoming)); } if (vertices.contains(v)) { rank++; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java index fe82bffcd..e6051605d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java @@ -66,14 +66,14 @@ public boolean hasNext() { @Override public VariantGraph.Vertex next() { final VariantGraph.Vertex next = this.next.get(); - for (VariantGraph.Edge edge : next.outgoing()) { - if (witnesses != null && !edge.witnesses().stream().anyMatch(witnesses::contains)) { + for (Map.Entry> edge : next.outgoing().entrySet()) { + if (witnesses != null && !edge.getValue().stream().anyMatch(witnesses::contains)) { continue; } - final VariantGraph.Vertex end = edge.to(); + final VariantGraph.Vertex end = edge.getKey(); final long endEncountered = Optional.ofNullable(encountered.get(end)).orElse(0L); - final long endIncoming = end.incoming().stream().filter(e -> witnesses == null || e.witnesses().stream().anyMatch(witnesses::contains)).count(); + final long endIncoming = end.incoming().entrySet().stream().filter(e -> witnesses == null || e.getValue().stream().anyMatch(witnesses::contains)).count(); if (endIncoming == endEncountered) { throw new IllegalStateException(String.format("Encountered cycle traversing %s to %s", edge, end)); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java index 826c0fe97..3500f93ff 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java @@ -30,7 +30,6 @@ import org.junit.Before; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; @@ -119,19 +118,19 @@ protected static void assertGraphVertices(VariantGraph graph, int vertices) { } protected static void assertGraphEdges(VariantGraph graph, int edges) { - assertEquals(edges, StreamSupport.stream(graph.vertices().spliterator(), false).map(VariantGraph.Vertex::outgoing).flatMap(Collection::stream).count()); + assertEquals(edges, StreamSupport.stream(graph.vertices().spliterator(), false).map(VariantGraph.Vertex::outgoing).map(Map::keySet).flatMap(Set::stream).count()); } protected static void assetGraphSize(VariantGraph graph, int vertices, int edges) { assertGraphVertices(graph, vertices); assertGraphEdges(graph, edges); } - protected static void assertHasWitnesses(VariantGraph.Edge edge, Witness... witnesses) { - assertEquals(new HashSet<>(Arrays.asList(witnesses)), edge.witnesses()); + protected static void assertHasWitnesses(Set edge, Witness... witnesses) { + assertEquals(new HashSet<>(Arrays.asList(witnesses)), edge); } - protected static VariantGraph.Edge edgeBetween(VariantGraph.Vertex start, VariantGraph.Vertex end) { - final Optional edge = start.outgoing().stream().filter(e -> end.equals(e.to())).findFirst(); + protected static Set edgeBetween(VariantGraph.Vertex start, VariantGraph.Vertex end) { + final Optional> edge = Optional.ofNullable(start.outgoing().get(end)); Assert.assertTrue(String.format("No edge between %s and %s", start, end), edge.isPresent()); return edge.get(); } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java index bcd23725a..b1b736a26 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java @@ -22,15 +22,11 @@ import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; import eu.interedition.collatex.simple.SimpleWitness; import eu.interedition.collatex.util.VariantGraphTraversal; -import org.junit.Assert; import org.junit.Test; import java.io.StringWriter; -import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; import java.util.List; -import java.util.Set; import java.util.logging.Level; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -46,20 +42,6 @@ public void emptyGraph() { assetGraphSize(graph, 2, 1); } - @Test - public void reconnectingVerticesYieldsSameEdge() { - final SimpleWitness witness = createWitnesses("hello world")[0]; - final VariantGraph graph = new VariantGraph(); - final VariantGraph.Vertex helloVertex = graph.add(witness.getTokens().get(0)); - final VariantGraph.Vertex worldVertex = graph.add(witness.getTokens().get(1)); - final VariantGraph.Edge edge = graph.connect(helloVertex, worldVertex, Collections. singleton(witness)); - - Assert.assertEquals(1, edge.witnesses().size()); - - Assert.assertEquals(edge, graph.connect(helloVertex, worldVertex, Collections. singleton(witness))); - Assert.assertEquals(1, edge.witnesses().size()); - } - @Test public void getTokens() { final SimpleWitness[] w = createWitnesses("a b c d"); @@ -120,10 +102,8 @@ public void transpositions2() { final VariantGraph graph = collate(w); // There should be two vertices for cat in the graph - VariantGraph.Edge edge = edgeBetween(vertexWith(graph, "red", w[0]), vertexWith(graph, "cat", w[0])); - assertHasWitnesses(edge, w[0]); - edge = edgeBetween(vertexWith(graph, "red", w[1]), vertexWith(graph, "cat", w[1])); - assertHasWitnesses(edge, w[1], w[2]); + assertHasWitnesses(edgeBetween(vertexWith(graph, "red", w[0]), vertexWith(graph, "cat", w[0])), w[0]); + assertHasWitnesses(edgeBetween(vertexWith(graph, "red", w[1]), vertexWith(graph, "cat", w[1])), w[1], w[2]); assetGraphSize(graph, 17, 20); } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java index f92bde8c9..b0193573c 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java @@ -2,7 +2,6 @@ import eu.interedition.collatex.AbstractTest; import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.VariantGraph.Transposition; import eu.interedition.collatex.VariantGraph.Vertex; import eu.interedition.collatex.matching.EqualityTokenComparator; import eu.interedition.collatex.simple.SimpleWitness; @@ -29,7 +28,7 @@ public void transpositions() { final VariantGraph graph = collate(w[0], w[1]); assertEquals(2, graph.transpositions().size()); collate(graph, w[2]); - final Set transposed = graph.transpositions(); + final Set> transposed = graph.transpositions(); assertEquals(2, transposed.size()); } @@ -64,7 +63,7 @@ public void testGreekTwoWitnesses() { "και αποκριθεισ ειπεν αυτω ου βλεπεισ ταυτασ μεγαλασ οικοδομασ αμην λεγω σοι ο(υ μη α)φεθη ωδε λιθοσ επι λιθω (οσ ου) μη καταλυθη", // "και αποκριθεισ ο ι̅σ̅ ειπεν αυτω βλεπεισ Ταυτασ τασ μεγαλασ οικοδομασ λεγω υμιν ου μη αφεθη λιθοσ επι λιθου οσ ου μη καταλυθη"); VariantGraph graph = collate(w[0], w[1]); - Set transpositions = graph.transpositions(); + Set> transpositions = graph.transpositions(); assertTrue(transpositions.isEmpty()); } @@ -73,9 +72,9 @@ public void testGreekTwoWitnesses() { public void testGreekThreeWitnesses() { SimpleWitness[] w = createWitnesses("και αποκριθεισ ειπεν αυτω ου βλεπεισ ταυτασ μεγαλασ οικοδομασ αμην λεγω σοι ο(υ μη α)φεθη ωδε λιθοσ επι λιθω (οσ ου) μη καταλυθη", "και αποκριθεισ ο ι̅σ̅ ειπεν αυτω βλεπεισ Ταυτασ τασ μεγαλασ οικοδομασ λεγω υμιν ου μη αφεθη λιθοσ επι λιθου οσ ου μη καταλυθη", "και ο ι̅σ̅ αποκριθεισ ειπεν αυτω βλεπεισ ταυτασ τασ μεγαλασ οικοδομασ ου μη αφεθη λιθοσ επι λιθον οσ ου μη καταλυθη"); VariantGraph graph = collate(w[0], w[1], w[2]); - Set transpositions = graph.transpositions(); + Set> transpositions = graph.transpositions(); assertEquals(1, transpositions.size()); - Transposition transposition = transpositions.iterator().next(); + Set transposition = transpositions.iterator().next(); Set transposedVertices = new HashSet<>(); for (Vertex transposedVertex : transposition) { transposedVertices.add(transposedVertex.toString()); diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java index 3078d5484..4fba32f25 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java @@ -199,8 +199,8 @@ public void test4JoinedTranspositions2witnesses() throws XMLStreamException { SimpleWitness[] sw = createWitnesses(a, b); // testWitnessCollation(sw); VariantGraph vg = collate(sw); - Set transpositions0 = vg.transpositions(); - for (VariantGraph.Transposition t : transpositions0) { + Set> transpositions0 = vg.transpositions(); + for (Set t : transpositions0) { LOG.log(Level.FINE, "transposition {0}", t.toString()); } @@ -210,9 +210,9 @@ public void test4JoinedTranspositions2witnesses() throws XMLStreamException { } vg = VariantGraph.JOIN.apply(vg); LOG.fine(toString(table(vg))); - Set transpositions = vg.transpositions(); + Set> transpositions = vg.transpositions(); LOG.log(Level.FINE, "{0} transpositions", transpositions.size()); - for (VariantGraph.Transposition t : transpositions) { + for (Set t : transpositions) { LOG.log(Level.FINE, "transposition {0}", t.toString()); // all joined vertices should be size 3 for (VariantGraph.Vertex vertex : t) { @@ -242,9 +242,9 @@ public void test4JoinedTranspositions3witnesses() throws XMLStreamException { LOG.fine(writer.toString()); vg = VariantGraph.JOIN.apply(vg); - Set transpositions = vg.transpositions(); + Set> transpositions = vg.transpositions(); LOG.log(Level.FINE, "{0} transpositions", transpositions.size()); - for (VariantGraph.Transposition t : transpositions) { + for (Set t : transpositions) { String showTransposition = t.toString(); LOG.log(Level.FINE, "transposition {0}", showTransposition); boolean transpositionOfA = showTransposition.contains("a"); @@ -278,9 +278,9 @@ public void testHermansText3aJoinedTranspositions2() throws XMLStreamException { testWitnessCollation(sw); VariantGraph vg = VariantGraph.JOIN.apply(collate(sw)); - Set transpositions = vg.transpositions(); + Set> transpositions = vg.transpositions(); assertEquals(5, transpositions.size()); - VariantGraph.Transposition transposition = transpositions.iterator().next(); + Set transposition = transpositions.iterator().next(); // assertEquals("genaamd de", transposition.from().toString()); } @@ -322,9 +322,9 @@ public void testNoLoops() throws XMLStreamException { String w3 = "e c b d"; SimpleWitness[] sw = createWitnesses(w1, w2, w3); VariantGraph vg = collate(sw); - Set transpositions = vg.transpositions(); + Set> transpositions = vg.transpositions(); assertEquals(1, transpositions.size()); - VariantGraph.Transposition t = transpositions.iterator().next(); + Set t = transpositions.iterator().next(); for (VariantGraph.Vertex vertex : t) { for (SimpleToken token : vertex.tokens().stream().map(tk -> (SimpleToken)tk).toArray(SimpleToken[]::new)) { assertEquals(token.toString(), token.getNormalized(), "c"); From 16507a0d8628376bb5631a3e6cda3418bb850af8 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Tue, 10 Feb 2015 21:40:45 +0100 Subject: [PATCH 36/52] Reduce POM --- collatex-core/pom.xml | 32 ------------------- collatex-tools/pom.xml | 39 +++++++++++------------ pom.xml | 72 +++++++++++++----------------------------- 3 files changed, 40 insertions(+), 103 deletions(-) diff --git a/collatex-core/pom.xml b/collatex-core/pom.xml index 02770f43e..ebfc993c5 100644 --- a/collatex-core/pom.xml +++ b/collatex-core/pom.xml @@ -10,36 +10,4 @@ 1.6-SNAPSHOT CollateX Core A Java library for collating textual sources, for example, to produce an apparatus. - - - net.sf.jung - jung-graph-impl - - - - - - maven-javadoc-plugin - 2.9.1 - - - - API - eu.interedition.collatex - - - Collation Algorithms - - eu.interedition.collatex.dekker*:eu.interedition.collatex.medite:eu.interedition.collatex.needlemanwunsch - - - - Variant Graph Implementations - eu.interedition.collatex.jung*:eu.interedition.collatex.neo4j* - - - - - - diff --git a/collatex-tools/pom.xml b/collatex-tools/pom.xml index 6d1bf6506..467c6a97e 100644 --- a/collatex-tools/pom.xml +++ b/collatex-tools/pom.xml @@ -11,6 +11,10 @@ CollateX Tools CollateX Tool Suite including a command line interface and a HTTP service + + eu.interedition + collatex-core + com.google.code.findbugs jsr305 @@ -23,27 +27,20 @@ 15.0 - org.glassfish - javax.json - 1.0.4 - - - eu.interedition - collatex-core - - - commons-cli - commons-cli - 1.2 - - - net.sf.jung - jung-graph-impl - - - org.glassfish.grizzly - grizzly-http-server - + org.glassfish + javax.json + 1.0.4 + + + org.glassfish.grizzly + grizzly-http-server + 2.3.17 + + + commons-cli + commons-cli + 1.2 + diff --git a/pom.xml b/pom.xml index 17c8d81e5..b72167e6f 100644 --- a/pom.xml +++ b/pom.xml @@ -59,56 +59,28 @@ - - 2.0.1 - + + 1.8 + 1.8 + - - - junit - junit - 4.10 - test - - + + + junit + junit + 4.10 + test + + - - - - eu.interedition - collatex-core - ${project.version} - - - - org.glassfish.grizzly - grizzly-http-server - 2.3.17 - - - - net.sf.jung - jung-graph-impl - ${jung.version} - - - net.sf.jung - jung-visualization - ${jung.version} - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - 1.8 - 1.8 - - - - + + + + eu.interedition + collatex-core + ${project.version} + + + + From 8d6340427f634d2f0af1f9764a070e85ea1175af Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Tue, 10 Feb 2015 21:56:45 +0100 Subject: [PATCH 37/52] Remove some Google Guava usages --- collatex-tools/pom.xml | 11 ----- .../interedition/collatex/tools/CollateX.java | 31 +++++-------- .../collatex/tools/PluginScript.java | 23 ++++------ .../collatex/tools/TextWitness.java | 46 ------------------- .../collatex/tools/URLWitness.java | 27 ++++++----- 5 files changed, 36 insertions(+), 102 deletions(-) delete mode 100644 collatex-tools/src/main/java/eu/interedition/collatex/tools/TextWitness.java diff --git a/collatex-tools/pom.xml b/collatex-tools/pom.xml index 467c6a97e..6e93b309a 100644 --- a/collatex-tools/pom.xml +++ b/collatex-tools/pom.xml @@ -15,17 +15,6 @@ eu.interedition collatex-core - - com.google.code.findbugs - jsr305 - 2.0.2 - provided - - - com.google.guava - guava - 15.0 - org.glassfish javax.json diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java index e952b3880..e7cb61fbe 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java @@ -19,9 +19,6 @@ package eu.interedition.collatex.tools; -import com.google.common.io.Closeables; -import com.google.common.io.Closer; -import com.google.common.io.Files; import eu.interedition.collatex.CollationAlgorithm; import eu.interedition.collatex.CollationAlgorithmFactory; import eu.interedition.collatex.Token; @@ -55,7 +52,6 @@ import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; -import java.io.Closeable; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -66,6 +62,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.Charset; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -78,7 +75,7 @@ /** * @author Gregor Middell */ -public class CollateX implements Closeable { +public class CollateX implements AutoCloseable { Charset inputCharset; boolean xmlMode; @@ -145,7 +142,7 @@ CollateX configure(CommandLine commandLine) throws XPathExpressionException, Par if (!"-".equals(output)) { try { this.outFile = new File(output); - this.out = new PrintWriter(Files.newWriter(this.outFile, outputCharset)); + this.out = new PrintWriter(Files.newBufferedWriter(this.outFile.toPath(), outputCharset)); } catch (FileNotFoundException e) { throw new ParseException("Output file '" + outFile + "' not found"); } @@ -212,7 +209,7 @@ void write() throws IOException { try { xml.close(); } catch (XMLStreamException e) { - throw new IOException(e); + // ignored } } } @@ -322,8 +319,9 @@ public static void main(String... args) { engine.error("Script error", e); } finally { try { - Closeables.close(engine, false); + engine.close(); } catch (IOException ignored) { + // ignored } } } @@ -355,20 +353,15 @@ public static void main(String... args) { @Override public void close() throws IOException { - final Closer closer = Closer.create(); try { - if (out != null) { - closer.register(out).flush(); - } - if (log != null) { - closer.register(log).flush(); + for (PrintWriter writer : new PrintWriter[] { out, log }) { + writer.close(); } } finally { - closer.close(); - } - if (errorOccurred && (outFile != null) && outFile.isFile()) { - //noinspection ResultOfMethodCallIgnored - outFile.delete(); + if (errorOccurred && (outFile != null) && outFile.isFile()) { + //noinspection ResultOfMethodCallIgnored + outFile.delete(); + } } } } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java index e1525cf1d..a1a5044df 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java @@ -19,27 +19,25 @@ package eu.interedition.collatex.tools; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; import eu.interedition.collatex.Token; import eu.interedition.collatex.simple.SimpleToken; -import javax.annotation.Nullable; import javax.script.Compilable; import javax.script.CompiledScript; import javax.script.Invocable; import javax.script.ScriptEngine; import javax.script.ScriptEngineManager; import javax.script.ScriptException; +import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.nio.charset.Charset; import java.util.Comparator; +import java.util.LinkedList; import java.util.List; +import java.util.Objects; import java.util.function.Function; import java.util.stream.Stream; @@ -60,16 +58,13 @@ public class PluginScript { final boolean comparator; public static PluginScript read(URL source) throws ScriptException, IOException { - InputStream sourceStream = null; - try { - return read(source.toString(), new InputStreamReader(sourceStream = source.openStream(), SCRIPT_CHARSET)); - } finally { - Closeables.close(sourceStream, false); + try (Reader sourceReader = new BufferedReader(new InputStreamReader(source.openStream(), SCRIPT_CHARSET))) { + return read(source.toString(), sourceReader); } } public static PluginScript read(String filename, Reader source) throws ScriptException, IOException { - final ScriptEngine scriptEngine = Preconditions.checkNotNull(new ScriptEngineManager().getEngineByExtension("js")); + final ScriptEngine scriptEngine = Objects.requireNonNull(new ScriptEngineManager().getEngineByExtension("js")); scriptEngine.put(ScriptEngine.FILENAME, filename); final CompiledScript script = ((Compilable) scriptEngine).compile(source); @@ -88,14 +83,14 @@ public static PluginScript read(String filename, Reader source) throws ScriptExc Function> tokenizer() { return (tokenizer ? new Function>() { @Override - public Stream apply(@Nullable String input) { + public Stream apply(String input) { final Object result = invoke(TOKENIZER_FUNCTION, input); if (!(result instanceof Iterable)) { throw new PluginScriptExecutionException("Wrong result type of " + TOKENIZER_FUNCTION + "(); expected an iterable type, found " + result.getClass()); } - final List tokens = Lists.newLinkedList(); + final List tokens = new LinkedList<>(); for (Object token : (Iterable) result) { if (token == null) { throw new PluginScriptExecutionException(TOKENIZER_FUNCTION + "() returned null token"); @@ -116,7 +111,7 @@ public Stream apply(@Nullable String input) { Function normalizer() { return (normalizer ? new Function() { @Override - public String apply(@Nullable String input) { + public String apply(String input) { final Object result = invoke(NORMALIZER_FUNCTION, input); if (!(result instanceof String)) { throw new PluginScriptExecutionException("Wrong result type of " + diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/TextWitness.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/TextWitness.java deleted file mode 100644 index 1f3d36153..000000000 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/TextWitness.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.tools; - -import eu.interedition.collatex.Witness; - -/** - * @author Gregor Middell - */ -public class TextWitness implements Witness { - - public final String sigil; - public final String content; - - public TextWitness(String sigil, String content) { - this.sigil = sigil; - this.content = content; - } - - @Override - public String getSigil() { - return sigil; - } - - @Override - public String toString() { - return sigil; - } -} diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java index 85374c37f..5a8f17bf2 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java @@ -19,9 +19,6 @@ package eu.interedition.collatex.tools; -import com.google.common.collect.Lists; -import com.google.common.io.CharStreams; -import com.google.common.io.Closeables; import eu.interedition.collatex.Token; import eu.interedition.collatex.simple.SimpleToken; import eu.interedition.collatex.simple.SimpleWitness; @@ -36,11 +33,14 @@ import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; +import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.StringWriter; import java.net.URL; import java.nio.charset.Charset; +import java.util.ArrayList; import java.util.List; import java.util.function.Function; import java.util.stream.Collectors; @@ -64,16 +64,14 @@ public URLWitness read( Charset charset, XPathExpression tokenXPath) throws IOException, XPathExpressionException, SAXException { - InputStream stream = null; - try { - stream = url.openStream(); + try (InputStream stream = url.openStream()) { if (tokenXPath != null) { final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); final Document document = documentBuilder.parse(stream); document.normalizeDocument(); final NodeList tokenNodes = (NodeList) tokenXPath.evaluate(document, XPathConstants.NODESET); - final List tokens = Lists.newArrayListWithExpectedSize(tokenNodes.getLength()); + final List tokens = new ArrayList<>(tokenNodes.getLength()); for (int nc = 0; nc < tokenNodes.getLength(); nc++) { final Node tokenNode = tokenNodes.item(nc); final String tokenText = tokenNode.getTextContent(); @@ -81,15 +79,20 @@ public URLWitness read( } setTokens(tokens); } else { - setTokens(tokenizer.apply(CharStreams.toString(new InputStreamReader(stream, charset))) - .map(tokenText -> new SimpleToken(this, tokenText, normalizer.apply(tokenText))) - .collect(Collectors.toList()) + + final BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charset)); + final StringWriter writer = new StringWriter(); + final char[] buf = new char[1024]; + while (reader.read(buf) != -1) { + writer.write(buf); + } + setTokens(tokenizer.apply(writer.toString()) + .map(tokenText -> new SimpleToken(this, tokenText, normalizer.apply(tokenText))) + .collect(Collectors.toList()) ); } } catch (ParserConfigurationException e) { throw new SAXException(e); - } finally { - Closeables.close(stream, false); } return this; } From cf8b959d5a0008d6fa63f352e38b8497fad0ea2f Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Wed, 11 Feb 2015 01:18:29 +0100 Subject: [PATCH 38/52] Collation Algorithm: refactor merging of alignments/transpositions --- .../collatex/CollationAlgorithm.java | 80 ++++++++----------- 1 file changed, 34 insertions(+), 46 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java index 8ce18f7a4..f3f94529c 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java @@ -24,13 +24,11 @@ import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschScorer; import eu.interedition.collatex.util.VertexMatch; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -38,6 +36,7 @@ import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; import java.util.stream.StreamSupport; /** @@ -132,61 +131,50 @@ protected void mergeTranspositions(VariantGraph into, List> transpos } protected void merge(VariantGraph graph, VariantGraph.Vertex[][] vertices, Token[] tokens, SortedSet> matches) { - final SortedSet[] matchesVertexOrder = (SortedSet[]) matches.toArray(new SortedSet[matches.size()]); + @SuppressWarnings("unchecked") + final SortedSet[] matchesVertexOrder = matches.toArray(new SortedSet[matches.size()]); final SortedSet[] matchesTokenOrder = Arrays.copyOf(matchesVertexOrder, matchesVertexOrder.length); - Arrays.sort(matchesTokenOrder, new Comparator>() { - @Override - public int compare(SortedSet o1, SortedSet o2) { - return (o1.first().token - o2.first().token); - } - }); + Arrays.sort(matchesTokenOrder, Comparator.comparing(m -> m.first().token)); - final int mergedLength = Math.max(tokens.length, vertices.length); - final Set> inOrderMatches = NeedlemanWunschAlgorithm.align( + final Set> alignedMatches = NeedlemanWunschAlgorithm.align( matchesVertexOrder, matchesTokenOrder, - new NeedlemanWunschScorer, SortedSet>() { - - @Override - public float score(SortedSet a, SortedSet b) { - return (a.equals(b) ? 1 : -mergedLength); - } - - @Override - public float gap() { - return -(1 / (mergedLength * 1.0f)); - } - } + new MatchPhraseAlignmentScorer(Math.max(tokens.length, vertices.length)) ).keySet(); - final List> transpositions = new ArrayList<>(); - for (SortedSet phraseMatch : matches) { - if (!inOrderMatches.contains(phraseMatch)) { - transpositions.add(phraseMatch); - } - } + final Map alignments = matches.stream() + .filter(alignedMatches::contains) + .flatMap(Set::stream) + .collect(Collectors.toMap(m -> tokens[m.token], m -> m.vertex)); + final List> transpositions = matches.stream() + .filter(m -> !alignedMatches.contains(m)) + .map(t -> t.stream().map(m -> new VertexMatch.WithToken(m.vertex, m.vertexRank, tokens[m.token])).collect(Collectors.toCollection(TreeSet::new))) + .collect(Collectors.toList()); - final Map matchedTokens = new HashMap<>(); - for (SortedSet phraseMatch : matches) { - for (VertexMatch.WithTokenIndex tokenMatch : phraseMatch) { - matchedTokens.put(tokens[tokenMatch.token], tokenMatch.vertex); - } - } + merge(graph, Arrays.asList(tokens), alignments); + mergeTranspositions(graph, transpositions); + } + } + + static class MatchPhraseAlignmentScorer implements NeedlemanWunschScorer, SortedSet> { - final List> transposedTokens = new LinkedList<>(); - for (SortedSet transposition : transpositions) { - final SortedSet transpositionMatch = new TreeSet<>(); - for (VertexMatch.WithTokenIndex match : transposition) { - matchedTokens.remove(tokens[match.token]); - transpositionMatch.add(new VertexMatch.WithToken(match.vertex, match.vertexRank, tokens[match.token])); - } - transposedTokens.add(transpositionMatch); - } + private final int maxWitnessLength; + + public MatchPhraseAlignmentScorer(int maxWitnessLength) { + this.maxWitnessLength = maxWitnessLength; + } - merge(graph, Arrays.asList(tokens), matchedTokens); - mergeTranspositions(graph, transposedTokens); + @Override + public float score(SortedSet a, SortedSet b) { + return (a.equals(b) ? 1 : -maxWitnessLength); + } + + @Override + public float gap() { + return -(1 / (maxWitnessLength * 1.0f)); } + } } From b95193903fb5586961a23a596b9eec93a2dfaa0a Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 14 Feb 2015 16:28:47 +0100 Subject: [PATCH 39/52] Unified code formatting; made editor configuration explicit --- .editorconfig | 11 + collatex-core/pom.xml | 3 +- .../collatex/CollationAlgorithm.java | 360 +- .../collatex/CollationAlgorithmFactory.java | 116 +- .../java/eu/interedition/collatex/Token.java | 4 +- .../interedition/collatex/VariantGraph.java | 280 +- .../eu/interedition/collatex/Witness.java | 9 +- .../collatex/dekker/DekkerAlgorithm.java | 292 +- .../interedition/collatex/dekker/Match.java | 66 +- .../collatex/dekker/PhraseMatchDetector.java | 153 +- .../collatex/dekker/TokenLinker.java | 64 +- .../dekker/TranspositionDetector.java | 381 +- .../interedition/collatex/dekker/Tuple.java | 34 +- .../collatex/dekker/matrix/Archipelago.java | 286 +- .../collatex/dekker/matrix/Coordinate.java | 162 +- .../collatex/dekker/matrix/Island.java | 465 +-- .../dekker/matrix/IslandCompetition.java | 2 +- .../dekker/matrix/IslandConflictResolver.java | 309 +- .../collatex/dekker/matrix/MatchTable.java | 275 +- .../dekker/matrix/MatchTableLinker.java | 68 +- .../dekker/matrix/MatchTableSelection.java | 258 +- .../dekker/matrix/MatchTableSerializer.java | 204 +- .../collatex/matching/EditDistance.java | 4 +- .../matching/EditDistanceTokenComparator.java | 32 +- .../matching/EqualityTokenComparator.java | 12 +- .../collatex/matching/Matches.java | 58 +- .../StrictEqualityTokenComparator.java | 16 +- .../medite/AlignmentDecisionGraph.java | 200 +- .../interedition/collatex/medite/Matches.java | 296 +- .../collatex/medite/MediteAlgorithm.java | 94 +- .../collatex/medite/SuffixTree.java | 384 +- .../NeedlemanWunschAlgorithm.java | 158 +- .../NeedlemanWunschScorer.java | 4 +- .../collatex/simple/SimpleCollation.java | 58 +- .../simple/SimplePatternTokenizer.java | 36 +- .../collatex/simple/SimpleToken.java | 72 +- .../simple/SimpleTokenNormalizers.java | 60 +- .../simple/SimpleVariantGraphSerializer.java | 684 ++-- .../collatex/simple/SimpleWitness.java | 124 +- .../simple/SimpleWitnessTeiBuilder.java | 134 +- .../collatex/suffixarray/Algorithm.java | 67 +- .../collatex/suffixarray/BPR.java | 676 ++-- .../suffixarray/CharSequenceAdapter.java | 23 +- .../collatex/suffixarray/DeepShallow.java | 3582 ++++++++--------- .../suffixarray/DensePositiveDecorator.java | 27 +- .../suffixarray/DensePositiveMapper.java | 46 +- .../collatex/suffixarray/DivSufSort.java | 1522 +++---- .../ExtraTrailingCellsDecorator.java | 18 +- .../suffixarray/GenericArrayAdapter.java | 76 +- .../suffixarray/ISuffixArrayBuilder.java | 30 +- .../collatex/suffixarray/ISymbolMapper.java | 8 +- .../collatex/suffixarray/MinMax.java | 14 +- .../collatex/suffixarray/QSufSort.java | 194 +- .../collatex/suffixarray/SAIS.java | 847 ++-- .../collatex/suffixarray/Skew.java | 92 +- .../collatex/suffixarray/SuffixArrays.java | 71 +- .../collatex/suffixarray/SuffixData.java | 18 +- .../collatex/suffixarray/Tools.java | 53 +- .../collatex/suffixarray/Traversals.java | 87 +- .../collatex/suffixtree/ActivePoint.java | 386 +- .../collatex/suffixtree/Cursor.java | 180 +- .../collatex/suffixtree/Edge.java | 391 +- .../collatex/suffixtree/Node.java | 303 +- .../collatex/suffixtree/Sequence.java | 168 +- .../collatex/suffixtree/SequenceTerminal.java | 55 +- .../collatex/suffixtree/Suffix.java | 273 +- .../collatex/suffixtree/SuffixTree.java | 320 +- .../collatex/suffixtree/Utils.java | 167 +- .../util/GreedyStringTilingAlgorithm.java | 240 +- .../util/ParallelSegmentationApparatus.java | 100 +- .../collatex/util/VariantGraphRanking.java | 148 +- .../collatex/util/VariantGraphTraversal.java | 88 +- .../collatex/util/VertexMatch.java | 120 +- .../interedition/collatex/AbstractTest.java | 223 +- .../collatex/ScriptEngineTest.java | 38 +- .../collatex/VariantGraphTest.java | 324 +- .../collatex/dekker/AlignmentTest.java | 317 +- .../collatex/dekker/BeckettTest.java | 466 +-- .../collatex/dekker/DarwinTest.java | 34 +- .../collatex/dekker/SpencerHoweTest.java | 52 +- .../dekker/TranspositionGraphTest.java | 122 +- .../dekker/TranspositionRenderingTest.java | 166 +- .../dekker/VariantGraphRankerTest.java | 68 +- .../collatex/dekker/VariantGraphTest.java | 214 +- .../collatex/dekker/matrix/HermansTest.java | 582 +-- .../matrix/IslandConflictResolverTest.java | 42 +- .../collatex/dekker/matrix/IslandTest.java | 298 +- .../dekker/matrix/MatchTableLinkerTest.java | 368 +- .../dekker/matrix/MatchTableTest.java | 336 +- .../collatex/lab/CollateXLaboratory.java | 352 +- .../collatex/lab/MatchMatrixCellStatus.java | 48 +- .../collatex/lab/MatchMatrixTableModel.java | 126 +- .../collatex/lab/MatchTableCell.java | 26 +- .../collatex/lab/WitnessPanel.java | 216 +- .../collatex/matching/MatchesTest.java | 110 +- .../collatex/matching/NearMatcherTest.java | 20 +- .../collatex/medite/MediteTest.java | 18 +- .../collatex/medite/SuffixTreeTest.java | 22 +- .../needlemanwunsch/NeedlemanWunschTest.java | 10 +- .../collatex/output/AlignmentTableTest.java | 218 +- .../simple/SimpleWitnessTeiBuilderTest.java | 46 +- .../collatex/simple/SimpleWitnessTest.java | 22 +- collatex-tools/pom.xml | 47 +- .../interedition/collatex/tools/CollateX.java | 526 +-- .../collatex/tools/CollatorService.java | 330 +- .../collatex/tools/JsonProcessor.java | 395 +- .../collatex/tools/NodeToken.java | 10 +- .../collatex/tools/PluginScript.java | 244 +- .../collatex/tools/URLWitness.java | 76 +- pom.xml | 47 +- 110 files changed, 11253 insertions(+), 11924 deletions(-) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..5b247c005 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +# http://editorconfig.org/ + +root = true + +[*] +charset = utf-8 +end_of_line = lf +trim_trailing_whitespace = true +insert_final_newline = false +indent_style = space +indent_size = 4 diff --git a/collatex-core/pom.xml b/collatex-core/pom.xml index ebfc993c5..9f7145071 100644 --- a/collatex-core/pom.xml +++ b/collatex-core/pom.xml @@ -1,5 +1,6 @@ - + 4.0.0 eu.interedition diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java index f3f94529c..ba974fc6a 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java @@ -1,180 +1,180 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex; - -import eu.interedition.collatex.dekker.Match; -import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm; -import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschScorer; -import eu.interedition.collatex.util.VertexMatch; - -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - -/** - * @author Gregor Middell - */ -public interface CollationAlgorithm { - - void collate(VariantGraph against, Iterable witness); - - void collate(VariantGraph against, Iterable... witnesses); - - void collate(VariantGraph against, List> witnesses); - - abstract class Base implements CollationAlgorithm { - protected final Logger LOG = Logger.getLogger(getClass().getName()); - protected Map witnessTokenVertices; - - @Override - public void collate(VariantGraph against, Iterable... witnesses) { - collate(against, Arrays.asList(witnesses)); - } - - @Override - public void collate(VariantGraph against, List> witnesses) { - for (Iterable witness : witnesses) { - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "heap space: {0}/{1}", new Object[] { - Runtime.getRuntime().totalMemory(), - Runtime.getRuntime().maxMemory() - }); - } - collate(against, witness); - } - } - - protected void merge(VariantGraph into, Iterable witnessTokens, Map alignments) { - final Witness witness = StreamSupport.stream(witnessTokens.spliterator(), false) - .findFirst() - .map(Token::getWitness) - .orElseThrow(() -> new IllegalArgumentException("Empty witness")); - - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "{0} + {1}: Merge comparand into graph", new Object[] { into, witness }); - } - witnessTokenVertices = new HashMap<>(); - VariantGraph.Vertex last = into.getStart(); - final Set witnessSet = Collections.singleton(witness); - for (Token token : witnessTokens) { - VariantGraph.Vertex matchingVertex = alignments.get(token); - if (matchingVertex == null) { - matchingVertex = into.add(token); - } else { - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Match: {0} to {1}", new Object[] { matchingVertex, token }); - } - matchingVertex.add(Collections.singleton(token)); - } - witnessTokenVertices.put(token, matchingVertex); - - into.connect(last, matchingVertex, witnessSet); - last = matchingVertex; - } - into.connect(last, into.getEnd(), witnessSet); - } - - protected void mergeTranspositions(VariantGraph into, Iterable> transpositions) { - for (SortedSet transposedPhrase : transpositions) { - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Transposition: {0}", transposedPhrase); - } - final Set transposed = new HashSet<>(); - for (VertexMatch.WithToken match : transposedPhrase) { - transposed.add(witnessTokenVertices.get(match.token)); - transposed.add(match.vertex); - } - into.transpose(transposed); - } - } - - protected void mergeTranspositions(VariantGraph into, List> transpositions) { - for (List transposedPhrase : transpositions) { - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Transposition: {0}", transposedPhrase); - } - final Set transposed = new HashSet<>(); - for (Match match : transposedPhrase) { - transposed.add(witnessTokenVertices.get(match.token)); - transposed.add(match.vertex); - } - into.transpose(transposed); - } - } - - protected void merge(VariantGraph graph, VariantGraph.Vertex[][] vertices, Token[] tokens, SortedSet> matches) { - @SuppressWarnings("unchecked") - final SortedSet[] matchesVertexOrder = matches.toArray(new SortedSet[matches.size()]); - final SortedSet[] matchesTokenOrder = Arrays.copyOf(matchesVertexOrder, matchesVertexOrder.length); - - Arrays.sort(matchesTokenOrder, Comparator.comparing(m -> m.first().token)); - - final Set> alignedMatches = NeedlemanWunschAlgorithm.align( - matchesVertexOrder, - matchesTokenOrder, - new MatchPhraseAlignmentScorer(Math.max(tokens.length, vertices.length)) - ).keySet(); - - final Map alignments = matches.stream() - .filter(alignedMatches::contains) - .flatMap(Set::stream) - .collect(Collectors.toMap(m -> tokens[m.token], m -> m.vertex)); - - final List> transpositions = matches.stream() - .filter(m -> !alignedMatches.contains(m)) - .map(t -> t.stream().map(m -> new VertexMatch.WithToken(m.vertex, m.vertexRank, tokens[m.token])).collect(Collectors.toCollection(TreeSet::new))) - .collect(Collectors.toList()); - - merge(graph, Arrays.asList(tokens), alignments); - mergeTranspositions(graph, transpositions); - } - } - - static class MatchPhraseAlignmentScorer implements NeedlemanWunschScorer, SortedSet> { - - private final int maxWitnessLength; - - public MatchPhraseAlignmentScorer(int maxWitnessLength) { - this.maxWitnessLength = maxWitnessLength; - } - - @Override - public float score(SortedSet a, SortedSet b) { - return (a.equals(b) ? 1 : -maxWitnessLength); - } - - @Override - public float gap() { - return -(1 / (maxWitnessLength * 1.0f)); - } - - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex; + +import eu.interedition.collatex.dekker.Match; +import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm; +import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschScorer; +import eu.interedition.collatex.util.VertexMatch; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +/** + * @author Gregor Middell + */ +public interface CollationAlgorithm { + + void collate(VariantGraph against, Iterable witness); + + void collate(VariantGraph against, Iterable... witnesses); + + void collate(VariantGraph against, List> witnesses); + + abstract class Base implements CollationAlgorithm { + protected final Logger LOG = Logger.getLogger(getClass().getName()); + protected Map witnessTokenVertices; + + @Override + public void collate(VariantGraph against, Iterable... witnesses) { + collate(against, Arrays.asList(witnesses)); + } + + @Override + public void collate(VariantGraph against, List> witnesses) { + for (Iterable witness : witnesses) { + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "heap space: {0}/{1}", new Object[]{ + Runtime.getRuntime().totalMemory(), + Runtime.getRuntime().maxMemory() + }); + } + collate(against, witness); + } + } + + protected void merge(VariantGraph into, Iterable witnessTokens, Map alignments) { + final Witness witness = StreamSupport.stream(witnessTokens.spliterator(), false) + .findFirst() + .map(Token::getWitness) + .orElseThrow(() -> new IllegalArgumentException("Empty witness")); + + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "{0} + {1}: Merge comparand into graph", new Object[]{into, witness}); + } + witnessTokenVertices = new HashMap<>(); + VariantGraph.Vertex last = into.getStart(); + final Set witnessSet = Collections.singleton(witness); + for (Token token : witnessTokens) { + VariantGraph.Vertex matchingVertex = alignments.get(token); + if (matchingVertex == null) { + matchingVertex = into.add(token); + } else { + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "Match: {0} to {1}", new Object[]{matchingVertex, token}); + } + matchingVertex.add(Collections.singleton(token)); + } + witnessTokenVertices.put(token, matchingVertex); + + into.connect(last, matchingVertex, witnessSet); + last = matchingVertex; + } + into.connect(last, into.getEnd(), witnessSet); + } + + protected void mergeTranspositions(VariantGraph into, Iterable> transpositions) { + for (SortedSet transposedPhrase : transpositions) { + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "Transposition: {0}", transposedPhrase); + } + final Set transposed = new HashSet<>(); + for (VertexMatch.WithToken match : transposedPhrase) { + transposed.add(witnessTokenVertices.get(match.token)); + transposed.add(match.vertex); + } + into.transpose(transposed); + } + } + + protected void mergeTranspositions(VariantGraph into, List> transpositions) { + for (List transposedPhrase : transpositions) { + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "Transposition: {0}", transposedPhrase); + } + final Set transposed = new HashSet<>(); + for (Match match : transposedPhrase) { + transposed.add(witnessTokenVertices.get(match.token)); + transposed.add(match.vertex); + } + into.transpose(transposed); + } + } + + protected void merge(VariantGraph graph, VariantGraph.Vertex[][] vertices, Token[] tokens, SortedSet> matches) { + @SuppressWarnings("unchecked") + final SortedSet[] matchesVertexOrder = matches.toArray(new SortedSet[matches.size()]); + final SortedSet[] matchesTokenOrder = Arrays.copyOf(matchesVertexOrder, matchesVertexOrder.length); + + Arrays.sort(matchesTokenOrder, Comparator.comparing(m -> m.first().token)); + + final Set> alignedMatches = NeedlemanWunschAlgorithm.align( + matchesVertexOrder, + matchesTokenOrder, + new MatchPhraseAlignmentScorer(Math.max(tokens.length, vertices.length)) + ).keySet(); + + final Map alignments = matches.stream() + .filter(alignedMatches::contains) + .flatMap(Set::stream) + .collect(Collectors.toMap(m -> tokens[m.token], m -> m.vertex)); + + final List> transpositions = matches.stream() + .filter(m -> !alignedMatches.contains(m)) + .map(t -> t.stream().map(m -> new VertexMatch.WithToken(m.vertex, m.vertexRank, tokens[m.token])).collect(Collectors.toCollection(TreeSet::new))) + .collect(Collectors.toList()); + + merge(graph, Arrays.asList(tokens), alignments); + mergeTranspositions(graph, transpositions); + } + } + + static class MatchPhraseAlignmentScorer implements NeedlemanWunschScorer, SortedSet> { + + private final int maxWitnessLength; + + public MatchPhraseAlignmentScorer(int maxWitnessLength) { + this.maxWitnessLength = maxWitnessLength; + } + + @Override + public float score(SortedSet a, SortedSet b) { + return (a.equals(b) ? 1 : -maxWitnessLength); + } + + @Override + public float gap() { + return -(1 / (maxWitnessLength * 1.0f)); + } + + } +} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java index dfce79bf0..7e0a7df1b 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java @@ -1,58 +1,58 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex; - -import eu.interedition.collatex.dekker.DekkerAlgorithm; -import eu.interedition.collatex.dekker.matrix.MatchTableLinker; -import eu.interedition.collatex.util.GreedyStringTilingAlgorithm; -import eu.interedition.collatex.util.VertexMatch; -import eu.interedition.collatex.medite.MediteAlgorithm; -import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm; - -import java.util.Comparator; -import java.util.SortedSet; -import java.util.function.Function; - -/** - * @author Gregor Middell - * @author Ronald Haentjens Dekker - */ -public class CollationAlgorithmFactory { - - public static CollationAlgorithm dekker(Comparator comparator) { - return dekkerMatchMatrix(comparator, 3); - } - - public static CollationAlgorithm dekkerMatchMatrix(Comparator comparator, int outlierTranspositionsSizeLimit) { - return new DekkerAlgorithm(comparator, new MatchTableLinker()); - } - - public static CollationAlgorithm needlemanWunsch(Comparator comparator) { - return new NeedlemanWunschAlgorithm(comparator); - } - - public static CollationAlgorithm greedyStringTiling(Comparator comparator, int minimumTileLength) { - return new GreedyStringTilingAlgorithm(comparator, minimumTileLength); - } - - public static CollationAlgorithm medite(Comparator comparator, Function, Integer> matchEvaluator) { - return new MediteAlgorithm(comparator, matchEvaluator); - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex; + +import eu.interedition.collatex.dekker.DekkerAlgorithm; +import eu.interedition.collatex.dekker.matrix.MatchTableLinker; +import eu.interedition.collatex.medite.MediteAlgorithm; +import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm; +import eu.interedition.collatex.util.GreedyStringTilingAlgorithm; +import eu.interedition.collatex.util.VertexMatch; + +import java.util.Comparator; +import java.util.SortedSet; +import java.util.function.Function; + +/** + * @author Gregor Middell + * @author Ronald Haentjens Dekker + */ +public class CollationAlgorithmFactory { + + public static CollationAlgorithm dekker(Comparator comparator) { + return dekkerMatchMatrix(comparator, 3); + } + + public static CollationAlgorithm dekkerMatchMatrix(Comparator comparator, int outlierTranspositionsSizeLimit) { + return new DekkerAlgorithm(comparator, new MatchTableLinker()); + } + + public static CollationAlgorithm needlemanWunsch(Comparator comparator) { + return new NeedlemanWunschAlgorithm(comparator); + } + + public static CollationAlgorithm greedyStringTiling(Comparator comparator, int minimumTileLength) { + return new GreedyStringTilingAlgorithm(comparator, minimumTileLength); + } + + public static CollationAlgorithm medite(Comparator comparator, Function, Integer> matchEvaluator) { + return new MediteAlgorithm(comparator, matchEvaluator); + } +} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/Token.java b/collatex-core/src/main/java/eu/interedition/collatex/Token.java index 33a67d680..ac0da98ba 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/Token.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/Token.java @@ -21,8 +21,8 @@ /** - * The normalized version of the token. + * The normalized version of the token. */ public interface Token { - Witness getWitness(); + Witness getWitness(); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java index 94f5b88da..735e121de 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java @@ -37,169 +37,169 @@ * @author Gregor Middell */ public class VariantGraph { - final VariantGraph.Vertex start; - final VariantGraph.Vertex end; - final Map>> transpositionIndex = new HashMap<>(); - - public VariantGraph() { - super(); - this.start = new VariantGraph.Vertex(this); - this.end = new VariantGraph.Vertex(this); - - this.start.outgoing.put(this.end, Collections.emptySet()); - this.end.incoming.put(this.start, Collections.emptySet()); - } - - public Vertex getStart() { - return start; - } - - public Vertex getEnd() { - return end; - } - - public Set> transpositions() { - return transpositionIndex.values().stream().flatMap(Set::stream).collect(Collectors.toSet()); - } - - public Iterable vertices() { - return VariantGraphTraversal.of(this); - } - - public Vertex add(Token token) { - final VariantGraph.Vertex vertex = new VariantGraph.Vertex(this); - vertex.tokens.add(token); - return vertex; - } - - public void connect(Vertex from, Vertex to, Set witnesses) { - if (from.equals(to)) { - throw new IllegalArgumentException(); - } - - witnesses = new HashSet<>(witnesses); - Optional.ofNullable(from.outgoing.remove(to)).ifPresent(witnesses::addAll); - - from.outgoing.put(to, witnesses); - to.incoming.put(from, witnesses); + final VariantGraph.Vertex start; + final VariantGraph.Vertex end; + final Map>> transpositionIndex = new HashMap<>(); - start.outgoing.remove(end); - end.incoming.remove(start); - } + public VariantGraph() { + super(); + this.start = new VariantGraph.Vertex(this); + this.end = new VariantGraph.Vertex(this); - public Set transpose(Set vertices) { - if (vertices.isEmpty()) { - throw new IllegalArgumentException(); - } - for (Set transposition : vertices.iterator().next().transpositions()) { - if (transposition.equals(vertices)) { - return transposition; - } - } - final Set t = new HashSet<>(vertices); - for (VariantGraph.Vertex vertex : t) { - transpositionIndex.computeIfAbsent(vertex, v -> new HashSet<>()).add(t); - } - return t; - } - - public Set witnesses() { - return start.outgoing().values().stream().flatMap(Collection::stream).collect(Collectors.toSet()); - } - - @Override - public String toString() { - return witnesses().toString(); - } - - - /** - * @author Gregor Middell - */ - public static class Vertex { - private final VariantGraph graph; - private final Set tokens = new HashSet<>(); - private final Map> outgoing = new HashMap<>(); - private final Map> incoming = new HashMap<>(); - - public Vertex(VariantGraph graph) { - this.graph = graph; + this.start.outgoing.put(this.end, Collections.emptySet()); + this.end.incoming.put(this.start, Collections.emptySet()); } - public Map> incoming() { - return incoming; + public Vertex getStart() { + return start; } - public Map> outgoing() { - return outgoing; + public Vertex getEnd() { + return end; } public Set> transpositions() { - return graph.transpositionIndex.getOrDefault(this, Collections.emptySet()); + return transpositionIndex.values().stream().flatMap(Set::stream).collect(Collectors.toSet()); } - public Set tokens() { - return tokens; + public Iterable vertices() { + return VariantGraphTraversal.of(this); } - public Set witnesses() { - return incoming().values().stream().flatMap(Set::stream).collect(Collectors.toSet()); + public Vertex add(Token token) { + final VariantGraph.Vertex vertex = new VariantGraph.Vertex(this); + vertex.tokens.add(token); + return vertex; + } + + public void connect(Vertex from, Vertex to, Set witnesses) { + if (from.equals(to)) { + throw new IllegalArgumentException(); + } + + witnesses = new HashSet<>(witnesses); + Optional.ofNullable(from.outgoing.remove(to)).ifPresent(witnesses::addAll); + + from.outgoing.put(to, witnesses); + to.incoming.put(from, witnesses); + + start.outgoing.remove(end); + end.incoming.remove(start); } - public void add(Iterable tokens) { - tokens.forEach(this.tokens::add); + public Set transpose(Set vertices) { + if (vertices.isEmpty()) { + throw new IllegalArgumentException(); + } + for (Set transposition : vertices.iterator().next().transpositions()) { + if (transposition.equals(vertices)) { + return transposition; + } + } + final Set t = new HashSet<>(vertices); + for (VariantGraph.Vertex vertex : t) { + transpositionIndex.computeIfAbsent(vertex, v -> new HashSet<>()).add(t); + } + return t; } - public VariantGraph graph() { - return graph; + public Set witnesses() { + return start.outgoing().values().stream().flatMap(Collection::stream).collect(Collectors.toSet()); } + @Override public String toString() { - return tokens.toString(); + return witnesses().toString(); } - } - - public static final Function JOIN = graph -> { - final Set processed = new HashSet<>(); - final Deque queue = new ArrayDeque<>(graph.start.outgoing.keySet()); - - while (!queue.isEmpty()) { - final Vertex vertex = queue.pop(); - final Set> transpositions = new HashSet<>(vertex.transpositions()); - if (vertex.outgoing.size() == 1) { - final Vertex joinCandidateVertex = vertex.outgoing.keySet().iterator().next(); - final Set> joinCandidateTranspositions = new HashSet<>(joinCandidateVertex.transpositions()); - - boolean canJoin = !graph.end.equals(joinCandidateVertex) && // - joinCandidateVertex.incoming.size() == 1 && // - transpositions.equals(joinCandidateTranspositions); - if (canJoin) { - vertex.add(joinCandidateVertex.tokens()); - for (Set t : new HashSet<>(joinCandidateVertex.transpositions())) { - final Set transposed = new HashSet<>(t); - transposed.remove(joinCandidateVertex); - transposed.add(vertex); - for (Vertex tv : t) { - graph.transpositionIndex.getOrDefault(tv, Collections.emptySet()).remove(t); - } - graph.transpose(transposed); - } - - vertex.outgoing.clear(); - vertex.outgoing.putAll(joinCandidateVertex.outgoing); - - vertex.outgoing.keySet().forEach(v -> v.incoming.put(vertex, v.incoming.remove(joinCandidateVertex))); - - queue.push(vertex); - continue; + + + /** + * @author Gregor Middell + */ + public static class Vertex { + private final VariantGraph graph; + private final Set tokens = new HashSet<>(); + private final Map> outgoing = new HashMap<>(); + private final Map> incoming = new HashMap<>(); + + public Vertex(VariantGraph graph) { + this.graph = graph; + } + + public Map> incoming() { + return incoming; + } + + public Map> outgoing() { + return outgoing; + } + + public Set> transpositions() { + return graph.transpositionIndex.getOrDefault(this, Collections.emptySet()); + } + + public Set tokens() { + return tokens; + } + + public Set witnesses() { + return incoming().values().stream().flatMap(Set::stream).collect(Collectors.toSet()); + } + + public void add(Iterable tokens) { + tokens.forEach(this.tokens::add); + } + + public VariantGraph graph() { + return graph; } - } - // FIXME: Why do we run out of memory in some cases here, if this is not checked? - processed.add(vertex); - vertex.outgoing.keySet().stream().filter(v -> !processed.contains(v)).forEach(queue::push); + public String toString() { + return tokens.toString(); + } } - return graph; - }; + public static final Function JOIN = graph -> { + final Set processed = new HashSet<>(); + final Deque queue = new ArrayDeque<>(graph.start.outgoing.keySet()); + + while (!queue.isEmpty()) { + final Vertex vertex = queue.pop(); + final Set> transpositions = new HashSet<>(vertex.transpositions()); + if (vertex.outgoing.size() == 1) { + final Vertex joinCandidateVertex = vertex.outgoing.keySet().iterator().next(); + final Set> joinCandidateTranspositions = new HashSet<>(joinCandidateVertex.transpositions()); + + boolean canJoin = !graph.end.equals(joinCandidateVertex) && // + joinCandidateVertex.incoming.size() == 1 && // + transpositions.equals(joinCandidateTranspositions); + if (canJoin) { + vertex.add(joinCandidateVertex.tokens()); + for (Set t : new HashSet<>(joinCandidateVertex.transpositions())) { + final Set transposed = new HashSet<>(t); + transposed.remove(joinCandidateVertex); + transposed.add(vertex); + for (Vertex tv : t) { + graph.transpositionIndex.getOrDefault(tv, Collections.emptySet()).remove(t); + } + graph.transpose(transposed); + } + + vertex.outgoing.clear(); + vertex.outgoing.putAll(joinCandidateVertex.outgoing); + + vertex.outgoing.keySet().forEach(v -> v.incoming.put(vertex, v.incoming.remove(joinCandidateVertex))); + + queue.push(vertex); + continue; + } + } + + // FIXME: Why do we run out of memory in some cases here, if this is not checked? + processed.add(vertex); + vertex.outgoing.keySet().stream().filter(v -> !processed.contains(v)).forEach(queue::push); + } + + return graph; + }; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java index c0d307846..9de6b4c5c 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java @@ -20,18 +20,15 @@ package eu.interedition.collatex; import java.util.Comparator; -import java.util.function.Function; -import java.util.stream.Collectors; /** * IWitness - * + *

        * Representation of a single textual witness - * */ public interface Witness { - String getSigil(); + String getSigil(); - final Comparator SIGIL_COMPARATOR = Comparator.comparing(Witness::getSigil); + final Comparator SIGIL_COMPARATOR = Comparator.comparing(Witness::getSigil); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java index b35b33e36..b40a43163 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java @@ -18,6 +18,13 @@ */ package eu.interedition.collatex.dekker; +import eu.interedition.collatex.CollationAlgorithm; +import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.Witness; +import eu.interedition.collatex.dekker.matrix.MatchTableLinker; +import eu.interedition.collatex.util.VariantGraphRanking; + import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -28,159 +35,152 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; -import eu.interedition.collatex.CollationAlgorithm; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.dekker.matrix.MatchTableLinker; -import eu.interedition.collatex.util.VariantGraphRanking; - public class DekkerAlgorithm extends CollationAlgorithm.Base { - private final Comparator comparator; - private final TokenLinker tokenLinker; - private final PhraseMatchDetector phraseMatchDetector; - private final TranspositionDetector transpositionDetector; - private Map tokenLinks; - private List> phraseMatches; - private List> transpositions; - private Map alignments; - private boolean mergeTranspositions = false; - - public DekkerAlgorithm(Comparator comparator) { - this(comparator, new MatchTableLinker()); - } - - public DekkerAlgorithm(Comparator comparator, TokenLinker tokenLinker) { - this.comparator = comparator; - this.tokenLinker = tokenLinker; - this.phraseMatchDetector = new PhraseMatchDetector(); - this.transpositionDetector = new TranspositionDetector(); - } - - @Override - public void collate(VariantGraph graph, Iterable tokens) { - final Witness witness = StreamSupport.stream(tokens.spliterator(), false) - .findFirst() - .map(Token::getWitness) - .orElseThrow(() -> new IllegalArgumentException("Empty witness")); - - if (LOG.isLoggable(Level.FINER)) { - LOG.log(Level.FINER, "{0} + {1}: {2} vs. {3}", new Object[] { graph, witness, graph.vertices(), tokens }); - } - - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "{0} + {1}: Match and link tokens", new Object[] { graph, witness }); - } - tokenLinks = tokenLinker.link(graph, tokens, comparator); - - if (LOG.isLoggable(Level.FINER)) { - for (Map.Entry tokenLink : tokenLinks.entrySet()) { - LOG.log(Level.FINER, "{0} + {1}: Token match: {2} = {3}", new Object[] { graph, witness, tokenLink.getValue(), tokenLink.getKey() }); - } - } - - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "{0} + {1}: Detect phrase matches", new Object[] { graph, witness }); - } - phraseMatches = phraseMatchDetector.detect(tokenLinks, graph, tokens); - if (LOG.isLoggable(Level.FINER)) { - for (List phraseMatch : phraseMatches) { - LOG.log(Level.FINER, "{0} + {1}: Phrase match: {2}", new Object[] { graph, witness, phraseMatch }); - } - } - - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "{0} + {1}: Detect transpositions", new Object[] { graph, witness }); - } - transpositions = transpositionDetector.detect(phraseMatches, graph); - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "transpositions:{0}", transpositions); - } - - if (LOG.isLoggable(Level.FINER)) { - for (List transposition : transpositions) { - LOG.log(Level.FINER, "{0} + {1}: Transposition: {2}", new Object[] { graph, witness, transposition }); - } - } + private final Comparator comparator; + private final TokenLinker tokenLinker; + private final PhraseMatchDetector phraseMatchDetector; + private final TranspositionDetector transpositionDetector; + private Map tokenLinks; + private List> phraseMatches; + private List> transpositions; + private Map alignments; + private boolean mergeTranspositions = false; - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "{0} + {1}: Determine aligned tokens by filtering transpositions", new Object[] { graph, witness }); - } - alignments = new HashMap<>(); - for (List phrase : phraseMatches) { - for (Match match : phrase) { - alignments.put(match.token, match.vertex); - } - } - - for (List transposedPhrase : transpositions) { - for (Match match : transposedPhrase) { - alignments.remove(match.token); - } - } - if (LOG.isLoggable(Level.FINER)) { - for (Map.Entry alignment : alignments.entrySet()) { - LOG.log(Level.FINER, "{0} + {1}: Alignment: {2} = {3}", new Object[] { graph, witness, alignment.getValue(), alignment.getKey() }); - } + public DekkerAlgorithm(Comparator comparator) { + this(comparator, new MatchTableLinker()); } - merge(graph, tokens, alignments); - - // we filter out small transposed phrases over large distances - List> falseTranspositions = new ArrayList<>(); - - VariantGraphRanking ranking = VariantGraphRanking.of(graph); - - for (List transposedPhrase : transpositions) { - Match match = transposedPhrase.get(0); - VariantGraph.Vertex v1 = witnessTokenVertices.get(match.token); - VariantGraph.Vertex v2 = match.vertex; - int distance = Math.abs(ranking.apply(v1)-ranking.apply(v2))-1; - if (distance > transposedPhrase.size()*3) { - falseTranspositions.add(transposedPhrase); - } + public DekkerAlgorithm(Comparator comparator, TokenLinker tokenLinker) { + this.comparator = comparator; + this.tokenLinker = tokenLinker; + this.phraseMatchDetector = new PhraseMatchDetector(); + this.transpositionDetector = new TranspositionDetector(); } - for (List transposition : falseTranspositions) { - transpositions.remove(transposition); - } + @Override + public void collate(VariantGraph graph, Iterable tokens) { + final Witness witness = StreamSupport.stream(tokens.spliterator(), false) + .findFirst() + .map(Token::getWitness) + .orElseThrow(() -> new IllegalArgumentException("Empty witness")); - if (mergeTranspositions) { - mergeTranspositions(graph, transpositions); - } - - if (LOG.isLoggable(Level.FINER)) { - LOG.log(Level.FINER, "!{0}: {1}", new Object[] {graph, StreamSupport.stream(graph.vertices().spliterator(), false).map(Object::toString).collect(Collectors.joining(", ")) }); + if (LOG.isLoggable(Level.FINER)) { + LOG.log(Level.FINER, "{0} + {1}: {2} vs. {3}", new Object[]{graph, witness, graph.vertices(), tokens}); + } + + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "{0} + {1}: Match and link tokens", new Object[]{graph, witness}); + } + tokenLinks = tokenLinker.link(graph, tokens, comparator); + + if (LOG.isLoggable(Level.FINER)) { + for (Map.Entry tokenLink : tokenLinks.entrySet()) { + LOG.log(Level.FINER, "{0} + {1}: Token match: {2} = {3}", new Object[]{graph, witness, tokenLink.getValue(), tokenLink.getKey()}); + } + } + + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "{0} + {1}: Detect phrase matches", new Object[]{graph, witness}); + } + phraseMatches = phraseMatchDetector.detect(tokenLinks, graph, tokens); + if (LOG.isLoggable(Level.FINER)) { + for (List phraseMatch : phraseMatches) { + LOG.log(Level.FINER, "{0} + {1}: Phrase match: {2}", new Object[]{graph, witness, phraseMatch}); + } + } + + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "{0} + {1}: Detect transpositions", new Object[]{graph, witness}); + } + transpositions = transpositionDetector.detect(phraseMatches, graph); + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "transpositions:{0}", transpositions); + } + + if (LOG.isLoggable(Level.FINER)) { + for (List transposition : transpositions) { + LOG.log(Level.FINER, "{0} + {1}: Transposition: {2}", new Object[]{graph, witness, transposition}); + } + } + + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "{0} + {1}: Determine aligned tokens by filtering transpositions", new Object[]{graph, witness}); + } + alignments = new HashMap<>(); + for (List phrase : phraseMatches) { + for (Match match : phrase) { + alignments.put(match.token, match.vertex); + } + } + + for (List transposedPhrase : transpositions) { + for (Match match : transposedPhrase) { + alignments.remove(match.token); + } + } + if (LOG.isLoggable(Level.FINER)) { + for (Map.Entry alignment : alignments.entrySet()) { + LOG.log(Level.FINER, "{0} + {1}: Alignment: {2} = {3}", new Object[]{graph, witness, alignment.getValue(), alignment.getKey()}); + } + } + + merge(graph, tokens, alignments); + + // we filter out small transposed phrases over large distances + List> falseTranspositions = new ArrayList<>(); + + VariantGraphRanking ranking = VariantGraphRanking.of(graph); + + for (List transposedPhrase : transpositions) { + Match match = transposedPhrase.get(0); + VariantGraph.Vertex v1 = witnessTokenVertices.get(match.token); + VariantGraph.Vertex v2 = match.vertex; + int distance = Math.abs(ranking.apply(v1) - ranking.apply(v2)) - 1; + if (distance > transposedPhrase.size() * 3) { + falseTranspositions.add(transposedPhrase); + } + } + + for (List transposition : falseTranspositions) { + transpositions.remove(transposition); + } + + if (mergeTranspositions) { + mergeTranspositions(graph, transpositions); + } + + if (LOG.isLoggable(Level.FINER)) { + LOG.log(Level.FINER, "!{0}: {1}", new Object[]{graph, StreamSupport.stream(graph.vertices().spliterator(), false).map(Object::toString).collect(Collectors.joining(", "))}); + } + } + + public Map getTokenLinks() { + return tokenLinks; + } + + public List> getPhraseMatches() { + return Collections.unmodifiableList(phraseMatches); + } + + public List> getTranspositions() { + return Collections.unmodifiableList(transpositions); + } + + public Map getAlignments() { + return Collections.unmodifiableMap(alignments); + } + + /* + * This check disables transposition rendering in the variant + * graph when the variant graph contains more then two witnesses. + * Transposition detection is done in a progressive manner + * (witness by witness). When viewing the resulting graph + * containing the variation for all witnesses + * the detected transpositions can look strange, since segments + * may have split into smaller or larger parts. + */ + public void setMergeTranspositions(boolean b) { + this.mergeTranspositions = b; } - } - - public Map getTokenLinks() { - return tokenLinks; - } - - public List> getPhraseMatches() { - return Collections.unmodifiableList(phraseMatches); - } - - public List> getTranspositions() { - return Collections.unmodifiableList(transpositions); - } - - public Map getAlignments() { - return Collections.unmodifiableMap(alignments); - } - - /* - * This check disables transposition rendering in the variant - * graph when the variant graph contains more then two witnesses. - * Transposition detection is done in a progressive manner - * (witness by witness). When viewing the resulting graph - * containing the variation for all witnesses - * the detected transpositions can look strange, since segments - * may have split into smaller or larger parts. - */ - public void setMergeTranspositions(boolean b) { - this.mergeTranspositions = b; - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java index 3642266ce..dd4c0cd2d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java @@ -34,47 +34,47 @@ * @author Gregor Middell */ public class Match { - public final VariantGraph.Vertex vertex; - public final Token token; + public final VariantGraph.Vertex vertex; + public final Token token; - public Match(VariantGraph.Vertex vertex, Token token) { - this.vertex = vertex; - this.token = token; - } + public Match(VariantGraph.Vertex vertex, Token token) { + this.vertex = vertex; + this.token = token; + } - @Override - public int hashCode() { - return Objects.hash(vertex, token); - } + @Override + public int hashCode() { + return Objects.hash(vertex, token); + } - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof Match) { - Match other = (Match) obj; - return vertex.equals(other.vertex) && token.equals(other.token); + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof Match) { + Match other = (Match) obj; + return vertex.equals(other.vertex) && token.equals(other.token); + } + return super.equals(obj); } - return super.equals(obj); - } - @Override - public String toString() { - return "{" + vertex + "; " + token + "}"; - } + @Override + public String toString() { + return "{" + vertex + "; " + token + "}"; + } - public static List createPhraseMatch(List vertices, List tokens) { - final List phraseMatch = new ArrayList<>(vertices.size()); - final Iterator vertexIt = vertices.iterator(); - final Iterator tokenIt = tokens.iterator(); - while (vertexIt.hasNext() && tokenIt.hasNext()) { - phraseMatch.add(new Match(vertexIt.next(), tokenIt.next())); + public static List createPhraseMatch(List vertices, List tokens) { + final List phraseMatch = new ArrayList<>(vertices.size()); + final Iterator vertexIt = vertices.iterator(); + final Iterator tokenIt = tokens.iterator(); + while (vertexIt.hasNext() && tokenIt.hasNext()) { + phraseMatch.add(new Match(vertexIt.next(), tokenIt.next())); + } + return phraseMatch; } - return phraseMatch; - } - public static Predicate createNoBoundaryMatchPredicate(final VariantGraph graph) { - return input -> !input.vertex.equals(graph.getStart()) && !input.vertex.equals(graph.getEnd()); - } + public static Predicate createNoBoundaryMatchPredicate(final VariantGraph graph) { + return input -> !input.vertex.equals(graph.getStart()) && !input.vertex.equals(graph.getEnd()); + } - public static final Function, List> PHRASE_MATCH_TO_TOKENS = input -> input.stream().map(m -> m.token).collect(Collectors.toList()); + public static final Function, List> PHRASE_MATCH_TO_TOKENS = input -> input.stream().map(m -> m.token).collect(Collectors.toList()); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java index 5ea3adb37..e64530349 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java @@ -1,77 +1,76 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ -package eu.interedition.collatex.dekker; - -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; - -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; - -/** - * - * @author Ronald Haentjens Dekker - * @author Bram Buitendijk - */ -public class PhraseMatchDetector { - - public List> detect(Map linkedTokens, VariantGraph base, Iterable tokens) { - List> phraseMatches = new ArrayList<>(); - List basePhrase = new ArrayList<>(); - List witnessPhrase = new ArrayList<>(); - VariantGraph.Vertex previous = base.getStart(); - - for (Token token : tokens) { - if (!linkedTokens.containsKey(token)) { - addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase); - continue; - } - VariantGraph.Vertex baseVertex = linkedTokens.get(token); - // requirements: - // - previous and base vertex should have the same witnesses - // - previous and base vertex should either be in the same transposition(s) or both aren't in any transpositions - // - there should be a directed edge between previous and base vertex - // - there may not be a longer path between previous and base vertex - boolean sameTranspositions = new HashSet<>(previous.transpositions()).equals(new HashSet<>(baseVertex.transpositions())); - boolean sameWitnesses = previous.witnesses().equals(baseVertex.witnesses()); - boolean directedEdge = previous.outgoing().containsKey(baseVertex); - boolean isNear = sameTranspositions && sameWitnesses && directedEdge && (previous.outgoing().size() == 1 || baseVertex.incoming().size() == 1); - if (!isNear) { - addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase); - } - basePhrase.add(baseVertex); - witnessPhrase.add(token); - previous = baseVertex; - } - if (!basePhrase.isEmpty()) { - phraseMatches.add(Match.createPhraseMatch(basePhrase, witnessPhrase)); - } - return phraseMatches; - } - - private void addNewPhraseMatchAndClearBuffer(List> phraseMatches, List basePhrase, List witnessPhrase) { - if (!basePhrase.isEmpty()) { - phraseMatches.add(Match.createPhraseMatch(basePhrase, witnessPhrase)); - basePhrase.clear(); - witnessPhrase.clear(); - } - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ +package eu.interedition.collatex.dekker; + +import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + +/** + * @author Ronald Haentjens Dekker + * @author Bram Buitendijk + */ +public class PhraseMatchDetector { + + public List> detect(Map linkedTokens, VariantGraph base, Iterable tokens) { + List> phraseMatches = new ArrayList<>(); + List basePhrase = new ArrayList<>(); + List witnessPhrase = new ArrayList<>(); + VariantGraph.Vertex previous = base.getStart(); + + for (Token token : tokens) { + if (!linkedTokens.containsKey(token)) { + addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase); + continue; + } + VariantGraph.Vertex baseVertex = linkedTokens.get(token); + // requirements: + // - previous and base vertex should have the same witnesses + // - previous and base vertex should either be in the same transposition(s) or both aren't in any transpositions + // - there should be a directed edge between previous and base vertex + // - there may not be a longer path between previous and base vertex + boolean sameTranspositions = new HashSet<>(previous.transpositions()).equals(new HashSet<>(baseVertex.transpositions())); + boolean sameWitnesses = previous.witnesses().equals(baseVertex.witnesses()); + boolean directedEdge = previous.outgoing().containsKey(baseVertex); + boolean isNear = sameTranspositions && sameWitnesses && directedEdge && (previous.outgoing().size() == 1 || baseVertex.incoming().size() == 1); + if (!isNear) { + addNewPhraseMatchAndClearBuffer(phraseMatches, basePhrase, witnessPhrase); + } + basePhrase.add(baseVertex); + witnessPhrase.add(token); + previous = baseVertex; + } + if (!basePhrase.isEmpty()) { + phraseMatches.add(Match.createPhraseMatch(basePhrase, witnessPhrase)); + } + return phraseMatches; + } + + private void addNewPhraseMatchAndClearBuffer(List> phraseMatches, List basePhrase, List witnessPhrase) { + if (!basePhrase.isEmpty()) { + phraseMatches.add(Match.createPhraseMatch(basePhrase, witnessPhrase)); + basePhrase.clear(); + witnessPhrase.clear(); + } + } +} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java index 9de49acea..7c26f836b 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java @@ -1,33 +1,33 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.dekker; - -import java.util.Comparator; -import java.util.Map; - -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; - - -public interface TokenLinker { - - Map link(VariantGraph base, Iterable witness, Comparator comparator); - +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex.dekker; + +import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; + +import java.util.Comparator; +import java.util.Map; + + +public interface TokenLinker { + + Map link(VariantGraph base, Iterable witness, Comparator comparator); + } \ No newline at end of file diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java index 8906614b2..6371ec027 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java @@ -1,191 +1,190 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ -package eu.interedition.collatex.dekker; - -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.simple.SimpleToken; -import eu.interedition.collatex.util.VariantGraphRanking; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * - * @author Ronald Haentjens Dekker - */ -public class TranspositionDetector { - private Map, Integer> phraseMatchToIndex; - - public List> detect(final List> phraseMatches, VariantGraph base) { - // if there are no phrase matches it is not possible - // to detect transpositions, return an empty list - if (phraseMatches.isEmpty()) { - return new ArrayList<>(); - } - - /* - * We order the phrase matches in the topological order - * of the graph (called rank). When the rank is equal - * for two phrase matches, the witness order is used - * to differentiate. - */ - final VariantGraphRanking ranking = rankTheGraph(phraseMatches, base); - - Comparator> comp = (pm1, pm2) -> { - int rank1 = ranking.apply(pm1.get(0).vertex); - int rank2 = ranking.apply(pm2.get(0).vertex); - int difference = rank1 - rank2; - if (difference != 0) { - return difference; - } - int index1 = phraseMatches.indexOf(pm1); - int index2 = phraseMatches.indexOf(pm2); - return index1 - index2; - }; - - List> phraseMatchesGraphOrder = new ArrayList<>(phraseMatches); - Collections.sort(phraseMatchesGraphOrder, comp); - - // Map 1 - phraseMatchToIndex = new HashMap<>(); - for (int i = 0; i < phraseMatchesGraphOrder.size(); i++) { - phraseMatchToIndex.put(phraseMatchesGraphOrder.get(i), i); - } - - /* - * We calculate the index for all the phrase matches - * First in witness order, then in graph order - */ - List phraseMatchesGraphIndex = new ArrayList<>(); - List phraseMatchesWitnessIndex = new ArrayList<>(); - - for (int i=0; i < phraseMatches.size(); i++) { - phraseMatchesGraphIndex.add(i); - } - - for (List phraseMatch : phraseMatches) { - phraseMatchesWitnessIndex.add(phraseMatchToIndex.get(phraseMatch)); - } - - /* - * Initialize result variables - */ - List> nonTransposedPhraseMatches = new ArrayList<>(phraseMatches); - List> transpositions = new ArrayList<>(); - - /* - * loop here until the maximum distance == 0 - */ - while (true) { - // Map 2 - final Map, Integer> phraseMatchToDistanceMap = new LinkedHashMap<>(); - for (int i=0; i < nonTransposedPhraseMatches.size(); i++) { - Integer graphIndex = phraseMatchesGraphIndex.get(i); - Integer witnessIndex = phraseMatchesWitnessIndex.get(i); - Integer distance = Math.abs(graphIndex - witnessIndex); - List phraseMatch = nonTransposedPhraseMatches.get(i); - phraseMatchToDistanceMap.put(phraseMatch, distance); - } - - List distanceList = new ArrayList<>(phraseMatchToDistanceMap.values()); - - if (distanceList.isEmpty()||Collections.max(distanceList) == 0) { - break; - } - - // sort phrase matches on distance, size - // TODO: order by 3) graph rank? - // TODO: I have not yet found evidence/a use case that - // TODO: indicates that it is needed. - Comparator> comp2 = (pm1, pm2) -> { - // first order by distance - int distance1 = phraseMatchToDistanceMap.get(pm1); - int distance2 = phraseMatchToDistanceMap.get(pm2); - int difference = distance2 - distance1; - if (difference != 0) { - return difference; - } - // second order by size - // return pm1.size() - pm2.size(); - return determineSize(pm1) - determineSize(pm2); - }; - - List> sortedPhraseMatches = new ArrayList<>(nonTransposedPhraseMatches); - Collections.sort(sortedPhraseMatches, comp2); - - List transposedPhrase = sortedPhraseMatches.remove(0); - - Integer transposedIndex = phraseMatchToIndex.get(transposedPhrase); - Integer graphIndex = phraseMatchesGraphIndex.indexOf(transposedIndex); - Integer transposedWithIndex = phraseMatchesWitnessIndex.get(graphIndex); - List linkedTransposedPhrase = phraseMatchesGraphOrder.get(transposedWithIndex); - - addTransposition(phraseMatchesWitnessIndex, phraseMatchesGraphIndex, nonTransposedPhraseMatches, transpositions, transposedPhrase); - - Integer distance = phraseMatchToDistanceMap.get(transposedPhrase); - if (distance == phraseMatchToDistanceMap.get(linkedTransposedPhrase) && distance > 1) { - addTransposition(phraseMatchesWitnessIndex, phraseMatchesGraphIndex, nonTransposedPhraseMatches, transpositions, linkedTransposedPhrase); - } - } - return transpositions; - } - - private void addTransposition(List phraseWitnessRanks, List phraseGraphRanks, List> nonTransposedPhraseMatches, List> transpositions, List transposedPhrase) { - Integer indexToRemove = phraseMatchToIndex.get(transposedPhrase); - nonTransposedPhraseMatches.remove(transposedPhrase); - transpositions.add(transposedPhrase); - phraseGraphRanks.remove(indexToRemove); - phraseWitnessRanks.remove(indexToRemove); - } - - private VariantGraphRanking rankTheGraph(List> phraseMatches, VariantGraph base) { - // rank the variant graph - Set matchedVertices = new HashSet<>(); - for (List phraseMatch : phraseMatches) { - matchedVertices.add(phraseMatch.get(0).vertex); - } - final VariantGraphRanking ranking = VariantGraphRanking.ofOnlyCertainVertices(base, matchedVertices); - return ranking; - } - - /* - * in case of an a, b / b, a transposition we have to determine whether a or b - * stays put. the phrase with the most character stays still if the tokens are - * not simple tokens the phrase with the most tokens stays put - */ - private int determineSize(List t) { - Match firstMatch = t.get(0); - if (!(firstMatch.token instanceof SimpleToken)) { - return t.size(); - } - int charLength = 0; - for (Match m : t) { - SimpleToken token = (SimpleToken) m.token; - charLength += token.getNormalized().length(); - } - return charLength; - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ +package eu.interedition.collatex.dekker; + +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.simple.SimpleToken; +import eu.interedition.collatex.util.VariantGraphRanking; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * @author Ronald Haentjens Dekker + */ +public class TranspositionDetector { + private Map, Integer> phraseMatchToIndex; + + public List> detect(final List> phraseMatches, VariantGraph base) { + // if there are no phrase matches it is not possible + // to detect transpositions, return an empty list + if (phraseMatches.isEmpty()) { + return new ArrayList<>(); + } + + /* + * We order the phrase matches in the topological order + * of the graph (called rank). When the rank is equal + * for two phrase matches, the witness order is used + * to differentiate. + */ + final VariantGraphRanking ranking = rankTheGraph(phraseMatches, base); + + Comparator> comp = (pm1, pm2) -> { + int rank1 = ranking.apply(pm1.get(0).vertex); + int rank2 = ranking.apply(pm2.get(0).vertex); + int difference = rank1 - rank2; + if (difference != 0) { + return difference; + } + int index1 = phraseMatches.indexOf(pm1); + int index2 = phraseMatches.indexOf(pm2); + return index1 - index2; + }; + + List> phraseMatchesGraphOrder = new ArrayList<>(phraseMatches); + Collections.sort(phraseMatchesGraphOrder, comp); + + // Map 1 + phraseMatchToIndex = new HashMap<>(); + for (int i = 0; i < phraseMatchesGraphOrder.size(); i++) { + phraseMatchToIndex.put(phraseMatchesGraphOrder.get(i), i); + } + + /* + * We calculate the index for all the phrase matches + * First in witness order, then in graph order + */ + List phraseMatchesGraphIndex = new ArrayList<>(); + List phraseMatchesWitnessIndex = new ArrayList<>(); + + for (int i = 0; i < phraseMatches.size(); i++) { + phraseMatchesGraphIndex.add(i); + } + + for (List phraseMatch : phraseMatches) { + phraseMatchesWitnessIndex.add(phraseMatchToIndex.get(phraseMatch)); + } + + /* + * Initialize result variables + */ + List> nonTransposedPhraseMatches = new ArrayList<>(phraseMatches); + List> transpositions = new ArrayList<>(); + + /* + * loop here until the maximum distance == 0 + */ + while (true) { + // Map 2 + final Map, Integer> phraseMatchToDistanceMap = new LinkedHashMap<>(); + for (int i = 0; i < nonTransposedPhraseMatches.size(); i++) { + Integer graphIndex = phraseMatchesGraphIndex.get(i); + Integer witnessIndex = phraseMatchesWitnessIndex.get(i); + Integer distance = Math.abs(graphIndex - witnessIndex); + List phraseMatch = nonTransposedPhraseMatches.get(i); + phraseMatchToDistanceMap.put(phraseMatch, distance); + } + + List distanceList = new ArrayList<>(phraseMatchToDistanceMap.values()); + + if (distanceList.isEmpty() || Collections.max(distanceList) == 0) { + break; + } + + // sort phrase matches on distance, size + // TODO: order by 3) graph rank? + // TODO: I have not yet found evidence/a use case that + // TODO: indicates that it is needed. + Comparator> comp2 = (pm1, pm2) -> { + // first order by distance + int distance1 = phraseMatchToDistanceMap.get(pm1); + int distance2 = phraseMatchToDistanceMap.get(pm2); + int difference = distance2 - distance1; + if (difference != 0) { + return difference; + } + // second order by size + // return pm1.size() - pm2.size(); + return determineSize(pm1) - determineSize(pm2); + }; + + List> sortedPhraseMatches = new ArrayList<>(nonTransposedPhraseMatches); + Collections.sort(sortedPhraseMatches, comp2); + + List transposedPhrase = sortedPhraseMatches.remove(0); + + Integer transposedIndex = phraseMatchToIndex.get(transposedPhrase); + Integer graphIndex = phraseMatchesGraphIndex.indexOf(transposedIndex); + Integer transposedWithIndex = phraseMatchesWitnessIndex.get(graphIndex); + List linkedTransposedPhrase = phraseMatchesGraphOrder.get(transposedWithIndex); + + addTransposition(phraseMatchesWitnessIndex, phraseMatchesGraphIndex, nonTransposedPhraseMatches, transpositions, transposedPhrase); + + Integer distance = phraseMatchToDistanceMap.get(transposedPhrase); + if (distance == phraseMatchToDistanceMap.get(linkedTransposedPhrase) && distance > 1) { + addTransposition(phraseMatchesWitnessIndex, phraseMatchesGraphIndex, nonTransposedPhraseMatches, transpositions, linkedTransposedPhrase); + } + } + return transpositions; + } + + private void addTransposition(List phraseWitnessRanks, List phraseGraphRanks, List> nonTransposedPhraseMatches, List> transpositions, List transposedPhrase) { + Integer indexToRemove = phraseMatchToIndex.get(transposedPhrase); + nonTransposedPhraseMatches.remove(transposedPhrase); + transpositions.add(transposedPhrase); + phraseGraphRanks.remove(indexToRemove); + phraseWitnessRanks.remove(indexToRemove); + } + + private VariantGraphRanking rankTheGraph(List> phraseMatches, VariantGraph base) { + // rank the variant graph + Set matchedVertices = new HashSet<>(); + for (List phraseMatch : phraseMatches) { + matchedVertices.add(phraseMatch.get(0).vertex); + } + final VariantGraphRanking ranking = VariantGraphRanking.ofOnlyCertainVertices(base, matchedVertices); + return ranking; + } + + /* + * in case of an a, b / b, a transposition we have to determine whether a or b + * stays put. the phrase with the most character stays still if the tokens are + * not simple tokens the phrase with the most tokens stays put + */ + private int determineSize(List t) { + Match firstMatch = t.get(0); + if (!(firstMatch.token instanceof SimpleToken)) { + return t.size(); + } + int charLength = 0; + for (Match m : t) { + SimpleToken token = (SimpleToken) m.token; + charLength += token.getNormalized().length(); + } + return charLength; + } +} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java index f18d8e775..9b3762c20 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java @@ -26,25 +26,25 @@ */ public class Tuple { - public final T left; - public final T right; + public final T left; + public final T right; - public Tuple(T left, T right) { - this.left = left; - this.right = right; - } + public Tuple(T left, T right) { + this.left = left; + this.right = right; + } - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof Tuple) { - final Tuple other = (Tuple) obj; - return (left.equals(other.left) || left.equals(other.right)) && (right.equals(other.right) || right.equals(other.left)); + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof Tuple) { + final Tuple other = (Tuple) obj; + return (left.equals(other.left) || left.equals(other.right)) && (right.equals(other.right) || right.equals(other.left)); + } + return super.equals(obj); } - return super.equals(obj); - } - @Override - public int hashCode() { - return Objects.hash(left, right); - } + @Override + public int hashCode() { + return Objects.hash(left, right); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java index eca8b8bcc..6d6b1a87f 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java @@ -34,147 +34,147 @@ * @author Ronald Haentjens Dekker */ public class Archipelago { - Logger LOG = Logger.getLogger(Archipelago.class.getName()); - - private final List islands; - private final Set islandvectors; - - public Archipelago() { - islands = new ArrayList<>(); - this.islandvectors = new HashSet<>(); // row - column, all islands should have direction 1, so this diff should be the same for all coordinates on the island. - } - - //copy constructor - public Archipelago(Archipelago orig) { - this.islands = new ArrayList<>(orig.islands); - this.islandvectors = new HashSet<>(orig.islandvectors); - } - - public Archipelago(Island isl) { - this(); - islands.add(isl); - } - - public void add(Island island) { - islands.add(island); - Coordinate leftEnd = island.getLeftEnd(); - islandvectors.add(leftEnd.row - leftEnd.column); - } - - public int size() { - return islands.size(); - } - - public Island get(int i) { - return islands.get(i); - } - - public boolean containsCoordinate(int row, int column) { - return Objects.equals(getCoordinatesMap().get(row), column); - } - - public List getIslands() { - return islands; - } - - protected void remove(int i) { - islands.remove(i); - } - - @Override - public String toString() { - String result = ""; - for (Island island : getIslands()) { - if (result.isEmpty()) - result = "[ " + island; - else - result += ", " + island; - } - result += " ]"; - return result; - } - - @Override - public int hashCode() { - return Objects.hashCode(islands); - } - - @Override - public boolean equals(Object object) { - if (object == null) return false; - if (object.getClass() != this.getClass()) return false; - if (((Archipelago) object).size() != this.size()) return false; - for (int i = 0; i < size(); i++) { - if (!((Archipelago) object).get(i).equals(get(i))) return false; - } - return true; - } - - private Map getCoordinatesMap() { - final Map map = new HashMap<>(); - for (final Island isl : islands) { - for (final Coordinate c : isl) { - map.put(c.getRow(), c.getColumn()); - } - } - return map; - } - - private double distance(Island isl1, Island isl2) { - double result = 0.0; - int isl1_L_x = isl1.getLeftEnd().column; - int isl1_L_y = isl1.getLeftEnd().row; - int isl1_R_x = isl1.getRightEnd().column; - int isl1_R_y = isl1.getRightEnd().row; - int isl2_L_x = isl2.getLeftEnd().column; - int isl2_L_y = isl2.getLeftEnd().row; - int isl2_R_x = isl2.getRightEnd().column; - int isl2_R_y = isl2.getRightEnd().row; - result = distance(isl1_L_x, isl1_L_y, isl2_L_x, isl2_L_y); - double d = distance(isl1_L_x, isl1_L_y, isl2_R_x, isl2_R_y); - if (d < result) result = d; - d = distance(isl1_R_x, isl1_R_y, isl2_L_x, isl2_L_y); - if (d < result) result = d; - d = distance(isl1_R_x, isl1_R_y, isl2_R_x, isl2_R_y); - if (d < result) result = d; - return result; - } - - private double distance(int a_x, int a_y, int b_x, int b_y) { - double result = 0.0; - result = Math.sqrt((a_x - b_x) * (a_x - b_x) + (a_y - b_y) * (a_y - b_y)); - return result; - } - - public Set getIslandVectors() { - return islandvectors; - } - - public double smallestDistance(Island isl) { - double minimum = 10000; - for (Island fixedIsland : getIslands()) { - minimum = Math.min(minimum, distance(isl, fixedIsland)); - } - return minimum; - } - - public double smallestDistanceToIdealLine(Island isl) { - double minimum = 10000; - Island closestIsland = null; - for (Island fixedIsland : getIslands()) { - double prev = minimum; - minimum = Math.min(minimum, distance(isl, fixedIsland)); - if (prev > minimum) { - closestIsland = fixedIsland; - } - } - if (closestIsland == null) { - return minimum; - } - Coordinate leftEnd = isl.getLeftEnd(); - int islandVector = leftEnd.row - leftEnd.column; - Coordinate leftEnd0 = closestIsland.getLeftEnd(); - int closestIslandVector = leftEnd0.row - leftEnd0.column; - return Math.abs(islandVector - closestIslandVector); - } + Logger LOG = Logger.getLogger(Archipelago.class.getName()); + + private final List islands; + private final Set islandvectors; + + public Archipelago() { + islands = new ArrayList<>(); + this.islandvectors = new HashSet<>(); // row - column, all islands should have direction 1, so this diff should be the same for all coordinates on the island. + } + + //copy constructor + public Archipelago(Archipelago orig) { + this.islands = new ArrayList<>(orig.islands); + this.islandvectors = new HashSet<>(orig.islandvectors); + } + + public Archipelago(Island isl) { + this(); + islands.add(isl); + } + + public void add(Island island) { + islands.add(island); + Coordinate leftEnd = island.getLeftEnd(); + islandvectors.add(leftEnd.row - leftEnd.column); + } + + public int size() { + return islands.size(); + } + + public Island get(int i) { + return islands.get(i); + } + + public boolean containsCoordinate(int row, int column) { + return Objects.equals(getCoordinatesMap().get(row), column); + } + + public List getIslands() { + return islands; + } + + protected void remove(int i) { + islands.remove(i); + } + + @Override + public String toString() { + String result = ""; + for (Island island : getIslands()) { + if (result.isEmpty()) + result = "[ " + island; + else + result += ", " + island; + } + result += " ]"; + return result; + } + + @Override + public int hashCode() { + return Objects.hashCode(islands); + } + + @Override + public boolean equals(Object object) { + if (object == null) return false; + if (object.getClass() != this.getClass()) return false; + if (((Archipelago) object).size() != this.size()) return false; + for (int i = 0; i < size(); i++) { + if (!((Archipelago) object).get(i).equals(get(i))) return false; + } + return true; + } + + private Map getCoordinatesMap() { + final Map map = new HashMap<>(); + for (final Island isl : islands) { + for (final Coordinate c : isl) { + map.put(c.getRow(), c.getColumn()); + } + } + return map; + } + + private double distance(Island isl1, Island isl2) { + double result = 0.0; + int isl1_L_x = isl1.getLeftEnd().column; + int isl1_L_y = isl1.getLeftEnd().row; + int isl1_R_x = isl1.getRightEnd().column; + int isl1_R_y = isl1.getRightEnd().row; + int isl2_L_x = isl2.getLeftEnd().column; + int isl2_L_y = isl2.getLeftEnd().row; + int isl2_R_x = isl2.getRightEnd().column; + int isl2_R_y = isl2.getRightEnd().row; + result = distance(isl1_L_x, isl1_L_y, isl2_L_x, isl2_L_y); + double d = distance(isl1_L_x, isl1_L_y, isl2_R_x, isl2_R_y); + if (d < result) result = d; + d = distance(isl1_R_x, isl1_R_y, isl2_L_x, isl2_L_y); + if (d < result) result = d; + d = distance(isl1_R_x, isl1_R_y, isl2_R_x, isl2_R_y); + if (d < result) result = d; + return result; + } + + private double distance(int a_x, int a_y, int b_x, int b_y) { + double result = 0.0; + result = Math.sqrt((a_x - b_x) * (a_x - b_x) + (a_y - b_y) * (a_y - b_y)); + return result; + } + + public Set getIslandVectors() { + return islandvectors; + } + + public double smallestDistance(Island isl) { + double minimum = 10000; + for (Island fixedIsland : getIslands()) { + minimum = Math.min(minimum, distance(isl, fixedIsland)); + } + return minimum; + } + + public double smallestDistanceToIdealLine(Island isl) { + double minimum = 10000; + Island closestIsland = null; + for (Island fixedIsland : getIslands()) { + double prev = minimum; + minimum = Math.min(minimum, distance(isl, fixedIsland)); + if (prev > minimum) { + closestIsland = fixedIsland; + } + } + if (closestIsland == null) { + return minimum; + } + Coordinate leftEnd = isl.getLeftEnd(); + int islandVector = leftEnd.row - leftEnd.column; + Coordinate leftEnd0 = closestIsland.getLeftEnd(); + int closestIslandVector = leftEnd0.row - leftEnd0.column; + return Math.abs(islandVector - closestIslandVector); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java index d6f416487..be3779104 100755 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java @@ -1,81 +1,81 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.dekker.matrix; - -import java.util.Objects; - -public class Coordinate implements Comparable { - int row; - int column; - - public Coordinate(int row, int column) { - this.column = column; - this.row = row; - } - - Coordinate(Coordinate other) { - this(other.row, other.column); - } - - public int getRow() { - return row; - } - - public int getColumn() { - return column; - } - - public boolean sameColumn(Coordinate c) { - return c.column == column; - } - - public boolean sameRow(Coordinate c) { - return c.row == row; - } - - public boolean bordersOn(Coordinate c) { - return (Math.abs(this.row - c.getRow()) == 1) && (Math.abs(this.column - c.getColumn()) == 1); - } - - @Override - public boolean equals(Object o) { - if (o != null & o instanceof Coordinate) { - final Coordinate c = (Coordinate) o; - return (this.row == c.getRow() && this.column == c.getColumn()); - } - return super.equals(o); - } - - @Override - public int hashCode() { - return Objects.hash(row, column); - } - - @Override - public int compareTo(Coordinate o) { - final int result = column - o.column; - return (result == 0 ? row - o.row : result); - } - - @Override - public String toString() { - return "(" + row + "," + column + ")"; - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex.dekker.matrix; + +import java.util.Objects; + +public class Coordinate implements Comparable { + int row; + int column; + + public Coordinate(int row, int column) { + this.column = column; + this.row = row; + } + + Coordinate(Coordinate other) { + this(other.row, other.column); + } + + public int getRow() { + return row; + } + + public int getColumn() { + return column; + } + + public boolean sameColumn(Coordinate c) { + return c.column == column; + } + + public boolean sameRow(Coordinate c) { + return c.row == row; + } + + public boolean bordersOn(Coordinate c) { + return (Math.abs(this.row - c.getRow()) == 1) && (Math.abs(this.column - c.getColumn()) == 1); + } + + @Override + public boolean equals(Object o) { + if (o != null & o instanceof Coordinate) { + final Coordinate c = (Coordinate) o; + return (this.row == c.getRow() && this.column == c.getColumn()); + } + return super.equals(o); + } + + @Override + public int hashCode() { + return Objects.hash(row, column); + } + + @Override + public int compareTo(Coordinate o) { + final int result = column - o.column; + return (result == 0 ? row - o.row : result); + } + + @Override + public String toString() { + return "(" + row + "," + column + ")"; + } +} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java index fac24273b..94cc8b0ea 100755 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java @@ -1,232 +1,233 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.dekker.matrix; - -import java.text.MessageFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -/** - * A DirectedIsland is a collections of Coordinates all on the same - * diagonal. The direction of this diagonal can be -1, 0, or 1. - * The zero is for a DirectedIsland of only one Coordinate. - * Directions 1 and -1 examples - * Coordinates (0,0) (1,1) have Direction 1 - * Coordinates (1,1) (2,1) have Direction -1 - * I.e. if the row-coordinate gets larger and the col-coordinate also, the - * direction is 1 (positive) else it is -1 (negative) - */ -public class Island implements Iterable, Comparable { - - private int direction = 0; - private final List islandCoordinates = new ArrayList<>(); - - public Island() {} - - public Island(Island other) { - for (Coordinate c : other.islandCoordinates) { - add(new Coordinate(c)); - } - } - - public Island(Coordinate first, Coordinate last) { - add(first); - Coordinate newCoordinate = first; - while (!newCoordinate.equals(last)) { - newCoordinate = new Coordinate(newCoordinate.getRow() + 1, newCoordinate.getColumn() + 1); - // LOG.debug("{}", newCoordinate); - add(newCoordinate); - } - } - - public boolean add(Coordinate coordinate) { - boolean result = false; - if (islandCoordinates.isEmpty()) { - result = islandCoordinates.add(coordinate); - } else if (!contains(coordinate) && neighbour(coordinate)) { - if (direction == 0) { - Coordinate existing = islandCoordinates.get(0); - direction = (existing.row - coordinate.row) / (existing.column - coordinate.column); - result = islandCoordinates.add(coordinate); - } else { - Coordinate existing = islandCoordinates.get(0); - if (existing.column != coordinate.column) { - int new_direction = (existing.row - coordinate.row) / (existing.column - coordinate.column); - if (new_direction == direction) result = islandCoordinates.add(coordinate); - } - } - } - return result; - } - - public int direction() { - return direction; - } - - public Island removePoints(Island di) { - Island result = new Island(this); - for (Coordinate c : di) { - result.removeSameColOrRow(c); - } - return result; - } - - public void removeCoordinate(Coordinate c) { - islandCoordinates.remove(c); - } - - public Coordinate getCoorOnRow(int row) { - for (Coordinate coor : islandCoordinates) { - if (coor.getRow() == row) return coor; - } - return null; - } - - public Coordinate getCoorOnCol(int col) { - for (Coordinate coor : islandCoordinates) { - if (coor.getColumn() == col) return coor; - } - return null; - } - - public void merge(Island di) { - for (Coordinate c : di) { - add(c); - } - } - - /** - * Two islands are competitors if there is a horizontal or - * vertical line which goes through both islands - */ - public boolean isCompetitor(Island isl) { - for (Coordinate c : isl) { - for (Coordinate d : islandCoordinates) { - if (c.sameColumn(d) || c.sameRow(d)) return true; - } - } - return false; - } - - public boolean contains(Coordinate c) { - return islandCoordinates.contains(c); - } - - public boolean neighbour(Coordinate c) { - if (contains(c)) return false; - for (Coordinate islC : islandCoordinates) { - if (c.bordersOn(islC)) { - return true; - } - } - return false; - } - - public Coordinate getLeftEnd() { - Coordinate coor = islandCoordinates.get(0); - for (Coordinate c : islandCoordinates) { - if (c.column < coor.column) coor = c; - } - return coor; - } - - public Coordinate getRightEnd() { - Coordinate coor = islandCoordinates.get(0); - for (Coordinate c : islandCoordinates) { - if (c.column > coor.column) coor = c; - } - return coor; - } - - public boolean overlap(Island isl) { - for (Coordinate c : isl) { - if (contains(c) || neighbour(c)) return true; - } - return false; - } - - public int size() { - return islandCoordinates.size(); - } - - public void clear() { - islandCoordinates.clear(); - } - - public int value() { - final int size = size(); - return (size < 2 ? size : direction + size * size); - } - - protected boolean removeSameColOrRow(Coordinate c) { - ArrayList remove = new ArrayList<>(); - for (Coordinate coor : islandCoordinates) { - if (coor.sameColumn(c) || coor.sameRow(c)) { - remove.add(coor); - } - } - if (remove.isEmpty()) return false; - for (Coordinate coor : remove) { - islandCoordinates.remove(coor); - } - return true; - } - - @Override - public Iterator iterator() { - return Collections.unmodifiableList(islandCoordinates).iterator(); - } - - @Override - public int hashCode() { - return islandCoordinates.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (obj == null) return false; - - if (!obj.getClass().equals(Island.class)) return false; - - Island isl = (Island) obj; - if (isl.size() != size()) return false; - - boolean result = true; - for (Coordinate c : isl) { - result &= this.contains(c); - } - return result; - } - - @Override - public String toString() { - if (islandCoordinates.isEmpty()) { - throw new RuntimeException("Unexpected situation: island coordinates empty!"); - } - return MessageFormat.format("Island ({0}-{1}) size: {2} direction: {3}", islandCoordinates.get(0), islandCoordinates.get(islandCoordinates.size() - 1), size(), direction()); - } - - @Override - public int compareTo(Island i) { - return this.getLeftEnd().compareTo(i.getLeftEnd()); - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex.dekker.matrix; + +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * A DirectedIsland is a collections of Coordinates all on the same + * diagonal. The direction of this diagonal can be -1, 0, or 1. + * The zero is for a DirectedIsland of only one Coordinate. + * Directions 1 and -1 examples + * Coordinates (0,0) (1,1) have Direction 1 + * Coordinates (1,1) (2,1) have Direction -1 + * I.e. if the row-coordinate gets larger and the col-coordinate also, the + * direction is 1 (positive) else it is -1 (negative) + */ +public class Island implements Iterable, Comparable { + + private int direction = 0; + private final List islandCoordinates = new ArrayList<>(); + + public Island() { + } + + public Island(Island other) { + for (Coordinate c : other.islandCoordinates) { + add(new Coordinate(c)); + } + } + + public Island(Coordinate first, Coordinate last) { + add(first); + Coordinate newCoordinate = first; + while (!newCoordinate.equals(last)) { + newCoordinate = new Coordinate(newCoordinate.getRow() + 1, newCoordinate.getColumn() + 1); + // LOG.debug("{}", newCoordinate); + add(newCoordinate); + } + } + + public boolean add(Coordinate coordinate) { + boolean result = false; + if (islandCoordinates.isEmpty()) { + result = islandCoordinates.add(coordinate); + } else if (!contains(coordinate) && neighbour(coordinate)) { + if (direction == 0) { + Coordinate existing = islandCoordinates.get(0); + direction = (existing.row - coordinate.row) / (existing.column - coordinate.column); + result = islandCoordinates.add(coordinate); + } else { + Coordinate existing = islandCoordinates.get(0); + if (existing.column != coordinate.column) { + int new_direction = (existing.row - coordinate.row) / (existing.column - coordinate.column); + if (new_direction == direction) result = islandCoordinates.add(coordinate); + } + } + } + return result; + } + + public int direction() { + return direction; + } + + public Island removePoints(Island di) { + Island result = new Island(this); + for (Coordinate c : di) { + result.removeSameColOrRow(c); + } + return result; + } + + public void removeCoordinate(Coordinate c) { + islandCoordinates.remove(c); + } + + public Coordinate getCoorOnRow(int row) { + for (Coordinate coor : islandCoordinates) { + if (coor.getRow() == row) return coor; + } + return null; + } + + public Coordinate getCoorOnCol(int col) { + for (Coordinate coor : islandCoordinates) { + if (coor.getColumn() == col) return coor; + } + return null; + } + + public void merge(Island di) { + for (Coordinate c : di) { + add(c); + } + } + + /** + * Two islands are competitors if there is a horizontal or + * vertical line which goes through both islands + */ + public boolean isCompetitor(Island isl) { + for (Coordinate c : isl) { + for (Coordinate d : islandCoordinates) { + if (c.sameColumn(d) || c.sameRow(d)) return true; + } + } + return false; + } + + public boolean contains(Coordinate c) { + return islandCoordinates.contains(c); + } + + public boolean neighbour(Coordinate c) { + if (contains(c)) return false; + for (Coordinate islC : islandCoordinates) { + if (c.bordersOn(islC)) { + return true; + } + } + return false; + } + + public Coordinate getLeftEnd() { + Coordinate coor = islandCoordinates.get(0); + for (Coordinate c : islandCoordinates) { + if (c.column < coor.column) coor = c; + } + return coor; + } + + public Coordinate getRightEnd() { + Coordinate coor = islandCoordinates.get(0); + for (Coordinate c : islandCoordinates) { + if (c.column > coor.column) coor = c; + } + return coor; + } + + public boolean overlap(Island isl) { + for (Coordinate c : isl) { + if (contains(c) || neighbour(c)) return true; + } + return false; + } + + public int size() { + return islandCoordinates.size(); + } + + public void clear() { + islandCoordinates.clear(); + } + + public int value() { + final int size = size(); + return (size < 2 ? size : direction + size * size); + } + + protected boolean removeSameColOrRow(Coordinate c) { + ArrayList remove = new ArrayList<>(); + for (Coordinate coor : islandCoordinates) { + if (coor.sameColumn(c) || coor.sameRow(c)) { + remove.add(coor); + } + } + if (remove.isEmpty()) return false; + for (Coordinate coor : remove) { + islandCoordinates.remove(coor); + } + return true; + } + + @Override + public Iterator iterator() { + return Collections.unmodifiableList(islandCoordinates).iterator(); + } + + @Override + public int hashCode() { + return islandCoordinates.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) return false; + + if (!obj.getClass().equals(Island.class)) return false; + + Island isl = (Island) obj; + if (isl.size() != size()) return false; + + boolean result = true; + for (Coordinate c : isl) { + result &= this.contains(c); + } + return result; + } + + @Override + public String toString() { + if (islandCoordinates.isEmpty()) { + throw new RuntimeException("Unexpected situation: island coordinates empty!"); + } + return MessageFormat.format("Island ({0}-{1}) size: {2} direction: {3}", islandCoordinates.get(0), islandCoordinates.get(islandCoordinates.size() - 1), size(), direction()); + } + + @Override + public int compareTo(Island i) { + return this.getLeftEnd().compareTo(i.getLeftEnd()); + } +} diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandCompetition.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandCompetition.java index bbe71b33d..805742618 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandCompetition.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandCompetition.java @@ -1,6 +1,6 @@ package eu.interedition.collatex.dekker.matrix; public enum IslandCompetition { - CompetingIslandAndOnIdealIine, CompetingIsland, NonCompetingIsland + CompetingIslandAndOnIdealIine, CompetingIsland, NonCompetingIsland } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java index a29461e34..cd7e31edd 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java @@ -1,156 +1,155 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.dekker.matrix; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.logging.Logger; - -/** - * - * @author Ronald Haentjens Dekker - * @author Bram Buitendijk - * @author Meindert Kroese - */ -public class IslandConflictResolver { - Logger LOG = Logger.getLogger(IslandConflictResolver.class.getName()); - // fixed islands contains all the islands that are selected for the final alignment - private final MatchTableSelection selection; - - //NOTE: outlierTranspositionLimit is ignored for now - public IslandConflictResolver(MatchTable table) { - selection = new MatchTableSelection(table); - } - - /* - * Create a non-conflicting version by simply taken all the islands that do - * not conflict with each other, largest first. - */ - public MatchTableSelection createNonConflictingVersion() { - List possibleIslands; - do { - possibleIslands = selection.getPossibleIslands(); - // check the possible islands of a certain size against each other. - if (possibleIslands.size() == 1) { - selection.addIsland(possibleIslands.get(0)); - } else if (possibleIslands.size() > 1) { - Map> analysis = analyzeConflictsBetweenPossibleIslands(possibleIslands); - resolveConflictsBySelectingPreferredIslands(selection, analysis); - } - } - while (!possibleIslands.isEmpty()); - return selection; - } - - /* - * This method analyzes the relationship between all the islands of the same - * size that have yet to be selected. They can compete with one another - * (choosing one locks out the other), some of them can be on the ideal line. - * - * Parameters: the size of the islands that you want to analyze - */ - public Map> analyzeConflictsBetweenPossibleIslands(List possibleIslands) { - Map> conflictMap = new HashMap<>(); - Set competingIslands = getCompetingIslands(possibleIslands); - for (Island island : competingIslands) { - if (selection.doesCandidateLayOnVectorOfCommittedIsland(island)) { - conflictMap.computeIfAbsent(IslandCompetition.CompetingIslandAndOnIdealIine, c -> new ArrayList<>()).add(island); - } else { - conflictMap.computeIfAbsent(IslandCompetition.CompetingIsland, c -> new ArrayList<>()).add(island); - } - } - for (Island island : getNonCompetingIslands(possibleIslands, competingIslands)) { - conflictMap.computeIfAbsent(IslandCompetition.NonCompetingIsland, c -> new ArrayList<>()).add(island); - } - return conflictMap; - } - - /* - * The preferred Islands are directly added to the result Archipelago - * If we want to - * re-factor this into a pull construction rather then a push construction - * we have to move this code out of this method and move it to the caller - * class - */ - private void resolveConflictsBySelectingPreferredIslands(MatchTableSelection selection, Map> islandConflictMap) { - // First select competing islands that are on the ideal line - LOG.fine("addBestOfCompeting with competingIslandsOnIdealLine"); - makeDistanceMap(islandConflictMap.getOrDefault(IslandCompetition.CompetingIslandAndOnIdealIine, Collections.emptyList())) - .values().stream() - .flatMap(List::stream).filter(ci1 -> selection.isIslandPossibleCandidate(ci1)) - .forEach(selection::addIsland); - - // Second select other competing islands - LOG.fine("addBestOfCompeting with otherCompetingIslands"); - makeDistanceMap(islandConflictMap.getOrDefault(IslandCompetition.CompetingIsland, Collections.emptyList())) - .values().stream() - .flatMap(List::stream).filter(ci -> selection.isIslandPossibleCandidate(ci)) - .forEach(selection::addIsland); - - // Third select non competing islands - LOG.fine("add non competing islands"); - islandConflictMap.getOrDefault(IslandCompetition.NonCompetingIsland, Collections.emptyList()) - .forEach(selection::addIsland); - } - - // TODO: This method calculates the distance from the ideal line - // TODO: by calculating the ratio x/y. - // TODO: but the ideal line may have moved (due to additions/deletions). - private SortedMap> makeDistanceMap(Collection competingIslands) { - SortedMap> distanceMap = new TreeMap<>(); - for (Island isl : competingIslands) { - Coordinate leftEnd = isl.getLeftEnd(); - double ratio = ((leftEnd.column+1) / (double) (leftEnd.row+1)); - double b2 = Math.log(ratio)/Math.log(2); - double distanceToIdealLine = Math.abs(b2); - distanceMap.computeIfAbsent(distanceToIdealLine, d -> new ArrayList<>()).add(isl); - } - return distanceMap; - } - - private Set getNonCompetingIslands(List islands, Set competingIslands) { - Set nonCompetingIslands = new HashSet<>(islands); - nonCompetingIslands.removeAll(competingIslands); - return nonCompetingIslands; - } - - private Set getCompetingIslands(List islands) { - Set competingIslands = new HashSet<>(); - for (int i = 0; i < islands.size(); i++) { - Island i1 = islands.get(i); - for (int j = 1; j < islands.size() - i; j++) { - Island i2 = islands.get(i + j); - if (i1.isCompetitor(i2)) { - competingIslands.add(i1); - competingIslands.add(i2); - } - } - } - return competingIslands; - } +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex.dekker.matrix; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.logging.Logger; + +/** + * @author Ronald Haentjens Dekker + * @author Bram Buitendijk + * @author Meindert Kroese + */ +public class IslandConflictResolver { + Logger LOG = Logger.getLogger(IslandConflictResolver.class.getName()); + // fixed islands contains all the islands that are selected for the final alignment + private final MatchTableSelection selection; + + //NOTE: outlierTranspositionLimit is ignored for now + public IslandConflictResolver(MatchTable table) { + selection = new MatchTableSelection(table); + } + + /* + * Create a non-conflicting version by simply taken all the islands that do + * not conflict with each other, largest first. + */ + public MatchTableSelection createNonConflictingVersion() { + List possibleIslands; + do { + possibleIslands = selection.getPossibleIslands(); + // check the possible islands of a certain size against each other. + if (possibleIslands.size() == 1) { + selection.addIsland(possibleIslands.get(0)); + } else if (possibleIslands.size() > 1) { + Map> analysis = analyzeConflictsBetweenPossibleIslands(possibleIslands); + resolveConflictsBySelectingPreferredIslands(selection, analysis); + } + } + while (!possibleIslands.isEmpty()); + return selection; + } + + /* + * This method analyzes the relationship between all the islands of the same + * size that have yet to be selected. They can compete with one another + * (choosing one locks out the other), some of them can be on the ideal line. + * + * Parameters: the size of the islands that you want to analyze + */ + public Map> analyzeConflictsBetweenPossibleIslands(List possibleIslands) { + Map> conflictMap = new HashMap<>(); + Set competingIslands = getCompetingIslands(possibleIslands); + for (Island island : competingIslands) { + if (selection.doesCandidateLayOnVectorOfCommittedIsland(island)) { + conflictMap.computeIfAbsent(IslandCompetition.CompetingIslandAndOnIdealIine, c -> new ArrayList<>()).add(island); + } else { + conflictMap.computeIfAbsent(IslandCompetition.CompetingIsland, c -> new ArrayList<>()).add(island); + } + } + for (Island island : getNonCompetingIslands(possibleIslands, competingIslands)) { + conflictMap.computeIfAbsent(IslandCompetition.NonCompetingIsland, c -> new ArrayList<>()).add(island); + } + return conflictMap; + } + + /* + * The preferred Islands are directly added to the result Archipelago + * If we want to + * re-factor this into a pull construction rather then a push construction + * we have to move this code out of this method and move it to the caller + * class + */ + private void resolveConflictsBySelectingPreferredIslands(MatchTableSelection selection, Map> islandConflictMap) { + // First select competing islands that are on the ideal line + LOG.fine("addBestOfCompeting with competingIslandsOnIdealLine"); + makeDistanceMap(islandConflictMap.getOrDefault(IslandCompetition.CompetingIslandAndOnIdealIine, Collections.emptyList())) + .values().stream() + .flatMap(List::stream).filter(ci1 -> selection.isIslandPossibleCandidate(ci1)) + .forEach(selection::addIsland); + + // Second select other competing islands + LOG.fine("addBestOfCompeting with otherCompetingIslands"); + makeDistanceMap(islandConflictMap.getOrDefault(IslandCompetition.CompetingIsland, Collections.emptyList())) + .values().stream() + .flatMap(List::stream).filter(ci -> selection.isIslandPossibleCandidate(ci)) + .forEach(selection::addIsland); + + // Third select non competing islands + LOG.fine("add non competing islands"); + islandConflictMap.getOrDefault(IslandCompetition.NonCompetingIsland, Collections.emptyList()) + .forEach(selection::addIsland); + } + + // TODO: This method calculates the distance from the ideal line + // TODO: by calculating the ratio x/y. + // TODO: but the ideal line may have moved (due to additions/deletions). + private SortedMap> makeDistanceMap(Collection competingIslands) { + SortedMap> distanceMap = new TreeMap<>(); + for (Island isl : competingIslands) { + Coordinate leftEnd = isl.getLeftEnd(); + double ratio = ((leftEnd.column + 1) / (double) (leftEnd.row + 1)); + double b2 = Math.log(ratio) / Math.log(2); + double distanceToIdealLine = Math.abs(b2); + distanceMap.computeIfAbsent(distanceToIdealLine, d -> new ArrayList<>()).add(isl); + } + return distanceMap; + } + + private Set getNonCompetingIslands(List islands, Set competingIslands) { + Set nonCompetingIslands = new HashSet<>(islands); + nonCompetingIslands.removeAll(competingIslands); + return nonCompetingIslands; + } + + private Set getCompetingIslands(List islands) { + Set competingIslands = new HashSet<>(); + for (int i = 0; i < islands.size(); i++) { + Island i1 = islands.get(i); + for (int j = 1; j < islands.size() - i; j++) { + Island i2 = islands.get(i + j); + if (i1.isCompetitor(i2)) { + competingIslands.add(i1); + competingIslands.add(i2); + } + } + } + return competingIslands; + } } \ No newline at end of file diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java index 05a052e74..d674b4fcc 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java @@ -47,157 +47,156 @@ * However the API of this class looks very much like an array based one * since you can use tokenAt(row, column) or vertexAt(row, column). * This class is read only. -* Selections of vectors from the table can be made using the +* Selections of vectors from the table can be made using the * MatchTableSelection class. */ public class MatchTable { - private final MatchTableCell[][] table; - private final Token[] witness; - private final int[] ranks; - - // assumes default token comparator - public static MatchTable create(VariantGraph graph, Iterable witness) { - Comparator comparator = new EqualityTokenComparator(); - return MatchTable.create(graph, witness, comparator); - } - - public static MatchTable create(VariantGraph graph, Iterable witness, Comparator comparator) { - final VariantGraphRanking ranking = VariantGraphRanking.of(graph); - // step 1: build the MatchTable - MatchTable table = createEmptyTable(ranking, graph, witness); - // step 2: do the matching and fill the table - table.fillTableWithMatches(ranking, graph, witness, comparator); - return table; - } - - private Optional cell(int rowIndex, int columnIndex) { - return Optional.ofNullable(table[rowIndex][columnIndex]); - } - - public VariantGraph.Vertex vertexAt(int rowIndex, int columnIndex) { - return cell(rowIndex, columnIndex).map(c -> c.vertex).orElse(null); - } - - public Token tokenAt(int rowIndex, int columnIndex) { - return cell(rowIndex, columnIndex).map(c -> c.token).orElse(null); - } - - // Warning: this method reiterates the witness! - // This method is only meant for the user interface and serialization classes! - // Use the tokenAt method in all other cases. - public List rowList() { - return Collections.unmodifiableList(Arrays.asList(witness)); - } - - public List columnList() { - return Arrays.stream(ranks).boxed().collect(Collectors.toList()); - } - - // Since the coordinates in allMatches are ordered from upper left to lower right, - // we don't need to check the lower right neighbor. - public Set getIslands() { - Map coordinateMapper = new HashMap<>(); - List allMatches = allMatches(); - for (Coordinate c : allMatches) { - // LOG.debug("coordinate {}", c); - addToIslands(coordinateMapper, c); + private final MatchTableCell[][] table; + private final Token[] witness; + private final int[] ranks; + + // assumes default token comparator + public static MatchTable create(VariantGraph graph, Iterable witness) { + Comparator comparator = new EqualityTokenComparator(); + return MatchTable.create(graph, witness, comparator); } - Set smallestIslandsCoordinates = new HashSet<>(allMatches); - smallestIslandsCoordinates.removeAll(coordinateMapper.keySet()); - for (Coordinate coordinate : smallestIslandsCoordinates) { - Island island = new Island(); - island.add(coordinate); - coordinateMapper.put(coordinate, island); + + public static MatchTable create(VariantGraph graph, Iterable witness, Comparator comparator) { + final VariantGraphRanking ranking = VariantGraphRanking.of(graph); + // step 1: build the MatchTable + MatchTable table = createEmptyTable(ranking, graph, witness); + // step 2: do the matching and fill the table + table.fillTableWithMatches(ranking, graph, witness, comparator); + return table; + } + + private Optional cell(int rowIndex, int columnIndex) { + return Optional.ofNullable(table[rowIndex][columnIndex]); + } + + public VariantGraph.Vertex vertexAt(int rowIndex, int columnIndex) { + return cell(rowIndex, columnIndex).map(c -> c.vertex).orElse(null); + } + + public Token tokenAt(int rowIndex, int columnIndex) { + return cell(rowIndex, columnIndex).map(c -> c.token).orElse(null); } - return new HashSet<>(coordinateMapper.values()); - } + // Warning: this method reiterates the witness! + // This method is only meant for the user interface and serialization classes! + // Use the tokenAt method in all other cases. + public List rowList() { + return Collections.unmodifiableList(Arrays.asList(witness)); + } + + public List columnList() { + return Arrays.stream(ranks).boxed().collect(Collectors.toList()); + } + + // Since the coordinates in allMatches are ordered from upper left to lower right, + // we don't need to check the lower right neighbor. + public Set getIslands() { + Map coordinateMapper = new HashMap<>(); + List allMatches = allMatches(); + for (Coordinate c : allMatches) { + // LOG.debug("coordinate {}", c); + addToIslands(coordinateMapper, c); + } + Set smallestIslandsCoordinates = new HashSet<>(allMatches); + smallestIslandsCoordinates.removeAll(coordinateMapper.keySet()); + for (Coordinate coordinate : smallestIslandsCoordinates) { + Island island = new Island(); + island.add(coordinate); + coordinateMapper.put(coordinate, island); + } + return new HashSet<>(coordinateMapper.values()); + } - - private MatchTable(Token[] tokens, int[] ranks) { - this.table = new MatchTableCell[tokens.length][ranks.length]; - this.witness = tokens; - this.ranks = ranks; - } - private static MatchTable createEmptyTable(VariantGraphRanking ranking, VariantGraph graph, Iterable witness) { - // -2 === ignore the start and the end vertex - return new MatchTable( - StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new), + private MatchTable(Token[] tokens, int[] ranks) { + this.table = new MatchTableCell[tokens.length][ranks.length]; + this.witness = tokens; + this.ranks = ranks; + } + + private static MatchTable createEmptyTable(VariantGraphRanking ranking, VariantGraph graph, Iterable witness) { + // -2 === ignore the start and the end vertex + return new MatchTable( + StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new), IntStream.range(0, Math.max(0, ranking.apply(graph.getEnd()) - 1)).toArray() - ); - } - - // move parameters into fields? - private void fillTableWithMatches(VariantGraphRanking ranking, VariantGraph graph, Iterable witness, Comparator comparator) { - Matches matches = Matches.between(graph.vertices(), witness, comparator); - Set unique = matches.uniqueInWitness; - Set ambiguous = matches.ambiguousInWitness; - int rowIndex=0; - for (Token t : witness) { - if (unique.contains(t) || ambiguous.contains(t)) { - List matchingVertices = matches.allMatches.getOrDefault(t, Collections.emptyList()); - for (VariantGraph.Vertex vgv : matchingVertices) { - set(rowIndex, ranking.apply(vgv) - 1, t, vgv); + ); + } + + // move parameters into fields? + private void fillTableWithMatches(VariantGraphRanking ranking, VariantGraph graph, Iterable witness, Comparator comparator) { + Matches matches = Matches.between(graph.vertices(), witness, comparator); + Set unique = matches.uniqueInWitness; + Set ambiguous = matches.ambiguousInWitness; + int rowIndex = 0; + for (Token t : witness) { + if (unique.contains(t) || ambiguous.contains(t)) { + List matchingVertices = matches.allMatches.getOrDefault(t, Collections.emptyList()); + for (VariantGraph.Vertex vgv : matchingVertices) { + set(rowIndex, ranking.apply(vgv) - 1, t, vgv); + } + } + rowIndex++; } - } - rowIndex++; } - } - - private void set(int rowIndex, int columnIndex, Token token, VariantGraph.Vertex vertex) { - // LOG.debug("putting: {}<->{}<->{}", new Object[] { token, columnIndex, variantGraphVertex }); - table[rowIndex][columnIndex] = new MatchTableCell(token, vertex); - } - - private void addToIslands(Map coordinateMapper, Coordinate c) { - int diff = -1; - Coordinate neighborCoordinate = new Coordinate(c.row + diff, c.column + diff); - VariantGraph.Vertex neighbor = null; - try { - neighbor = vertexAt(c.row + diff, c.column + diff); - } catch (IndexOutOfBoundsException e) { - // ignored + + private void set(int rowIndex, int columnIndex, Token token, VariantGraph.Vertex vertex) { + // LOG.debug("putting: {}<->{}<->{}", new Object[] { token, columnIndex, variantGraphVertex }); + table[rowIndex][columnIndex] = new MatchTableCell(token, vertex); } - if (neighbor != null) { - Island island = coordinateMapper.get(neighborCoordinate); - if (island == null) { - // LOG.debug("new island"); - Island island0 = new Island(); - island0.add(neighborCoordinate); - island0.add(c); - coordinateMapper.put(neighborCoordinate, island0); - coordinateMapper.put(c, island0); - } else { - // LOG.debug("add to existing island"); - island.add(c); - coordinateMapper.put(c, island); - } + + private void addToIslands(Map coordinateMapper, Coordinate c) { + int diff = -1; + Coordinate neighborCoordinate = new Coordinate(c.row + diff, c.column + diff); + VariantGraph.Vertex neighbor = null; + try { + neighbor = vertexAt(c.row + diff, c.column + diff); + } catch (IndexOutOfBoundsException e) { + // ignored + } + if (neighbor != null) { + Island island = coordinateMapper.get(neighborCoordinate); + if (island == null) { + // LOG.debug("new island"); + Island island0 = new Island(); + island0.add(neighborCoordinate); + island0.add(c); + coordinateMapper.put(neighborCoordinate, island0); + coordinateMapper.put(c, island0); + } else { + // LOG.debug("add to existing island"); + island.add(c); + coordinateMapper.put(c, island); + } + } } - } - - // Note: code taken from MatchMatrix class - // TODO: might be simpler to work from the valueSet - // TODO: try remove the call to rowList / columnList - List allMatches() { - List pairs = new ArrayList<>(); - int rows = rowList().size(); - int cols = columnList().size(); - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - if (vertexAt(i, j) != null) pairs.add(new Coordinate(i, j)); - } + + // Note: code taken from MatchMatrix class + // TODO: might be simpler to work from the valueSet + // TODO: try remove the call to rowList / columnList + List allMatches() { + List pairs = new ArrayList<>(); + int rows = rowList().size(); + int cols = columnList().size(); + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + if (vertexAt(i, j) != null) pairs.add(new Coordinate(i, j)); + } + } + return pairs; } - return pairs; - } - - private class MatchTableCell { - public final Token token; - public final VariantGraph.Vertex vertex; - - public MatchTableCell(Token token, VariantGraph.Vertex vertex) { - this.token = token; - this.vertex = vertex; + + private class MatchTableCell { + public final Token token; + public final VariantGraph.Vertex vertex; + + public MatchTableCell(Token token, VariantGraph.Vertex vertex) { + this.token = token; + this.vertex = vertex; + } } - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java index f30136399..c99a0079e 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java @@ -19,48 +19,48 @@ package eu.interedition.collatex.dekker.matrix; +import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.dekker.TokenLinker; + import java.util.Comparator; import java.util.HashMap; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.dekker.TokenLinker; - public class MatchTableLinker implements TokenLinker { - static Logger LOG = Logger.getLogger(MatchTableLinker.class.getName()); + static Logger LOG = Logger.getLogger(MatchTableLinker.class.getName()); + + public MatchTableLinker() { + super(); + } + + @Override + public Map link(VariantGraph base, Iterable witness, Comparator comparator) { + // create MatchTable and fill it with matches + LOG.fine("create MatchTable and fill it with matches"); + MatchTable table = MatchTable.create(base, witness, comparator); - public MatchTableLinker() { - super(); - } + // create IslandConflictResolver + LOG.fine("create island conflict resolver"); + IslandConflictResolver resolver = new IslandConflictResolver(table); - @Override - public Map link(VariantGraph base, Iterable witness, Comparator comparator) { - // create MatchTable and fill it with matches - LOG.fine("create MatchTable and fill it with matches"); - MatchTable table = MatchTable.create(base, witness, comparator); + // The IslandConflictResolver createNonConflictingVersion() method + // selects the optimal islands + LOG.fine("select the optimal islands"); + MatchTableSelection preferredIslands = resolver.createNonConflictingVersion(); + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "Number of preferred Islands: {0}", preferredIslands.size()); + } - // create IslandConflictResolver - LOG.fine("create island conflict resolver"); - IslandConflictResolver resolver = new IslandConflictResolver(table); - - // The IslandConflictResolver createNonConflictingVersion() method - // selects the optimal islands - LOG.fine("select the optimal islands"); - MatchTableSelection preferredIslands = resolver.createNonConflictingVersion(); - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Number of preferred Islands: {0}", preferredIslands.size()); - } - - // Here the result is put in a map - Map map = new HashMap<>(); - for (Island island : preferredIslands.getIslands()) { - for (Coordinate c : island) { - map.put(table.tokenAt(c.row, c.column), table.vertexAt(c.row, c.column)); - } - } - return map; - } + // Here the result is put in a map + Map map = new HashMap<>(); + for (Island island : preferredIslands.getIslands()) { + for (Coordinate c : island) { + map.put(table.tokenAt(c.row, c.column), table.vertexAt(c.row, c.column)); + } + } + return map; + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java index 794fd6a40..1f6101be8 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java @@ -1,5 +1,7 @@ package eu.interedition.collatex.dekker.matrix; +import eu.interedition.collatex.VariantGraph; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -12,154 +14,158 @@ import java.util.logging.Logger; import java.util.stream.Collectors; -import eu.interedition.collatex.VariantGraph; - // @author: Ronald Haentjens Dekker // Unselected islands reside in the islandMultimap. // Selected islands reside in the fixedIsland Archipelago. -// Group the islands together by size; +// Group the islands together by size; // islands may change after commit islands public class MatchTableSelection { - Logger LOG = Logger.getLogger(MatchTableSelection.class.getName()); - private final Map> islandMultimap; - private final Archipelago fixedIslands; - //this fields are needed for the locking of table cells - private final Set fixedRows; - private final Set fixedVertices; - private final MatchTable table; + Logger LOG = Logger.getLogger(MatchTableSelection.class.getName()); + private final Map> islandMultimap; + private final Archipelago fixedIslands; + //this fields are needed for the locking of table cells + private final Set fixedRows; + private final Set fixedVertices; + private final MatchTable table; - public MatchTableSelection(MatchTable table) { - fixedRows = new HashSet<>(); - fixedVertices = new HashSet<>(); - this.table = table; - this.fixedIslands = new Archipelago(); - islandMultimap = new HashMap<>(); - for (Island isl : table.getIslands()) { - islandMultimap.computeIfAbsent(isl.size(), s -> new ArrayList<>()).add(isl); + public MatchTableSelection(MatchTable table) { + fixedRows = new HashSet<>(); + fixedVertices = new HashSet<>(); + this.table = table; + this.fixedIslands = new Archipelago(); + islandMultimap = new HashMap<>(); + for (Island isl : table.getIslands()) { + islandMultimap.computeIfAbsent(isl.size(), s -> new ArrayList<>()).add(isl); + } } - } - - // copy constructor - public MatchTableSelection(MatchTableSelection orig) { - // table structure is read only, does not have to be copied - this.islandMultimap = orig.islandMultimap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> new ArrayList<>(e.getValue()))); - this.fixedIslands = new Archipelago(orig.fixedIslands); - this.fixedRows = new HashSet<>(orig.fixedRows); - this.fixedVertices = new HashSet<>(orig.fixedVertices); - this.table = orig.table; - } - /* - * Return whether a coordinate overlaps with an already committed coordinate - */ - public boolean doesCoordinateOverlapWithCommittedCoordinate(Coordinate coordinate) { - return fixedRows.contains(coordinate.row) || // - fixedVertices.contains(table.vertexAt(coordinate.row, coordinate.column)); - } + // copy constructor + public MatchTableSelection(MatchTableSelection orig) { + // table structure is read only, does not have to be copied + this.islandMultimap = orig.islandMultimap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> new ArrayList<>(e.getValue()))); + this.fixedIslands = new Archipelago(orig.fixedIslands); + this.fixedRows = new HashSet<>(orig.fixedRows); + this.fixedVertices = new HashSet<>(orig.fixedVertices); + this.table = orig.table; + } + + /* + * Return whether a coordinate overlaps with an already committed coordinate + */ + public boolean doesCoordinateOverlapWithCommittedCoordinate(Coordinate coordinate) { + return fixedRows.contains(coordinate.row) || // + fixedVertices.contains(table.vertexAt(coordinate.row, coordinate.column)); + } - /* - * Return whether an island overlaps with an already committed island - */ - public boolean isIslandPossibleCandidate(Island island) { - for (Coordinate coordinate : island) { - if (doesCoordinateOverlapWithCommittedCoordinate(coordinate)) return false; + /* + * Return whether an island overlaps with an already committed island + */ + public boolean isIslandPossibleCandidate(Island island) { + for (Coordinate coordinate : island) { + if (doesCoordinateOverlapWithCommittedCoordinate(coordinate)) return false; + } + return true; } - return true; - } - - /* - * Commit an island in the match table - * Island will be part of the final alignment - */ - public void addIsland(Island isl) { - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "adding island: '{0}'", isl); + + /* + * Commit an island in the match table + * Island will be part of the final alignment + */ + public void addIsland(Island isl) { + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "adding island: '{0}'", isl); + } + for (Coordinate coordinate : isl) { + fixedRows.add(coordinate.row); + fixedVertices.add(table.vertexAt(coordinate.row, coordinate.column)); + } + fixedIslands.add(isl); + islandMultimap.computeIfPresent(isl.size(), (s, i) -> { + i.remove(isl); + return (i.isEmpty() ? null : i); + }); } - for (Coordinate coordinate : isl) { - fixedRows.add(coordinate.row); - fixedVertices.add(table.vertexAt(coordinate.row, coordinate.column)); + + public boolean doesCandidateLayOnVectorOfCommittedIsland(Island island) { + Coordinate leftEnd = island.getLeftEnd(); + return fixedIslands.getIslandVectors().contains(leftEnd.row - leftEnd.column); } - fixedIslands.add(isl); - islandMultimap.computeIfPresent(isl.size(), (s, i) -> { i.remove(isl); return (i.isEmpty() ? null : i); }); - } - - public boolean doesCandidateLayOnVectorOfCommittedIsland(Island island) { - Coordinate leftEnd = island.getLeftEnd(); - return fixedIslands.getIslandVectors().contains(leftEnd.row - leftEnd.column); - } - public int size() { - return fixedIslands.size(); - } + public int size() { + return fixedIslands.size(); + } - public List getIslands() { - return fixedIslands.getIslands(); - } + public List getIslands() { + return fixedIslands.getIslands(); + } - public boolean containsCoordinate(int row, int column) { - return fixedIslands.containsCoordinate(row, column); - } + public boolean containsCoordinate(int row, int column) { + return fixedIslands.containsCoordinate(row, column); + } - /* - * For all the possible islands of a certain size this method checks whether - * they conflict with one of the previously committed islands. If so, the - * possible island is removed from the multimap. Or in case of overlap, split - * into a smaller island and then put in back into the map Note that this - * method changes the possible islands multimap. - */ - //TODO: the original Island object is modified here - //TODO: That should not happen, if we want to build a decision tree. - public void removeOrSplitImpossibleIslands(Integer islandSize, Map> islandMultimap) { - Collection islandsToCheck = new ArrayList<>(islandMultimap.getOrDefault(islandSize, Collections.emptyList())); - for (Island island : islandsToCheck) { - if (!isIslandPossibleCandidate(island)) { - islandMultimap.computeIfPresent(islandSize, (s, i) -> { i.remove(island); return (i.isEmpty() ? null : i); }); - removeConflictingEndCoordinates(island); - if (island.size() > 0) { - islandMultimap.computeIfAbsent(island.size(), s -> new ArrayList<>()).add(island); + /* + * For all the possible islands of a certain size this method checks whether + * they conflict with one of the previously committed islands. If so, the + * possible island is removed from the multimap. Or in case of overlap, split + * into a smaller island and then put in back into the map Note that this + * method changes the possible islands multimap. + */ + //TODO: the original Island object is modified here + //TODO: That should not happen, if we want to build a decision tree. + public void removeOrSplitImpossibleIslands(Integer islandSize, Map> islandMultimap) { + Collection islandsToCheck = new ArrayList<>(islandMultimap.getOrDefault(islandSize, Collections.emptyList())); + for (Island island : islandsToCheck) { + if (!isIslandPossibleCandidate(island)) { + islandMultimap.computeIfPresent(islandSize, (s, i) -> { + i.remove(island); + return (i.isEmpty() ? null : i); + }); + removeConflictingEndCoordinates(island); + if (island.size() > 0) { + islandMultimap.computeIfAbsent(island.size(), s -> new ArrayList<>()).add(island); + } + } } - } } - } - private void removeConflictingEndCoordinates(Island island) { - boolean goOn = true; - while (goOn) { - Coordinate leftEnd = island.getLeftEnd(); - if (doesCoordinateOverlapWithCommittedCoordinate(leftEnd)) { - island.removeCoordinate(leftEnd); - if (island.size() == 0) { - return; + private void removeConflictingEndCoordinates(Island island) { + boolean goOn = true; + while (goOn) { + Coordinate leftEnd = island.getLeftEnd(); + if (doesCoordinateOverlapWithCommittedCoordinate(leftEnd)) { + island.removeCoordinate(leftEnd); + if (island.size() == 0) { + return; + } + } else { + goOn = false; + } } - } else { - goOn = false; - } - } - goOn = true; - while (goOn) { - Coordinate rightEnd = island.getRightEnd(); - if (doesCoordinateOverlapWithCommittedCoordinate(rightEnd)) { - island.removeCoordinate(rightEnd); - if (island.size() == 0) { - return; + goOn = true; + while (goOn) { + Coordinate rightEnd = island.getRightEnd(); + if (doesCoordinateOverlapWithCommittedCoordinate(rightEnd)) { + island.removeCoordinate(rightEnd); + if (island.size() == 0) { + return; + } + } else { + goOn = false; + } } - } else { - goOn = false; - } } - } - public List getPossibleIslands() { - List possibleIslands = new ArrayList<>(); - while(possibleIslands.isEmpty()&&!islandMultimap.isEmpty()) { - // find the maximum island size and traverse groups in descending order - Integer max = Collections.max(islandMultimap.keySet()); - LOG.fine("Checking islands of size: "+max); - // check the possible islands of a certain size against - // the already committed islands. - removeOrSplitImpossibleIslands(max, islandMultimap); - possibleIslands = new ArrayList<>(islandMultimap.getOrDefault(max, Collections.emptyList())); + public List getPossibleIslands() { + List possibleIslands = new ArrayList<>(); + while (possibleIslands.isEmpty() && !islandMultimap.isEmpty()) { + // find the maximum island size and traverse groups in descending order + Integer max = Collections.max(islandMultimap.keySet()); + LOG.fine("Checking islands of size: " + max); + // check the possible islands of a certain size against + // the already committed islands. + removeOrSplitImpossibleIslands(max, islandMultimap); + possibleIslands = new ArrayList<>(islandMultimap.getOrDefault(max, Collections.emptyList())); + } + return possibleIslands; } - return possibleIslands; - } } \ No newline at end of file diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSerializer.java index 254071c21..b05c4ea04 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSerializer.java @@ -19,128 +19,126 @@ package eu.interedition.collatex.dekker.matrix; -import java.util.ArrayList; - import eu.interedition.collatex.Token; +import java.util.ArrayList; + /** - * * @author Meindert Kroese * @author Ronald Haentjens Dekker - * */ //TODO: The methods in this class are extracted from the old MatchMatrix class //TODO: check correctness public class MatchTableSerializer { - //TODO: rename - private final MatchTable sparseMatrix; - - public MatchTableSerializer(MatchTable table) { - this.sparseMatrix = table; - } - - public String toHtml() { - StringBuilder result = new StringBuilder("\n\n"); - ArrayList colLabels = columnLabels(); - for (String cLabel : colLabels) { - result.append(""); - } - result.append("\n"); - int colNum = sparseMatrix.columnList().size(); - ArrayList rLabels = rowLabels(); - int row = 0; - for (String label : rLabels) { - result.append(""); - for (int col = 0; col < colNum; col++) - if (sparseMatrix.vertexAt(row, col)!=null) - result.append(""); - else - result.append(""); - result.append("\n"); - row++; - } - result.append("
        ").append(cLabel).append("
        ").append(label).append("M
        "); - return result.toString(); - } + //TODO: rename + private final MatchTable sparseMatrix; - // arch = preferred matches - public String toHtml(Archipelago arch) { - int mat[] = new int[rowNum()]; - for (Island isl : arch.getIslands()) { - for (Coordinate c : isl) { - mat[c.row] = c.column; - } - } - StringBuilder result = new StringBuilder("\n\n"); - ArrayList colLabels = columnLabels(); - for (String cLabel : colLabels) { - result.append(""); + public MatchTableSerializer(MatchTable table) { + this.sparseMatrix = table; } - result.append("\n"); - ArrayList rLabels = rowLabels(); - int row = 0; - for (String label : rLabels) { - result.append(""); - if (mat[row] > 0) { - result.append("").append(""); - } - result.append("\n"); - row++; + + public String toHtml() { + StringBuilder result = new StringBuilder("
        ").append(cLabel).append("
        ").append(label).append("M
        \n\n"); + ArrayList colLabels = columnLabels(); + for (String cLabel : colLabels) { + result.append(""); + } + result.append("\n"); + int colNum = sparseMatrix.columnList().size(); + ArrayList rLabels = rowLabels(); + int row = 0; + for (String label : rLabels) { + result.append(""); + for (int col = 0; col < colNum; col++) + if (sparseMatrix.vertexAt(row, col) != null) + result.append(""); + else + result.append(""); + result.append("\n"); + row++; + } + result.append("
        ").append(cLabel).append("
        ").append(label).append("M
        "); + return result.toString(); } - result.append(""); - return result.toString(); - } - @Override - public String toString() { - StringBuilder result = new StringBuilder(); - ArrayList colLabels = columnLabels(); - for (String cLabel : colLabels) { - result.append(" ").append(cLabel); + // arch = preferred matches + public String toHtml(Archipelago arch) { + int mat[] = new int[rowNum()]; + for (Island isl : arch.getIslands()) { + for (Coordinate c : isl) { + mat[c.row] = c.column; + } + } + StringBuilder result = new StringBuilder("\n\n"); + ArrayList colLabels = columnLabels(); + for (String cLabel : colLabels) { + result.append(""); + } + result.append("\n"); + ArrayList rLabels = rowLabels(); + int row = 0; + for (String label : rLabels) { + result.append(""); + if (mat[row] > 0) { + result.append("").append(""); + } + result.append("\n"); + row++; + } + result.append("
        ").append(cLabel).append("
        ").append(label).append("M
        "); + return result.toString(); } - result.append("\n"); - int colNum = sparseMatrix.columnList().size(); - ArrayList rLabels = rowLabels(); - int row = 0; - for (String label : rLabels) { - result.append(label); - for (int col = 0; col < colNum; col++) - result.append(" ").append(sparseMatrix.vertexAt(row++, col)!=null); - result.append("\n"); + + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + ArrayList colLabels = columnLabels(); + for (String cLabel : colLabels) { + result.append(" ").append(cLabel); + } + result.append("\n"); + int colNum = sparseMatrix.columnList().size(); + ArrayList rLabels = rowLabels(); + int row = 0; + for (String label : rLabels) { + result.append(label); + for (int col = 0; col < colNum; col++) + result.append(" ").append(sparseMatrix.vertexAt(row++, col) != null); + result.append("\n"); + } + return result.toString(); } - return result.toString(); - } - public ArrayList rowLabels() { - ArrayList labels = new ArrayList(); - for (Token vgv : sparseMatrix.rowList()) { - String token = vgv.toString(); - int pos = token.indexOf(":'"); - if (pos > -1) { - labels.add(token.substring(pos + 2, token.length() - 2)); - } + public ArrayList rowLabels() { + ArrayList labels = new ArrayList(); + for (Token vgv : sparseMatrix.rowList()) { + String token = vgv.toString(); + int pos = token.indexOf(":'"); + if (pos > -1) { + labels.add(token.substring(pos + 2, token.length() - 2)); + } + } + return labels; } - return labels; - } - public ArrayList columnLabels() { - ArrayList labels = new ArrayList(); - for (Integer t : sparseMatrix.columnList()) { - String token = t.toString(); - int pos = token.indexOf(":'"); - if (pos > -1) { - // LOG.debug("token={{}}, pos={}", token, pos); - labels.add(token.substring(pos + 2, token.length() - 1)); - } + public ArrayList columnLabels() { + ArrayList labels = new ArrayList(); + for (Integer t : sparseMatrix.columnList()) { + String token = t.toString(); + int pos = token.indexOf(":'"); + if (pos > -1) { + // LOG.debug("token={{}}, pos={}", token, pos); + labels.add(token.substring(pos + 2, token.length() - 1)); + } + } + return labels; } - return labels; - } - public int rowNum() { - return rowLabels().size(); - } + public int rowNum() { + return rowLabels().size(); + } - public int colNum() { - return columnLabels().size(); - } + public int colNum() { + return columnLabels().size(); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistance.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistance.java index f0bc92e2f..56cbafad5 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistance.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistance.java @@ -40,7 +40,7 @@ public static int compute(String str1, String str2) { } int[][][] cache = new int[30][][]; - int matrix[][]; + int matrix[][]; if (str2Length >= cache.length) { matrix = form(str1Length, str2Length); } else if (cache[str2Length] != null) { @@ -48,7 +48,7 @@ public static int compute(String str1, String str2) { } else { matrix = cache[str2Length] = form(str1Length, str2Length); } - + for (int i = 1; i <= str1Length; i++) { final char str1Char = str1Chars[i - 1]; for (int j = 1; j <= str2Length; j++) { diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistanceTokenComparator.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistanceTokenComparator.java index 00168f192..cd9ef3ba9 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistanceTokenComparator.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistanceTokenComparator.java @@ -26,20 +26,20 @@ public class EditDistanceTokenComparator implements Comparator { - private final int threshold; - - public EditDistanceTokenComparator() { - this(1); - } - - public EditDistanceTokenComparator(int threshold) { - this.threshold = threshold; - } - - @Override - public int compare(Token base, Token witness) { - final String baseContent = ((SimpleToken) base).getNormalized(); - final String witnessContent = ((SimpleToken) witness).getNormalized(); - return (EditDistance.compute(baseContent, witnessContent) <= threshold) ? 0 : -1; - } + private final int threshold; + + public EditDistanceTokenComparator() { + this(1); + } + + public EditDistanceTokenComparator(int threshold) { + this.threshold = threshold; + } + + @Override + public int compare(Token base, Token witness) { + final String baseContent = ((SimpleToken) base).getNormalized(); + final String witnessContent = ((SimpleToken) witness).getNormalized(); + return (EditDistance.compute(baseContent, witnessContent) <= threshold) ? 0 : -1; + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/EqualityTokenComparator.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/EqualityTokenComparator.java index 077638d33..7ae42e492 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/EqualityTokenComparator.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/EqualityTokenComparator.java @@ -26,11 +26,11 @@ public class EqualityTokenComparator implements Comparator { - @Override - public int compare(Token base, Token witness) { - final String baseContent = ((SimpleToken) base).getNormalized(); - final String witnessContent = ((SimpleToken) witness).getNormalized(); - return baseContent.compareTo(witnessContent); - } + @Override + public int compare(Token base, Token witness) { + final String baseContent = ((SimpleToken) base).getNormalized(); + final String witnessContent = ((SimpleToken) witness).getNormalized(); + return baseContent.compareTo(witnessContent); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java index cf281cd72..05b9b6684 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java @@ -37,26 +37,26 @@ public class Matches { - public final Map> allMatches; - public final Set unmatchedInWitness; - public final Set ambiguousInWitness; - public final Set uniqueInWitness; + public final Map> allMatches; + public final Set unmatchedInWitness; + public final Set ambiguousInWitness; + public final Set uniqueInWitness; - public static Matches between(final Iterable vertices, final Iterable witnessTokens, Comparator comparator) { + public static Matches between(final Iterable vertices, final Iterable witnessTokens, Comparator comparator) { - final Map> allMatches = new HashMap<>(); + final Map> allMatches = new HashMap<>(); - StreamSupport.stream(vertices.spliterator(), false).forEach(vertex -> + StreamSupport.stream(vertices.spliterator(), false).forEach(vertex -> vertex.tokens().stream().findFirst().ifPresent(baseToken -> - StreamSupport.stream(witnessTokens.spliterator(), false) - .filter(witnessToken -> comparator.compare(baseToken, witnessToken) == 0) - .forEach(matchingToken -> allMatches.computeIfAbsent(matchingToken, t -> new ArrayList<>()).add(vertex)))); + StreamSupport.stream(witnessTokens.spliterator(), false) + .filter(witnessToken -> comparator.compare(baseToken, witnessToken) == 0) + .forEach(matchingToken -> allMatches.computeIfAbsent(matchingToken, t -> new ArrayList<>()).add(vertex)))); - final Set unmatchedInWitness = StreamSupport.stream(witnessTokens.spliterator(), false) + final Set unmatchedInWitness = StreamSupport.stream(witnessTokens.spliterator(), false) .filter(t -> !allMatches.containsKey(t)) .collect(Collectors.toCollection(LinkedHashSet::new)); - final Set ambiguousInBase = allMatches.values().stream() + final Set ambiguousInBase = allMatches.values().stream() .flatMap(List::stream) .collect(Collectors.toMap(Function.identity(), v -> 1, (a, b) -> a + b)) .entrySet() @@ -65,30 +65,30 @@ public static Matches between(final Iterable vertices, fina .map(Map.Entry::getKey) .collect(Collectors.toCollection(LinkedHashSet::new)); - // (have to check: base -> witness, and witness -> base) - final Set ambiguousInWitness = Stream.concat( + // (have to check: base -> witness, and witness -> base) + final Set ambiguousInWitness = Stream.concat( StreamSupport.stream(witnessTokens.spliterator(), false) - .filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() > 1), + .filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() > 1), allMatches.entrySet().stream() - .filter(match -> match.getValue().stream().anyMatch(ambiguousInBase::contains)) - .map(Map.Entry::getKey) - ).collect(Collectors.toCollection(LinkedHashSet::new)); + .filter(match -> match.getValue().stream().anyMatch(ambiguousInBase::contains)) + .map(Map.Entry::getKey) + ).collect(Collectors.toCollection(LinkedHashSet::new)); - // sure tokens - // have to check unsure tokens because of (base -> witness && witness -> base) - final Set uniqueInWitness = StreamSupport.stream(witnessTokens.spliterator(), false) + // sure tokens + // have to check unsure tokens because of (base -> witness && witness -> base) + final Set uniqueInWitness = StreamSupport.stream(witnessTokens.spliterator(), false) .filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() == 1 && !ambiguousInWitness.contains(t)) .collect(Collectors.toCollection(LinkedHashSet::new)); - return new Matches(allMatches, unmatchedInWitness, ambiguousInWitness, uniqueInWitness); - } + return new Matches(allMatches, unmatchedInWitness, ambiguousInWitness, uniqueInWitness); + } - private Matches(Map> allMatches, Set unmatchedInWitness, Set ambiguousInWitness, Set uniqueInWitness) { - this.allMatches = Collections.unmodifiableMap(allMatches); - this.unmatchedInWitness = Collections.unmodifiableSet(unmatchedInWitness); - this.ambiguousInWitness = Collections.unmodifiableSet(ambiguousInWitness); - this.uniqueInWitness = Collections.unmodifiableSet(uniqueInWitness); - } + private Matches(Map> allMatches, Set unmatchedInWitness, Set ambiguousInWitness, Set uniqueInWitness) { + this.allMatches = Collections.unmodifiableMap(allMatches); + this.unmatchedInWitness = Collections.unmodifiableSet(unmatchedInWitness); + this.ambiguousInWitness = Collections.unmodifiableSet(ambiguousInWitness); + this.uniqueInWitness = Collections.unmodifiableSet(uniqueInWitness); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/StrictEqualityTokenComparator.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/StrictEqualityTokenComparator.java index 88b4dae01..686452acb 100755 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/StrictEqualityTokenComparator.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/StrictEqualityTokenComparator.java @@ -19,18 +19,18 @@ package eu.interedition.collatex.matching; -import java.util.Comparator; - import eu.interedition.collatex.Token; import eu.interedition.collatex.simple.SimpleToken; +import java.util.Comparator; + public class StrictEqualityTokenComparator implements Comparator { - @Override - public int compare(Token base, Token witness) { - final String baseContent = ((SimpleToken) base).getContent(); - final String witnessContent = ((SimpleToken) witness).getContent(); - return baseContent.compareTo(witnessContent); - } + @Override + public int compare(Token base, Token witness) { + final String baseContent = ((SimpleToken) base).getContent(); + final String witnessContent = ((SimpleToken) witness).getContent(); + return baseContent.compareTo(witnessContent); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java index 96d815b4d..a115a9fa1 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java @@ -37,116 +37,116 @@ */ public class AlignmentDecisionGraph { - private final List> matches; - private final Function, Integer> matchEvaluator; - private final PriorityQueue bestPaths; - private final Map minCosts; - - AlignmentDecisionGraph(List> matches, Function, Integer> matchEvaluator) { - this.matches = matches; - this.matchEvaluator = matchEvaluator; - this.bestPaths = new PriorityQueue<>(matches.size(), Comparator.comparingInt(n -> n.cost)); - this.minCosts = new HashMap<>(); - } - - static SortedSet> filter(SortedSet> matches, Function, Integer> matchEvaluator) { - final SortedSet> alignments = new TreeSet<>(VertexMatch.setComparator()); - - final List> matchList = new ArrayList<>(matches); - Node optimal = new AlignmentDecisionGraph(matchList, matchEvaluator).findBestPath(); - while (optimal.matchIndex >= 0) { - if (optimal.aligned) { - alignments.add(matchList.get(optimal.matchIndex)); - } - optimal = optimal.previous; + private final List> matches; + private final Function, Integer> matchEvaluator; + private final PriorityQueue bestPaths; + private final Map minCosts; + + AlignmentDecisionGraph(List> matches, Function, Integer> matchEvaluator) { + this.matches = matches; + this.matchEvaluator = matchEvaluator; + this.bestPaths = new PriorityQueue<>(matches.size(), Comparator.comparingInt(n -> n.cost)); + this.minCosts = new HashMap<>(); } - return alignments; - } - - private Node findBestPath() { - bestPaths.add(new Node(-1, false)); - while (!bestPaths.isEmpty()) { - final Node current = bestPaths.remove(); - if (current.matchIndex == matches.size() - 1) { - return current; - } - for (Node successor : current.successors()) { - final int tentativeCost = cost(current) + cost(successor); - if (bestPaths.contains(successor) && tentativeCost >= minCosts.get(successor)) { - continue; - } - minCosts.put(successor, tentativeCost); - successor.cost = tentativeCost + heuristicCost(successor); - successor.previous = current; - bestPaths.remove(successor); - bestPaths.add(successor); - } - } - throw new IllegalStateException("No optimal alignment found"); - } - - private int heuristicCost(Node path) { - final SortedSet evaluated = matches.get(path.matchIndex); - final VertexMatch.WithTokenIndex lastMatch = evaluated.last(); - - int cost = 0; - for (SortedSet following : matches.subList(path.matchIndex + 1, matches.size())) { - final VertexMatch.WithTokenIndex followingFirstMatch = following.first(); - if (lastMatch.vertexRank < followingFirstMatch.vertexRank && lastMatch.token < followingFirstMatch.token) { - // we still can align this following match as the matched components are to the right of this path's last match - continue; - } - // we cannot align this following match, so add it to the cost - cost += value(following); + static SortedSet> filter(SortedSet> matches, Function, Integer> matchEvaluator) { + final SortedSet> alignments = new TreeSet<>(VertexMatch.setComparator()); + + final List> matchList = new ArrayList<>(matches); + Node optimal = new AlignmentDecisionGraph(matchList, matchEvaluator).findBestPath(); + while (optimal.matchIndex >= 0) { + if (optimal.aligned) { + alignments.add(matchList.get(optimal.matchIndex)); + } + optimal = optimal.previous; + } + return alignments; } - return cost; - } - - private int cost(Node current) { - int cost = 0; - while (current != null && current.matchIndex >= 0) { - if (!current.aligned) { - cost += value(matches.get(current.matchIndex)); - } - current = current.previous; + + private Node findBestPath() { + bestPaths.add(new Node(-1, false)); + while (!bestPaths.isEmpty()) { + final Node current = bestPaths.remove(); + if (current.matchIndex == matches.size() - 1) { + return current; + } + for (Node successor : current.successors()) { + final int tentativeCost = cost(current) + cost(successor); + if (bestPaths.contains(successor) && tentativeCost >= minCosts.get(successor)) { + continue; + } + minCosts.put(successor, tentativeCost); + + successor.cost = tentativeCost + heuristicCost(successor); + successor.previous = current; + bestPaths.remove(successor); + bestPaths.add(successor); + } + } + throw new IllegalStateException("No optimal alignment found"); } - return cost; - } - - private int value(SortedSet match) { - return matchEvaluator.apply(match); - } - - static class Node { - final int matchIndex; - final boolean aligned; - Node previous; - int cost; - - Node(int matchIndex, boolean aligned) { - this.matchIndex = matchIndex; - this.aligned = aligned; + + private int heuristicCost(Node path) { + final SortedSet evaluated = matches.get(path.matchIndex); + final VertexMatch.WithTokenIndex lastMatch = evaluated.last(); + + int cost = 0; + for (SortedSet following : matches.subList(path.matchIndex + 1, matches.size())) { + final VertexMatch.WithTokenIndex followingFirstMatch = following.first(); + if (lastMatch.vertexRank < followingFirstMatch.vertexRank && lastMatch.token < followingFirstMatch.token) { + // we still can align this following match as the matched components are to the right of this path's last match + continue; + } + // we cannot align this following match, so add it to the cost + cost += value(following); + } + return cost; } - Node[] successors() { - final int nextIndex = matchIndex + 1; - return new Node[] { new Node(nextIndex, true), new Node(nextIndex, false) }; + private int cost(Node current) { + int cost = 0; + while (current != null && current.matchIndex >= 0) { + if (!current.aligned) { + cost += value(matches.get(current.matchIndex)); + } + current = current.previous; + } + return cost; } - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof Node) { - final Node other = (Node) obj; - return (matchIndex == other.matchIndex) && (aligned == other.aligned); - } - return super.equals(obj); + private int value(SortedSet match) { + return matchEvaluator.apply(match); } - @Override - public int hashCode() { - return Objects.hash(matchIndex, aligned); + static class Node { + final int matchIndex; + final boolean aligned; + Node previous; + int cost; + + Node(int matchIndex, boolean aligned) { + this.matchIndex = matchIndex; + this.aligned = aligned; + } + + Node[] successors() { + final int nextIndex = matchIndex + 1; + return new Node[]{new Node(nextIndex, true), new Node(nextIndex, false)}; + } + + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof Node) { + final Node other = (Node) obj; + return (matchIndex == other.matchIndex) && (aligned == other.aligned); + } + return super.equals(obj); + } + + @Override + public int hashCode() { + return Objects.hash(matchIndex, aligned); + } } - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java index 69bb91267..17b1fa479 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java @@ -43,175 +43,175 @@ */ public class Matches extends ArrayList> { - public Matches(int initialCapacity) { - super(initialCapacity); - } - - public static Matches between(VariantGraph.Vertex[][] vertices, SuffixTree suffixTree, Function, Integer> matchEvaluator) { - - final Map> matchThreads = new HashMap<>(); - for (int rank = 0; rank < vertices.length; rank++) { - for (VariantGraph.Vertex vertex : vertices[rank]) { - final MatchThreadElement matchThreadElement = new MatchThreadElement(suffixTree).advance(vertex, rank); - if (matchThreadElement != null) { - matchThreads.computeIfAbsent(rank, r -> new LinkedList<>()).add(matchThreadElement); - } - } - for (MatchThreadElement matchThreadElement : matchThreads.getOrDefault(rank - 1, Collections.emptyList())) { - for (VariantGraph.Vertex vertex : vertices[rank]) { - final MatchThreadElement advanced = matchThreadElement.advance(vertex, rank); - if (advanced != null) { - matchThreads.computeIfAbsent(rank, r -> new LinkedList<>()).add(advanced); - } - } - } + public Matches(int initialCapacity) { + super(initialCapacity); } - final Matches matches = new Matches(matchThreads.size()); - matchThreads.values().stream().flatMap(List::stream).forEach(matchThreadElement -> { - final List> threadPhrases = new ArrayList<>(); - boolean firstElement = true; - for (MatchThreadElement threadElement : matchThreadElement.thread()) { - final SuffixTree.EquivalenceClass equivalenceClass = threadElement.cursor.matchedClass(); - for (int mc = 0; mc < equivalenceClass.length; mc++) { - final int tokenCandidate = equivalenceClass.members[mc]; - if (firstElement) { - final SortedSet phrase = new TreeSet<>(); - phrase.add(new VertexMatch.WithTokenIndex(threadElement.vertex, threadElement.vertexRank, tokenCandidate)); - threadPhrases.add(phrase); - } else { - for (SortedSet phrase : threadPhrases) { - if ((phrase.last().token + 1) == tokenCandidate) { - phrase.add(new VertexMatch.WithTokenIndex(threadElement.vertex, threadElement.vertexRank, tokenCandidate)); - } + public static Matches between(VariantGraph.Vertex[][] vertices, SuffixTree suffixTree, Function, Integer> matchEvaluator) { + + final Map> matchThreads = new HashMap<>(); + for (int rank = 0; rank < vertices.length; rank++) { + for (VariantGraph.Vertex vertex : vertices[rank]) { + final MatchThreadElement matchThreadElement = new MatchThreadElement(suffixTree).advance(vertex, rank); + if (matchThreadElement != null) { + matchThreads.computeIfAbsent(rank, r -> new LinkedList<>()).add(matchThreadElement); + } + } + for (MatchThreadElement matchThreadElement : matchThreads.getOrDefault(rank - 1, Collections.emptyList())) { + for (VariantGraph.Vertex vertex : vertices[rank]) { + final MatchThreadElement advanced = matchThreadElement.advance(vertex, rank); + if (advanced != null) { + matchThreads.computeIfAbsent(rank, r -> new LinkedList<>()).add(advanced); + } + } } - } - } - firstElement = false; - } - matches.addAll(threadPhrases); - }); - Collections.sort(matches, maximalUniqueMatchOrdering(matchEvaluator)); - - return matches; - } - - private static Comparator> maximalUniqueMatchOrdering(final Function, Integer> matchEvaluator) { - return new Comparator>() { - @Override - public int compare(SortedSet o1, SortedSet o2) { - // 1. reverse ordering by match value - int result = matchEvaluator.apply(o2) - matchEvaluator.apply(o1); - if (result != 0) { - return result; } - final VertexMatch.WithTokenIndex firstMatch1 = o1.first(); - final VertexMatch.WithTokenIndex firstMatch2 = o2.first(); + final Matches matches = new Matches(matchThreads.size()); + matchThreads.values().stream().flatMap(List::stream).forEach(matchThreadElement -> { + final List> threadPhrases = new ArrayList<>(); + boolean firstElement = true; + for (MatchThreadElement threadElement : matchThreadElement.thread()) { + final SuffixTree.EquivalenceClass equivalenceClass = threadElement.cursor.matchedClass(); + for (int mc = 0; mc < equivalenceClass.length; mc++) { + final int tokenCandidate = equivalenceClass.members[mc]; + if (firstElement) { + final SortedSet phrase = new TreeSet<>(); + phrase.add(new VertexMatch.WithTokenIndex(threadElement.vertex, threadElement.vertexRank, tokenCandidate)); + threadPhrases.add(phrase); + } else { + for (SortedSet phrase : threadPhrases) { + if ((phrase.last().token + 1) == tokenCandidate) { + phrase.add(new VertexMatch.WithTokenIndex(threadElement.vertex, threadElement.vertexRank, tokenCandidate)); + } + } + } + } + firstElement = false; + } + matches.addAll(threadPhrases); + }); + Collections.sort(matches, maximalUniqueMatchOrdering(matchEvaluator)); - // 2. ordering by match distance - result = (Math.abs(firstMatch1.token - firstMatch1.vertexRank) - Math.abs(firstMatch2.token - firstMatch2.vertexRank)); - if (result != 0) { - return result; - } + return matches; + } + private static Comparator> maximalUniqueMatchOrdering(final Function, Integer> matchEvaluator) { + return new Comparator>() { + @Override + public int compare(SortedSet o1, SortedSet o2) { + // 1. reverse ordering by match value + int result = matchEvaluator.apply(o2) - matchEvaluator.apply(o1); + if (result != 0) { + return result; + } - // 3. ordering by first vertex ranking - result = firstMatch1.vertexRank - firstMatch2.vertexRank; - if (result != 0) { - return result; - } + final VertexMatch.WithTokenIndex firstMatch1 = o1.first(); + final VertexMatch.WithTokenIndex firstMatch2 = o2.first(); - // 3. ordering by first token index - return firstMatch1.token - firstMatch2.token; + // 2. ordering by match distance + result = (Math.abs(firstMatch1.token - firstMatch1.vertexRank) - Math.abs(firstMatch2.token - firstMatch2.vertexRank)); + if (result != 0) { + return result; + } - } - }; - } - public SortedSet> findMaximalUniqueMatches() { - final List> allMatches = new ArrayList<>(this); - final SortedSet> maximalUniqueMatches = new TreeSet<>(VertexMatch.setComparator()); + // 3. ordering by first vertex ranking + result = firstMatch1.vertexRank - firstMatch2.vertexRank; + if (result != 0) { + return result; + } - while (true) { - SortedSet nextMum = null; - SortedSet candidate = null; - for (SortedSet successor : allMatches) { - if (candidate == null) { - continue; - } - if (candidate.size() > successor.size() || candidate.first().token == successor.first().token) { - nextMum = candidate; - break; - } - candidate = successor; - } - if (nextMum == null) { - nextMum = allMatches.stream().findFirst().orElse(null); - } - if (nextMum == null) { - break; - } - if (!maximalUniqueMatches.add(nextMum)) { - throw new IllegalStateException("Duplicate MUM"); - } - - final BitSet rankFilter = new BitSet(); - final BitSet tokenFilter = new BitSet(); - - rankFilter.set(nextMum.first().vertexRank, nextMum.last().vertexRank + 1); - tokenFilter.set(nextMum.first().token, nextMum.last().token + 1); - - allMatches.removeIf(VertexMatch.filter(rankFilter, tokenFilter)); + // 3. ordering by first token index + return firstMatch1.token - firstMatch2.token; + + } + }; } - return maximalUniqueMatches; - } - /** - * @author Gregor Middell - */ - static class MatchThreadElement { + public SortedSet> findMaximalUniqueMatches() { + final List> allMatches = new ArrayList<>(this); + final SortedSet> maximalUniqueMatches = new TreeSet<>(VertexMatch.setComparator()); + + while (true) { + SortedSet nextMum = null; + SortedSet candidate = null; + for (SortedSet successor : allMatches) { + if (candidate == null) { + continue; + } + if (candidate.size() > successor.size() || candidate.first().token == successor.first().token) { + nextMum = candidate; + break; + } + candidate = successor; + } + if (nextMum == null) { + nextMum = allMatches.stream().findFirst().orElse(null); + } + if (nextMum == null) { + break; + } + if (!maximalUniqueMatches.add(nextMum)) { + throw new IllegalStateException("Duplicate MUM"); + } - final MatchThreadElement previous; - final VariantGraph.Vertex vertex; - final int vertexRank; - final SuffixTree.Cursor cursor; + final BitSet rankFilter = new BitSet(); + final BitSet tokenFilter = new BitSet(); - MatchThreadElement(SuffixTree suffixTree) { - this(null, null, -1, suffixTree.cursor()); - } + rankFilter.set(nextMum.first().vertexRank, nextMum.last().vertexRank + 1); + tokenFilter.set(nextMum.first().token, nextMum.last().token + 1); - MatchThreadElement(MatchThreadElement previous, VariantGraph.Vertex vertex, int vertexRank, SuffixTree.Cursor cursor) { - this.previous = previous; - this.vertex = vertex; - this.vertexRank = vertexRank; - this.cursor = cursor; + allMatches.removeIf(VertexMatch.filter(rankFilter, tokenFilter)); + } + return maximalUniqueMatches; } - MatchThreadElement advance(VariantGraph.Vertex vertex, int vertexRank) { - final Set tokens = vertex.tokens(); - if (!tokens.isEmpty()) { - final SuffixTree.Cursor next = cursor.move(tokens.stream().findFirst().get()); - if (next != null) { - return new MatchThreadElement(this, vertex, vertexRank, next); + /** + * @author Gregor Middell + */ + static class MatchThreadElement { + + final MatchThreadElement previous; + final VariantGraph.Vertex vertex; + final int vertexRank; + final SuffixTree.Cursor cursor; + + MatchThreadElement(SuffixTree suffixTree) { + this(null, null, -1, suffixTree.cursor()); } - } - return null; - } - List thread() { - final LinkedList thread = new LinkedList<>(); - MatchThreadElement current = this; - while (current.vertex != null) { - thread.addFirst(current); - current = current.previous; - } - return thread; - } + MatchThreadElement(MatchThreadElement previous, VariantGraph.Vertex vertex, int vertexRank, SuffixTree.Cursor cursor) { + this.previous = previous; + this.vertex = vertex; + this.vertexRank = vertexRank; + this.cursor = cursor; + } - @Override - public String toString() { - return "[" + Arrays.asList(vertexRank, vertex, cursor.matchedClass()).stream().map(Object::toString).collect(Collectors.joining(", ")) + "]"; + MatchThreadElement advance(VariantGraph.Vertex vertex, int vertexRank) { + final Set tokens = vertex.tokens(); + if (!tokens.isEmpty()) { + final SuffixTree.Cursor next = cursor.move(tokens.stream().findFirst().get()); + if (next != null) { + return new MatchThreadElement(this, vertex, vertexRank, next); + } + } + return null; + } + + List thread() { + final LinkedList thread = new LinkedList<>(); + MatchThreadElement current = this; + while (current.vertex != null) { + thread.addFirst(current); + current = current.previous; + } + return thread; + } + + @Override + public String toString() { + return "[" + Arrays.asList(vertexRank, vertex, cursor.matchedClass()).stream().map(Object::toString).collect(Collectors.joining(", ")) + "]"; + } } - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java index 6cdc2b829..f3d025e89 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java @@ -38,66 +38,66 @@ */ public class MediteAlgorithm extends CollationAlgorithm.Base { - private final Comparator comparator; - private final Function, Integer> matchEvaluator; + private final Comparator comparator; + private final Function, Integer> matchEvaluator; - public MediteAlgorithm(Comparator comparator, Function, Integer> matchEvaluator) { - this.comparator = comparator; - this.matchEvaluator = matchEvaluator; - } + public MediteAlgorithm(Comparator comparator, Function, Integer> matchEvaluator) { + this.comparator = comparator; + this.matchEvaluator = matchEvaluator; + } - @Override - public void collate(VariantGraph graph, Iterable witness) { - final VariantGraph.Vertex[][] vertices = VariantGraphRanking.of(graph).asArray(); - final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); + @Override + public void collate(VariantGraph graph, Iterable witness) { + final VariantGraph.Vertex[][] vertices = VariantGraphRanking.of(graph).asArray(); + final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); - final SuffixTree suffixTree = SuffixTree.build(comparator, tokens); - final MatchEvaluatorWrapper matchEvaluator = new MatchEvaluatorWrapper(this.matchEvaluator, tokens); + final SuffixTree suffixTree = SuffixTree.build(comparator, tokens); + final MatchEvaluatorWrapper matchEvaluator = new MatchEvaluatorWrapper(this.matchEvaluator, tokens); - final Matches matchCandidates = Matches.between(vertices, suffixTree, matchEvaluator); - final SortedSet> matches = new TreeSet<>(VertexMatch.setComparator()); + final Matches matchCandidates = Matches.between(vertices, suffixTree, matchEvaluator); + final SortedSet> matches = new TreeSet<>(VertexMatch.setComparator()); - while (true) { - final SortedSet> maximalUniqueMatches = matchCandidates.findMaximalUniqueMatches(); - if (maximalUniqueMatches.isEmpty()) { - break; - } + while (true) { + final SortedSet> maximalUniqueMatches = matchCandidates.findMaximalUniqueMatches(); + if (maximalUniqueMatches.isEmpty()) { + break; + } - final BitSet rankFilter = new BitSet(); - final BitSet tokenFilter = new BitSet(); + final BitSet rankFilter = new BitSet(); + final BitSet tokenFilter = new BitSet(); - for (SortedSet phrase : AlignmentDecisionGraph.filter(maximalUniqueMatches, matchEvaluator)) { - final VertexMatch.WithTokenIndex firstMatch = phrase.first(); - final VertexMatch.WithTokenIndex lastMatch = phrase.last(); + for (SortedSet phrase : AlignmentDecisionGraph.filter(maximalUniqueMatches, matchEvaluator)) { + final VertexMatch.WithTokenIndex firstMatch = phrase.first(); + final VertexMatch.WithTokenIndex lastMatch = phrase.last(); - matches.add(phrase); - IntStream.range(firstMatch.vertexRank, lastMatch.vertexRank + 1).forEach(rankFilter::set); - IntStream.range(firstMatch.token, lastMatch.token + 1).forEach(tokenFilter::set); - } + matches.add(phrase); + IntStream.range(firstMatch.vertexRank, lastMatch.vertexRank + 1).forEach(rankFilter::set); + IntStream.range(firstMatch.token, lastMatch.token + 1).forEach(tokenFilter::set); + } - matchCandidates.removeIf(VertexMatch.filter(rankFilter, tokenFilter)); - } + matchCandidates.removeIf(VertexMatch.filter(rankFilter, tokenFilter)); + } - merge(graph, vertices, tokens, matches); - } + merge(graph, vertices, tokens, matches); + } - static class MatchEvaluatorWrapper implements Function, Integer> { + static class MatchEvaluatorWrapper implements Function, Integer> { - private final Function, Integer> wrapped; - private final Function tokenResolver; + private final Function, Integer> wrapped; + private final Function tokenResolver; - MatchEvaluatorWrapper(final Function, Integer> wrapped, final Token[] tokens) { - this.wrapped = wrapped; - this.tokenResolver = VertexMatch.tokenResolver(tokens); - } + MatchEvaluatorWrapper(final Function, Integer> wrapped, final Token[] tokens) { + this.wrapped = wrapped; + this.tokenResolver = VertexMatch.tokenResolver(tokens); + } - @Override - public Integer apply(SortedSet input) { - final SortedSet tokenPhrase = new TreeSet<>(); - for (VertexMatch.WithTokenIndex match : input) { - tokenPhrase.add(tokenResolver.apply(match)); - } - return wrapped.apply(tokenPhrase); + @Override + public Integer apply(SortedSet input) { + final SortedSet tokenPhrase = new TreeSet<>(); + for (VertexMatch.WithTokenIndex match : input) { + tokenPhrase.add(tokenResolver.apply(match)); + } + return wrapped.apply(tokenPhrase); + } } - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java index 12f195ea7..08f4261d8 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java @@ -37,240 +37,240 @@ */ class SuffixTree { - final Comparator comparator; - final Comparator sourceComparator; - final T[] source; - final Node root; - - @SafeVarargs - static SuffixTree build(Comparator comparator, T... source) { - return new SuffixTree<>(comparator, source).build(); - } - - @SafeVarargs - private SuffixTree(Comparator comparator, T... source) { - this.comparator = comparator; - this.sourceComparator = new SentinelAwareComparator(comparator); - this.source = source; - this.root = new Node(); - } - - public Cursor cursor() { - return new Cursor(); - } - - public Iterable match(final Iterable str) { - return () -> new Iterator() { - - final Iterator it = str.iterator(); - Optional cursor = Optional.ofNullable(it.hasNext() ? cursor().move(it.next()) : null); - - @Override - public boolean hasNext() { - return cursor.isPresent(); - } - - @Override - public EquivalenceClass next() { - final EquivalenceClass next = cursor.get().matchedClass(); - cursor = Optional.ofNullable(it.hasNext() ? cursor.get().move(it.next()) : null); - return next; - } - - }; - } - - - private SuffixTree build() { - for (int suffixStart = 0; suffixStart <= source.length; suffixStart++) { - root.addSuffix(suffixStart); + final Comparator comparator; + final Comparator sourceComparator; + final T[] source; + final Node root; + + @SafeVarargs + static SuffixTree build(Comparator comparator, T... source) { + return new SuffixTree<>(comparator, source).build(); } - compactNodes(root); - return this; - } - - private void compactNodes(Node node) { - for (Node child : node.children) { - while (child.children.size() == 1) { - final Node firstGrandChild = child.children.iterator().next(); - child.incomingLabel.add(firstGrandChild.incomingLabel.getFirst()); - child.children = firstGrandChild.children; - for (Node formerGrandchild : child.children) { - formerGrandchild.parent = child; - } - } - compactNodes(child); + + @SafeVarargs + private SuffixTree(Comparator comparator, T... source) { + this.comparator = comparator; + this.sourceComparator = new SentinelAwareComparator(comparator); + this.source = source; + this.root = new Node(); } - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - final Deque nodes = new ArrayDeque<>(Collections.singleton(root)); - while (!nodes.isEmpty()) { - final Node node = nodes.remove(); - sb.append(IntStream.range(0, node.depth()).mapToObj(i -> "\t").collect(Collectors.joining())).append(node).append("\n"); - node.children.forEach(nodes::addFirst); + + public Cursor cursor() { + return new Cursor(); } - return sb.toString(); - } - /** - * @author Gregor Middell - */ - class Node { + public Iterable match(final Iterable str) { + return () -> new Iterator() { - final LinkedList incomingLabel; + final Iterator it = str.iterator(); + Optional cursor = Optional.ofNullable(it.hasNext() ? cursor().move(it.next()) : null); - Node parent; - List children = new ArrayList<>(); + @Override + public boolean hasNext() { + return cursor.isPresent(); + } - public Node(Node parent, int firstIndex) { - this.parent = parent; - this.incomingLabel = new LinkedList<>(Collections.singleton(new EquivalenceClass(firstIndex))); - } + @Override + public EquivalenceClass next() { + final EquivalenceClass next = cursor.get().matchedClass(); + cursor = Optional.ofNullable(it.hasNext() ? cursor.get().move(it.next()) : null); + return next; + } - public Node() { - this.parent = null; - this.incomingLabel = null; + }; } - public int depth() { - int depth = 0; - for (Node parent = this.parent; parent != null; parent = parent.parent) { - depth++; - } - return depth; - } - - public void addSuffix(int start) { - addSuffix(this, start); + private SuffixTree build() { + for (int suffixStart = 0; suffixStart <= source.length; suffixStart++) { + root.addSuffix(suffixStart); + } + compactNodes(root); + return this; } - private Node addSuffix(Node node, int start) { - for (Node child : node.children) { - EquivalenceClass childClass = child.incomingLabel.getFirst(); - if (childClass.isMember(start)) { - childClass.add(start); - start++; - if (start == (source.length + 1)) { - return child; - } - return addSuffix(child, start); + private void compactNodes(Node node) { + for (Node child : node.children) { + while (child.children.size() == 1) { + final Node firstGrandChild = child.children.iterator().next(); + child.incomingLabel.add(firstGrandChild.incomingLabel.getFirst()); + child.children = firstGrandChild.children; + for (Node formerGrandchild : child.children) { + formerGrandchild.parent = child; + } + } + compactNodes(child); } - } - while (start <= source.length) { - Node child = new Node(node, start); - node.children.add(child); - node = child; - start++; - } - return node; } @Override public String toString() { - return Optional.ofNullable(incomingLabel).map(label -> label.stream().map(Object::toString).collect(Collectors.joining(", "))).orElse(""); + final StringBuilder sb = new StringBuilder(); + final Deque nodes = new ArrayDeque<>(Collections.singleton(root)); + while (!nodes.isEmpty()) { + final Node node = nodes.remove(); + sb.append(IntStream.range(0, node.depth()).mapToObj(i -> "\t").collect(Collectors.joining())).append(node).append("\n"); + node.children.forEach(nodes::addFirst); + } + return sb.toString(); } - } - class EquivalenceClass implements Comparable { + /** + * @author Gregor Middell + */ + class Node { - int[] members = new int[2]; - int length = 0; + final LinkedList incomingLabel; - EquivalenceClass(int first) { - members[length++] = first; - } + Node parent; + List children = new ArrayList<>(); - void add(int member) { - if (length == members.length) { - members = Arrays.copyOf(members, members.length * 2); - } - members[length++] = member; - } + public Node(Node parent, int firstIndex) { + this.parent = parent; + this.incomingLabel = new LinkedList<>(Collections.singleton(new EquivalenceClass(firstIndex))); + } - boolean isMember(int index) { - return sourceComparator.compare(index, members[0]) == 0; - } + public Node() { + this.parent = null; + this.incomingLabel = null; + } - public boolean isMember(T symbol) { - return (members[0] != source.length && comparator.compare(symbol, source[members[0]]) == 0); - } - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof SuffixTree.EquivalenceClass) { - return members[0] == ((SuffixTree.EquivalenceClass)obj).members[0]; - } - return super.equals(obj); - } + public int depth() { + int depth = 0; + for (Node parent = this.parent; parent != null; parent = parent.parent) { + depth++; + } + return depth; + } - @Override - public int hashCode() { - return members[0]; - } + public void addSuffix(int start) { + addSuffix(this, start); + } - @Override - public int compareTo(EquivalenceClass o) { - return (members[0] - o.members[0]); - } + private Node addSuffix(Node node, int start) { + for (Node child : node.children) { + EquivalenceClass childClass = child.incomingLabel.getFirst(); + if (childClass.isMember(start)) { + childClass.add(start); + start++; + if (start == (source.length + 1)) { + return child; + } + return addSuffix(child, start); + } + } + while (start <= source.length) { + Node child = new Node(node, start); + node.children.add(child); + node = child; + start++; + } + return node; + } - @Override - public String toString() { - return String.format("{%s}", Arrays.stream(members, 0, length) - .mapToObj(member -> "<[" + member + "] " + (member == source.length ? "$" : source[member].toString()) + ">") - .collect(Collectors.joining(", "))); + @Override + public String toString() { + return Optional.ofNullable(incomingLabel).map(label -> label.stream().map(Object::toString).collect(Collectors.joining(", "))).orElse(""); + } } - } + class EquivalenceClass implements Comparable { - class SentinelAwareComparator implements Comparator { + int[] members = new int[2]; + int length = 0; - final Comparator comparator; + EquivalenceClass(int first) { + members[length++] = first; + } - SentinelAwareComparator(Comparator comparator) { - this.comparator = comparator; - } + void add(int member) { + if (length == members.length) { + members = Arrays.copyOf(members, members.length * 2); + } + members[length++] = member; + } - @Override - public int compare(Integer o1, Integer o2) { - if (o1 == source.length || o2 == source.length) { - return (o2 - o1); - } - return comparator.compare(source[o1], source[o2]); - } - } + boolean isMember(int index) { + return sourceComparator.compare(index, members[0]) == 0; + } - public class Cursor { - final Node node; - final int offset; + public boolean isMember(T symbol) { + return (members[0] != source.length && comparator.compare(symbol, source[members[0]]) == 0); + } - Cursor() { - this(root, 0); - } + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof SuffixTree.EquivalenceClass) { + return members[0] == ((SuffixTree.EquivalenceClass) obj).members[0]; + } + return super.equals(obj); + } + + @Override + public int hashCode() { + return members[0]; + } + + @Override + public int compareTo(EquivalenceClass o) { + return (members[0] - o.members[0]); + } + + @Override + public String toString() { + return String.format("{%s}", Arrays.stream(members, 0, length) + .mapToObj(member -> "<[" + member + "] " + (member == source.length ? "$" : source[member].toString()) + ">") + .collect(Collectors.joining(", "))); + } - Cursor(Node node, int offset) { - this.node = node; - this.offset = offset; } - public Cursor move(T symbol) { - if (node.incomingLabel == null || (offset + 1) == node.incomingLabel.size()) { - for (Node child : node.children) { - final Cursor next = new Cursor(child, 0); - if (next.matchedClass().isMember(symbol)) { - return next; - } + class SentinelAwareComparator implements Comparator { + + final Comparator comparator; + + SentinelAwareComparator(Comparator comparator) { + this.comparator = comparator; + } + + @Override + public int compare(Integer o1, Integer o2) { + if (o1 == source.length || o2 == source.length) { + return (o2 - o1); + } + return comparator.compare(source[o1], source[o2]); } - return null; - } - return (node.incomingLabel.get(offset + 1).isMember(symbol) ? new Cursor(node, offset + 1) : null); } - EquivalenceClass matchedClass() { - return node.incomingLabel.get(offset); + public class Cursor { + final Node node; + final int offset; + + Cursor() { + this(root, 0); + } + + Cursor(Node node, int offset) { + this.node = node; + this.offset = offset; + } + + public Cursor move(T symbol) { + if (node.incomingLabel == null || (offset + 1) == node.incomingLabel.size()) { + for (Node child : node.children) { + final Cursor next = new Cursor(child, 0); + if (next.matchedClass().isMember(symbol)) { + return next; + } + } + return null; + } + return (node.incomingLabel.get(offset + 1).isMember(symbol) ? new Cursor(node, offset + 1) : null); + } + + EquivalenceClass matchedClass() { + return node.incomingLabel.get(offset); + } } - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java index 93c278abd..52af9f155 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java @@ -36,96 +36,96 @@ */ public class NeedlemanWunschAlgorithm extends CollationAlgorithm.Base { - private final Comparator comparator; - private final NeedlemanWunschScorer scorer = new NeedlemanWunschScorer() { + private final Comparator comparator; + private final NeedlemanWunschScorer scorer = new NeedlemanWunschScorer() { - @Override - public float score(VariantGraph.Vertex[] a, Token b) { - return Arrays.stream(a).map(VariantGraph.Vertex::tokens).flatMap(Set::stream).anyMatch(t -> comparator.compare(t, b) == 0) ? 1 : -1; + @Override + public float score(VariantGraph.Vertex[] a, Token b) { + return Arrays.stream(a).map(VariantGraph.Vertex::tokens).flatMap(Set::stream).anyMatch(t -> comparator.compare(t, b) == 0) ? 1 : -1; + } + + @Override + public float gap() { + return -1; + } + }; + + public NeedlemanWunschAlgorithm(Comparator comparator) { + this.comparator = comparator; } @Override - public float gap() { - return -1; - } - }; - - public NeedlemanWunschAlgorithm(Comparator comparator) { - this.comparator = comparator; - } - - @Override - public void collate(VariantGraph against, Iterable witness) { - final VariantGraph.Vertex[][] ranks = VariantGraphRanking.of(against).asArray(); - final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); - - final Map alignments = new HashMap<>(); - for (Map.Entry alignment : align(ranks, tokens, scorer).entrySet()) { - boolean aligned = false; - final Token token = alignment.getValue(); - for (VariantGraph.Vertex vertex : alignment.getKey()) { - for (Token vertexToken : vertex.tokens()) { - if (comparator.compare(vertexToken, token) == 0) { - alignments.put(token, vertex); - aligned = true; - break; - } + public void collate(VariantGraph against, Iterable witness) { + final VariantGraph.Vertex[][] ranks = VariantGraphRanking.of(against).asArray(); + final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); + + final Map alignments = new HashMap<>(); + for (Map.Entry alignment : align(ranks, tokens, scorer).entrySet()) { + boolean aligned = false; + final Token token = alignment.getValue(); + for (VariantGraph.Vertex vertex : alignment.getKey()) { + for (Token vertexToken : vertex.tokens()) { + if (comparator.compare(vertexToken, token) == 0) { + alignments.put(token, vertex); + aligned = true; + break; + } + } + if (aligned) { + break; + } + } } - if (aligned) { - break; - } - } + + merge(against, witness, alignments); } - merge(against, witness, alignments); - } + public static Map align(A[] a, B[] b, NeedlemanWunschScorer scorer) { - public static Map align(A[] a, B[] b, NeedlemanWunschScorer scorer) { + final Map alignments = new HashMap<>(); + final float[][] matrix = new float[a.length + 1][b.length + 1]; - final Map alignments = new HashMap<>(); - final float[][] matrix = new float[a.length + 1][b.length + 1]; + int ac = 0; + int bc = 0; + while (ac < a.length) { + matrix[ac++][0] = scorer.gap() * ac; + } + while (bc < b.length) { + matrix[0][bc++] = scorer.gap() * bc; + } - int ac = 0; - int bc = 0; - while (ac < a.length) { - matrix[ac++][0] = scorer.gap() * ac; - } - while (bc < b.length) { - matrix[0][bc++] = scorer.gap() * bc; - } + ac = 1; + for (A aElement : a) { + bc = 1; + for (B bElement : b) { + final float k = matrix[ac - 1][bc - 1] + scorer.score(aElement, bElement); + final float l = matrix[ac - 1][bc] + scorer.gap(); + final float m = matrix[ac][bc - 1] + scorer.gap(); + matrix[ac][bc++] = Math.max(Math.max(k, l), m); + } + ac++; + } - ac = 1; - for (A aElement : a) { - bc = 1; - for (B bElement : b) { - final float k = matrix[ac - 1][bc - 1] + scorer.score(aElement, bElement); - final float l = matrix[ac - 1][bc] + scorer.gap(); - final float m = matrix[ac][bc - 1] + scorer.gap(); - matrix[ac][bc++] = Math.max(Math.max(k, l), m); - } - ac++; - } + ac = a.length; + bc = b.length; + while (ac > 0 && bc > 0) { + final float score = matrix[ac][bc]; + final float scoreDiag = matrix[ac - 1][bc - 1]; + final float scoreUp = matrix[ac][bc - 1]; + final float scoreLeft = matrix[ac - 1][bc]; + + if (score == scoreDiag + scorer.score(a[ac - 1], b[bc - 1])) { + // match + alignments.put(a[ac - 1], b[bc - 1]); + ac--; + bc--; + } else if (score == scoreLeft + scorer.gap()) { + ac--; + } else if (score == scoreUp + scorer.gap()) { + bc--; + } + } - ac = a.length; - bc = b.length; - while (ac > 0 && bc > 0) { - final float score = matrix[ac][bc]; - final float scoreDiag = matrix[ac - 1][bc - 1]; - final float scoreUp = matrix[ac][bc - 1]; - final float scoreLeft = matrix[ac - 1][bc]; - - if (score == scoreDiag + scorer.score(a[ac - 1], b[bc - 1])) { - // match - alignments.put(a[ac - 1], b[bc - 1]); - ac--; - bc--; - } else if (score == scoreLeft + scorer.gap()) { - ac--; - } else if (score == scoreUp + scorer.gap()) { - bc--; - } + return alignments; } - - return alignments; - } } \ No newline at end of file diff --git a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschScorer.java b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschScorer.java index 3f81ed75d..ebfa55f43 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschScorer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschScorer.java @@ -24,7 +24,7 @@ */ public interface NeedlemanWunschScorer { - float score(A a, B b); + float score(A a, B b); - float gap(); + float gap(); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java index 6feff87fb..3c04cb3e6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java @@ -26,35 +26,35 @@ public class SimpleCollation { - private final List witnesses; - private final CollationAlgorithm algorithm; - private final boolean joined; - - public SimpleCollation(List witnesses, CollationAlgorithm algorithm, boolean joined) { - this.witnesses = witnesses; - this.algorithm = algorithm; - this.joined = joined; - } - - public List getWitnesses() { - return witnesses; - } - - public CollationAlgorithm getAlgorithm() { - return algorithm; - } - - public boolean isJoined() { - return joined; - } - - public VariantGraph collate(VariantGraph graph) { - for (SimpleWitness witness : witnesses) { - algorithm.collate(graph, witness); + private final List witnesses; + private final CollationAlgorithm algorithm; + private final boolean joined; + + public SimpleCollation(List witnesses, CollationAlgorithm algorithm, boolean joined) { + this.witnesses = witnesses; + this.algorithm = algorithm; + this.joined = joined; } - if (joined) { - VariantGraph.JOIN.apply(graph); + + public List getWitnesses() { + return witnesses; + } + + public CollationAlgorithm getAlgorithm() { + return algorithm; + } + + public boolean isJoined() { + return joined; + } + + public VariantGraph collate(VariantGraph graph) { + for (SimpleWitness witness : witnesses) { + algorithm.collate(graph, witness); + } + if (joined) { + VariantGraph.JOIN.apply(graph); + } + return graph; } - return graph; - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java index 1677df3a9..d32a5ea83 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java @@ -32,23 +32,23 @@ */ public class SimplePatternTokenizer { - static final String PUNCT = Pattern.quote(".?!,;:"); - - static Function> tokenizer(Pattern pattern) { - return input -> { - final Matcher matcher = pattern.matcher(input); - final List tokens = new LinkedList<>(); - while (matcher.find()) { - tokens.add(input.substring(matcher.start(), matcher.end())); - } - return tokens.stream(); - }; - } - - public static final Function> BY_WHITESPACE = tokenizer(Pattern.compile("\\s*?\\S+\\s*]")); - - public static final Function> BY_WS_AND_PUNCT = tokenizer(Pattern.compile("[\\s" + PUNCT + "]*?[^\\s" + PUNCT + "]+[\\s" + PUNCT + "]*")); - - public static final Function> BY_WS_OR_PUNCT = tokenizer(Pattern.compile("[" + PUNCT + "]+[\\s]*|[^" + PUNCT + "\\s]+[\\s]*")); + static final String PUNCT = Pattern.quote(".?!,;:"); + + static Function> tokenizer(Pattern pattern) { + return input -> { + final Matcher matcher = pattern.matcher(input); + final List tokens = new LinkedList<>(); + while (matcher.find()) { + tokens.add(input.substring(matcher.start(), matcher.end())); + } + return tokens.stream(); + }; + } + + public static final Function> BY_WHITESPACE = tokenizer(Pattern.compile("\\s*?\\S+\\s*]")); + + public static final Function> BY_WS_AND_PUNCT = tokenizer(Pattern.compile("[\\s" + PUNCT + "]*?[^\\s" + PUNCT + "]+[\\s" + PUNCT + "]*")); + + public static final Function> BY_WS_OR_PUNCT = tokenizer(Pattern.compile("[" + PUNCT + "]+[\\s]*|[^" + PUNCT + "\\s]+[\\s]*")); } \ No newline at end of file diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java index ce6511085..48852f1d0 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java @@ -29,53 +29,53 @@ import java.util.stream.StreamSupport; public class SimpleToken implements Token, Comparable { - private final SimpleWitness witness; - private final String content; - private final String normalized; + private final SimpleWitness witness; + private final String content; + private final String normalized; - public SimpleToken(SimpleWitness witness, String content, String normalized) { - this.witness = witness; - this.content = content; - this.normalized = normalized; - } + public SimpleToken(SimpleWitness witness, String content, String normalized) { + this.witness = witness; + this.content = content; + this.normalized = normalized; + } - public String getContent() { - return content; - } + public String getContent() { + return content; + } - @Override - public Witness getWitness() { - return witness; - } + @Override + public Witness getWitness() { + return witness; + } - public String getNormalized() { - return normalized; - } + public String getNormalized() { + return normalized; + } - @Override - public String toString() { - return new StringBuilder(witness.toString()).append(":").append(witness.getTokens().indexOf(this)).append(":'").append(normalized).append("'").toString(); - } + @Override + public String toString() { + return new StringBuilder(witness.toString()).append(":").append(witness.getTokens().indexOf(this)).append(":'").append(normalized).append("'").toString(); + } - public static String toString(Iterable tokens) { - return StreamSupport.stream(tokens.spliterator(), false) + public static String toString(Iterable tokens) { + return StreamSupport.stream(tokens.spliterator(), false) .filter(t -> SimpleToken.class.isAssignableFrom(t.getClass())) .map(t -> (SimpleToken) t) .map(SimpleToken::getContent) .collect(Collectors.joining()) .trim(); - } - - @Override - public int compareTo(SimpleToken o) { - return witness.compare(this, o); - } + } - public static final Function, Integer> TOKEN_MATCH_EVALUATOR = input -> { - int value = 0; - for (VertexMatch.WithToken match : input) { - value += ((SimpleToken) match.token).getContent().length(); + @Override + public int compareTo(SimpleToken o) { + return witness.compare(this, o); } - return value; - }; + + public static final Function, Integer> TOKEN_MATCH_EVALUATOR = input -> { + int value = 0; + for (VertexMatch.WithToken match : input) { + value += ((SimpleToken) match.token).getContent().length(); + } + return value; + }; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java index 8e5193015..9d98627a9 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java @@ -27,37 +27,37 @@ */ public class SimpleTokenNormalizers { - public static final Function LOWER_CASE = String::toLowerCase; - - public static final Function TRIM_WS = String::trim; - - public static final Function TRIM_WS_PUNCT = input -> { - int start = 0; - int end = input.length() - 1; - while (start <= end && isWhitespaceOrPunctuation(input.charAt(start))) { - start++; - } - while (end >= start && isWhitespaceOrPunctuation(input.charAt(end))) { - end--; + public static final Function LOWER_CASE = String::toLowerCase; + + public static final Function TRIM_WS = String::trim; + + public static final Function TRIM_WS_PUNCT = input -> { + int start = 0; + int end = input.length() - 1; + while (start <= end && isWhitespaceOrPunctuation(input.charAt(start))) { + start++; + } + while (end >= start && isWhitespaceOrPunctuation(input.charAt(end))) { + end--; + } + return input.substring(start, end + 1); + }; + + public static boolean isWhitespaceOrPunctuation(char c) { + if (Character.isWhitespace(c)) { + return true; + } + switch (Character.getType(c)) { + case Character.START_PUNCTUATION: + case Character.END_PUNCTUATION: + case Character.OTHER_PUNCTUATION: + return true; + default: + return false; + } } - return input.substring(start, end + 1); - }; - public static boolean isWhitespaceOrPunctuation(char c) { - if (Character.isWhitespace(c)) { - return true; - } - switch (Character.getType(c)) { - case Character.START_PUNCTUATION: - case Character.END_PUNCTUATION: - case Character.OTHER_PUNCTUATION: - return true; - default: - return false; - } - } + public static final Function LC_TRIM_WS_PUNCT = LOWER_CASE.andThen(TRIM_WS_PUNCT); - public static final Function LC_TRIM_WS_PUNCT = LOWER_CASE.andThen(TRIM_WS_PUNCT); - - public static final Function LC_TRIM_WS = LOWER_CASE.andThen(TRIM_WS); + public static final Function LC_TRIM_WS = LOWER_CASE.andThen(TRIM_WS); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java index 0d8b3e4ff..999856246 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java @@ -53,393 +53,393 @@ * @author Gregor Middell */ public class SimpleVariantGraphSerializer { - /** - * CollateX custom namespace. - */ - protected static final String COLLATEX_NS = "http://interedition.eu/collatex/ns/1.0"; - - /** - * The TEI P5 namespace. - */ - protected static final String TEI_NS = "http://www.tei-c.org/ns/1.0"; - - private final VariantGraph graph; - private final Function, String> tokensToString; - private final Map vertexIds = new HashMap<>(); - private VariantGraphRanking ranking; - - public SimpleVariantGraphSerializer(VariantGraph graph) { - this(graph, SIMPLE_TOKEN_TO_STRING); - } - - public SimpleVariantGraphSerializer(VariantGraph graph, Function, String> tokensToString) { - this.graph = graph; - this.tokensToString = tokensToString; - } - - public void toTEI(final XMLStreamWriter xml) throws XMLStreamException { - try { - ParallelSegmentationApparatus.generate(ranking(), new ParallelSegmentationApparatus.GeneratorCallback() { - @Override - public void start() { - try { - xml.writeStartElement("cx", "apparatus", COLLATEX_NS); - xml.writeNamespace("cx", COLLATEX_NS); - xml.writeNamespace("", TEI_NS); - } catch (XMLStreamException e) { - throw new RuntimeException(e); - } - } + /** + * CollateX custom namespace. + */ + protected static final String COLLATEX_NS = "http://interedition.eu/collatex/ns/1.0"; + + /** + * The TEI P5 namespace. + */ + protected static final String TEI_NS = "http://www.tei-c.org/ns/1.0"; + + private final VariantGraph graph; + private final Function, String> tokensToString; + private final Map vertexIds = new HashMap<>(); + private VariantGraphRanking ranking; + + public SimpleVariantGraphSerializer(VariantGraph graph) { + this(graph, SIMPLE_TOKEN_TO_STRING); + } - @Override - public void segment(SortedMap> contents) { - final Map> segments = new LinkedHashMap<>(); - contents.forEach((witness, tokens) -> segments.computeIfAbsent(tokensToString.apply(tokens).trim(), k -> new HashSet<>()).add(witness)); - - final Set segmentContents = segments.keySet(); - try { - if (segmentContents.size() == 1) { - xml.writeCharacters(segmentContents.stream().findFirst().get()); - } else { - xml.writeStartElement("", "app", TEI_NS); - for (String segment : segmentContents) { - final StringBuilder witnesses = new StringBuilder(); - for (Witness witness : segments.get(segment)) { - witnesses.append(witness.getSigil()).append(" "); - } - if (segment.length() == 0) { - xml.writeEmptyElement("", "rdg", TEI_NS); - } else { - xml.writeStartElement("", "rdg", TEI_NS); + public SimpleVariantGraphSerializer(VariantGraph graph, Function, String> tokensToString) { + this.graph = graph; + this.tokensToString = tokensToString; + } + + public void toTEI(final XMLStreamWriter xml) throws XMLStreamException { + try { + ParallelSegmentationApparatus.generate(ranking(), new ParallelSegmentationApparatus.GeneratorCallback() { + @Override + public void start() { + try { + xml.writeStartElement("cx", "apparatus", COLLATEX_NS); + xml.writeNamespace("cx", COLLATEX_NS); + xml.writeNamespace("", TEI_NS); + } catch (XMLStreamException e) { + throw new RuntimeException(e); + } } - xml.writeAttribute("wit", witnesses.toString().trim()); + @Override + public void segment(SortedMap> contents) { + final Map> segments = new LinkedHashMap<>(); + contents.forEach((witness, tokens) -> segments.computeIfAbsent(tokensToString.apply(tokens).trim(), k -> new HashSet<>()).add(witness)); + + final Set segmentContents = segments.keySet(); + try { + if (segmentContents.size() == 1) { + xml.writeCharacters(segmentContents.stream().findFirst().get()); + } else { + xml.writeStartElement("", "app", TEI_NS); + for (String segment : segmentContents) { + final StringBuilder witnesses = new StringBuilder(); + for (Witness witness : segments.get(segment)) { + witnesses.append(witness.getSigil()).append(" "); + } + if (segment.length() == 0) { + xml.writeEmptyElement("", "rdg", TEI_NS); + } else { + xml.writeStartElement("", "rdg", TEI_NS); + } + + xml.writeAttribute("wit", witnesses.toString().trim()); + + if (segment.length() > 0) { + xml.writeCharacters(segment); + xml.writeEndElement(); + } + } + xml.writeEndElement(); + } + } catch (XMLStreamException e) { + throw new RuntimeException(e); + } + } - if (segment.length() > 0) { - xml.writeCharacters(segment); - xml.writeEndElement(); + @Override + public void end() { + try { + xml.writeEndElement(); + } catch (XMLStreamException e) { + throw new RuntimeException(e); + } } - } - xml.writeEndElement(); + }); + } catch (RuntimeException re) { + Throwable rootCause = re; + for (Throwable cause = re; cause != null; cause = cause.getCause()) { + rootCause = cause; } - } catch (XMLStreamException e) { - throw new RuntimeException(e); - } - } - - @Override - public void end() { - try { - xml.writeEndElement(); - } catch (XMLStreamException e) { - throw new RuntimeException(e); - } + if (rootCause instanceof XMLStreamException) { + throw (XMLStreamException) rootCause; + } + throw re; } - }); - } catch (RuntimeException re) { - Throwable rootCause = re; - for (Throwable cause = re; cause != null; cause = cause.getCause()) { - rootCause = cause; - } - if (rootCause instanceof XMLStreamException) { - throw (XMLStreamException) rootCause; - } - throw re; } - } - public void toCsv(final Writer out) throws IOException { - try { - ParallelSegmentationApparatus.generate(ranking(), new ParallelSegmentationApparatus.GeneratorCallback() { - @Override - public void start() { - try { - for (Iterator it = graph.witnesses().stream().sorted(Witness.SIGIL_COMPARATOR).iterator(); it.hasNext(); ) { - out.write(escapeCsvField(it.next().getSigil())); - if (it.hasNext()) { - out.write(","); - } - } - out.write("\r\n"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } + public void toCsv(final Writer out) throws IOException { + try { + ParallelSegmentationApparatus.generate(ranking(), new ParallelSegmentationApparatus.GeneratorCallback() { + @Override + public void start() { + try { + for (Iterator it = graph.witnesses().stream().sorted(Witness.SIGIL_COMPARATOR).iterator(); it.hasNext(); ) { + out.write(escapeCsvField(it.next().getSigil())); + if (it.hasNext()) { + out.write(","); + } + } + out.write("\r\n"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } - @Override - public void segment(SortedMap> contents) { - try { - for (Iterator witnessIt = contents.keySet().iterator(); witnessIt.hasNext();) { - out.write(escapeCsvField(tokensToString.apply(contents.getOrDefault(witnessIt.next(), Collections.emptySet())))); - if (witnessIt.hasNext()) { - out.write(","); - } - } - out.write("\r\n"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } + @Override + public void segment(SortedMap> contents) { + try { + for (Iterator witnessIt = contents.keySet().iterator(); witnessIt.hasNext(); ) { + out.write(escapeCsvField(tokensToString.apply(contents.getOrDefault(witnessIt.next(), Collections.emptySet())))); + if (witnessIt.hasNext()) { + out.write(","); + } + } + out.write("\r\n"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } - @Override - public void end() { - } - }); - } catch (Throwable t) { - for (Throwable cause = t; cause != null; cause = cause.getCause()) { - if (cause instanceof IOException) { - throw (IOException) cause; + @Override + public void end() { + } + }); + } catch (Throwable t) { + for (Throwable cause = t; cause != null; cause = cause.getCause()) { + if (cause instanceof IOException) { + throw (IOException) cause; + } + } + if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } + throw new RuntimeException(t); } - } - if (t instanceof RuntimeException) { - throw (RuntimeException) t; - } - throw new RuntimeException(t); } - } - static final Pattern CSV_SPECIAL_CHARS = Pattern.compile("[\r\n\",]"); + static final Pattern CSV_SPECIAL_CHARS = Pattern.compile("[\r\n\",]"); - static String escapeCsvField(String content) { - return (CSV_SPECIAL_CHARS.matcher(content).find() ? ("\"" + content.replaceAll("\"", "\"\"") + "\"") : content); - } - - public void toDot(Writer writer) { - final PrintWriter out = new PrintWriter(writer); - final String indent = " "; - final String connector = " -> "; + static String escapeCsvField(String content) { + return (CSV_SPECIAL_CHARS.matcher(content).find() ? ("\"" + content.replaceAll("\"", "\"\"") + "\"") : content); + } - out.println("digraph G {"); + public void toDot(Writer writer) { + final PrintWriter out = new PrintWriter(writer); + final String indent = " "; + final String connector = " -> "; - for (VariantGraph.Vertex v : graph.vertices()) { - out.print(indent + id(v)); - out.print(" [label = \"" + toDotLabel(v) + "\"]"); - out.println(";"); - } + out.println("digraph G {"); - for (VariantGraph.Vertex v : graph.vertices()) { - for (Map.Entry> e : v.outgoing().entrySet()) { - out.print(indent + id(v) + connector + id(e.getKey())); - out.print(" [label = \"" + toDotLabel(e.getValue()) + "\"]"); - out.println(";"); + for (VariantGraph.Vertex v : graph.vertices()) { + out.print(indent + id(v)); + out.print(" [label = \"" + toDotLabel(v) + "\"]"); + out.println(";"); } - } - for (Tuple transposedTuple : transposedTuples()) { - final String leftId = id(transposedTuple.left); - final String rightId = id(transposedTuple.right); - out.print(indent + leftId + connector + rightId); - out.print(" [ color = \"lightgray\", style = \"dashed\" arrowhead = \"none\", arrowtail = \"none\" ]"); - out.println(";"); - } + for (VariantGraph.Vertex v : graph.vertices()) { + for (Map.Entry> e : v.outgoing().entrySet()) { + out.print(indent + id(v) + connector + id(e.getKey())); + out.print(" [label = \"" + toDotLabel(e.getValue()) + "\"]"); + out.println(";"); + } + } - out.print(indent + id(graph.getStart()) + connector + id(graph.getEnd())); - out.print(" [color = \"white\"]"); - out.println(";"); + for (Tuple transposedTuple : transposedTuples()) { + final String leftId = id(transposedTuple.left); + final String rightId = id(transposedTuple.right); + out.print(indent + leftId + connector + rightId); + out.print(" [ color = \"lightgray\", style = \"dashed\" arrowhead = \"none\", arrowtail = \"none\" ]"); + out.println(";"); + } - out.println("}"); + out.print(indent + id(graph.getStart()) + connector + id(graph.getEnd())); + out.print(" [color = \"white\"]"); + out.println(";"); - out.flush(); - } + out.println("}"); - private String id(VariantGraph.Vertex vertex) { - return ("v" + numericId(vertex)); - } + out.flush(); + } - private int numericId(VariantGraph.Vertex vertex) { - Integer id = vertexIds.get(vertex); - if (id == null) { - id = vertexIds.size(); - vertexIds.put(vertex, id); + private String id(VariantGraph.Vertex vertex) { + return ("v" + numericId(vertex)); } - return id; - } - String toDotLabel(Set e) { - return escapeDotLabel(e.stream().map(Witness::getSigil).distinct().sorted().collect(Collectors.joining(", "))); - } + private int numericId(VariantGraph.Vertex vertex) { + Integer id = vertexIds.get(vertex); + if (id == null) { + id = vertexIds.size(); + vertexIds.put(vertex, id); + } + return id; + } - String toDotLabel(VariantGraph.Vertex v) { - return escapeDotLabel(vertexToString.apply(v)); - } + String toDotLabel(Set e) { + return escapeDotLabel(e.stream().map(Witness::getSigil).distinct().sorted().collect(Collectors.joining(", "))); + } - static String escapeDotLabel(String string) { - return string.replaceAll("\"", "\\\\\"").replaceAll("[\n\r]+", "\u00B6"); - } + String toDotLabel(VariantGraph.Vertex v) { + return escapeDotLabel(vertexToString.apply(v)); + } - VariantGraphRanking ranking() { - if (ranking == null) { - ranking = VariantGraphRanking.of(graph); + static String escapeDotLabel(String string) { + return string.replaceAll("\"", "\\\\\"").replaceAll("[\n\r]+", "\u00B6"); } - return ranking; - } - - Set> transposedTuples() { - final Set> tuples = new HashSet<>(); - final Comparator vertexOrdering = ranking().comparator(); - - for (Set transposition : graph.transpositions()) { - final SortedMap> verticesByWitness = new TreeMap<>(Witness.SIGIL_COMPARATOR); - for (VariantGraph.Vertex vertex : transposition) { - for (Witness witness : vertex.witnesses()) { - verticesByWitness.computeIfAbsent(witness, w -> new TreeSet<>(vertexOrdering)).add(vertex); + + VariantGraphRanking ranking() { + if (ranking == null) { + ranking = VariantGraphRanking.of(graph); } - } - - Witness prev = null; - for (Witness witness : verticesByWitness.keySet()) { - if (prev != null) { - final Iterator prevIt = verticesByWitness.get(prev).iterator(); - final Iterator nextIt = verticesByWitness.get(witness).iterator(); - while (prevIt.hasNext() && nextIt.hasNext()) { - final VariantGraph.Vertex prevVertex = prevIt.next(); - final VariantGraph.Vertex nextVertex = nextIt.next(); - if (!prevVertex.equals(nextVertex)) { - tuples.add(new Tuple<>(prevVertex, nextVertex)); + return ranking; + } + + Set> transposedTuples() { + final Set> tuples = new HashSet<>(); + final Comparator vertexOrdering = ranking().comparator(); + + for (Set transposition : graph.transpositions()) { + final SortedMap> verticesByWitness = new TreeMap<>(Witness.SIGIL_COMPARATOR); + for (VariantGraph.Vertex vertex : transposition) { + for (Witness witness : vertex.witnesses()) { + verticesByWitness.computeIfAbsent(witness, w -> new TreeSet<>(vertexOrdering)).add(vertex); + } + } + + Witness prev = null; + for (Witness witness : verticesByWitness.keySet()) { + if (prev != null) { + final Iterator prevIt = verticesByWitness.get(prev).iterator(); + final Iterator nextIt = verticesByWitness.get(witness).iterator(); + while (prevIt.hasNext() && nextIt.hasNext()) { + final VariantGraph.Vertex prevVertex = prevIt.next(); + final VariantGraph.Vertex nextVertex = nextIt.next(); + if (!prevVertex.equals(nextVertex)) { + tuples.add(new Tuple<>(prevVertex, nextVertex)); + } + } + } + prev = witness; } - } } - prev = witness; - } + + return tuples; } - return tuples; - } + public void toGraphML(XMLStreamWriter xml) throws XMLStreamException { + xml.writeStartElement("", GRAPHML_TAG, GRAPHML_NS); + xml.writeNamespace("", GRAPHML_NS); + xml.writeAttribute(XMLNSXSI_ATT, GRAPHML_XMLNSXSI); + xml.writeAttribute(XSISL_ATT, GRAPHML_XSISL); - public void toGraphML(XMLStreamWriter xml) throws XMLStreamException { - xml.writeStartElement("", GRAPHML_TAG, GRAPHML_NS); - xml.writeNamespace("", GRAPHML_NS); - xml.writeAttribute(XMLNSXSI_ATT, GRAPHML_XMLNSXSI); - xml.writeAttribute(XSISL_ATT, GRAPHML_XSISL); + for (GraphMLProperty p : GraphMLProperty.values()) { + p.declare(xml); + } - for (GraphMLProperty p : GraphMLProperty.values()) { - p.declare(xml); - } + xml.writeStartElement(GRAPHML_NS, GRAPH_TAG); + xml.writeAttribute(ID_ATT, GRAPH_ID); + xml.writeAttribute(EDGEDEFAULT_ATT, EDGEDEFAULT_DEFAULT_VALUE); + xml.writeAttribute(PARSENODEIDS_ATT, PARSENODEIDS_DEFAULT_VALUE); + xml.writeAttribute(PARSEEDGEIDS_ATT, PARSEEDGEIDS_DEFAULT_VALUE); + xml.writeAttribute(PARSEORDER_ATT, PARSEORDER_DEFAULT_VALUE); + + final VariantGraphRanking ranking = ranking(); + for (VariantGraph.Vertex vertex : graph.vertices()) { + final int id = numericId(vertex); + xml.writeStartElement(GRAPHML_NS, NODE_TAG); + xml.writeAttribute(ID_ATT, "n" + id); + GraphMLProperty.NODE_NUMBER.write(Integer.toString(id), xml); + GraphMLProperty.NODE_RANK.write(Integer.toString(ranking.apply(vertex)), xml); + GraphMLProperty.NODE_TOKEN.write(vertexToString.apply(vertex), xml); + xml.writeEndElement(); + } - xml.writeStartElement(GRAPHML_NS, GRAPH_TAG); - xml.writeAttribute(ID_ATT, GRAPH_ID); - xml.writeAttribute(EDGEDEFAULT_ATT, EDGEDEFAULT_DEFAULT_VALUE); - xml.writeAttribute(PARSENODEIDS_ATT, PARSENODEIDS_DEFAULT_VALUE); - xml.writeAttribute(PARSEEDGEIDS_ATT, PARSEEDGEIDS_DEFAULT_VALUE); - xml.writeAttribute(PARSEORDER_ATT, PARSEORDER_DEFAULT_VALUE); - - final VariantGraphRanking ranking = ranking(); - for (VariantGraph.Vertex vertex : graph.vertices()) { - final int id = numericId(vertex); - xml.writeStartElement(GRAPHML_NS, NODE_TAG); - xml.writeAttribute(ID_ATT, "n" + id); - GraphMLProperty.NODE_NUMBER.write(Integer.toString(id), xml); - GraphMLProperty.NODE_RANK.write(Integer.toString(ranking.apply(vertex)), xml); - GraphMLProperty.NODE_TOKEN.write(vertexToString.apply(vertex), xml); - xml.writeEndElement(); - } + int edgeNumber = 0; + for (VariantGraph.Vertex v : graph.vertices()) { + for (Map.Entry> edge : v.outgoing().entrySet()) { + xml.writeStartElement(GRAPHML_NS, EDGE_TAG); + xml.writeAttribute(ID_ATT, "e" + edgeNumber); + xml.writeAttribute(SOURCE_ATT, "n" + numericId(v)); + xml.writeAttribute(TARGET_ATT, "n" + numericId(edge.getKey())); + GraphMLProperty.EDGE_NUMBER.write(Integer.toString(edgeNumber++), xml); + GraphMLProperty.EDGE_TYPE.write(EDGE_TYPE_PATH, xml); + GraphMLProperty.EDGE_WITNESSES.write(edge.getValue().stream().map(Witness::getSigil).distinct().sorted().collect(Collectors.joining(", ")), xml); + xml.writeEndElement(); + } + } + + for (Tuple transposedTuple : transposedTuples()) { + xml.writeStartElement(GRAPHML_NS, EDGE_TAG); + xml.writeAttribute(ID_ATT, "e" + edgeNumber); + xml.writeAttribute(SOURCE_ATT, "n" + numericId(transposedTuple.left)); + xml.writeAttribute(TARGET_ATT, "n" + numericId(transposedTuple.right)); + GraphMLProperty.EDGE_NUMBER.write(Integer.toString(edgeNumber++), xml); + GraphMLProperty.EDGE_TYPE.write(EDGE_TYPE_TRANSPOSITION, xml); + xml.writeEndElement(); + } - int edgeNumber = 0; - for (VariantGraph.Vertex v : graph.vertices()) { - for (Map.Entry> edge : v.outgoing().entrySet()) { - xml.writeStartElement(GRAPHML_NS, EDGE_TAG); - xml.writeAttribute(ID_ATT, "e" + edgeNumber); - xml.writeAttribute(SOURCE_ATT, "n" + numericId(v)); - xml.writeAttribute(TARGET_ATT, "n" + numericId(edge.getKey())); - GraphMLProperty.EDGE_NUMBER.write(Integer.toString(edgeNumber++), xml); - GraphMLProperty.EDGE_TYPE.write(EDGE_TYPE_PATH, xml); - GraphMLProperty.EDGE_WITNESSES.write(edge.getValue().stream().map(Witness::getSigil).distinct().sorted().collect(Collectors.joining(", ")), xml); xml.writeEndElement(); - } - } - for (Tuple transposedTuple : transposedTuples()) { - xml.writeStartElement(GRAPHML_NS, EDGE_TAG); - xml.writeAttribute(ID_ATT, "e" + edgeNumber); - xml.writeAttribute(SOURCE_ATT, "n" + numericId(transposedTuple.left)); - xml.writeAttribute(TARGET_ATT, "n" + numericId(transposedTuple.right)); - GraphMLProperty.EDGE_NUMBER.write(Integer.toString(edgeNumber++), xml); - GraphMLProperty.EDGE_TYPE.write(EDGE_TYPE_TRANSPOSITION, xml); - xml.writeEndElement(); + xml.writeEndElement(); } - xml.writeEndElement(); - - xml.writeEndElement(); - } - - private static final String NODE_TAG = "node"; - private static final String TARGET_ATT = "target"; - private static final String SOURCE_ATT = "source"; - private static final String EDGE_TAG = "edge"; - private static final String EDGE_TYPE_PATH = "path"; - private static final String EDGE_TYPE_TRANSPOSITION = "transposition"; - private static final String EDGEDEFAULT_DEFAULT_VALUE = "directed"; - private static final String EDGEDEFAULT_ATT = "edgedefault"; - private static final String GRAPH_ID = "g0"; - private static final String GRAPH_TAG = "graph"; - private static final String GRAPHML_NS = "http://graphml.graphdrawing.org/xmlns"; - private static final String GRAPHML_TAG = "graphml"; - private static final String XMLNSXSI_ATT = "xmlns:xsi"; - private static final String XSISL_ATT = "xsi:schemaLocation"; - private static final String GRAPHML_XMLNSXSI = "http://www.w3.org/2001/XMLSchema-instance"; - private static final String GRAPHML_XSISL = "http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd"; - private static final String PARSENODEIDS_ATT = "parse.nodeids"; - private static final String PARSENODEIDS_DEFAULT_VALUE = "canonical"; - private static final String PARSEEDGEIDS_ATT = "parse.edgeids"; - private static final String PARSEEDGEIDS_DEFAULT_VALUE = "canonical"; - private static final String PARSEORDER_ATT = "parse.order"; - private static final String PARSEORDER_DEFAULT_VALUE = "nodesfirst"; - - private static final String ATTR_TYPE_ATT = "attr.type"; - private static final String ATTR_NAME_ATT = "attr.name"; - private static final String FOR_ATT = "for"; - private static final String ID_ATT = "id"; - private static final String KEY_TAG = "key"; - private static final String DATA_TAG = "data"; - - private enum GraphMLProperty { - NODE_NUMBER(NODE_TAG, "number", "int"), // - NODE_TOKEN(NODE_TAG, "tokens", "string"), // - NODE_RANK(NODE_TAG, "rank", "int"), // - EDGE_NUMBER(EDGE_TAG, "number", "int"), // - EDGE_TYPE(EDGE_TAG, "type", "string"), // - EDGE_WITNESSES(EDGE_TAG, "witnesses", "string"); - - private String name; - private String forElement; - private String type; - - private GraphMLProperty(String forElement, String name, String type) { - this.name = name; - this.forElement = forElement; - this.type = type; - } + private static final String NODE_TAG = "node"; + private static final String TARGET_ATT = "target"; + private static final String SOURCE_ATT = "source"; + private static final String EDGE_TAG = "edge"; + private static final String EDGE_TYPE_PATH = "path"; + private static final String EDGE_TYPE_TRANSPOSITION = "transposition"; + private static final String EDGEDEFAULT_DEFAULT_VALUE = "directed"; + private static final String EDGEDEFAULT_ATT = "edgedefault"; + private static final String GRAPH_ID = "g0"; + private static final String GRAPH_TAG = "graph"; + private static final String GRAPHML_NS = "http://graphml.graphdrawing.org/xmlns"; + private static final String GRAPHML_TAG = "graphml"; + private static final String XMLNSXSI_ATT = "xmlns:xsi"; + private static final String XSISL_ATT = "xsi:schemaLocation"; + private static final String GRAPHML_XMLNSXSI = "http://www.w3.org/2001/XMLSchema-instance"; + private static final String GRAPHML_XSISL = "http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd"; + private static final String PARSENODEIDS_ATT = "parse.nodeids"; + private static final String PARSENODEIDS_DEFAULT_VALUE = "canonical"; + private static final String PARSEEDGEIDS_ATT = "parse.edgeids"; + private static final String PARSEEDGEIDS_DEFAULT_VALUE = "canonical"; + private static final String PARSEORDER_ATT = "parse.order"; + private static final String PARSEORDER_DEFAULT_VALUE = "nodesfirst"; + + private static final String ATTR_TYPE_ATT = "attr.type"; + private static final String ATTR_NAME_ATT = "attr.name"; + private static final String FOR_ATT = "for"; + private static final String ID_ATT = "id"; + private static final String KEY_TAG = "key"; + private static final String DATA_TAG = "data"; + + private enum GraphMLProperty { + NODE_NUMBER(NODE_TAG, "number", "int"), // + NODE_TOKEN(NODE_TAG, "tokens", "string"), // + NODE_RANK(NODE_TAG, "rank", "int"), // + EDGE_NUMBER(EDGE_TAG, "number", "int"), // + EDGE_TYPE(EDGE_TAG, "type", "string"), // + EDGE_WITNESSES(EDGE_TAG, "witnesses", "string"); + + private String name; + private String forElement; + private String type; + + private GraphMLProperty(String forElement, String name, String type) { + this.name = name; + this.forElement = forElement; + this.type = type; + } - public void write(String data, XMLStreamWriter xml) throws XMLStreamException { - xml.writeStartElement(GRAPHML_NS, DATA_TAG); - xml.writeAttribute(KEY_TAG, "d" + ordinal()); - xml.writeCharacters(data); - xml.writeEndElement(); - } + public void write(String data, XMLStreamWriter xml) throws XMLStreamException { + xml.writeStartElement(GRAPHML_NS, DATA_TAG); + xml.writeAttribute(KEY_TAG, "d" + ordinal()); + xml.writeCharacters(data); + xml.writeEndElement(); + } - public void declare(XMLStreamWriter xml) throws XMLStreamException { - xml.writeEmptyElement(GRAPHML_NS, KEY_TAG); - xml.writeAttribute(ID_ATT, "d" + ordinal()); - xml.writeAttribute(FOR_ATT, forElement); - xml.writeAttribute(ATTR_NAME_ATT, name); - xml.writeAttribute(ATTR_TYPE_ATT, type); - } - } - - final Function vertexToString = new Function() { - @Override - public String apply(VariantGraph.Vertex input) { - return input.witnesses().stream().findFirst() - .map(witness -> tokensToString.apply(Arrays.asList(input.tokens().stream().filter(t -> witness.equals(t.getWitness())).toArray(Token[]::new)))) - .orElse(""); + public void declare(XMLStreamWriter xml) throws XMLStreamException { + xml.writeEmptyElement(GRAPHML_NS, KEY_TAG); + xml.writeAttribute(ID_ATT, "d" + ordinal()); + xml.writeAttribute(FOR_ATT, forElement); + xml.writeAttribute(ATTR_NAME_ATT, name); + xml.writeAttribute(ATTR_TYPE_ATT, type); + } } - }; - - static final Function, String> SIMPLE_TOKEN_TO_STRING = input -> StreamSupport.stream(input.spliterator(), false) - .filter(t -> SimpleToken.class.isAssignableFrom(t.getClass())) - .map(t -> (SimpleToken) t) - .sorted() - .map(SimpleToken::getContent) - .collect(Collectors.joining()); + + final Function vertexToString = new Function() { + @Override + public String apply(VariantGraph.Vertex input) { + return input.witnesses().stream().findFirst() + .map(witness -> tokensToString.apply(Arrays.asList(input.tokens().stream().filter(t -> witness.equals(t.getWitness())).toArray(Token[]::new)))) + .orElse(""); + } + }; + + static final Function, String> SIMPLE_TOKEN_TO_STRING = input -> StreamSupport.stream(input.spliterator(), false) + .filter(t -> SimpleToken.class.isAssignableFrom(t.getClass())) + .map(t -> (SimpleToken) t) + .sorted() + .map(SimpleToken::getContent) + .collect(Collectors.joining()); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java index 51f6f81be..43d35f67f 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java @@ -34,71 +34,71 @@ public class SimpleWitness implements Iterable, Witness, Comparator { - private final String sigil; - private final List tokens = new ArrayList<>(); - - public SimpleWitness(String sigil) { - this.sigil = sigil; - } - - public SimpleWitness(String sigil, String content) { - this(sigil, content, SimplePatternTokenizer.BY_WS_OR_PUNCT, SimpleTokenNormalizers.LC_TRIM_WS); - } - - public SimpleWitness(String sigil, - String content, - Function> tokenizer, - Function normalizer) { - this(sigil); - setTokenContents(tokenizer.apply(content), normalizer); - } - - public List getTokens() { - return tokens; - } - - public void setTokenContents(Stream tokenContents, Function normalizer) { - setTokens(tokenContents.map(content -> new SimpleToken(SimpleWitness.this, content, normalizer.apply(content))).collect(Collectors.toList())); - } - - public void setTokens(List tokens) { - this.tokens.clear(); - this.tokens.addAll(tokens); - } - - @Override - public String getSigil() { - return sigil; - } - - @Override - public Iterator iterator() { - return Collections.unmodifiableList(tokens).iterator(); - } - - @Override - public String toString() { - return getSigil(); - } - - @Override - public int compare(SimpleToken o1, SimpleToken o2) { - final int o1Index = tokens.indexOf(o1); - final int o2Index = tokens.indexOf(o2); - if (o1Index < 0) { - throw new IllegalArgumentException(o1.toString()); + private final String sigil; + private final List tokens = new ArrayList<>(); + + public SimpleWitness(String sigil) { + this.sigil = sigil; + } + + public SimpleWitness(String sigil, String content) { + this(sigil, content, SimplePatternTokenizer.BY_WS_OR_PUNCT, SimpleTokenNormalizers.LC_TRIM_WS); + } + + public SimpleWitness(String sigil, + String content, + Function> tokenizer, + Function normalizer) { + this(sigil); + setTokenContents(tokenizer.apply(content), normalizer); + } + + public List getTokens() { + return tokens; + } + + public void setTokenContents(Stream tokenContents, Function normalizer) { + setTokens(tokenContents.map(content -> new SimpleToken(SimpleWitness.this, content, normalizer.apply(content))).collect(Collectors.toList())); } - if (o2Index < 0) { - throw new IllegalArgumentException(); + + public void setTokens(List tokens) { + this.tokens.clear(); + this.tokens.addAll(tokens); + } + + @Override + public String getSigil() { + return sigil; + } + + @Override + public Iterator iterator() { + return Collections.unmodifiableList(tokens).iterator(); + } + + @Override + public String toString() { + return getSigil(); + } + + @Override + public int compare(SimpleToken o1, SimpleToken o2) { + final int o1Index = tokens.indexOf(o1); + final int o2Index = tokens.indexOf(o2); + if (o1Index < 0) { + throw new IllegalArgumentException(o1.toString()); + } + if (o2Index < 0) { + throw new IllegalArgumentException(); + } + return (o1Index - o2Index); } - return (o1Index - o2Index); - } - public static final Pattern PUNCT = Pattern.compile("\\p{Punct}"); + public static final Pattern PUNCT = Pattern.compile("\\p{Punct}"); - public static final Function TOKEN_NORMALIZER = input -> { - final String normalized = PUNCT.matcher(input.trim().toLowerCase()).replaceAll(""); - return (normalized == null || normalized.length() == 0 ? input : normalized); - }; + public static final Function TOKEN_NORMALIZER = input -> { + final String normalized = PUNCT.matcher(input.trim().toLowerCase()).replaceAll(""); + return (normalized == null || normalized.length() == 0 ? input : normalized); + }; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java index 286c8338d..9e463d307 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java @@ -33,78 +33,78 @@ // large TEI files, with segments and expansions public class SimpleWitnessTeiBuilder { - private static QName w = new QName("http://www.tei-c.org/ns/1.0", "w"); - private static QName seg = new QName("http://www.tei-c.org/ns/1.0", "seg"); - private static QName p = new QName("http://www.tei-c.org/ns/1.0", "p"); + private static QName w = new QName("http://www.tei-c.org/ns/1.0", "w"); + private static QName seg = new QName("http://www.tei-c.org/ns/1.0", "seg"); + private static QName p = new QName("http://www.tei-c.org/ns/1.0", "p"); - public SimpleWitness read(InputStream input) throws XMLStreamException { - SimpleWitness witness = new SimpleWitness("id"); - List tokenContents = new ArrayList<>(); - XMLInputFactory factory = XMLInputFactory.newInstance(); - XMLEventReader reader = factory.createXMLEventReader(input); - XMLEvent event = null; - int numberOfParagraphs = 0; - while ((event = reader.peek()) != null && numberOfParagraphs < 10) { - // System.out.println(event.toString()); - if (event.isStartElement() && event.asStartElement().getName().equals(w)) { - tokenContents.add(handleWElement(reader)); - } else if (event.isStartElement() && event.asStartElement().getName().equals(seg)) { - tokenContents.add(handleSegElement(reader)); - } else if (event.isStartElement() && event.asStartElement().getName().equals(p)) { - reader.next(); - numberOfParagraphs++; - } else { - reader.next(); - } + public SimpleWitness read(InputStream input) throws XMLStreamException { + SimpleWitness witness = new SimpleWitness("id"); + List tokenContents = new ArrayList<>(); + XMLInputFactory factory = XMLInputFactory.newInstance(); + XMLEventReader reader = factory.createXMLEventReader(input); + XMLEvent event = null; + int numberOfParagraphs = 0; + while ((event = reader.peek()) != null && numberOfParagraphs < 10) { + // System.out.println(event.toString()); + if (event.isStartElement() && event.asStartElement().getName().equals(w)) { + tokenContents.add(handleWElement(reader)); + } else if (event.isStartElement() && event.asStartElement().getName().equals(seg)) { + tokenContents.add(handleSegElement(reader)); + } else if (event.isStartElement() && event.asStartElement().getName().equals(p)) { + reader.next(); + numberOfParagraphs++; + } else { + reader.next(); + } + } + witness.setTokenContents(tokenContents.stream(), SimpleTokenNormalizers.LC_TRIM_WS_PUNCT); + return witness; } - witness.setTokenContents(tokenContents.stream(), SimpleTokenNormalizers.LC_TRIM_WS_PUNCT); - return witness; - } - private static String handleWElement(XMLEventReader reader) throws XMLStreamException { - XMLEvent event = reader.nextEvent(); - // Do what you need to do with the start element, e.g. initialize - // data structures - // System.out.println("W tag is triggered here!"); - StringBuffer textBuffer = new StringBuffer(); - while ((event = reader.peek()) != null) { - if (event.isEndElement() && event.asEndElement().getName().equals(w)) { - // Do what you need to do at the end, e.g. add data - // collected from sub elements, etc. - event = reader.nextEvent(); - break; - } else { - // Do what you need to do for start or child elements, e.g. - // dispatch to another handler function - event = reader.nextEvent(); - textBuffer.append(event.toString()); - // System.out.println("Text :"+event.toString()); - } + private static String handleWElement(XMLEventReader reader) throws XMLStreamException { + XMLEvent event = reader.nextEvent(); + // Do what you need to do with the start element, e.g. initialize + // data structures + // System.out.println("W tag is triggered here!"); + StringBuffer textBuffer = new StringBuffer(); + while ((event = reader.peek()) != null) { + if (event.isEndElement() && event.asEndElement().getName().equals(w)) { + // Do what you need to do at the end, e.g. add data + // collected from sub elements, etc. + event = reader.nextEvent(); + break; + } else { + // Do what you need to do for start or child elements, e.g. + // dispatch to another handler function + event = reader.nextEvent(); + textBuffer.append(event.toString()); + // System.out.println("Text :"+event.toString()); + } + } + return textBuffer.toString(); } - return textBuffer.toString(); - } - private static String handleSegElement(XMLEventReader reader) throws XMLStreamException { - XMLEvent event = reader.nextEvent(); - // Do what you need to do with the start element, e.g. initialize - // data structures - // System.out.println("Seg tag is triggered here!"); - StringBuffer textBuffer = new StringBuffer(); - while ((event = reader.peek()) != null) { - if (event.isEndElement() && event.asEndElement().getName().equals(seg)) { - // Do what you need to do at the end, e.g. add data - // collected from sub elements, etc. - event = reader.nextEvent(); - break; - } else { - // Do what you need to do for start or child elements, e.g. - // dispatch to another handler function - event = reader.nextEvent(); - if (event.getEventType() == XMLEvent.CHARACTERS) { - textBuffer.append(event.toString().trim()); + private static String handleSegElement(XMLEventReader reader) throws XMLStreamException { + XMLEvent event = reader.nextEvent(); + // Do what you need to do with the start element, e.g. initialize + // data structures + // System.out.println("Seg tag is triggered here!"); + StringBuffer textBuffer = new StringBuffer(); + while ((event = reader.peek()) != null) { + if (event.isEndElement() && event.asEndElement().getName().equals(seg)) { + // Do what you need to do at the end, e.g. add data + // collected from sub elements, etc. + event = reader.nextEvent(); + break; + } else { + // Do what you need to do for start or child elements, e.g. + // dispatch to another handler function + event = reader.nextEvent(); + if (event.getEventType() == XMLEvent.CHARACTERS) { + textBuffer.append(event.toString().trim()); + } + } } - } + return textBuffer.toString(); } - return textBuffer.toString(); - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Algorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Algorithm.java index b4ded56a9..59ec02eef 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Algorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Algorithm.java @@ -7,49 +7,61 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public enum Algorithm -{ - /** Karkkainen-Sanders. */ +public enum Algorithm { + /** + * Karkkainen-Sanders. + */ SKEW("Kärkkäinen-Sanders"), - /** Karkkainen-Sanders, with decorators allowing arbitrary input. */ + /** + * Karkkainen-Sanders, with decorators allowing arbitrary input. + */ SKEW_D("Kärkkäinen-Sanders (decorated for arbitrary input symbols)"), - /** Yuta Mori's divsufsort algorithm. */ + /** + * Yuta Mori's divsufsort algorithm. + */ DIVSUFSORT("Mori's algorithm"), - /** Yuta Mori's implementation of SA-IS. */ + /** + * Yuta Mori's implementation of SA-IS. + */ SAIS("SA-IS algorithm"), - /** Klaus-Bernd Schürmann's bucket pointer refinement algorithm */ + /** + * Klaus-Bernd Schürmann's bucket pointer refinement algorithm + */ BPR("Klaus-Bernd Schürmann's bpr algorithm"), - /** Deep-Shallow algorithm by Manzini and Ferragina. */ + /** + * Deep-Shallow algorithm by Manzini and Ferragina. + */ DEEP_SHALLOW("Manzini-Ferragina"), - /** "Larrson-Sadakane qsufsort algorithm */ + /** + * "Larrson-Sadakane qsufsort algorithm + */ QSUFSORT("Larrson-Sadakane qsufsort algorithm"); - /** Full name of the algorithm. */ + /** + * Full name of the algorithm. + */ private final String name; /* - * + * */ - private Algorithm(String name) - { + private Algorithm(String name) { this.name = name; } /** * @return Same as {@link #getInstance()}, but returns the algorithm instance - * decorated to work with any range or distribution of input symbols - * (respecting each algorithm's constraints). + * decorated to work with any range or distribution of input symbols + * (respecting each algorithm's constraints). */ - public ISuffixArrayBuilder getDecoratedInstance() - { - switch (this) - { + public ISuffixArrayBuilder getDecoratedInstance() { + switch (this) { case SKEW: return new DensePositiveDecorator(new ExtraTrailingCellsDecorator( getInstance(), SuffixArrays.MAX_EXTRA_TRAILING_SPACE)); @@ -62,10 +74,8 @@ public ISuffixArrayBuilder getDecoratedInstance() /** * @return Create and return an algorithm instance. */ - public ISuffixArrayBuilder getInstance() - { - switch (this) - { + public ISuffixArrayBuilder getInstance() { + switch (this) { case SKEW: return new Skew(); @@ -94,13 +104,11 @@ public ISuffixArrayBuilder getInstance() * instance will overwrite input. *

        * If not, create default instance - * + * * @return Create and return low memory consuming instance. */ - public ISuffixArrayBuilder getMemoryConservingInstance() - { - switch (this) - { + public ISuffixArrayBuilder getMemoryConservingInstance() { + switch (this) { case QSUFSORT: return new QSufSort(false); case BPR: @@ -115,8 +123,7 @@ public ISuffixArrayBuilder getMemoryConservingInstance() /** * Return the full name of the algorithm. */ - public String getName() - { + public String getName() { return name; } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/BPR.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/BPR.java index 731f0eb01..30d083c9d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/BPR.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/BPR.java @@ -6,7 +6,7 @@ *

        * A straightforward reimplementation of the bucket pointer refinement algorithm given in: * - * Klaus-Bernd Schürmann, Suffix Arrays in Theory and Practice, Faculty of Technology of + * Klaus-Bernd Schürmann, Suffix Arrays in Theory and Practice, Faculty of Technology of * Bielefeld University, Germany, 2007 * *

        @@ -19,40 +19,33 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public class BPR implements ISuffixArrayBuilder -{ - private final static class Alphabet - { +public class BPR implements ISuffixArrayBuilder { + private final static class Alphabet { int size; - int [] charArray; - int [] alphaMapping; - int [] charFreq; + int[] charArray; + int[] alphaMapping; + int[] charFreq; - Alphabet(int [] thisString, int stringLength) - { + Alphabet(int[] thisString, int stringLength) { int tmpChar; size = 0; - alphaMapping = new int [KBS_MAX_ALPHABET_SIZE]; - charFreq = new int [KBS_MAX_ALPHABET_SIZE]; - for (int i = 0; i < stringLength; i++) - { + alphaMapping = new int[KBS_MAX_ALPHABET_SIZE]; + charFreq = new int[KBS_MAX_ALPHABET_SIZE]; + for (int i = 0; i < stringLength; i++) { tmpChar = thisString[i]; Tools.assertAlways(tmpChar >= 0, "Input must be positive"); - if (charFreq[tmpChar] == 0) - { + if (charFreq[tmpChar] == 0) { size++; } charFreq[tmpChar]++; } - charArray = new int [size + 1]; + charArray = new int[size + 1]; charArray[size] = 0; int k = 0; - for (int i = 0; i < KBS_MAX_ALPHABET_SIZE; i++) - { + for (int i = 0; i < KBS_MAX_ALPHABET_SIZE; i++) { alphaMapping[i] = -1; - if (charFreq[i] > 0) - { + if (charFreq[i] > 0) { charArray[k] = i; alphaMapping[i] = k; k++; @@ -73,21 +66,19 @@ private final static class Alphabet */ private final boolean preserveInput; - private int [] seq; + private int[] seq; private int length; private Alphabet alphabet; - private int [] suffixArray; - private int [] sufPtrMap; + private int[] suffixArray; + private int[] sufPtrMap; private int start; - public BPR() - { + public BPR() { this(true); } - public BPR(boolean preserveInput) - { + public BPR(boolean preserveInput) { this.preserveInput = preserveInput; } @@ -106,23 +97,19 @@ public BPR(boolean preserveInput) *

        */ @Override - public int [] buildSuffixArray(int [] input, int start, int length) - { + public int[] buildSuffixArray(int[] input, int start, int length) { Tools.assertAlways(input != null, "input must not be null"); Tools.assertAlways(input.length >= start + length + KBS_STRING_EXTENSION_SIZE, "input is too short"); Tools.assertAlways(length >= 2, "input length must be >= 2"); this.start = start; - if (preserveInput) - { + if (preserveInput) { - seq = new int [length + KBS_STRING_EXTENSION_SIZE]; + seq = new int[length + KBS_STRING_EXTENSION_SIZE]; this.start = 0; System.arraycopy(input, start, seq, 0, length); - } - else - { + } else { seq = input; } @@ -131,24 +118,15 @@ public BPR(boolean preserveInput) int alphaSize = alphabet.size; int q; - if (alphaSize <= 9) - { + if (alphaSize <= 9) { q = 7; - } - else if (9 < alphaSize && alphaSize <= 13) - { + } else if (9 < alphaSize && alphaSize <= 13) { q = 6; - } - else if (13 < alphaSize && alphaSize <= 21) - { + } else if (13 < alphaSize && alphaSize <= 21) { q = 5; - } - else if (21 < alphaSize && alphaSize <= 46) - { + } else if (21 < alphaSize && alphaSize <= 46) { q = 4; - } - else - { + } else { q = 3; } @@ -157,11 +135,10 @@ else if (21 < alphaSize && alphaSize <= 46) } /** - * + * */ - private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) - { - int [] buckets = determine_Buckets_Sarray_Sptrmap(q); + private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) { + int[] buckets = determine_Buckets_Sarray_Sptrmap(q); /* Sorting of all buckets */ int mappedCharPtr = 0; @@ -169,38 +146,33 @@ private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) int bucketsInLevel3Bucket = kbs_power_Ulong(alphabetSize, q - 3); int bucketsInLevel2Bucket = bucketsInLevel3Bucket * alphabetSize; int bucketsInLevel1Bucket = bucketsInLevel2Bucket * alphabetSize; - int [] alphaOrder = getCharWeightedOrder_Alphabet(buckets, bucketsInLevel2Bucket); - int [] isNotSortedLevel1Char = new int [alphabetSize]; + int[] alphaOrder = getCharWeightedOrder_Alphabet(buckets, bucketsInLevel2Bucket); + int[] isNotSortedLevel1Char = new int[alphabetSize]; Arrays.fill(isNotSortedLevel1Char, 1); /* Sort all level-1 buckets */ - int [] leftPtrList = new int [alphabetSize]; - int [] rightPtrList = new int [alphabetSize]; - int [] leftPtrList2 = new int [alphabetSize * alphabetSize]; - int [] rightPtrList2 = new int [alphabetSize * alphabetSize]; + int[] leftPtrList = new int[alphabetSize]; + int[] rightPtrList = new int[alphabetSize]; + int[] leftPtrList2 = new int[alphabetSize * alphabetSize]; + int[] rightPtrList2 = new int[alphabetSize * alphabetSize]; int i; int j; int c1 = 0; - for (i = 0; i < alphabetSize; i++) - { + for (i = 0; i < alphabetSize; i++) { c1 = alphaOrder[i]; /* sort buckets cd to cz */ - for (j = i + 1; j < alphabetSize; j++) - { + for (j = i + 1; j < alphabetSize; j++) { int c2 = alphaOrder[j]; int l; - for (l = i; l < alphabetSize; l++) - { + for (l = i; l < alphabetSize; l++) { int c3 = alphaOrder[l]; int tmpUlong = c1 * bucketsInLevel1Bucket + c2 * bucketsInLevel2Bucket + c3 * bucketsInLevel3Bucket; int k; - for (k = tmpUlong; k < tmpUlong + bucketsInLevel3Bucket; k++) - { + for (k = tmpUlong; k < tmpUlong + bucketsInLevel3Bucket; k++) { int leftPtr = buckets[k]; int rightPtr = buckets[k + 1] - 1; - if (rightPtr - leftPtr > 0) - { + if (rightPtr - leftPtr > 0) { if (rightPtr - leftPtr < INSSORT_LIMIT) insSortUpdateRecurse_SaBucket( leftPtr, rightPtr, q, q); else partitionUpdateRecurse_SaBucket(leftPtr, rightPtr, q, q); @@ -210,30 +182,25 @@ private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) } /* copy left buckets of cx */ - for (j = i; j < alphabetSize; j++) - { + for (j = i; j < alphabetSize; j++) { int cp1 = alphaOrder[j]; leftPtrList[cp1] = buckets[cp1 * bucketsInLevel1Bucket + c1 * bucketsInLevel2Bucket]; int k; - for (k = i + 1; k < alphabetSize; k++) - { + for (k = i + 1; k < alphabetSize; k++) { int cp2 = alphaOrder[k]; leftPtrList2[cp2 * alphabetSize + cp1] = buckets[cp2 * bucketsInLevel1Bucket + cp1 * bucketsInLevel2Bucket + c1 * bucketsInLevel3Bucket]; } } - if (c1 == 0) - { + if (c1 == 0) { int cp1 = seq[start + mappedCharPtr + length - 1]; int cp2 = seq[start + mappedCharPtr + length - 2]; - if (isNotSortedLevel1Char[cp1] != 0) - { + if (isNotSortedLevel1Char[cp1] != 0) { leftPtrList[cp1]++; leftPtrList2[cp1 * alphabetSize]++; - if (isNotSortedLevel1Char[cp2] != 0 && cp2 != c1) - { + if (isNotSortedLevel1Char[cp2] != 0 && cp2 != c1) { suffixArray[leftPtrList2[cp2 * alphabetSize + cp1]] = length - 2; sufPtrMap[length - 2] = leftPtrList2[cp2 * alphabetSize + cp1]; leftPtrList2[cp2 * alphabetSize + cp1]++; @@ -242,16 +209,13 @@ private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) } int leftPtr = buckets[c1 * bucketsInLevel1Bucket]; - while (leftPtr < leftPtrList[c1]) - { + while (leftPtr < leftPtrList[c1]) { int cp1; int tmpUlong = suffixArray[leftPtr]; if (tmpUlong != 0 && isNotSortedLevel1Char[cp1 = seq[start + mappedCharPtr + tmpUlong - - 1]] != 0) - { - if (isNotSortedLevel1Char[seq[start + mappedCharPtr + tmpUlong + 1]] != 0) - { + - 1]] != 0) { + if (isNotSortedLevel1Char[seq[start + mappedCharPtr + tmpUlong + 1]] != 0) { int tmpUlongPtr = leftPtrList[cp1]; sufPtrMap[tmpUlong - 1] = tmpUlongPtr; suffixArray[tmpUlongPtr] = tmpUlong - 1; @@ -260,8 +224,7 @@ private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) int cp2; if (tmpUlong > 1 && isNotSortedLevel1Char[cp2 = seq[start + mappedCharPtr - + tmpUlong - 2]] != 0 && cp2 != c1) - { + + tmpUlong - 2]] != 0 && cp2 != c1) { int tmpUlongPtr = leftPtrList2[cp2 * alphabetSize + cp1]++; sufPtrMap[tmpUlong - 2] = tmpUlongPtr; suffixArray[tmpUlongPtr] = tmpUlong - 2; @@ -271,14 +234,12 @@ private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) } /* copy right buckets of cx */ - for (j = i; j < alphabetSize; j++) - { + for (j = i; j < alphabetSize; j++) { int cp1 = alphaOrder[j]; int k; rightPtrList[cp1] = buckets[cp1 * bucketsInLevel1Bucket + (c1 + 1) * bucketsInLevel2Bucket]; - for (k = i + 1; k < alphabetSize; k++) - { + for (k = i + 1; k < alphabetSize; k++) { int cp2 = alphaOrder[k]; rightPtrList2[cp2 * alphabetSize + cp1] = buckets[cp2 * bucketsInLevel1Bucket + cp1 * bucketsInLevel2Bucket + (c1 + 1) @@ -286,18 +247,15 @@ private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) } } int rightPtr = buckets[(c1 + 1) * bucketsInLevel1Bucket]; - while (leftPtr < rightPtr) - { + while (leftPtr < rightPtr) { int cp1; rightPtr--; int tmpUlong = suffixArray[rightPtr]; if (tmpUlong != 0 && isNotSortedLevel1Char[cp1 = seq[start + mappedCharPtr + tmpUlong - - 1]] != 0) - { + - 1]] != 0) { rightPtrList[cp1]--; - if (isNotSortedLevel1Char[seq[start + mappedCharPtr + tmpUlong + 1]] != 0) - { + if (isNotSortedLevel1Char[seq[start + mappedCharPtr + tmpUlong + 1]] != 0) { int tmpUlongPtr = rightPtrList[cp1]; sufPtrMap[tmpUlong - 1] = tmpUlongPtr; suffixArray[tmpUlongPtr] = tmpUlong - 1; @@ -305,8 +263,7 @@ private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) int cp2; if (tmpUlong > 1 && isNotSortedLevel1Char[cp2 = seq[start + mappedCharPtr - + tmpUlong - 2]] != 0 && cp2 != c1) - { + + tmpUlong - 2]] != 0 && cp2 != c1) { int tmpUlongPtr = --rightPtrList2[cp2 * alphabetSize + cp1]; sufPtrMap[tmpUlong - 2] = tmpUlongPtr; suffixArray[tmpUlongPtr] = tmpUlong - 2; @@ -321,25 +278,22 @@ private void kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(int q) /** * Stably sorts a bucket at a refinement level regarding sort keys that are bucket * pointers in sufPtrMap with offset. - * - * @param leftPtr points to the leftmost suffix of the current bucket. + * + * @param leftPtr points to the leftmost suffix of the current bucket. * @param rightPtr points to the rightmost suffix of the current bucket. - * @param offset is the length of the common prefix of the suffixes (a multiple of q). - * @param q is the initial prefix length used for the bucket sort. It also determines - * the increase of offset. + * @param offset is the length of the common prefix of the suffixes (a multiple of q). + * @param q is the initial prefix length used for the bucket sort. It also determines + * the increase of offset. */ private void insSortUpdateRecurse_SaBucket(int leftPtr, int rightPtr, int offset, - int q) - { + int q) { int rightTmpPtr = leftPtr + 1; - while (rightTmpPtr <= rightPtr) - { + while (rightTmpPtr <= rightPtr) { int tempValue = suffixArray[rightTmpPtr]; int tempHashValue = sufPtrMap[suffixArray[rightTmpPtr] + offset]; int leftTmpPtr = rightTmpPtr; while (leftTmpPtr > leftPtr - && sufPtrMap[suffixArray[leftTmpPtr - 1] + offset] > tempHashValue) - { + && sufPtrMap[suffixArray[leftTmpPtr - 1] + offset] > tempHashValue) { suffixArray[leftTmpPtr] = suffixArray[leftTmpPtr - 1]; leftTmpPtr--; } @@ -352,16 +306,15 @@ private void insSortUpdateRecurse_SaBucket(int leftPtr, int rightPtr, int offset /** * The function determines the subbuckets after refining this bucket and recursively * calls the refinement function for the subbuckets. - * - * @param leftPtr points to the leftmost suffix of the current bucket. + * + * @param leftPtr points to the leftmost suffix of the current bucket. * @param rightPtr points to the rightmost suffix of the current bucket. - * @param offset is the length of the common prefix of the suffixes (a multiple of q). - * @param q is the initial prefix length used for the bucket sort. It also determines - * the increase of offset. + * @param offset is the length of the common prefix of the suffixes (a multiple of q). + * @param q is the initial prefix length used for the bucket sort. It also determines + * the increase of offset. */ private void updatePtrAndRefineBuckets_SaBucket(int leftPtr, int rightPtr, - int offset, int q) - { + int offset, int q) { /* * for all buckets with resp. pointer > rightPtr determine buckets via setting * sufPtrMap @@ -370,10 +323,8 @@ private void updatePtrAndRefineBuckets_SaBucket(int leftPtr, int rightPtr, int rightIntervalPtr = rightPtr; int tmpPtr; while (leftPtr <= leftIntervalPtr - && rightPtr < (tmpPtr = sufPtrMap[suffixArray[leftIntervalPtr] + offset])) - { - do - { + && rightPtr < (tmpPtr = sufPtrMap[suffixArray[leftIntervalPtr] + offset])) { + do { sufPtrMap[suffixArray[leftIntervalPtr]] = rightIntervalPtr; leftIntervalPtr--; } @@ -394,8 +345,7 @@ private void updatePtrAndRefineBuckets_SaBucket(int leftPtr, int rightPtr, rightIntervalPtr = leftIntervalPtr; while (leftPtr <= leftIntervalPtr && leftPtr <= sufPtrMap[suffixArray[leftIntervalPtr] + offset] - && sufPtrMap[suffixArray[leftIntervalPtr] + offset] <= rightPtr) - { + && sufPtrMap[suffixArray[leftIntervalPtr] + offset] <= rightPtr) { sufPtrMap[suffixArray[leftIntervalPtr]] = rightIntervalPtr; leftIntervalPtr--; } @@ -412,11 +362,9 @@ private void updatePtrAndRefineBuckets_SaBucket(int leftPtr, int rightPtr, int middleRightPtr = rightIntervalPtr; int middleLeftPtr = leftIntervalPtr; rightIntervalPtr = leftIntervalPtr; - while (leftPtr <= leftIntervalPtr) - { + while (leftPtr <= leftIntervalPtr) { int tmpPtr2 = sufPtrMap[suffixArray[leftIntervalPtr] + offset]; - do - { + do { sufPtrMap[suffixArray[leftIntervalPtr]] = rightIntervalPtr; leftIntervalPtr--; } @@ -426,25 +374,20 @@ private void updatePtrAndRefineBuckets_SaBucket(int leftPtr, int rightPtr, } int newOffset = offset + q; - if (sufPtrMap[suffixArray[leftPtr]] == rightPtr) - { + if (sufPtrMap[suffixArray[leftPtr]] == rightPtr) { newOffset = computeDiffDepthBucket_SaBucket(leftPtr, rightPtr, newOffset, q); } int leftTmpPtr = leftPtr; - while (leftTmpPtr < middleLeftPtr) - { + while (leftTmpPtr < middleLeftPtr) { int rightTmpPtr = sufPtrMap[suffixArray[leftTmpPtr]]; int tmpLong = rightTmpPtr - leftTmpPtr; - if (tmpLong > 0) - { - if (tmpLong == 1) - { + if (tmpLong > 0) { + if (tmpLong == 1) { computeBucketSize2_SaBucket(leftTmpPtr, rightTmpPtr, newOffset, q); leftTmpPtr = rightTmpPtr + 1; continue; } - if (tmpLong == 2) - { + if (tmpLong == 2) { computeBucketSize3_SaBucket(leftTmpPtr, rightTmpPtr, newOffset, q); leftTmpPtr = rightTmpPtr + 1; continue; @@ -454,42 +397,31 @@ private void updatePtrAndRefineBuckets_SaBucket(int leftPtr, int rightPtr, leftTmpPtr = rightTmpPtr + 1; } /* for buckets refering to this bucket, the offset can be doubled */ - if (middleRightPtr > middleLeftPtr + 1) - { - if (middleRightPtr - middleLeftPtr == 2) - { + if (middleRightPtr > middleLeftPtr + 1) { + if (middleRightPtr - middleLeftPtr == 2) { computeBucketSize2_SaBucket(middleLeftPtr + 1, middleRightPtr, Math.max( 2 * offset, newOffset), q); - } - else - { - if (middleRightPtr - middleLeftPtr == 3) - { + } else { + if (middleRightPtr - middleLeftPtr == 3) { computeBucketSize3_SaBucket(middleLeftPtr + 1, middleRightPtr, Math .max(2 * offset, newOffset), q); - } - else - { + } else { insSortUpdateRecurse_SaBucket(middleLeftPtr + 1, middleRightPtr, Math .max(2 * offset, newOffset), q); } } } leftTmpPtr = middleRightPtr + 1; - while (leftTmpPtr < rightPtr) - { + while (leftTmpPtr < rightPtr) { int rightTmpPtr = sufPtrMap[suffixArray[leftTmpPtr]]; int tmpLong = rightTmpPtr - leftTmpPtr; - if (tmpLong > 0) - { - if (tmpLong == 1) - { + if (tmpLong > 0) { + if (tmpLong == 1) { computeBucketSize2_SaBucket(leftTmpPtr, rightTmpPtr, newOffset, q); leftTmpPtr = rightTmpPtr + 1; continue; } - if (tmpLong == 2) - { + if (tmpLong == 2) { computeBucketSize3_SaBucket(leftTmpPtr, rightTmpPtr, newOffset, q); leftTmpPtr = rightTmpPtr + 1; continue; @@ -503,57 +435,49 @@ private void updatePtrAndRefineBuckets_SaBucket(int leftPtr, int rightPtr, /** * Completely sorts buckets of size 3. - * - * @param leftPtr points to the leftmost suffix of the current bucket. + * + * @param leftPtr points to the leftmost suffix of the current bucket. * @param rightPtr points to the rightmost suffix of the current bucket. - * @param q is the initial prefix length used for the bucket sort. It also determines - * the increase of offset. - * @param offset is the length of the common prefix of the suffixes rounded down to a - * multiple of q. + * @param q is the initial prefix length used for the bucket sort. It also determines + * the increase of offset. + * @param offset is the length of the common prefix of the suffixes rounded down to a + * multiple of q. */ - private void computeBucketSize3_SaBucket(int leftPtr, int rightPtr, int offset, int q) - { + private void computeBucketSize3_SaBucket(int leftPtr, int rightPtr, int offset, int q) { int newOffset = offset; while (sufPtrMap[suffixArray[leftPtr] + newOffset] == sufPtrMap[suffixArray[leftPtr + 1] + newOffset] && sufPtrMap[suffixArray[leftPtr + 1] + newOffset] == sufPtrMap[suffixArray[rightPtr] - + newOffset]) - { + + newOffset]) { newOffset += q; } if (sufPtrMap[suffixArray[leftPtr] + newOffset] > sufPtrMap[suffixArray[rightPtr] - + newOffset]) - { + + newOffset]) { int swapTmp = suffixArray[leftPtr]; suffixArray[leftPtr] = suffixArray[rightPtr]; suffixArray[rightPtr] = swapTmp; } if (sufPtrMap[suffixArray[leftPtr] + newOffset] > sufPtrMap[suffixArray[leftPtr + 1] - + newOffset]) - { + + newOffset]) { int swapTmp = suffixArray[leftPtr]; suffixArray[leftPtr] = suffixArray[leftPtr + 1]; suffixArray[leftPtr + 1] = swapTmp; } if (sufPtrMap[suffixArray[leftPtr + 1] + newOffset] > sufPtrMap[suffixArray[rightPtr] - + newOffset]) - { + + newOffset]) { int swapTmp = suffixArray[rightPtr]; suffixArray[rightPtr] = suffixArray[leftPtr + 1]; suffixArray[leftPtr + 1] = swapTmp; } if (sufPtrMap[suffixArray[leftPtr] + newOffset] == sufPtrMap[suffixArray[leftPtr + 1] - + newOffset]) - { + + newOffset]) { int suffix1 = suffixArray[leftPtr] + newOffset + q; int suffix2 = suffixArray[leftPtr + 1] + newOffset + q; - while (sufPtrMap[suffix1] == sufPtrMap[suffix2]) - { + while (sufPtrMap[suffix1] == sufPtrMap[suffix2]) { suffix1 += q; suffix2 += q; } - if (sufPtrMap[suffix1] > sufPtrMap[suffix2]) - { + if (sufPtrMap[suffix1] > sufPtrMap[suffix2]) { int tmpSwap = suffixArray[leftPtr]; suffixArray[leftPtr] = suffixArray[leftPtr + 1]; suffixArray[leftPtr + 1] = tmpSwap; @@ -564,18 +488,15 @@ private void computeBucketSize3_SaBucket(int leftPtr, int rightPtr, int offset, return; } if (sufPtrMap[suffixArray[leftPtr + 1] + newOffset] == sufPtrMap[suffixArray[rightPtr] - + newOffset]) - { + + newOffset]) { sufPtrMap[suffixArray[leftPtr]] = leftPtr; int suffix1 = suffixArray[leftPtr + 1] + newOffset + q; int suffix2 = suffixArray[rightPtr] + newOffset + q; - while (sufPtrMap[suffix1] == sufPtrMap[suffix2]) - { + while (sufPtrMap[suffix1] == sufPtrMap[suffix2]) { suffix1 += q; suffix2 += q; } - if (sufPtrMap[suffix1] > sufPtrMap[suffix2]) - { + if (sufPtrMap[suffix1] > sufPtrMap[suffix2]) { int tmpSwap = suffixArray[rightPtr]; suffixArray[rightPtr] = suffixArray[leftPtr + 1]; suffixArray[leftPtr + 1] = tmpSwap; @@ -591,25 +512,22 @@ private void computeBucketSize3_SaBucket(int leftPtr, int rightPtr, int offset, /** * Completely sorts buckets of size 2. - * - * @param leftPtr points to the leftmost suffix of the current bucket. + * + * @param leftPtr points to the leftmost suffix of the current bucket. * @param rightPtr points to the rightmost suffix of the current bucket. - * @param offset is the length of the common prefix of the suffixes rounded down to a - * multiple of q. - * @param q is the initial prefix length used for the bucket sort. It also determines - * the increase of offset. + * @param offset is the length of the common prefix of the suffixes rounded down to a + * multiple of q. + * @param q is the initial prefix length used for the bucket sort. It also determines + * the increase of offset. */ - private void computeBucketSize2_SaBucket(int leftPtr, int rightPtr, int offset, int q) - { + private void computeBucketSize2_SaBucket(int leftPtr, int rightPtr, int offset, int q) { int suffix1 = suffixArray[leftPtr] + offset; int suffix2 = suffixArray[rightPtr] + offset; - while (sufPtrMap[suffix1] == sufPtrMap[suffix2]) - { + while (sufPtrMap[suffix1] == sufPtrMap[suffix2]) { suffix1 += q; suffix2 += q; } - if (sufPtrMap[suffix1] > sufPtrMap[suffix2]) - { + if (sufPtrMap[suffix1] > sufPtrMap[suffix2]) { int tmpSwap = suffixArray[leftPtr]; suffixArray[leftPtr] = suffixArray[rightPtr]; suffixArray[rightPtr] = tmpSwap; @@ -621,28 +539,24 @@ private void computeBucketSize2_SaBucket(int leftPtr, int rightPtr, int offset, /** * Computes about the LCP of all suffixes in this bucket. It will be the newoffset. - * - * @param leftPtr points to the leftmost suffix of the current bucket. + * + * @param leftPtr points to the leftmost suffix of the current bucket. * @param rightPtr points to the rightmost suffix of the current bucket. - * @param offset is the length of the common prefix of the suffixes rounded down to a - * multiple of q. - * @param q is the initial prefix length used for the bucket sort. It also determines - * the increase of offset. + * @param offset is the length of the common prefix of the suffixes rounded down to a + * multiple of q. + * @param q is the initial prefix length used for the bucket sort. It also determines + * the increase of offset. * @return the LCP of suffixes in this bucket (newoffset). */ private int computeDiffDepthBucket_SaBucket(int leftPtr, int rightPtr, int offset, - int q) - { + int q) { int lcp = offset; - while (true) - { + while (true) { int runPtr = leftPtr; int a = suffixArray[rightPtr]; int tmpPtr = sufPtrMap[a + lcp]; - while (runPtr < rightPtr) - { - if (sufPtrMap[suffixArray[runPtr] + lcp] != tmpPtr) - { + while (runPtr < rightPtr) { + if (sufPtrMap[suffixArray[runPtr] + lcp] != tmpPtr) { return lcp; } runPtr++; @@ -655,28 +569,25 @@ private int computeDiffDepthBucket_SaBucket(int leftPtr, int rightPtr, int offse * Ternary partitioning of buckets with Lomuto's scheme. Subbuckets of size 2 and 3 * are directly sorted and partitions smaller than a given threshold are sorted by * insertion sort. - * - * @param leftPtr points to the leftmost position of the current bucket. + * + * @param leftPtr points to the leftmost position of the current bucket. * @param rightPtr points to the rightmost position of the current bucket. - * @param offset is the length of the common prefix of the suffixes (a multiple of q). - * @param q is the initial prefix length used for the bucket sort. It also determines - * the increase of offset. + * @param offset is the length of the common prefix of the suffixes (a multiple of q). + * @param q is the initial prefix length used for the bucket sort. It also determines + * the increase of offset. */ private void partitionUpdateRecurse_SaBucket(int leftPtr, int rightPtr, int offset, - int q) - { + int q) { int pivot; int tmpSize = rightPtr - leftPtr; - if (tmpSize < 10000) - { + if (tmpSize < 10000) { tmpSize = tmpSize / 4; pivot = sufPtrMap[suffixArray[leftPtr + tmpSize] + offset]; int pivotb = sufPtrMap[suffixArray[leftPtr + 2 * tmpSize] + offset]; int pivotc = sufPtrMap[suffixArray[rightPtr - tmpSize] + offset]; int medNumber = medianOfThreeUlong(pivot, pivotb, pivotc); int pivotPtr = leftPtr + tmpSize; - if (medNumber > 0) - { + if (medNumber > 0) { pivotPtr = (medNumber == 1) ? (leftPtr + 2 * tmpSize) : (rightPtr - tmpSize); pivot = (medNumber == 1) ? pivotb : pivotc; @@ -684,25 +595,20 @@ private void partitionUpdateRecurse_SaBucket(int leftPtr, int rightPtr, int offs int swapTmp = suffixArray[pivotPtr]; suffixArray[pivotPtr] = suffixArray[leftPtr]; suffixArray[leftPtr] = swapTmp; - } - else - { - int [] keyPtrList = new int [9]; + } else { + int[] keyPtrList = new int[9]; tmpSize = tmpSize / 10; int i; - for (i = 0; i < 9; i++) - { + for (i = 0; i < 9; i++) { keyPtrList[i] = leftPtr + (i + 1) * tmpSize; } /* insertion sort */ - for (i = 1; i < 9; i++) - { + for (i = 1; i < 9; i++) { int tempValue = keyPtrList[i]; int tempHashValue = sufPtrMap[suffixArray[tempValue] + offset]; int j = i - 1; while (j >= 0 - && sufPtrMap[suffixArray[keyPtrList[j]] + offset] > tempHashValue) - { + && sufPtrMap[suffixArray[keyPtrList[j]] + offset] > tempHashValue) { keyPtrList[j + 1] = keyPtrList[j]; j--; } @@ -716,28 +622,23 @@ private void partitionUpdateRecurse_SaBucket(int leftPtr, int rightPtr, int offs int pivotRightPtr = leftPtr + 1; while (pivotRightPtr <= rightPtr - && sufPtrMap[suffixArray[pivotRightPtr] + offset] == pivot) - { + && sufPtrMap[suffixArray[pivotRightPtr] + offset] == pivot) { ++pivotRightPtr; } int smallerPivotPtr = pivotRightPtr; while (smallerPivotPtr <= rightPtr - && sufPtrMap[suffixArray[smallerPivotPtr] + offset] < pivot) - { + && sufPtrMap[suffixArray[smallerPivotPtr] + offset] < pivot) { smallerPivotPtr++; } int frontPtr = smallerPivotPtr - 1; - while (frontPtr++ < rightPtr) - { + while (frontPtr++ < rightPtr) { int sortkey = sufPtrMap[suffixArray[frontPtr] + offset]; - if (sortkey <= pivot) - { + if (sortkey <= pivot) { int swapTmp = suffixArray[frontPtr]; suffixArray[frontPtr] = suffixArray[smallerPivotPtr]; suffixArray[smallerPivotPtr] = swapTmp; - if (sortkey == pivot) - { + if (sortkey == pivot) { suffixArray[smallerPivotPtr] = suffixArray[pivotRightPtr]; suffixArray[pivotRightPtr++] = swapTmp; } @@ -746,25 +647,18 @@ private void partitionUpdateRecurse_SaBucket(int leftPtr, int rightPtr, int offs } /* vector swap the pivot elements */ int numberSmaller = smallerPivotPtr - pivotRightPtr; - if (numberSmaller > 0) - { + if (numberSmaller > 0) { int swapsize = Math.min((pivotRightPtr - leftPtr), numberSmaller); int pivotRightTmpPtr = leftPtr + swapsize - 1; vectorSwap(leftPtr, pivotRightTmpPtr, smallerPivotPtr - 1); /* recursively sort < partition */ - if (numberSmaller == 1) - { + if (numberSmaller == 1) { sufPtrMap[suffixArray[leftPtr]] = leftPtr; - } - else - { - if (numberSmaller == 2) - { + } else { + if (numberSmaller == 2) { computeBucketSize2_SaBucket(leftPtr, leftPtr + 1, offset, q); - } - else - { + } else { if (numberSmaller == 3) computeBucketSize3_SaBucket(leftPtr, leftPtr + 2, offset, q); else partitionUpdateRecurse_SaBucket(leftPtr, leftPtr + numberSmaller @@ -776,47 +670,34 @@ else partitionUpdateRecurse_SaBucket(leftPtr, leftPtr + numberSmaller /* update pivots and recursively sort = partition */ int leftTmpPtr = leftPtr + numberSmaller; smallerPivotPtr--; - if (leftTmpPtr == smallerPivotPtr) - { + if (leftTmpPtr == smallerPivotPtr) { sufPtrMap[suffixArray[leftTmpPtr]] = leftTmpPtr; if (leftTmpPtr == rightPtr) return; - } - else - { + } else { int newOffset = (pivot == rightPtr) ? (2 * offset) : offset + q; - if (leftTmpPtr + 1 == smallerPivotPtr) - { + if (leftTmpPtr + 1 == smallerPivotPtr) { computeBucketSize2_SaBucket(leftTmpPtr, smallerPivotPtr, newOffset, q); if (rightPtr == smallerPivotPtr) return; - } - else - { - if (leftTmpPtr + 2 == smallerPivotPtr) - { + } else { + if (leftTmpPtr + 2 == smallerPivotPtr) { computeBucketSize3_SaBucket(leftTmpPtr, smallerPivotPtr, newOffset, q); if (rightPtr == smallerPivotPtr) return; - } - else - { - if (rightPtr == smallerPivotPtr) - { + } else { + if (rightPtr == smallerPivotPtr) { newOffset = computeDiffDepthBucket_SaBucket(leftPtr + numberSmaller, rightPtr, newOffset, q); partitionUpdateRecurse_SaBucket(leftTmpPtr, rightPtr, newOffset, q); return; } - while (leftTmpPtr <= smallerPivotPtr) - { + while (leftTmpPtr <= smallerPivotPtr) { sufPtrMap[suffixArray[leftTmpPtr]] = smallerPivotPtr; leftTmpPtr++; } - if (smallerPivotPtr < leftPtr + numberSmaller + INSSORT_LIMIT) - { + if (smallerPivotPtr < leftPtr + numberSmaller + INSSORT_LIMIT) { insSortUpdateRecurse_SaBucket(leftPtr + numberSmaller, smallerPivotPtr, newOffset, q); - } - else partitionUpdateRecurse_SaBucket(leftPtr + numberSmaller, + } else partitionUpdateRecurse_SaBucket(leftPtr + numberSmaller, smallerPivotPtr, newOffset, q); } } @@ -824,18 +705,15 @@ else partitionUpdateRecurse_SaBucket(leftPtr + numberSmaller, /* recursively sort > partition */ ++smallerPivotPtr; - if (smallerPivotPtr == rightPtr) - { + if (smallerPivotPtr == rightPtr) { sufPtrMap[suffixArray[rightPtr]] = rightPtr; return; } - if (smallerPivotPtr + 1 == rightPtr) - { + if (smallerPivotPtr + 1 == rightPtr) { computeBucketSize2_SaBucket(smallerPivotPtr, rightPtr, offset, q); return; } - if (smallerPivotPtr + 2 == rightPtr) - { + if (smallerPivotPtr + 2 == rightPtr) { computeBucketSize3_SaBucket(smallerPivotPtr, rightPtr, offset, q); return; } @@ -844,15 +722,13 @@ else partitionUpdateRecurse_SaBucket(leftPtr + numberSmaller, } /** - * @param leftPtr points to the leftmost suffix of the first swap space. - * @param rightPtr points to the rightmost suffix of the first swap space. + * @param leftPtr points to the leftmost suffix of the first swap space. + * @param rightPtr points to the rightmost suffix of the first swap space. * @param swapEndPtr points to the leftmost suffix of the second swap space. */ - private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) - { + private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) { int swapTmp = suffixArray[swapEndPtr]; - while (leftPtr < rightPtr) - { + while (leftPtr < rightPtr) { suffixArray[swapEndPtr] = suffixArray[rightPtr]; swapEndPtr--; suffixArray[rightPtr] = suffixArray[swapEndPtr]; @@ -866,36 +742,31 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) /** * Sorts the alphabet concerning some weight concerning cc bucket size and alphabet * frequency Only works for mapped string with alphabet [0,alphaSize] - * - * @param buckets - the bucket table + * + * @param buckets - the bucket table * @param bucketsInLevel2Bucket - number of subbuckets of level-2 buckets * @return the order of the alphabet according to the weight on buckets with same - * first and second character + * first and second character */ - private int [] getCharWeightedOrder_Alphabet(int [] buckets, int bucketsInLevel2Bucket) - { + private int[] getCharWeightedOrder_Alphabet(int[] buckets, int bucketsInLevel2Bucket) { int alphabetSize = alphabet.size; - int [] charWeight = new int [alphabetSize]; + int[] charWeight = new int[alphabetSize]; int tmpBucketFactor = bucketsInLevel2Bucket * (alphabetSize + 1); int i; - for (i = 0; i < alphabetSize; i++) - { + for (i = 0; i < alphabetSize; i++) { charWeight[i] = alphabet.charFreq[i]; charWeight[i] -= buckets[i * tmpBucketFactor + bucketsInLevel2Bucket] - buckets[i * tmpBucketFactor]; } - int [] targetCharArray = new int [alphabetSize + 1]; - for (i = 0; i < alphabetSize; i++) - { + int[] targetCharArray = new int[alphabetSize + 1]; + for (i = 0; i < alphabetSize; i++) { targetCharArray[i] = i; } - for (i = 1; i < alphabet.size; i++) - { + for (i = 1; i < alphabet.size; i++) { int tmpWeight = charWeight[i]; int j = i; - while (j > 0 && tmpWeight < charWeight[targetCharArray[j - 1]]) - { + while (j > 0 && tmpWeight < charWeight[targetCharArray[j - 1]]) { targetCharArray[j] = targetCharArray[j - 1]; j--; } @@ -907,19 +778,15 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) /** * Constructs all buckets w.r.t. q-gram size q, the up to prefix q sorted suffix * array, and the bucket-pointer table. - * + * * @param q size of q-gram. * @return Buckets containing pointers into the suffix array. */ - private int [] determine_Buckets_Sarray_Sptrmap(int q) - { + private int[] determine_Buckets_Sarray_Sptrmap(int q) { - if (kbs_getExp_Ulong(2, alphabet.size) >= 0) - { + if (kbs_getExp_Ulong(2, alphabet.size) >= 0) { return determinePower2Alpha_Buckets_Sarray_Sptrmap(q); - } - else - { + } else { return determineAll_Buckets_Sarray_Sptrmap(q); } } @@ -927,16 +794,15 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) /** * Constructs all buckets w.r.t. q-gram size q, the up to prefix q sorted suffix * array, and the bucket-pointer table. - * + * * @param q size of q-gram. * @return Buckets containing pointers into the suffix array. * @see #determine_Buckets_Sarray_Sptrmap */ - private int [] determineAll_Buckets_Sarray_Sptrmap(int q) - { - int [] buckets = determineAll_Buckets_Sarray(q); + private int[] determineAll_Buckets_Sarray_Sptrmap(int q) { + int[] buckets = determineAll_Buckets_Sarray(q); int strLen = length; - sufPtrMap = new int [strLen + 2 * q + 1]; + sufPtrMap = new int[strLen + 2 * q + 1]; /* computation of first hashvalue */ int alphabetSize = alphabet.size; @@ -944,16 +810,14 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) int tempPower = 1; int hashCode = 0; int i; - for (i = q - 1; i >= 0; i--) - { + for (i = q - 1; i >= 0; i--) { hashCode += seq[start + mappedUcharArray + i] * tempPower; tempPower *= alphabetSize; } int tempModulo = kbs_power_Ulong(alphabetSize, q - 1); mappedUcharArray += q; int j; - for (j = 0; j < strLen - 1; j++) - { + for (j = 0; j < strLen - 1; j++) { sufPtrMap[j] = (buckets[hashCode + 1]) - 1; hashCode -= (seq[start + mappedUcharArray - q]) * tempModulo; hashCode *= alphabetSize; @@ -963,8 +827,7 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) sufPtrMap[j] = buckets[hashCode]; /* set the values in sufPtrMap[strLen..strLen+2*d] to [-1, -2, ..., -2*d] */ int beginPtr = -1; - for (j = strLen; j <= strLen + 2 * q; j++) - { + for (j = strLen; j <= strLen + 2 * q; j++) { sufPtrMap[j] = beginPtr--; } return buckets; @@ -974,34 +837,30 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) * Constructs all buckets w.r.t. q-gram size and the up to prefix q sorted suffix * array. Call determine_Buckets_Sarray(const Kbs_Ustring *const ustring, register * const Kbs_Ulong q, Kbs_Ulong **suffixArrayPtr) instead - * + * * @param q size of q-gram. * @return Buckets containing pointers into the suffix array. * @see #determine_Buckets_Sarray_Sptrmap(int) */ - private int [] determineAll_Buckets_Sarray(int q) - { + private int[] determineAll_Buckets_Sarray(int q) { int strLen = length; int alphabetSize = alphabet.size; int numberBuckets = kbs_power_Ulong(alphabetSize, q); - int [] buckets = new int [numberBuckets + 1]; - for (int i = 0; i < q; i++) - { + int[] buckets = new int[numberBuckets + 1]; + for (int i = 0; i < q; i++) { seq[start + length + i] = alphabet.charArray[0]; } - for (int i = 0; i < KBS_STRING_EXTENSION_SIZE - q; i++) - { + for (int i = 0; i < KBS_STRING_EXTENSION_SIZE - q; i++) { seq[start + length + i + q] = 0; } /* computation of first hashvalue */ - int [] alphaMap = alphabet.alphaMapping; + int[] alphaMap = alphabet.alphaMapping; int mappedUcharArray = 0; int hashCode = 0; int tempPower = 1; int i; - for (i = q - 1; i >= 0; i--) - { + for (i = q - 1; i >= 0; i--) { hashCode += (seq[start + mappedUcharArray + i] = alphaMap[seq[start + mappedUcharArray + i]]) * tempPower; @@ -1013,8 +872,7 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) mappedUcharArray += q; buckets[hashCode]++; int j; - for (j = 1; j < strLen; j++) - { + for (j = 1; j < strLen; j++) { hashCode -= (seq[start + mappedUcharArray - q]) * tempModulo; hashCode *= alphabetSize; hashCode += seq[start + mappedUcharArray] = alphaMap[seq[start @@ -1023,31 +881,27 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) buckets[hashCode]++; } /* update the alphabet for mapped string */ - for (j = 0; j < alphabetSize; j++) - { + for (j = 0; j < alphabetSize; j++) { alphabet.charFreq[j] = alphabet.charFreq[alphabet.charArray[j]]; alphabet.charArray[j] = j; alphaMap[j] = j; } - for (; j < KBS_MAX_ALPHABET_SIZE; j++) - { + for (; j < KBS_MAX_ALPHABET_SIZE; j++) { alphaMap[j] = -1; } - this.suffixArray = new int [strLen + 1]; + this.suffixArray = new int[strLen + 1]; /* computation of the bucket pointers, pointers into the suffix array */ - for (j = 1; j <= numberBuckets; j++) - { + for (j = 1; j <= numberBuckets; j++) { buckets[j] = buckets[j - 1] + buckets[j]; } /* computation of the suffix array (buckets that are copied later are left out) */ - int [] charRank = getCharWeightedRank_Alphabet(buckets, q); + int[] charRank = getCharWeightedRank_Alphabet(buckets, q); mappedUcharArray = q; hashCode = firstHashCode; - for (j = 0; j < strLen - 1; j++) - { + for (j = 0; j < strLen - 1; j++) { int c1; buckets[hashCode]--; if ((c1 = charRank[seq[start + mappedUcharArray - q]]) < charRank[seq[start @@ -1069,26 +923,23 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) /** * Constructs all buckets w.r.t. q-gram size q, the up to prefix length q sorted * suffix array, and the bucket-pointer table. - * + * * @param q size of q-gram. * @return Buckets containing pointers into the suffix array. * @see #determine_Buckets_Sarray_Sptrmap */ - private int [] determinePower2Alpha_Buckets_Sarray_Sptrmap(int q) - { + private int[] determinePower2Alpha_Buckets_Sarray_Sptrmap(int q) { int strLen = length; int exp2 = kbs_getExp_Ulong(2, alphabet.size); - if (exp2 < 0) - { + if (exp2 < 0) { throw new RuntimeException("value out of bounds"); } - int [] buckets = determinePower2Alpha_Buckets_Sarray(q); - this.sufPtrMap = new int [strLen + 2 * q + 1]; + int[] buckets = determinePower2Alpha_Buckets_Sarray(q); + this.sufPtrMap = new int[strLen + 2 * q + 1]; int mappedUcharArray = 0; int hashCode = 0; int j; - for (j = 0; j < q; j++) - { + for (j = 0; j < q; j++) { hashCode = hashCode << exp2; hashCode += seq[start + mappedUcharArray + j]; } @@ -1097,8 +948,7 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) tempModulo = tempModulo << (exp2 * (q - 1)); tempModulo = ~tempModulo; mappedUcharArray += q; - for (j = 0; j < strLen - 1; j++) - { + for (j = 0; j < strLen - 1; j++) { sufPtrMap[j] = (buckets[hashCode + 1]) - 1; hashCode = hashCode & tempModulo; hashCode = hashCode << exp2; @@ -1107,36 +957,29 @@ private void vectorSwap(int leftPtr, int rightPtr, int swapEndPtr) } sufPtrMap[j] = buckets[hashCode]; int beginPtr = -1; - for (j = strLen; j <= strLen + 2 * q; j++) - { + for (j = strLen; j <= strLen + 2 * q; j++) { sufPtrMap[j] = beginPtr--; } return buckets; } - private int kbs_power_Ulong(int base, int exp) - { + private int kbs_power_Ulong(int base, int exp) { int p; - if (exp == 0) - { + if (exp == 0) { return 1; } - if (exp == 1) - { + if (exp == 1) { return base; } - if (base == 4) - { - if (exp > 15) - { + if (base == 4) { + if (exp > 15) { throw new RuntimeException(); } return 4 << (2 * (exp - 1)); } p = 1; - for (; exp > 0; --exp) - { + for (; exp > 0; --exp) { p = p * base; } return p; @@ -1146,29 +989,25 @@ private int kbs_power_Ulong(int base, int exp) * Constructs all buckets w.r.t. q-gram size q and the up to prefix q sorted suffix * array. Precondition: ustring->alphabet->alphaSize = 2^x for some x; otherwise, call * determine_Buckets_Sarray. - * + * * @param q size of q-gram. * @return Buckets containing pointers into the suffix array. * @see #determine_Buckets_Sarray_Sptrmap(int) */ - private int [] determinePower2Alpha_Buckets_Sarray(int q) - { + private int[] determinePower2Alpha_Buckets_Sarray(int q) { int exp2 = kbs_getExp_Ulong(2, alphabet.size); int strLen = length; int mappedUcharArray = 0; - for (int i = 0; i < q; i++) - { + for (int i = 0; i < q; i++) { seq[start + length + i] = alphabet.charArray[0]; } - for (int i = length + q; i < length + KBS_STRING_EXTENSION_SIZE - q; i++) - { + for (int i = length + q; i < length + KBS_STRING_EXTENSION_SIZE - q; i++) { seq[start + i] = 0; } int numberBuckets = kbs_power_Ulong(alphabet.size, q); - int [] buckets = new int [numberBuckets + 1]; + int[] buckets = new int[numberBuckets + 1]; int hashCode = 0; - for (int j = 0; j < q; j++) - { + for (int j = 0; j < q; j++) { hashCode = hashCode << exp2; hashCode += (seq[start + mappedUcharArray + j] = alphabet.alphaMapping[seq[start + mappedUcharArray + j]]); @@ -1182,44 +1021,39 @@ private int kbs_power_Ulong(int base, int exp) mappedUcharArray += q; buckets[hashCode]++; - for (int j = 1; j < strLen; j++) - { + for (int j = 1; j < strLen; j++) { hashCode = hashCode & tempModulo; hashCode = hashCode << exp2; hashCode = hashCode | (seq[start + mappedUcharArray] = alphabet.alphaMapping[seq[start - + mappedUcharArray]]); + + mappedUcharArray]]); mappedUcharArray++; buckets[hashCode]++; } /* update the alphabet for mapped string */ int j; - for (j = 0; j < alphabet.size; j++) - { + for (j = 0; j < alphabet.size; j++) { alphabet.charFreq[j] = alphabet.charFreq[alphabet.charArray[j]]; alphabet.charArray[j] = j; alphabet.alphaMapping[j] = j; } - for (; j < KBS_MAX_ALPHABET_SIZE; j++) - { + for (; j < KBS_MAX_ALPHABET_SIZE; j++) { alphabet.alphaMapping[j] = -1; } - this.suffixArray = new int [strLen + 1]; + this.suffixArray = new int[strLen + 1]; /* computation of the bucket pointers, pointers into the suffix array */ - for (j = 1; j <= numberBuckets; j++) - { + for (j = 1; j <= numberBuckets; j++) { buckets[j] = buckets[j - 1] + buckets[j]; } /* computation of the suffix array */ - int [] charRank = getCharWeightedRank_Alphabet(buckets, q); + int[] charRank = getCharWeightedRank_Alphabet(buckets, q); mappedUcharArray = q; hashCode = firstHashCode; - for (j = 0; j < strLen - 1; j++) - { + for (j = 0; j < strLen - 1; j++) { int c1; buckets[hashCode]--; if ((c1 = charRank[seq[start + mappedUcharArray - q]]) < charRank[seq[start @@ -1241,23 +1075,21 @@ private int kbs_power_Ulong(int base, int exp) /** * Sorts the alphabet regarding some weight according to cc bucket size and alphabet * frequency Only works for mapped string with alphabet [0,alphaSize] - * + * * @param buckets - the bucket table - * @param q - the initial q-gram size + * @param q - the initial q-gram size * @return the rank of each character */ - private int [] getCharWeightedRank_Alphabet(int [] buckets, int q) - { + private int[] getCharWeightedRank_Alphabet(int[] buckets, int q) { int alphabetSize = alphabet.size; - int [] charWeight = new int [alphabetSize]; + int[] charWeight = new int[alphabetSize]; int bucketsInLevel2Bucket = kbs_power_Ulong(alphabetSize, q - 2); int tmpBucketFactor = bucketsInLevel2Bucket * (alphabetSize + 1); int i; charWeight[0] = alphabet.charFreq[0]; charWeight[0] -= buckets[bucketsInLevel2Bucket - 1]; - for (i = 1; i < alphabetSize - 1; i++) - { + for (i = 1; i < alphabetSize - 1; i++) { charWeight[i] = alphabet.charFreq[i]; charWeight[i] -= buckets[i * tmpBucketFactor + bucketsInLevel2Bucket - 1] - buckets[i * tmpBucketFactor - 1]; @@ -1267,49 +1099,40 @@ private int kbs_power_Ulong(int base, int exp) + bucketsInLevel2Bucket - 1] - buckets[(alphabetSize - 1) * tmpBucketFactor - 1]; - int [] targetCharArray = new int [alphabetSize]; - for (i = 0; i < alphabetSize; i++) - { + int[] targetCharArray = new int[alphabetSize]; + for (i = 0; i < alphabetSize; i++) { targetCharArray[i] = i; } /* insertion sort by charWeight */ - for (i = 1; i < alphabet.size; i++) - { + for (i = 1; i < alphabet.size; i++) { int tmpWeight = charWeight[i]; int j = i; - while (j > 0 && tmpWeight < charWeight[targetCharArray[j - 1]]) - { + while (j > 0 && tmpWeight < charWeight[targetCharArray[j - 1]]) { targetCharArray[j] = targetCharArray[j - 1]; j--; } targetCharArray[j] = i; } - int [] charRank = new int [alphabetSize + 1]; - for (i = 0; i < alphabetSize; i++) - { + int[] charRank = new int[alphabetSize + 1]; + for (i = 0; i < alphabetSize; i++) { charRank[targetCharArray[i]] = i; } return charRank; } /** - * + * */ - private int kbs_getExp_Ulong(int base, int value) - { + private int kbs_getExp_Ulong(int base, int value) { int exp = 0; int tmpValue = 1; - while (tmpValue < value) - { + while (tmpValue < value) { tmpValue *= base; exp++; } - if (tmpValue == value) - { + if (tmpValue == value) { return exp; - } - else - { + } else { return -1; } @@ -1321,14 +1144,11 @@ private int kbs_getExp_Ulong(int base, int value) * @param c third key * @return 0 if a is the median, 1 if b is the median, 2 if c is the median. */ - private int medianOfThreeUlong(int a, int b, int c) - { - if (a == b || a == c) - { + private int medianOfThreeUlong(int a, int b, int c) { + if (a == b || a == c) { return 0; } - if (b == c) - { + if (b == c) { return 2; } return a < b ? (b < c ? 1 : (a < c ? 2 : 0)) : (b > c ? 1 : (a < c ? 0 : 2)); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/CharSequenceAdapter.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/CharSequenceAdapter.java index f548eaf41..6dbeffc8e 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/CharSequenceAdapter.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/CharSequenceAdapter.java @@ -2,47 +2,42 @@ /** * An adapter for constructing suffix arrays on character sequences. - * - * @see SuffixArrays#create(CharSequence) - * @see SuffixArrays#create(CharSequence, ISuffixArrayBuilder) * * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) + * @see SuffixArrays#create(CharSequence) + * @see SuffixArrays#create(CharSequence, ISuffixArrayBuilder) */ -final class CharSequenceAdapter -{ +final class CharSequenceAdapter { private final ISuffixArrayBuilder delegate; /** * Last mapped input in {@link #buildSuffixArray(CharSequence)}. */ - int [] input; + int[] input; /** * Construct an adapter with a given underlying suffix array construction strategy. * The suffix array builder should accept non-negative characters, with a possibly * large alphabet size. - * + * * @see DensePositiveDecorator */ - public CharSequenceAdapter(ISuffixArrayBuilder builder) - { + public CharSequenceAdapter(ISuffixArrayBuilder builder) { this.delegate = builder; } /** * Construct a suffix array for a given character sequence. */ - public int [] buildSuffixArray(CharSequence sequence) - { + public int[] buildSuffixArray(CharSequence sequence) { /* * Allocate slightly more space, some suffix construction strategies need it and * we don't want to waste space for multiple symbol mappings. */ - this.input = new int [sequence.length() + SuffixArrays.MAX_EXTRA_TRAILING_SPACE]; - for (int i = sequence.length() - 1; i >= 0; i--) - { + this.input = new int[sequence.length() + SuffixArrays.MAX_EXTRA_TRAILING_SPACE]; + for (int i = sequence.length() - 1; i >= 0; i--) { input[i] = sequence.charAt(i); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DeepShallow.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DeepShallow.java index 9a90d3cdb..452570195 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DeepShallow.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DeepShallow.java @@ -17,200 +17,200 @@ * @author Dawid Weiss (Carrot Search) */ public class DeepShallow implements ISuffixArrayBuilder { - private static class SplitGroupResult { - final int equal; - final int lower; - - public SplitGroupResult(int equal, int lower) { - this.equal = equal; - this.lower = lower; - } - } - - private static class Node { - int skip; - int key; - Node right; - // original author uses down as a pointer to another Node, but sometimes he stores - // int values in it. Because of that, we have two following variables (luckily we - // could do so :)). - Node down; - int downInt; - } - - /** - * TODO: What is this magic constant? Do not make it public and do not reuse it anywhere where it isn't needed - * (especially not in the tests). If this algorithm has special considerations, we can run algorithm-specific tests - * with an appropriate decorator. - */ - final static int OVERSHOOT = 575; - private final static int SETMASK = 1 << 30; - private final static int CLEARMASK = ~SETMASK; - private final static int MARKER = 1 << 31; - - /** - * recursion limit for mk quicksort: - */ - private final static int MK_QS_TRESH = 20; - - private final static int MAX_TRESH = 30; - - /** - * limit for shallow_sort - */ - private final static int SHALLOW_LIMIT = 550; - - /** - * maximum offset considered when searching a pseudo anchor - */ - private final static int MAX_PSEUDO_ANCHOR_OFFSET = 0; - - /** - * maximum ratio bucket_size/group_size accepted for pseudo anchor_sorting - */ - private final static int B2G_RATIO = 1000; - - /** - * Update anchor ranks when determining rank for pseudo-sorting - */ - private final static boolean UPDATE_ANCHOR_RANKS = false; - - /** - * blind sort is used for groups of size ≤ Text_size/Blind_sort_ratio - */ - private final static int BLIND_SORT_RATIO = 2000; - - private final static int STACK_SIZE = 100; - - private int[] text; - private int textSize; - private int[] suffixArray; - private int anchorDist; // distance between anchors - private int anchorNum; - private int[] anchorOffset; - private int[] anchorRank; - private final int[] ftab = new int[66049]; - private final int[] bucketRanked = new int[66049]; - private final int[] runningOrder = new int[257]; - private final int[] lcpAux = new int[1 + MAX_TRESH]; - private int lcp; - private int cmpLeft; - private int cmpDone; - private int aux; - private int auxWritten; - private int stackSize; - private Node[] stack; - private int start; - - /** - * If true, {@link #buildSuffixArray(int[], int, int)} uses a copy of the input so it is left intact. - */ - private final boolean preserveInput; - - public DeepShallow() { - preserveInput = true; - } - - public DeepShallow(boolean preserveInput) { - this.preserveInput = preserveInput; - } - - /** - * {@inheritDoc} - *

        - * Additional constraints enforced by Deep-Shallow algorithm: - *

          - *
        • non-negative (≥0) symbols in the input
        • - *
        • maximal symbol value < 256
        • - *
        • input.lengthstart + length if {@link #preserveInput} is true
        • - *
        • input.lengthstart + length + {@link #OVERSHOOT} if {@link #preserveInput} is false
        • - *
        • length >= 2
        • - *
        - */ - @Override - public int[] buildSuffixArray(int[] input, int start, int length) { - Tools.assertAlways(input.length >= start + length, "Input array is too short"); - - MinMax mm = Tools.minmax(input, start, length); - Tools.assertAlways(mm.min >= 0, "input must not be negative"); - Tools.assertAlways(mm.max < 256, "max alphabet size is 256"); - - lcp = 1; - stack = new Node[length]; - this.start = start; - if (preserveInput) { - this.start = 0; - text = new int[length + OVERSHOOT]; - System.arraycopy(input, start, text, 0, length); - } else { - Tools.assertAlways(input.length >= start + length + OVERSHOOT, - "Input array length must have a trailing space of at least " + OVERSHOOT - + " bytes."); - text = input; - } - - for (int i = length; i < length + OVERSHOOT; i++) { - text[this.start + i] = 0; - } - - textSize = length; - suffixArray = new int[length]; - - int i, j, ss, sb, k, c1, c2, numQSorted = 0; - boolean[] bigDone = new boolean[257]; - int[] copyStart = new int[257]; - int[] copyEnd = new int[257]; - - // ------ init array containing positions of anchors - if (anchorDist == 0) { - anchorNum = 0; - } else { - anchorNum = 2 + (length - 1) / anchorDist; // see comment for helped_sort() - anchorRank = new int[anchorNum]; - anchorOffset = new int[anchorNum]; - for (i = 0; i < anchorNum; i++) { - anchorRank[i] = -1; // pos of anchors is initially unknown - anchorOffset[i] = anchorDist; // maximum possible value - } - } - - // ---------- init ftab ------------------ - // at first, clear values in ftab - for (i = 0; i < 66049; i++) - ftab[i] = 0; - - c1 = text[this.start + 0]; - for (i = 1; i <= textSize; i++) { - c2 = text[this.start + i]; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } - for (i = 1; i < 66049; i++) - ftab[i] += ftab[i - 1]; - - // -------- sort suffixes considering only the first two chars - c1 = text[this.start + 0]; - for (i = 0; i < textSize; i++) { - c2 = text[this.start + i + 1]; - j = (c1 << 8) + c2; - c1 = c2; - ftab[j]--; - suffixArray[ftab[j]] = i; - } + private static class SplitGroupResult { + final int equal; + final int lower; + + public SplitGroupResult(int equal, int lower) { + this.equal = equal; + this.lower = lower; + } + } + + private static class Node { + int skip; + int key; + Node right; + // original author uses down as a pointer to another Node, but sometimes he stores + // int values in it. Because of that, we have two following variables (luckily we + // could do so :)). + Node down; + int downInt; + } + + /** + * TODO: What is this magic constant? Do not make it public and do not reuse it anywhere where it isn't needed + * (especially not in the tests). If this algorithm has special considerations, we can run algorithm-specific tests + * with an appropriate decorator. + */ + final static int OVERSHOOT = 575; + private final static int SETMASK = 1 << 30; + private final static int CLEARMASK = ~SETMASK; + private final static int MARKER = 1 << 31; + + /** + * recursion limit for mk quicksort: + */ + private final static int MK_QS_TRESH = 20; + + private final static int MAX_TRESH = 30; + + /** + * limit for shallow_sort + */ + private final static int SHALLOW_LIMIT = 550; + + /** + * maximum offset considered when searching a pseudo anchor + */ + private final static int MAX_PSEUDO_ANCHOR_OFFSET = 0; + + /** + * maximum ratio bucket_size/group_size accepted for pseudo anchor_sorting + */ + private final static int B2G_RATIO = 1000; + + /** + * Update anchor ranks when determining rank for pseudo-sorting + */ + private final static boolean UPDATE_ANCHOR_RANKS = false; + + /** + * blind sort is used for groups of size ≤ Text_size/Blind_sort_ratio + */ + private final static int BLIND_SORT_RATIO = 2000; + + private final static int STACK_SIZE = 100; + + private int[] text; + private int textSize; + private int[] suffixArray; + private int anchorDist; // distance between anchors + private int anchorNum; + private int[] anchorOffset; + private int[] anchorRank; + private final int[] ftab = new int[66049]; + private final int[] bucketRanked = new int[66049]; + private final int[] runningOrder = new int[257]; + private final int[] lcpAux = new int[1 + MAX_TRESH]; + private int lcp; + private int cmpLeft; + private int cmpDone; + private int aux; + private int auxWritten; + private int stackSize; + private Node[] stack; + private int start; + + /** + * If true, {@link #buildSuffixArray(int[], int, int)} uses a copy of the input so it is left intact. + */ + private final boolean preserveInput; + + public DeepShallow() { + preserveInput = true; + } + + public DeepShallow(boolean preserveInput) { + this.preserveInput = preserveInput; + } + + /** + * {@inheritDoc} + *

        + * Additional constraints enforced by Deep-Shallow algorithm: + *

          + *
        • non-negative (≥0) symbols in the input
        • + *
        • maximal symbol value < 256
        • + *
        • input.lengthstart + length if {@link #preserveInput} is true
        • + *
        • input.lengthstart + length + {@link #OVERSHOOT} if {@link #preserveInput} is false
        • + *
        • length >= 2
        • + *
        + */ + @Override + public int[] buildSuffixArray(int[] input, int start, int length) { + Tools.assertAlways(input.length >= start + length, "Input array is too short"); + + MinMax mm = Tools.minmax(input, start, length); + Tools.assertAlways(mm.min >= 0, "input must not be negative"); + Tools.assertAlways(mm.max < 256, "max alphabet size is 256"); + + lcp = 1; + stack = new Node[length]; + this.start = start; + if (preserveInput) { + this.start = 0; + text = new int[length + OVERSHOOT]; + System.arraycopy(input, start, text, 0, length); + } else { + Tools.assertAlways(input.length >= start + length + OVERSHOOT, + "Input array length must have a trailing space of at least " + OVERSHOOT + + " bytes."); + text = input; + } + + for (int i = length; i < length + OVERSHOOT; i++) { + text[this.start + i] = 0; + } + + textSize = length; + suffixArray = new int[length]; + + int i, j, ss, sb, k, c1, c2, numQSorted = 0; + boolean[] bigDone = new boolean[257]; + int[] copyStart = new int[257]; + int[] copyEnd = new int[257]; + + // ------ init array containing positions of anchors + if (anchorDist == 0) { + anchorNum = 0; + } else { + anchorNum = 2 + (length - 1) / anchorDist; // see comment for helped_sort() + anchorRank = new int[anchorNum]; + anchorOffset = new int[anchorNum]; + for (i = 0; i < anchorNum; i++) { + anchorRank[i] = -1; // pos of anchors is initially unknown + anchorOffset[i] = anchorDist; // maximum possible value + } + } + + // ---------- init ftab ------------------ + // at first, clear values in ftab + for (i = 0; i < 66049; i++) + ftab[i] = 0; + + c1 = text[this.start + 0]; + for (i = 1; i <= textSize; i++) { + c2 = text[this.start + i]; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } + for (i = 1; i < 66049; i++) + ftab[i] += ftab[i - 1]; + + // -------- sort suffixes considering only the first two chars + c1 = text[this.start + 0]; + for (i = 0; i < textSize; i++) { + c2 = text[this.start + i + 1]; + j = (c1 << 8) + c2; + c1 = c2; + ftab[j]--; + suffixArray[ftab[j]] = i; + } /* decide on the running order */ - calculateRunningOrder(); - for (i = 0; i < 257; i++) { - bigDone[i] = false; - } + calculateRunningOrder(); + for (i = 0; i < 257; i++) { + bigDone[i] = false; + } /* Really do the suffix sorting */ - for (i = 0; i <= 256; i++) { + for (i = 0; i <= 256; i++) { /*-- - Process big buckets, starting with the least full. + Process big buckets, starting with the least full. --*/ - ss = runningOrder[i]; + ss = runningOrder[i]; /*-- Complete the big bucket [ss] by sorting any unsorted small buckets [ss, j]. Hopefully @@ -218,1639 +218,1639 @@ public int[] buildSuffixArray(int[] input, int start, int length) { completed many of the small buckets [ss, j], so we don't have to sort them at all. --*/ - for (j = 0; j <= 256; j++) { - if (j != ss) { - sb = (ss << 8) + j; - if ((ftab[sb] & SETMASK) == 0) { - int lo = ftab[sb] & CLEARMASK; - int hi = (ftab[sb + 1] & CLEARMASK) - 1; - if (hi > lo) { - shallowSort(lo, hi - lo + 1); - numQSorted += (hi - lo + 1); - } - } - ftab[sb] |= SETMASK; - } - } - { - for (j = 0; j <= 256; j++) { - copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; - copyEnd[j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; - } - // take care of the virtual -1 char in position textSize+1 - if (ss == 0) { - k = textSize - 1; - c1 = text[this.start + k]; - if (!bigDone[c1]) - suffixArray[copyStart[c1]++] = k; - } - for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { - k = suffixArray[j] - 1; - if (k < 0) - continue; - c1 = text[this.start + k]; - if (!bigDone[c1]) - suffixArray[copyStart[c1]++] = k; - } - for (j = (ftab[(ss + 1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { - k = suffixArray[j] - 1; - if (k < 0) - continue; - c1 = text[this.start + k]; - if (!bigDone[c1]) - suffixArray[copyEnd[c1]--] = k; - } - } - for (j = 0; j <= 256; j++) - ftab[(j << 8) + ss] |= SETMASK; - bigDone[ss] = true; - }// endfor - - return suffixArray; - } - - /** - * This is the multikey quicksort from bentley-sedgewick modified so that it stops recursion when depth reaches - * {@link #SHALLOW_LIMIT} (that is when two or more suffixes have {@link #SHALLOW_LIMIT} chars in common). - */ - private void shallowSort(int a, int n) { - // call multikey quicksort - // skip 2 chars since suffixes come from the same bucket - shallowMkq32(a, n, 2); - - } - - /** - * recursive multikey quicksort from Bentley-Sedgewick. - *

        - * Stops when text_depth reaches {@link #SHALLOW_LIMIT} that is when we have found that the current set of strings - * have {@link #SHALLOW_LIMIT} chars in common - */ - private void shallowMkq32(int a, int n, int text_depth) { - - int partval, val; - int pa = 0, pb = 0, pc = 0, pd = 0, pl = 0, pm = 0, pn = 0;// pointers - int d, r; - int next_depth;// text pointer - boolean repeatFlag = true; - - // ---- On small arrays use insertions sort - if (n < MK_QS_TRESH) { - shallowInssortLcp(a, n, text_depth); - return; - } - - // ----------- choose pivot -------------- - while (repeatFlag) { - - repeatFlag = false; - pl = a; - pm = a + (n / 2); - pn = a + (n - 1); - if (n > 30) { // On big arrays, pseudomedian of 9 - d = (n / 8); - pl = med3(pl, pl + d, pl + 2 * d, text_depth); - pm = med3(pm - d, pm, pm + d, text_depth); - pn = med3(pn - 2 * d, pn - d, pn, text_depth); - } - pm = med3(pl, pm, pn, text_depth); - swap2(a, pm); - partval = ptr2char32(a, text_depth); - pa = pb = a + 1; - pc = pd = a + n - 1; - // -------- partition ----------------- - for (;;) { - while (pb <= pc && (val = ptr2char32(pb, text_depth)) <= partval) { - if (val == partval) { - swap2(pa, pb); - pa++; - } - pb++; - } - while (pb <= pc && (val = ptr2char32(pc, text_depth)) >= partval) { - if (val == partval) { - swap2(pc, pd); - pd--; - } - pc--; - } - if (pb > pc) - break; - swap2(pb, pc); - pb++; - pc--; - } - if (pa > pd) { - // all values were equal to partval: make it simpler - if ((next_depth = text_depth + 4) >= SHALLOW_LIMIT) { - helpedSort(a, n, next_depth); - return; - } else { - text_depth = next_depth; - repeatFlag = true; - } - } - - } - // partition a[] into the values smaller, equal, and larger that partval - pn = a + n; - r = min(pa - a, pb - pa); - vecswap2(a, pb - r, r); - r = min(pd - pc, pn - pd - 1); - vecswap2(pb, pn - r, r); - // --- sort smaller strings ------- - if ((r = pb - pa) > 1) - shallowMkq32(a, r, text_depth); - // --- sort strings starting with partval ----- - if ((next_depth = text_depth + 4) < SHALLOW_LIMIT) - shallowMkq32(a + r, pa - pd + n - 1, next_depth); - else - helpedSort(a + r, pa - pd + n - 1, next_depth); - if ((r = pd - pc) > 1) - shallowMkq32(a + n - r, r, text_depth); - - } - - private void vecswap2(int a, int b, int n) { - while (n-- > 0) { - int t = suffixArray[a]; - suffixArray[a++] = suffixArray[b]; - suffixArray[b++] = t; - } - } - - private static int min(int i, int j) { - return i < j ? i : j; - } - - /** - * this is the insertion sort routine called by multikey-quicksort for sorting small groups. During insertion sort - * the comparisons are done calling cmp_unrolled_shallow_lcp() and two strings are equal if the coincides for - * SHALLOW_LIMIT characters. After this first phase we sort groups of "equal_string" using helped_sort(). - *

        - */ - private void shallowInssortLcp(int a, int n, int text_depth) { - int i, j, j1, lcp_new, r, ai, lcpi; - int cmp_from_limit; - int text_depth_ai;// pointer - // --------- initialize ---------------- - - lcpAux[0] = -1; // set lcp[-1] = -1 - for (i = 0; i < n; i++) { - lcpAux[lcp + i] = 0; - } - cmp_from_limit = SHALLOW_LIMIT - text_depth; - - // ----- start insertion sort ----------- - for (i = 1; i < n; i++) { - ai = suffixArray[a + i]; - lcpi = 0; - text_depth_ai = ai + text_depth; - j = i; - j1 = j - 1; // j1 is a shorhand for j-1 - while (true) { - - // ------ compare ai with a[j-1] -------- - cmpLeft = cmp_from_limit - lcpi; - r = cmpUnrolledShallowLcp(lcpi + suffixArray[a + j1] + text_depth, lcpi - + text_depth_ai); - lcp_new = cmp_from_limit - cmpLeft; // lcp between ai and a[j1] - assert (r != 0 || lcp_new >= cmp_from_limit); - - if (r <= 0) { // we have a[j-1] <= ai - lcpAux[lcp + j1] = lcp_new; // ai will be written in a[j]; update - // lcp[j-1] - break; - } - - // --- we have a[j-1]>ai. a[j-1] and maybe other will be moved down - // --- use lcp to move down as many elements of a[] as possible - lcpi = lcp_new; - do { - suffixArray[a + j] = suffixArray[a + j1]; // move down a[j-1] - lcpAux[lcp + j] = lcpAux[lcp + j1]; // move down lcp[j-1] - j = j1; - j1--; // update j and j1=j-1 - } while (lcpi < lcpAux[lcp + j1]); // recall that lcp[-1]=-1 - - if (lcpi > lcpAux[lcp + j1]) - break; // ai will be written in position j - - // if we get here lcpi==lcp[j1]: we will compare them at next iteration - - } // end for(j=i ... - suffixArray[a + j] = ai; - lcpAux[lcp + j] = lcpi; - } // end for(i=1 ... - // ----- done with insertion sort. now sort groups of equal strings - for (i = 0; i < n - 1; i = j + 1) { - for (j = i; j < n; j++) - if (lcpAux[lcp + j] < cmp_from_limit) - break; - if (j - i > 0) - helpedSort(a + i, j - i + 1, SHALLOW_LIMIT); - } - } - - /** - * Function to compare two strings originating from the *b1 and *b2 The size of the unrolled loop must be at most - * equal to the costant CMP_OVERSHOOT defined in common.h When the function is called cmpLeft must contain the - * maximum number of comparisons the algorithm can do before returning 0 (equal strings) At exit cmpLeft has been - * decreased by the # of comparisons done - */ - private int cmpUnrolledShallowLcp(int b1, int b2) { - - int c1, c2; - - // execute blocks of 16 comparisons until a difference - // is found or we run out of the string - do { - // 1 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - return c1 - c2; - } - b1++; - b2++; - // 2 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 1; - return c1 - c2; - } - b1++; - b2++; - // 3 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 2; - return c1 - c2; - } - b1++; - b2++; - // 4 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 3; - return c1 - c2; - } - b1++; - b2++; - // 5 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 4; - return c1 - c2; - } - b1++; - b2++; - // 6 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 5; - return c1 - c2; - } - b1++; - b2++; - // 7 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 6; - return c1 - c2; - } - b1++; - b2++; - // 8 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 7; - return c1 - c2; - } - b1++; - b2++; - // 9 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 8; - return c1 - c2; - } - b1++; - b2++; - // 10 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 9; - return c1 - c2; - } - b1++; - b2++; - // 11 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 10; - return c1 - c2; - } - b1++; - b2++; - // 12 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 11; - return c1 - c2; - } - b1++; - b2++; - // 13 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 12; - return c1 - c2; - } - b1++; - b2++; - // 14 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 13; - return c1 - c2; - } - b1++; - b2++; - // 15 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 14; - return c1 - c2; - } - b1++; - b2++; - // 16 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpLeft -= 15; - return c1 - c2; - } - b1++; - b2++; - // if we have done enough comparisons the strings are considered equal - cmpLeft -= 16; - if (cmpLeft <= 0) - return 0; - // assert( b1 0) { // anchor <= a[i] < (sorted suffix) - if (curr_sb != getSmallBucket(text_pos + diff)) { - if (diff < min_forw_offset) { - min_forw_offset = diff; - best_forw_anchor = anchor; - forw_anchor_index = i; - } - } else { // the sorted suffix belongs to the same bucket of a[0]..a[n-1] - if (diff < min_forw_offset_buc) { - min_forw_offset_buc = diff; - best_forw_anchor_buc = anchor; - forw_anchor_index_buc = i; - } - } - } else { // diff<0 => anchor <= (sorted suffix) < a[i] - if (diff > max_back_offset) { - max_back_offset = diff; - best_back_anchor = anchor; - back_anchor_index = i; - } - // try to find a sorted suffix > a[i] by looking at next anchor - aoffset = anchorOffset[++anchor]; - if (aoffset < anchorDist) { - diff = anchorDist + aoffset - toffset; - assert (diff > 0); - if (curr_sb != getSmallBucket(text_pos + diff)) { - if (diff < min_forw_offset) { - min_forw_offset = diff; - best_forw_anchor = anchor; - forw_anchor_index = i; - } - } else { - if (diff < min_forw_offset_buc) { - min_forw_offset_buc = diff; - best_forw_anchor_buc = anchor; - forw_anchor_index_buc = i; - } - } - } - } - } - } - - // ------ if forward anchor_sort is possible, do it! -------- - if (best_forw_anchor >= 0 && min_forw_offset < depth - 1) { - anchor_pos = suffixArray[a + forw_anchor_index] + min_forw_offset; - anchor_rank = anchorRank[best_forw_anchor]; - generalAnchorSort(a, n, anchor_pos, anchor_rank, min_forw_offset); - if (anchorDist > 0) - updateAnchors(a, n); - return; - } - - boolean fail = false; - if (best_back_anchor >= 0) { - int T0, Ti;// text pointers - int j; - - // make sure that the offset is legal for all a[i] - for (i = 0; i < n; i++) { - if (suffixArray[a + i] + max_back_offset < 0) - fail = true; - // goto fail; // illegal offset, give up - } - // make sure that a[0] .. a[n-1] are preceded by the same substring - T0 = suffixArray[a]; - for (i = 1; i < n; i++) { - Ti = suffixArray[a + i]; - for (j = max_back_offset; j <= -1; j++) - if (text[this.start + T0 + j] != text[this.start + Ti + j]) - fail = true; - // goto fail; // mismatch, give up - } - if (!fail) { - // backward anchor sorting is possible - anchor_pos = suffixArray[a + back_anchor_index] + max_back_offset; - anchor_rank = anchorRank[best_back_anchor]; - generalAnchorSort(a, n, anchor_pos, anchor_rank, max_back_offset); - if (anchorDist > 0) - updateAnchors(a, n); - return; - } - } - if (fail) { - if (best_forw_anchor_buc >= 0 && min_forw_offset_buc < depth - 1) { - int equal = 0, lower = 0, upper = 0; - - anchor_pos = suffixArray[a + forw_anchor_index_buc] + min_forw_offset_buc; - anchor_rank = anchorRank[best_forw_anchor_buc]; - - // establish how many suffixes can be sorted using anchor_sort() - SplitGroupResult res = splitGroup(a, n, depth, min_forw_offset_buc, - forw_anchor_index_buc, lower); - equal = res.equal; - lower = res.lower; - if (equal == n) { - generalAnchorSort(a, n, anchor_pos, anchor_rank, min_forw_offset_buc); - } else { - // -- a[0] ... a[n-1] are split into 3 groups: lower, equal, upper - upper = n - equal - lower; - // printf("Warning! lo=%d eq=%d up=%d a=%x\n",lower,equal,upper,(int)a); - // sort the equal group - if (equal > 1) - generalAnchorSort(a + lower, equal, anchor_pos, anchor_rank, - min_forw_offset_buc); - - // sort upper and lower groups using deep_sort - if (lower > 1) - pseudoOrDeepSort(a, lower, depth); - if (upper > 1) - pseudoOrDeepSort(a + lower + equal, upper, depth); - } // end if(equal==n) ... else - if (anchorDist > 0) - updateAnchors(a, n); - return; - } // end hard case - - } - // --------------------------------------------------------------- - // If we get here it means that everything failed - // In this case we simply deep_sort a[0] ... a[n-1] - // --------------------------------------------------------------- - pseudoOrDeepSort(a, n, depth); - - } - - /** - * This function takes as input an array a[0] .. a[n-1] of suffixes which share the first "depth" chars. "pivot" in - * an index in 0..n-1 and offset and integer>0. The function splits a[0] .. a[n-1] into 3 groups: first the suffixes - * which are smaller than a[pivot], then those which are equal to a[pivot] and finally those which are greater than - * a[pivot]. Here, smaller, equal, larger refer to a lexicographic ordering limited to the first depth+offest chars - * (since the first depth chars are equal we only look at the chars in position depth, depth+1, ... depth+offset-1). - * The function returns the number "num" of suffixes equal to a[pivot], and stores in *first the first of these - * suffixes. So at the end the smaller suffixes are in a[0] ... a[first-1], the equal suffixes in a[first] ... - * a[first+num-1], the larger suffixes in a[first+num] ... a[n-1] The splitting is done using a modified mkq() - */ - private SplitGroupResult splitGroup(int a, int n, int depth, int offset, int pivot, int first) { - int r, partval; - int pa, pb, pc, pd, pa_old, pd_old;// pointers - int pivot_pos; - int text_depth, text_limit;// pointers - - // --------- initialization ------------------------------------ - pivot_pos = suffixArray[a + pivot]; // starting position in T[] of pivot - text_depth = depth; - text_limit = text_depth + offset; - - // ------------------------------------------------------------- - // In the following for() loop: - // [pa ... pd] is the current working region, - // pb moves from pa towards pd - // pc moves from pd towards pa - // ------------------------------------------------------------- - pa = a; - pd = a + n - 1; - - for (; pa != pd && (text_depth < text_limit); text_depth++) { - // ------ the pivot char is text[this.start + pivot_pos+depth] where - // depth = text_depth-text. This is text_depth[pivot_pos] - partval = text[this.start + text_depth + pivot_pos]; - // ----- partition ------------ - pb = pa_old = pa; - pc = pd_old = pd; - for (;;) { - while (pb <= pc && (r = ptr2char(pb, text_depth) - partval) <= 0) { - if (r == 0) { - swap2(pa, pb); - pa++; - } - pb++; - } - while (pb <= pc && (r = ptr2char(pc, text_depth) - partval) >= 0) { - if (r == 0) { - swap2(pc, pd); - pd--; - } - pc--; - } - if (pb > pc) - break; - swap2(pb, pc); - pb++; - pc--; - } - r = min(pa - pa_old, pb - pa); - vecswap2(pa_old, pb - r, r); - r = min(pd - pc, pd_old - pd); - vecswap2(pb, pd_old + 1 - r, r); - // ------ compute new boundaries ----- - pa = pa_old + (pb - pa); // there are pb-pa chars < partval - pd = pd_old - (pd - pc); // there are pd-pc chars > partval - - } - - first = pa - a; // index in a[] of the first suf. equal to pivot - // return pd-pa+1; // return number of suffixes equal to pivot - return new SplitGroupResult(pd - pa + 1, first); - - } - - /** - * given a SORTED array of suffixes a[0] .. a[n-1] updates anchorRank[] and anchorOffset[] - */ - private void updateAnchors(int a, int n) { - int i, anchor, toffset, aoffset, text_pos; - - for (i = 0; i < n; i++) { - text_pos = suffixArray[a + i]; - // get anchor preceeding text_pos=a[i] - anchor = text_pos / anchorDist; - toffset = text_pos % anchorDist; // distance of a[i] from anchor - aoffset = anchorOffset[anchor]; // dist of sorted suf from anchor - if (toffset < aoffset) { - anchorOffset[anchor] = toffset; - anchorRank[anchor] = a + i; - } - } - - } - - /** - * This routines sorts a[0] ... a[n-1] using the fact that in their common prefix, after offset characters, there is - * a suffix whose rank is known. In this routine we call this suffix anchor (and we denote its position and rank - * with anchor_pos and anchor_rank respectively) but it is not necessarily an anchor (=does not necessarily starts - * at position multiple of anchorDist) since this function is called by pseudo_anchor_sort(). The routine works by - * scanning the suffixes before and after the anchor in order to find (and mark) those which are suffixes of a[0] - * ... a[n-1]. After that, the ordering of a[0] ... a[n-1] is derived with a sigle scan of the marked - * suffixes.******************************************************************* - */ - private void generalAnchorSort(int a, int n, int anchor_pos, int anchor_rank, int offset) { - int sb, lo, hi; - int curr_lo, curr_hi, to_be_found, i, j; - int item; - int ris; - // void *ris; + for (j = 0; j <= 256; j++) { + if (j != ss) { + sb = (ss << 8) + j; + if ((ftab[sb] & SETMASK) == 0) { + int lo = ftab[sb] & CLEARMASK; + int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + shallowSort(lo, hi - lo + 1); + numQSorted += (hi - lo + 1); + } + } + ftab[sb] |= SETMASK; + } + } + { + for (j = 0; j <= 256; j++) { + copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; + copyEnd[j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; + } + // take care of the virtual -1 char in position textSize+1 + if (ss == 0) { + k = textSize - 1; + c1 = text[this.start + k]; + if (!bigDone[c1]) + suffixArray[copyStart[c1]++] = k; + } + for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { + k = suffixArray[j] - 1; + if (k < 0) + continue; + c1 = text[this.start + k]; + if (!bigDone[c1]) + suffixArray[copyStart[c1]++] = k; + } + for (j = (ftab[(ss + 1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { + k = suffixArray[j] - 1; + if (k < 0) + continue; + c1 = text[this.start + k]; + if (!bigDone[c1]) + suffixArray[copyEnd[c1]--] = k; + } + } + for (j = 0; j <= 256; j++) + ftab[(j << 8) + ss] |= SETMASK; + bigDone[ss] = true; + }// endfor + + return suffixArray; + } + + /** + * This is the multikey quicksort from bentley-sedgewick modified so that it stops recursion when depth reaches + * {@link #SHALLOW_LIMIT} (that is when two or more suffixes have {@link #SHALLOW_LIMIT} chars in common). + */ + private void shallowSort(int a, int n) { + // call multikey quicksort + // skip 2 chars since suffixes come from the same bucket + shallowMkq32(a, n, 2); + + } + + /** + * recursive multikey quicksort from Bentley-Sedgewick. + *

        + * Stops when text_depth reaches {@link #SHALLOW_LIMIT} that is when we have found that the current set of strings + * have {@link #SHALLOW_LIMIT} chars in common + */ + private void shallowMkq32(int a, int n, int text_depth) { + + int partval, val; + int pa = 0, pb = 0, pc = 0, pd = 0, pl = 0, pm = 0, pn = 0;// pointers + int d, r; + int next_depth;// text pointer + boolean repeatFlag = true; + + // ---- On small arrays use insertions sort + if (n < MK_QS_TRESH) { + shallowInssortLcp(a, n, text_depth); + return; + } + + // ----------- choose pivot -------------- + while (repeatFlag) { + + repeatFlag = false; + pl = a; + pm = a + (n / 2); + pn = a + (n - 1); + if (n > 30) { // On big arrays, pseudomedian of 9 + d = (n / 8); + pl = med3(pl, pl + d, pl + 2 * d, text_depth); + pm = med3(pm - d, pm, pm + d, text_depth); + pn = med3(pn - 2 * d, pn - d, pn, text_depth); + } + pm = med3(pl, pm, pn, text_depth); + swap2(a, pm); + partval = ptr2char32(a, text_depth); + pa = pb = a + 1; + pc = pd = a + n - 1; + // -------- partition ----------------- + for (; ; ) { + while (pb <= pc && (val = ptr2char32(pb, text_depth)) <= partval) { + if (val == partval) { + swap2(pa, pb); + pa++; + } + pb++; + } + while (pb <= pc && (val = ptr2char32(pc, text_depth)) >= partval) { + if (val == partval) { + swap2(pc, pd); + pd--; + } + pc--; + } + if (pb > pc) + break; + swap2(pb, pc); + pb++; + pc--; + } + if (pa > pd) { + // all values were equal to partval: make it simpler + if ((next_depth = text_depth + 4) >= SHALLOW_LIMIT) { + helpedSort(a, n, next_depth); + return; + } else { + text_depth = next_depth; + repeatFlag = true; + } + } + + } + // partition a[] into the values smaller, equal, and larger that partval + pn = a + n; + r = min(pa - a, pb - pa); + vecswap2(a, pb - r, r); + r = min(pd - pc, pn - pd - 1); + vecswap2(pb, pn - r, r); + // --- sort smaller strings ------- + if ((r = pb - pa) > 1) + shallowMkq32(a, r, text_depth); + // --- sort strings starting with partval ----- + if ((next_depth = text_depth + 4) < SHALLOW_LIMIT) + shallowMkq32(a + r, pa - pd + n - 1, next_depth); + else + helpedSort(a + r, pa - pd + n - 1, next_depth); + if ((r = pd - pc) > 1) + shallowMkq32(a + n - r, r, text_depth); + + } + + private void vecswap2(int a, int b, int n) { + while (n-- > 0) { + int t = suffixArray[a]; + suffixArray[a++] = suffixArray[b]; + suffixArray[b++] = t; + } + } + + private static int min(int i, int j) { + return i < j ? i : j; + } + + /** + * this is the insertion sort routine called by multikey-quicksort for sorting small groups. During insertion sort + * the comparisons are done calling cmp_unrolled_shallow_lcp() and two strings are equal if the coincides for + * SHALLOW_LIMIT characters. After this first phase we sort groups of "equal_string" using helped_sort(). + *

        + */ + private void shallowInssortLcp(int a, int n, int text_depth) { + int i, j, j1, lcp_new, r, ai, lcpi; + int cmp_from_limit; + int text_depth_ai;// pointer + // --------- initialize ---------------- + + lcpAux[0] = -1; // set lcp[-1] = -1 + for (i = 0; i < n; i++) { + lcpAux[lcp + i] = 0; + } + cmp_from_limit = SHALLOW_LIMIT - text_depth; + + // ----- start insertion sort ----------- + for (i = 1; i < n; i++) { + ai = suffixArray[a + i]; + lcpi = 0; + text_depth_ai = ai + text_depth; + j = i; + j1 = j - 1; // j1 is a shorhand for j-1 + while (true) { + + // ------ compare ai with a[j-1] -------- + cmpLeft = cmp_from_limit - lcpi; + r = cmpUnrolledShallowLcp(lcpi + suffixArray[a + j1] + text_depth, lcpi + + text_depth_ai); + lcp_new = cmp_from_limit - cmpLeft; // lcp between ai and a[j1] + assert (r != 0 || lcp_new >= cmp_from_limit); + + if (r <= 0) { // we have a[j-1] <= ai + lcpAux[lcp + j1] = lcp_new; // ai will be written in a[j]; update + // lcp[j-1] + break; + } + + // --- we have a[j-1]>ai. a[j-1] and maybe other will be moved down + // --- use lcp to move down as many elements of a[] as possible + lcpi = lcp_new; + do { + suffixArray[a + j] = suffixArray[a + j1]; // move down a[j-1] + lcpAux[lcp + j] = lcpAux[lcp + j1]; // move down lcp[j-1] + j = j1; + j1--; // update j and j1=j-1 + } while (lcpi < lcpAux[lcp + j1]); // recall that lcp[-1]=-1 + + if (lcpi > lcpAux[lcp + j1]) + break; // ai will be written in position j + + // if we get here lcpi==lcp[j1]: we will compare them at next iteration + + } // end for(j=i ... + suffixArray[a + j] = ai; + lcpAux[lcp + j] = lcpi; + } // end for(i=1 ... + // ----- done with insertion sort. now sort groups of equal strings + for (i = 0; i < n - 1; i = j + 1) { + for (j = i; j < n; j++) + if (lcpAux[lcp + j] < cmp_from_limit) + break; + if (j - i > 0) + helpedSort(a + i, j - i + 1, SHALLOW_LIMIT); + } + } + + /** + * Function to compare two strings originating from the *b1 and *b2 The size of the unrolled loop must be at most + * equal to the costant CMP_OVERSHOOT defined in common.h When the function is called cmpLeft must contain the + * maximum number of comparisons the algorithm can do before returning 0 (equal strings) At exit cmpLeft has been + * decreased by the # of comparisons done + */ + private int cmpUnrolledShallowLcp(int b1, int b2) { + + int c1, c2; + + // execute blocks of 16 comparisons until a difference + // is found or we run out of the string + do { + // 1 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + return c1 - c2; + } + b1++; + b2++; + // 2 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 1; + return c1 - c2; + } + b1++; + b2++; + // 3 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 2; + return c1 - c2; + } + b1++; + b2++; + // 4 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 3; + return c1 - c2; + } + b1++; + b2++; + // 5 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 4; + return c1 - c2; + } + b1++; + b2++; + // 6 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 5; + return c1 - c2; + } + b1++; + b2++; + // 7 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 6; + return c1 - c2; + } + b1++; + b2++; + // 8 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 7; + return c1 - c2; + } + b1++; + b2++; + // 9 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 8; + return c1 - c2; + } + b1++; + b2++; + // 10 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 9; + return c1 - c2; + } + b1++; + b2++; + // 11 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 10; + return c1 - c2; + } + b1++; + b2++; + // 12 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 11; + return c1 - c2; + } + b1++; + b2++; + // 13 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 12; + return c1 - c2; + } + b1++; + b2++; + // 14 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 13; + return c1 - c2; + } + b1++; + b2++; + // 15 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 14; + return c1 - c2; + } + b1++; + b2++; + // 16 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpLeft -= 15; + return c1 - c2; + } + b1++; + b2++; + // if we have done enough comparisons the strings are considered equal + cmpLeft -= 16; + if (cmpLeft <= 0) + return 0; + // assert( b1 0) { // anchor <= a[i] < (sorted suffix) + if (curr_sb != getSmallBucket(text_pos + diff)) { + if (diff < min_forw_offset) { + min_forw_offset = diff; + best_forw_anchor = anchor; + forw_anchor_index = i; + } + } else { // the sorted suffix belongs to the same bucket of a[0]..a[n-1] + if (diff < min_forw_offset_buc) { + min_forw_offset_buc = diff; + best_forw_anchor_buc = anchor; + forw_anchor_index_buc = i; + } + } + } else { // diff<0 => anchor <= (sorted suffix) < a[i] + if (diff > max_back_offset) { + max_back_offset = diff; + best_back_anchor = anchor; + back_anchor_index = i; + } + // try to find a sorted suffix > a[i] by looking at next anchor + aoffset = anchorOffset[++anchor]; + if (aoffset < anchorDist) { + diff = anchorDist + aoffset - toffset; + assert (diff > 0); + if (curr_sb != getSmallBucket(text_pos + diff)) { + if (diff < min_forw_offset) { + min_forw_offset = diff; + best_forw_anchor = anchor; + forw_anchor_index = i; + } + } else { + if (diff < min_forw_offset_buc) { + min_forw_offset_buc = diff; + best_forw_anchor_buc = anchor; + forw_anchor_index_buc = i; + } + } + } + } + } + } + + // ------ if forward anchor_sort is possible, do it! -------- + if (best_forw_anchor >= 0 && min_forw_offset < depth - 1) { + anchor_pos = suffixArray[a + forw_anchor_index] + min_forw_offset; + anchor_rank = anchorRank[best_forw_anchor]; + generalAnchorSort(a, n, anchor_pos, anchor_rank, min_forw_offset); + if (anchorDist > 0) + updateAnchors(a, n); + return; + } + + boolean fail = false; + if (best_back_anchor >= 0) { + int T0, Ti;// text pointers + int j; + + // make sure that the offset is legal for all a[i] + for (i = 0; i < n; i++) { + if (suffixArray[a + i] + max_back_offset < 0) + fail = true; + // goto fail; // illegal offset, give up + } + // make sure that a[0] .. a[n-1] are preceded by the same substring + T0 = suffixArray[a]; + for (i = 1; i < n; i++) { + Ti = suffixArray[a + i]; + for (j = max_back_offset; j <= -1; j++) + if (text[this.start + T0 + j] != text[this.start + Ti + j]) + fail = true; + // goto fail; // mismatch, give up + } + if (!fail) { + // backward anchor sorting is possible + anchor_pos = suffixArray[a + back_anchor_index] + max_back_offset; + anchor_rank = anchorRank[best_back_anchor]; + generalAnchorSort(a, n, anchor_pos, anchor_rank, max_back_offset); + if (anchorDist > 0) + updateAnchors(a, n); + return; + } + } + if (fail) { + if (best_forw_anchor_buc >= 0 && min_forw_offset_buc < depth - 1) { + int equal = 0, lower = 0, upper = 0; + + anchor_pos = suffixArray[a + forw_anchor_index_buc] + min_forw_offset_buc; + anchor_rank = anchorRank[best_forw_anchor_buc]; + + // establish how many suffixes can be sorted using anchor_sort() + SplitGroupResult res = splitGroup(a, n, depth, min_forw_offset_buc, + forw_anchor_index_buc, lower); + equal = res.equal; + lower = res.lower; + if (equal == n) { + generalAnchorSort(a, n, anchor_pos, anchor_rank, min_forw_offset_buc); + } else { + // -- a[0] ... a[n-1] are split into 3 groups: lower, equal, upper + upper = n - equal - lower; + // printf("Warning! lo=%d eq=%d up=%d a=%x\n",lower,equal,upper,(int)a); + // sort the equal group + if (equal > 1) + generalAnchorSort(a + lower, equal, anchor_pos, anchor_rank, + min_forw_offset_buc); + + // sort upper and lower groups using deep_sort + if (lower > 1) + pseudoOrDeepSort(a, lower, depth); + if (upper > 1) + pseudoOrDeepSort(a + lower + equal, upper, depth); + } // end if(equal==n) ... else + if (anchorDist > 0) + updateAnchors(a, n); + return; + } // end hard case + + } + // --------------------------------------------------------------- + // If we get here it means that everything failed + // In this case we simply deep_sort a[0] ... a[n-1] + // --------------------------------------------------------------- + pseudoOrDeepSort(a, n, depth); + + } + + /** + * This function takes as input an array a[0] .. a[n-1] of suffixes which share the first "depth" chars. "pivot" in + * an index in 0..n-1 and offset and integer>0. The function splits a[0] .. a[n-1] into 3 groups: first the suffixes + * which are smaller than a[pivot], then those which are equal to a[pivot] and finally those which are greater than + * a[pivot]. Here, smaller, equal, larger refer to a lexicographic ordering limited to the first depth+offest chars + * (since the first depth chars are equal we only look at the chars in position depth, depth+1, ... depth+offset-1). + * The function returns the number "num" of suffixes equal to a[pivot], and stores in *first the first of these + * suffixes. So at the end the smaller suffixes are in a[0] ... a[first-1], the equal suffixes in a[first] ... + * a[first+num-1], the larger suffixes in a[first+num] ... a[n-1] The splitting is done using a modified mkq() + */ + private SplitGroupResult splitGroup(int a, int n, int depth, int offset, int pivot, int first) { + int r, partval; + int pa, pb, pc, pd, pa_old, pd_old;// pointers + int pivot_pos; + int text_depth, text_limit;// pointers + + // --------- initialization ------------------------------------ + pivot_pos = suffixArray[a + pivot]; // starting position in T[] of pivot + text_depth = depth; + text_limit = text_depth + offset; + + // ------------------------------------------------------------- + // In the following for() loop: + // [pa ... pd] is the current working region, + // pb moves from pa towards pd + // pc moves from pd towards pa + // ------------------------------------------------------------- + pa = a; + pd = a + n - 1; + + for (; pa != pd && (text_depth < text_limit); text_depth++) { + // ------ the pivot char is text[this.start + pivot_pos+depth] where + // depth = text_depth-text. This is text_depth[pivot_pos] + partval = text[this.start + text_depth + pivot_pos]; + // ----- partition ------------ + pb = pa_old = pa; + pc = pd_old = pd; + for (; ; ) { + while (pb <= pc && (r = ptr2char(pb, text_depth) - partval) <= 0) { + if (r == 0) { + swap2(pa, pb); + pa++; + } + pb++; + } + while (pb <= pc && (r = ptr2char(pc, text_depth) - partval) >= 0) { + if (r == 0) { + swap2(pc, pd); + pd--; + } + pc--; + } + if (pb > pc) + break; + swap2(pb, pc); + pb++; + pc--; + } + r = min(pa - pa_old, pb - pa); + vecswap2(pa_old, pb - r, r); + r = min(pd - pc, pd_old - pd); + vecswap2(pb, pd_old + 1 - r, r); + // ------ compute new boundaries ----- + pa = pa_old + (pb - pa); // there are pb-pa chars < partval + pd = pd_old - (pd - pc); // there are pd-pc chars > partval + + } + + first = pa - a; // index in a[] of the first suf. equal to pivot + // return pd-pa+1; // return number of suffixes equal to pivot + return new SplitGroupResult(pd - pa + 1, first); + + } + + /** + * given a SORTED array of suffixes a[0] .. a[n-1] updates anchorRank[] and anchorOffset[] + */ + private void updateAnchors(int a, int n) { + int i, anchor, toffset, aoffset, text_pos; + + for (i = 0; i < n; i++) { + text_pos = suffixArray[a + i]; + // get anchor preceeding text_pos=a[i] + anchor = text_pos / anchorDist; + toffset = text_pos % anchorDist; // distance of a[i] from anchor + aoffset = anchorOffset[anchor]; // dist of sorted suf from anchor + if (toffset < aoffset) { + anchorOffset[anchor] = toffset; + anchorRank[anchor] = a + i; + } + } + + } + + /** + * This routines sorts a[0] ... a[n-1] using the fact that in their common prefix, after offset characters, there is + * a suffix whose rank is known. In this routine we call this suffix anchor (and we denote its position and rank + * with anchor_pos and anchor_rank respectively) but it is not necessarily an anchor (=does not necessarily starts + * at position multiple of anchorDist) since this function is called by pseudo_anchor_sort(). The routine works by + * scanning the suffixes before and after the anchor in order to find (and mark) those which are suffixes of a[0] + * ... a[n-1]. After that, the ordering of a[0] ... a[n-1] is derived with a sigle scan of the marked + * suffixes.******************************************************************* + */ + private void generalAnchorSort(int a, int n, int anchor_pos, int anchor_rank, int offset) { + int sb, lo, hi; + int curr_lo, curr_hi, to_be_found, i, j; + int item; + int ris; + // void *ris; /* ---------- get bucket of anchor ---------- */ - sb = getSmallBucket(anchor_pos); - lo = bucketFirst(sb); - hi = bucketLast(sb); - // ------ sort pointers a[0] ... a[n-1] as plain integers - // qsort(a, n, sizeof(Int32), integer_cmp); - Arrays.sort(suffixArray, a, a + n); - - // ------------------------------------------------------------------ - // now we scan the bucket containing the anchor in search of suffixes - // corresponding to the ones we have to sort. When we find one of - // such suffixes we mark it. We go on untill n sfx's have been marked - // ------------------------------------------------------------------ - curr_hi = curr_lo = anchor_rank; - - mark(curr_lo); - // scan suffixes preceeding and following the anchor - for (to_be_found = n - 1; to_be_found > 0;) { - // invariant: the next positions to check are curr_lo-1 and curr_hi+1 - assert (curr_lo > lo || curr_hi < hi); - while (curr_lo > lo) { - item = suffixArray[--curr_lo] - offset; - ris = Arrays.binarySearch(suffixArray, a, a + n, item); - // ris = bsearch(&item,a,n,sizeof(Int32), integer_cmp); - if (ris != 0) { - mark(curr_lo); - to_be_found--; - } else - break; - } - while (curr_hi < hi) { - item = suffixArray[++curr_hi] - offset; - ris = Arrays.binarySearch(suffixArray, a, a + n, item); - if (ris != 0) { - mark(curr_hi); - to_be_found--; - } else - break; - } - } - // sort a[] using the marked suffixes - for (j = 0, i = curr_lo; i <= curr_hi; i++) - if (isMarked(i)) { - unmark(i); - suffixArray[a + j++] = suffixArray[i] - offset; - } - - } - - /** - * + sb = getSmallBucket(anchor_pos); + lo = bucketFirst(sb); + hi = bucketLast(sb); + // ------ sort pointers a[0] ... a[n-1] as plain integers + // qsort(a, n, sizeof(Int32), integer_cmp); + Arrays.sort(suffixArray, a, a + n); + + // ------------------------------------------------------------------ + // now we scan the bucket containing the anchor in search of suffixes + // corresponding to the ones we have to sort. When we find one of + // such suffixes we mark it. We go on untill n sfx's have been marked + // ------------------------------------------------------------------ + curr_hi = curr_lo = anchor_rank; + + mark(curr_lo); + // scan suffixes preceeding and following the anchor + for (to_be_found = n - 1; to_be_found > 0; ) { + // invariant: the next positions to check are curr_lo-1 and curr_hi+1 + assert (curr_lo > lo || curr_hi < hi); + while (curr_lo > lo) { + item = suffixArray[--curr_lo] - offset; + ris = Arrays.binarySearch(suffixArray, a, a + n, item); + // ris = bsearch(&item,a,n,sizeof(Int32), integer_cmp); + if (ris != 0) { + mark(curr_lo); + to_be_found--; + } else + break; + } + while (curr_hi < hi) { + item = suffixArray[++curr_hi] - offset; + ris = Arrays.binarySearch(suffixArray, a, a + n, item); + if (ris != 0) { + mark(curr_hi); + to_be_found--; + } else + break; + } + } + // sort a[] using the marked suffixes + for (j = 0, i = curr_lo; i <= curr_hi; i++) + if (isMarked(i)) { + unmark(i); + suffixArray[a + j++] = suffixArray[i] - offset; + } + + } + + /** + * */ - private void unmark(int i) { - suffixArray[i] &= ~MARKER; + private void unmark(int i) { + suffixArray[i] &= ~MARKER; - } + } - /** - * + /** + * */ - private boolean isMarked(int i) { - return (suffixArray[i] & MARKER) != 0; - } + private boolean isMarked(int i) { + return (suffixArray[i] & MARKER) != 0; + } - /** - * + /** + * */ - private void mark(int i) { - suffixArray[i] |= MARKER; + private void mark(int i) { + suffixArray[i] |= MARKER; - } + } - /** - * + /** + * */ - private int bucketLast(int sb) { - return (ftab[sb + 1] & CLEARMASK) - 1; - } + private int bucketLast(int sb) { + return (ftab[sb + 1] & CLEARMASK) - 1; + } - /** - * + /** + * */ - private int bucketFirst(int sb) { - return ftab[sb] & CLEARMASK; - } + private int bucketFirst(int sb) { + return ftab[sb] & CLEARMASK; + } - /** - * + /** + * */ - private int bucketSize(int sb) { - return (ftab[sb + 1] & CLEARMASK) - (ftab[sb] & CLEARMASK); - } + private int bucketSize(int sb) { + return (ftab[sb + 1] & CLEARMASK) - (ftab[sb] & CLEARMASK); + } - /** - * + /** + * */ - private int getSmallBucket(int pos) { - return (text[this.start + pos] << 8) + text[this.start + pos + 1]; - } + private int getSmallBucket(int pos) { + return (text[this.start + pos] << 8) + text[this.start + pos + 1]; + } - /** - * + /** + * */ - @SuppressWarnings("unused") + @SuppressWarnings("unused") private void pseudoOrDeepSort(int a, int n, int depth) { - int offset, text_pos, sb, pseudo_anchor_pos, max_offset, size; - - // ------- search for a useful pseudo-anchor ------------- - if (MAX_PSEUDO_ANCHOR_OFFSET > 0) { - max_offset = min(depth - 1, MAX_PSEUDO_ANCHOR_OFFSET); - text_pos = suffixArray[a]; - for (offset = 1; offset < max_offset; offset++) { - pseudo_anchor_pos = text_pos + offset; - sb = getSmallBucket(pseudo_anchor_pos); - // check if pseudo_anchor is in a sorted bucket - if (isSortedBucket(sb)) { - size = bucketSize(sb); // size of group - if (size > B2G_RATIO * n) - continue; // discard large groups - // sort a[0] ... a[n-1] using pseudo_anchor - pseudoAnchorSort(a, n, pseudo_anchor_pos, offset); - return; - } - } - } - deepSort(a, n, depth); - } - - /** - * + int offset, text_pos, sb, pseudo_anchor_pos, max_offset, size; + + // ------- search for a useful pseudo-anchor ------------- + if (MAX_PSEUDO_ANCHOR_OFFSET > 0) { + max_offset = min(depth - 1, MAX_PSEUDO_ANCHOR_OFFSET); + text_pos = suffixArray[a]; + for (offset = 1; offset < max_offset; offset++) { + pseudo_anchor_pos = text_pos + offset; + sb = getSmallBucket(pseudo_anchor_pos); + // check if pseudo_anchor is in a sorted bucket + if (isSortedBucket(sb)) { + size = bucketSize(sb); // size of group + if (size > B2G_RATIO * n) + continue; // discard large groups + // sort a[0] ... a[n-1] using pseudo_anchor + pseudoAnchorSort(a, n, pseudo_anchor_pos, offset); + return; + } + } + } + deepSort(a, n, depth); + } + + /** + * + */ + private boolean isSortedBucket(int sb) { + return (ftab[sb] & SETMASK) != 0; + } + + /** + * routine for deep-sorting the suffixes a[0] ... a[n-1] knowing that they have a common prefix of length "depth" + */ + private void deepSort(int a, int n, int depth) { + int blind_limit; + + blind_limit = textSize / BLIND_SORT_RATIO; + if (n <= blind_limit) + blindSsort(a, n, depth); // small_group + else + qsUnrolledLcp(a, n, depth, blind_limit); + + } + + /** + * ternary quicksort (seward-like) with lcp information */ - private boolean isSortedBucket(int sb) { - return (ftab[sb] & SETMASK) != 0; - } - - /** - * routine for deep-sorting the suffixes a[0] ... a[n-1] knowing that they have a common prefix of length "depth" - */ - private void deepSort(int a, int n, int depth) { - int blind_limit; - - blind_limit = textSize / BLIND_SORT_RATIO; - if (n <= blind_limit) - blindSsort(a, n, depth); // small_group - else - qsUnrolledLcp(a, n, depth, blind_limit); - - } - - /** - * ternary quicksort (seward-like) with lcp information - */ - private void qsUnrolledLcp(int a, int n, int depth, int blind_limit) { - int text_depth, text_pos_pivot;// pointers - int[] stack_lo = new int[STACK_SIZE]; - int[] stack_hi = new int[STACK_SIZE]; - int[] stack_d = new int[STACK_SIZE]; - int sp, r, r3, med; - int i, j, lo, hi, ris, lcp_lo, lcp_hi; - // ----- init quicksort -------------- - r = sp = 0; - // Pushd(0,n-1,depth); - stack_lo[sp] = 0; - stack_hi[sp] = n - 1; - stack_d[sp] = depth; - sp++; - // end Pushd - - // ----- repeat untill stack is empty ------ - while (sp > 0) { - assert (sp < STACK_SIZE); - // Popd(lo,hi,depth); - sp--; - lo = stack_lo[sp]; - hi = stack_hi[sp]; - depth = stack_d[sp]; - // end popd - text_depth = depth; - - // --- use shellsort for small groups - if (hi - lo < blind_limit) { - blindSsort(a + lo, hi - lo + 1, depth); - continue; - } + private void qsUnrolledLcp(int a, int n, int depth, int blind_limit) { + int text_depth, text_pos_pivot;// pointers + int[] stack_lo = new int[STACK_SIZE]; + int[] stack_hi = new int[STACK_SIZE]; + int[] stack_d = new int[STACK_SIZE]; + int sp, r, r3, med; + int i, j, lo, hi, ris, lcp_lo, lcp_hi; + // ----- init quicksort -------------- + r = sp = 0; + // Pushd(0,n-1,depth); + stack_lo[sp] = 0; + stack_hi[sp] = n - 1; + stack_d[sp] = depth; + sp++; + // end Pushd + + // ----- repeat untill stack is empty ------ + while (sp > 0) { + assert (sp < STACK_SIZE); + // Popd(lo,hi,depth); + sp--; + lo = stack_lo[sp]; + hi = stack_hi[sp]; + depth = stack_d[sp]; + // end popd + text_depth = depth; + + // --- use shellsort for small groups + if (hi - lo < blind_limit) { + blindSsort(a + lo, hi - lo + 1, depth); + continue; + } /* * Random partitioning. Guidance for the magic constants 7621 and 32768 is * taken from Sedgewick's algorithms book, chapter 35. */ - r = ((r * 7621) + 1) % 32768; - r3 = r % 3; - if (r3 == 0) - med = lo; - else if (r3 == 1) - med = (lo + hi) >> 1; - else - med = hi; - - // --- partition ---- - swap(med, hi, a); // put the pivot at the right-end - text_pos_pivot = text_depth + suffixArray[a + hi]; - i = lo - 1; - j = hi; - lcp_lo = lcp_hi = Integer.MAX_VALUE; - while (true) { - while (++i < hi) { - ris = cmpUnrolledLcp(text_depth + suffixArray[a + i], text_pos_pivot); - if (ris > 0) { - if (cmpDone < lcp_hi) - lcp_hi = cmpDone; - break; - } else if (cmpDone < lcp_lo) - lcp_lo = cmpDone; - } - while (--j > lo) { - ris = cmpUnrolledLcp(text_depth + suffixArray[a + j], text_pos_pivot); - if (ris < 0) { - if (cmpDone < lcp_lo) - lcp_lo = cmpDone; - break; - } else if (cmpDone < lcp_hi) - lcp_hi = cmpDone; - } - if (i >= j) - break; - swap(i, j, a); - } - swap(i, hi, a); // put pivot at the middle - - // --------- insert subproblems in stack; smallest last - if (i - lo < hi - i) { - // Pushd(i + 1, hi, depth + lcp_hi); - stack_lo[sp] = i + 1; - stack_hi[sp] = hi; - stack_d[sp] = depth + lcp_hi; - sp++; - // end pushd - if (i - lo > 1) { - // Pushd(lo, i - 1, depth + lcp_lo); - stack_lo[sp] = lo; - stack_hi[sp] = i - 1; - stack_d[sp] = depth + lcp_lo; - sp++; - // end push - } - - } else { - // Pushd(lo, i - 1, depth + lcp_lo); - stack_lo[sp] = lo; - stack_hi[sp] = i - 1; - stack_d[sp] = depth + lcp_lo; - sp++; - // end pushd - if (hi - i > 1) { - // Pushd(i + 1, hi, depth + lcp_hi); - stack_lo[sp] = i + 1; - stack_hi[sp] = hi; - stack_d[sp] = depth + lcp_hi; - sp++; - // end pushd - } - } - } - - } - - /** - * Function to compare two strings originating from the *b1 and *b2 The size of the unrolled loop must be at most - * equal to the costant CMP_OVERSHOOT defined in common.h the function return the result of the comparison (+ or -) - * and writes in cmpDone the number of successfull comparisons done - */ - private int cmpUnrolledLcp(int b1, int b2) { - - int c1, c2; - cmpDone = 0; - - // execute blocks of 16 comparisons untill a difference - // is found or we run out of the string - do { - // 1 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - return (c1 - c2); - } - b1++; - b2++; - // 2 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 1; - return (c1 - c2); - } - b1++; - b2++; - // 3 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 2; - return (c1 - c2); - } - b1++; - b2++; - // 4 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 3; - return (c1 - c2); - } - b1++; - b2++; - // 5 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 4; - return (c1 - c2); - } - b1++; - b2++; - // 6 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 5; - return (c1 - c2); - } - b1++; - b2++; - // 7 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 6; - return (c1 - c2); - } - b1++; - b2++; - // 8 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 7; - return (c1 - c2); - } - b1++; - b2++; - // 9 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 8; - return (c1 - c2); - } - b1++; - b2++; - // 10 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 9; - return (c1 - c2); - } - b1++; - b2++; - // 11 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 10; - return (c1 - c2); - } - b1++; - b2++; - // 12 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 11; - return (c1 - c2); - } - b1++; - b2++; - // 13 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 12; - return (c1 - c2); - } - b1++; - b2++; - // 14 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 13; - return (c1 - c2); - } - b1++; - b2++; - // 15 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 14; - return (c1 - c2); - } - b1++; - b2++; - // 16 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmpDone += 15; - return (c1 - c2); - } - b1++; - b2++; - - cmpDone += 16; - - } while (b1 < textSize && b2 < textSize); - - return b2 - b1; - - } - - /** - * + r = ((r * 7621) + 1) % 32768; + r3 = r % 3; + if (r3 == 0) + med = lo; + else if (r3 == 1) + med = (lo + hi) >> 1; + else + med = hi; + + // --- partition ---- + swap(med, hi, a); // put the pivot at the right-end + text_pos_pivot = text_depth + suffixArray[a + hi]; + i = lo - 1; + j = hi; + lcp_lo = lcp_hi = Integer.MAX_VALUE; + while (true) { + while (++i < hi) { + ris = cmpUnrolledLcp(text_depth + suffixArray[a + i], text_pos_pivot); + if (ris > 0) { + if (cmpDone < lcp_hi) + lcp_hi = cmpDone; + break; + } else if (cmpDone < lcp_lo) + lcp_lo = cmpDone; + } + while (--j > lo) { + ris = cmpUnrolledLcp(text_depth + suffixArray[a + j], text_pos_pivot); + if (ris < 0) { + if (cmpDone < lcp_lo) + lcp_lo = cmpDone; + break; + } else if (cmpDone < lcp_hi) + lcp_hi = cmpDone; + } + if (i >= j) + break; + swap(i, j, a); + } + swap(i, hi, a); // put pivot at the middle + + // --------- insert subproblems in stack; smallest last + if (i - lo < hi - i) { + // Pushd(i + 1, hi, depth + lcp_hi); + stack_lo[sp] = i + 1; + stack_hi[sp] = hi; + stack_d[sp] = depth + lcp_hi; + sp++; + // end pushd + if (i - lo > 1) { + // Pushd(lo, i - 1, depth + lcp_lo); + stack_lo[sp] = lo; + stack_hi[sp] = i - 1; + stack_d[sp] = depth + lcp_lo; + sp++; + // end push + } + + } else { + // Pushd(lo, i - 1, depth + lcp_lo); + stack_lo[sp] = lo; + stack_hi[sp] = i - 1; + stack_d[sp] = depth + lcp_lo; + sp++; + // end pushd + if (hi - i > 1) { + // Pushd(i + 1, hi, depth + lcp_hi); + stack_lo[sp] = i + 1; + stack_hi[sp] = hi; + stack_d[sp] = depth + lcp_hi; + sp++; + // end pushd + } + } + } + + } + + /** + * Function to compare two strings originating from the *b1 and *b2 The size of the unrolled loop must be at most + * equal to the costant CMP_OVERSHOOT defined in common.h the function return the result of the comparison (+ or -) + * and writes in cmpDone the number of successfull comparisons done */ - private void swap(int i, int j, int a) { - int tmp = suffixArray[a + i]; - suffixArray[a + i] = suffixArray[a + j]; - suffixArray[a + j] = tmp; - } - - /** - * routine for deep-sorting the suffixes a[0] ... a[n-1] knowing that they have a common prefix of length "depth" - */ - private void blindSsort(int a, int n, int depth) { - int i, j, aj, lcp; - Node nh, root, h; - - // ---- sort suffixes in order of increasing length - // qsort(a, n, sizeof(Int32), neg_integer_cmp); - Arrays.sort(suffixArray, a, a + n); - for (int left = 0, right = n - 1; left < right; left++, right--) { - // exchange the first and last - int temp = suffixArray[a + left]; - suffixArray[a + left] = suffixArray[a + right]; - suffixArray[a + right] = temp; - } - - // --- skip suffixes which have already reached the end-of-text - for (j = 0; j < n; j++) - if (suffixArray[a + j] + depth < textSize) - break; - if (j >= n - 1) - return; // everything is already sorted! - - // ------ init stack ------- - // stack = (node **) malloc(n*sizeof(node *)); - - // ------- init root with the first unsorted suffix - nh = new Node(); - nh.skip = -1; - nh.right = null; - // nh.down = (void *) a[j]; - nh.downInt = suffixArray[a + j]; - root = nh; - - // ------- insert suffixes a[j+1] ... a[n-1] - for (i = j + 1; i < n; i++) { - h = findCompanion(root, suffixArray[a + i]); - aj = h.downInt; - lcp = compareSuffixes(aj, suffixArray[a + i], depth); - insertSuffix(root, suffixArray[a + i], lcp, text[this.start + aj + lcp]); - } - - // ---- traverse the trie and get suffixes in lexicographic order - aux = a; - auxWritten = j; - traverseTrie(root); - - } - - /** - * this procedures traverse the trie in depth first order so that the suffixes (stored in the leaf) are recovered in - * lexicographic order - */ - private void traverseTrie(Node h) { - Node p, nextp; - - if (h.skip < 0) - suffixArray[aux + auxWritten++] = h.downInt; - else { - p = h.down; - do { - nextp = p.right; - if (nextp != null) { - // if there are 2 nodes with equal keys - // they must be considered in inverted order - if (nextp.key == p.key) { - traverseTrie(nextp); - traverseTrie(p); - p = nextp.right; - continue; - } - } - traverseTrie(p); - p = nextp; - } while (p != null); - } - - } - - /** - * insert a suffix in the trie rooted at *p. we know that the trie already contains a string which share the first n - * chars with suf - */ - private void insertSuffix(Node h, int suf, int n, int mmchar) { - int c, s; - Node p, pp; - - s = suf; - - // --------- insert a new node before node *h if necessary - if (h.skip != n) { - p = new Node(); - p.key = mmchar; - p.skip = h.skip; // p inherits skip and children of *h - p.down = h.down; - p.downInt = h.downInt; - p.right = null; - h.skip = n; - h.down = p; // now *h has p as the only child - } - - // -------- search the position of s[n] among *h offsprings - c = text[this.start + s + n]; - pp = h.down; - while (pp != null) { - if (pp.key >= c) - break; - pp = pp.right; - } - // ------- insert new node containing suf - p = new Node(); - p.skip = -1; - p.key = c; - p.right = pp; - pp = p; - p.downInt = suf; - return; - - } - - /** - * this function returns the lcp between suf1 and suf2 (that is returns n such that suf1[n]!=suf2[n] but - * suf1[i]==suf2[i] for i=0..n-1 However, it is possible that suf1 is a prefix of suf2 (not vice-versa because of - * the initial sorting of suffixes in order of descreasing length) in this case the function returns - * n=length(suf1)-1. So in this case suf1[n]==suf2[n] (and suf1[n+1] does not exists). - */ - private int compareSuffixes(int suf1, int suf2, int depth) { - int limit; - int s1, s2; - - s1 = depth + suf1; - s2 = depth + suf2; - limit = textSize - suf1 - depth; - return depth + getLcpUnrolled(s1, s2, limit); - } - - /** - * + private int cmpUnrolledLcp(int b1, int b2) { + + int c1, c2; + cmpDone = 0; + + // execute blocks of 16 comparisons untill a difference + // is found or we run out of the string + do { + // 1 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + return (c1 - c2); + } + b1++; + b2++; + // 2 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 1; + return (c1 - c2); + } + b1++; + b2++; + // 3 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 2; + return (c1 - c2); + } + b1++; + b2++; + // 4 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 3; + return (c1 - c2); + } + b1++; + b2++; + // 5 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 4; + return (c1 - c2); + } + b1++; + b2++; + // 6 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 5; + return (c1 - c2); + } + b1++; + b2++; + // 7 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 6; + return (c1 - c2); + } + b1++; + b2++; + // 8 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 7; + return (c1 - c2); + } + b1++; + b2++; + // 9 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 8; + return (c1 - c2); + } + b1++; + b2++; + // 10 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 9; + return (c1 - c2); + } + b1++; + b2++; + // 11 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 10; + return (c1 - c2); + } + b1++; + b2++; + // 12 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 11; + return (c1 - c2); + } + b1++; + b2++; + // 13 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 12; + return (c1 - c2); + } + b1++; + b2++; + // 14 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 13; + return (c1 - c2); + } + b1++; + b2++; + // 15 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 14; + return (c1 - c2); + } + b1++; + b2++; + // 16 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmpDone += 15; + return (c1 - c2); + } + b1++; + b2++; + + cmpDone += 16; + + } while (b1 < textSize && b2 < textSize); + + return b2 - b1; + + } + + /** + * */ - private int getLcpUnrolled(int b1, int b2, int cmp_limit) { - int cmp2do; - int c1, c2; - - // execute blocks of 16 comparisons untill a difference - // is found or we reach cmp_limit comparisons - cmp2do = cmp_limit; - do { - // 1 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - break; - } - b1++; - b2++; - // 2 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 1; - break; - } - b1++; - b2++; - // 3 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 2; - break; - } - b1++; - b2++; - // 4 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 3; - break; - } - b1++; - b2++; - // 5 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 4; - break; - } - b1++; - b2++; - // 6 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 5; - break; - } - b1++; - b2++; - // 7 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 6; - break; - } - b1++; - b2++; - // 8 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 7; - break; - } - b1++; - b2++; - // 9 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 8; - break; - } - b1++; - b2++; - // 10 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 9; - break; - } - b1++; - b2++; - // 11 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 10; - break; - } - b1++; - b2++; - // 12 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 11; - break; - } - b1++; - b2++; - // 13 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 12; - break; - } - b1++; - b2++; - // 14 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 13; - break; - } - b1++; - b2++; - // 15 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 14; - break; - } - b1++; - b2++; - // 16 - c1 = text[this.start + b1]; - c2 = text[this.start + b2]; - if (c1 != c2) { - cmp2do -= 15; - break; - } - b1++; - b2++; - - cmp2do -= 16; - } while (cmp2do > 0); - - if (cmp_limit - cmp2do < cmp_limit) - return cmp_limit - cmp2do; - - return cmp_limit - 1; - } - - /** - * this function traverses the trie rooted at head following the string s. Returns the leaf "corresponding" to the - * string s - */ - private Node findCompanion(Node head, int s) { - int c; - Node p; - int t; - - stackSize = 0; // init stack - while (head.skip >= 0) { - stack[stackSize++] = head; - t = head.skip; - if (s + t >= textSize) // s[t] does not exist: mismatch - return getLeaf(head); - c = text[this.start + s + t]; - p = head.down; - boolean repeat = true; - while (repeat) { - if (c == p.key) { // found branch corresponding to c - head = p; - repeat = false; - } else if (c < p.key) // no branch corresponding to c: mismatch - { - return getLeaf(head); - } - if (repeat && (p = (p.right)) == null) // no other branches: mismatch - { - return getLeaf(head); - } - } - } - stack[stackSize++] = head; - return head; - } - - /** - * this function returns a leaf below "head". any leaf will do for the algorithm: we take the easiest to reach - */ - private Node getLeaf(Node head) { - Tools.assertAlways(head.skip >= 0, ""); - do { - head = head.down; - } while (head.skip >= 0); - return head; - } - - /** - * + private void swap(int i, int j, int a) { + int tmp = suffixArray[a + i]; + suffixArray[a + i] = suffixArray[a + j]; + suffixArray[a + j] = tmp; + } + + /** + * routine for deep-sorting the suffixes a[0] ... a[n-1] knowing that they have a common prefix of length "depth" */ - @SuppressWarnings("unused") + private void blindSsort(int a, int n, int depth) { + int i, j, aj, lcp; + Node nh, root, h; + + // ---- sort suffixes in order of increasing length + // qsort(a, n, sizeof(Int32), neg_integer_cmp); + Arrays.sort(suffixArray, a, a + n); + for (int left = 0, right = n - 1; left < right; left++, right--) { + // exchange the first and last + int temp = suffixArray[a + left]; + suffixArray[a + left] = suffixArray[a + right]; + suffixArray[a + right] = temp; + } + + // --- skip suffixes which have already reached the end-of-text + for (j = 0; j < n; j++) + if (suffixArray[a + j] + depth < textSize) + break; + if (j >= n - 1) + return; // everything is already sorted! + + // ------ init stack ------- + // stack = (node **) malloc(n*sizeof(node *)); + + // ------- init root with the first unsorted suffix + nh = new Node(); + nh.skip = -1; + nh.right = null; + // nh.down = (void *) a[j]; + nh.downInt = suffixArray[a + j]; + root = nh; + + // ------- insert suffixes a[j+1] ... a[n-1] + for (i = j + 1; i < n; i++) { + h = findCompanion(root, suffixArray[a + i]); + aj = h.downInt; + lcp = compareSuffixes(aj, suffixArray[a + i], depth); + insertSuffix(root, suffixArray[a + i], lcp, text[this.start + aj + lcp]); + } + + // ---- traverse the trie and get suffixes in lexicographic order + aux = a; + auxWritten = j; + traverseTrie(root); + + } + + /** + * this procedures traverse the trie in depth first order so that the suffixes (stored in the leaf) are recovered in + * lexicographic order + */ + private void traverseTrie(Node h) { + Node p, nextp; + + if (h.skip < 0) + suffixArray[aux + auxWritten++] = h.downInt; + else { + p = h.down; + do { + nextp = p.right; + if (nextp != null) { + // if there are 2 nodes with equal keys + // they must be considered in inverted order + if (nextp.key == p.key) { + traverseTrie(nextp); + traverseTrie(p); + p = nextp.right; + continue; + } + } + traverseTrie(p); + p = nextp; + } while (p != null); + } + + } + + /** + * insert a suffix in the trie rooted at *p. we know that the trie already contains a string which share the first n + * chars with suf + */ + private void insertSuffix(Node h, int suf, int n, int mmchar) { + int c, s; + Node p, pp; + + s = suf; + + // --------- insert a new node before node *h if necessary + if (h.skip != n) { + p = new Node(); + p.key = mmchar; + p.skip = h.skip; // p inherits skip and children of *h + p.down = h.down; + p.downInt = h.downInt; + p.right = null; + h.skip = n; + h.down = p; // now *h has p as the only child + } + + // -------- search the position of s[n] among *h offsprings + c = text[this.start + s + n]; + pp = h.down; + while (pp != null) { + if (pp.key >= c) + break; + pp = pp.right; + } + // ------- insert new node containing suf + p = new Node(); + p.skip = -1; + p.key = c; + p.right = pp; + pp = p; + p.downInt = suf; + return; + + } + + /** + * this function returns the lcp between suf1 and suf2 (that is returns n such that suf1[n]!=suf2[n] but + * suf1[i]==suf2[i] for i=0..n-1 However, it is possible that suf1 is a prefix of suf2 (not vice-versa because of + * the initial sorting of suffixes in order of descreasing length) in this case the function returns + * n=length(suf1)-1. So in this case suf1[n]==suf2[n] (and suf1[n+1] does not exists). + */ + private int compareSuffixes(int suf1, int suf2, int depth) { + int limit; + int s1, s2; + + s1 = depth + suf1; + s2 = depth + suf2; + limit = textSize - suf1 - depth; + return depth + getLcpUnrolled(s1, s2, limit); + } + + /** + * + */ + private int getLcpUnrolled(int b1, int b2, int cmp_limit) { + int cmp2do; + int c1, c2; + + // execute blocks of 16 comparisons untill a difference + // is found or we reach cmp_limit comparisons + cmp2do = cmp_limit; + do { + // 1 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + break; + } + b1++; + b2++; + // 2 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 1; + break; + } + b1++; + b2++; + // 3 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 2; + break; + } + b1++; + b2++; + // 4 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 3; + break; + } + b1++; + b2++; + // 5 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 4; + break; + } + b1++; + b2++; + // 6 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 5; + break; + } + b1++; + b2++; + // 7 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 6; + break; + } + b1++; + b2++; + // 8 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 7; + break; + } + b1++; + b2++; + // 9 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 8; + break; + } + b1++; + b2++; + // 10 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 9; + break; + } + b1++; + b2++; + // 11 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 10; + break; + } + b1++; + b2++; + // 12 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 11; + break; + } + b1++; + b2++; + // 13 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 12; + break; + } + b1++; + b2++; + // 14 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 13; + break; + } + b1++; + b2++; + // 15 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 14; + break; + } + b1++; + b2++; + // 16 + c1 = text[this.start + b1]; + c2 = text[this.start + b2]; + if (c1 != c2) { + cmp2do -= 15; + break; + } + b1++; + b2++; + + cmp2do -= 16; + } while (cmp2do > 0); + + if (cmp_limit - cmp2do < cmp_limit) + return cmp_limit - cmp2do; + + return cmp_limit - 1; + } + + /** + * this function traverses the trie rooted at head following the string s. Returns the leaf "corresponding" to the + * string s + */ + private Node findCompanion(Node head, int s) { + int c; + Node p; + int t; + + stackSize = 0; // init stack + while (head.skip >= 0) { + stack[stackSize++] = head; + t = head.skip; + if (s + t >= textSize) // s[t] does not exist: mismatch + return getLeaf(head); + c = text[this.start + s + t]; + p = head.down; + boolean repeat = true; + while (repeat) { + if (c == p.key) { // found branch corresponding to c + head = p; + repeat = false; + } else if (c < p.key) // no branch corresponding to c: mismatch + { + return getLeaf(head); + } + if (repeat && (p = (p.right)) == null) // no other branches: mismatch + { + return getLeaf(head); + } + } + } + stack[stackSize++] = head; + return head; + } + + /** + * this function returns a leaf below "head". any leaf will do for the algorithm: we take the easiest to reach + */ + private Node getLeaf(Node head) { + Tools.assertAlways(head.skip >= 0, ""); + do { + head = head.down; + } while (head.skip >= 0); + return head; + } + + /** + * + */ + @SuppressWarnings("unused") private void pseudoAnchorSort(int a, int n, int pseudo_anchor_pos, int offset) { - int pseudo_anchor_rank; - - // ---------- compute rank ------------ - if (UPDATE_ANCHOR_RANKS && anchorDist > 0) - pseudo_anchor_rank = getRankUpdateAnchors(pseudo_anchor_pos); - else - pseudo_anchor_rank = getRank(pseudo_anchor_pos); - // ---------- check rank -------------- - assert (suffixArray[pseudo_anchor_rank] == pseudo_anchor_pos); - // ---------- do the sorting ---------- - generalAnchorSort(a, n, pseudo_anchor_pos, pseudo_anchor_rank, offset); - - } - - /** - * compute the rank of the suffix starting at pos. It is required that the suffix is in an already sorted bucket - */ - private int getRank(int pos) { - int sb, lo, hi, j; - - sb = getSmallBucket(pos); - if (!isSortedBucket(sb)) { - throw new RuntimeException("Illegal call to get_rank! (get_rank1)"); - } - lo = bucketFirst(sb); - hi = bucketLast(sb); - for (j = lo; j <= hi; j++) - if (suffixArray[j] == pos) - return j; - throw new RuntimeException("Illegal call to get_rank! (get_rank2)"); - } - - /** - * compute the rank of the suffix starting at pos. At the same time check if the rank of the suffixes in the bucket - * containing pos can be used to update some entries in anchorOffset[] and anchorRank[] It is required that the - * suffix is in an already sorted bucket - */ - private int getRankUpdateAnchors(int pos) { - int sb, lo, hi, j, toffset, aoffset, anchor, rank; - - // --- get bucket and verify it is a sorted one - sb = getSmallBucket(pos); - if (!(isSortedBucket(sb))) { - throw new RuntimeException("Illegal call to get_rank! (get_rank_update_anchors)"); - } - // --- if the bucket has been already ranked just compute rank; - if (bucketRanked[sb] != 0) - return getRank(pos); - // --- rank all the bucket - bucketRanked[sb] = 1; - rank = -1; - lo = bucketFirst(sb); - hi = bucketLast(sb); - for (j = lo; j <= hi; j++) { - // see if we can update an anchor - toffset = suffixArray[j] % anchorDist; - anchor = suffixArray[j] / anchorDist; - aoffset = anchorOffset[anchor]; // dist of sorted suf from anchor - if (toffset < aoffset) { - anchorOffset[anchor] = toffset; - anchorRank[anchor] = j; - } - // see if we have found the rank of pos, if so store it in rank - if (suffixArray[j] == pos) { - assert (rank == -1); - rank = j; - } - } - assert (rank >= 0); - return rank; - } - - private void swap2(int a, int b) { - int tmp = suffixArray[a]; - suffixArray[a] = suffixArray[b]; - suffixArray[b] = tmp; - - } - - /* - * #define ptr2char32(i) (getword32(*(i) + text_depth)) - */ - private int ptr2char32(int a, int depth) { - return getword32(suffixArray[a] + depth); - } - - /* - * #define getword32(s) ((unsigned)( (*(s) << 24) | ((*((s)+1)) << 16) \ | ((*((s)+2)) - * << 8) | (*((s)+3)) )) - */ - private int getword32(int s) { - return text[this.start + s] << 24 | text[this.start + s + 1] << 16 - | text[this.start + s + 2] << 8 | text[this.start + s + 3]; - } - - private int ptr2char(int i, int text_depth) { - return text[this.start + suffixArray[i] + text_depth]; - } - - private int med3(int a, int b, int c, int depth) { - int va = ptr2char(a, depth); - int vb = ptr2char(b, depth); - if (va == vb) { - return a; - } - int vc = ptr2char(c, depth); - if (vc == va || vc == vb) { - return c; - } - return va < vb ? (vb < vc ? b : (va < vc ? c : a)) : (vb > vc ? b : (va < vc ? a : c)); - } - - private void calculateRunningOrder() { - int i, j; - for (i = 0; i <= 256; i++) - runningOrder[i] = i; - { - int vv; - int h = 1; - do - h = 3 * h + 1; - while (h <= 257); - do { - h = h / 3; - for (i = h; i <= 256; i++) { - vv = runningOrder[i]; - j = i; - while (bigFreq(runningOrder[j - h]) > bigFreq(vv)) { - runningOrder[j] = runningOrder[j - h]; - j = j - h; - if (j <= (h - 1)) - break; - } - runningOrder[j] = vv; - } - } while (h != 1); - } - } - - /** - * + int pseudo_anchor_rank; + + // ---------- compute rank ------------ + if (UPDATE_ANCHOR_RANKS && anchorDist > 0) + pseudo_anchor_rank = getRankUpdateAnchors(pseudo_anchor_pos); + else + pseudo_anchor_rank = getRank(pseudo_anchor_pos); + // ---------- check rank -------------- + assert (suffixArray[pseudo_anchor_rank] == pseudo_anchor_pos); + // ---------- do the sorting ---------- + generalAnchorSort(a, n, pseudo_anchor_pos, pseudo_anchor_rank, offset); + + } + + /** + * compute the rank of the suffix starting at pos. It is required that the suffix is in an already sorted bucket + */ + private int getRank(int pos) { + int sb, lo, hi, j; + + sb = getSmallBucket(pos); + if (!isSortedBucket(sb)) { + throw new RuntimeException("Illegal call to get_rank! (get_rank1)"); + } + lo = bucketFirst(sb); + hi = bucketLast(sb); + for (j = lo; j <= hi; j++) + if (suffixArray[j] == pos) + return j; + throw new RuntimeException("Illegal call to get_rank! (get_rank2)"); + } + + /** + * compute the rank of the suffix starting at pos. At the same time check if the rank of the suffixes in the bucket + * containing pos can be used to update some entries in anchorOffset[] and anchorRank[] It is required that the + * suffix is in an already sorted bucket + */ + private int getRankUpdateAnchors(int pos) { + int sb, lo, hi, j, toffset, aoffset, anchor, rank; + + // --- get bucket and verify it is a sorted one + sb = getSmallBucket(pos); + if (!(isSortedBucket(sb))) { + throw new RuntimeException("Illegal call to get_rank! (get_rank_update_anchors)"); + } + // --- if the bucket has been already ranked just compute rank; + if (bucketRanked[sb] != 0) + return getRank(pos); + // --- rank all the bucket + bucketRanked[sb] = 1; + rank = -1; + lo = bucketFirst(sb); + hi = bucketLast(sb); + for (j = lo; j <= hi; j++) { + // see if we can update an anchor + toffset = suffixArray[j] % anchorDist; + anchor = suffixArray[j] / anchorDist; + aoffset = anchorOffset[anchor]; // dist of sorted suf from anchor + if (toffset < aoffset) { + anchorOffset[anchor] = toffset; + anchorRank[anchor] = j; + } + // see if we have found the rank of pos, if so store it in rank + if (suffixArray[j] == pos) { + assert (rank == -1); + rank = j; + } + } + assert (rank >= 0); + return rank; + } + + private void swap2(int a, int b) { + int tmp = suffixArray[a]; + suffixArray[a] = suffixArray[b]; + suffixArray[b] = tmp; + + } + + /* + * #define ptr2char32(i) (getword32(*(i) + text_depth)) + */ + private int ptr2char32(int a, int depth) { + return getword32(suffixArray[a] + depth); + } + + /* + * #define getword32(s) ((unsigned)( (*(s) << 24) | ((*((s)+1)) << 16) \ | ((*((s)+2)) + * << 8) | (*((s)+3)) )) + */ + private int getword32(int s) { + return text[this.start + s] << 24 | text[this.start + s + 1] << 16 + | text[this.start + s + 2] << 8 | text[this.start + s + 3]; + } + + private int ptr2char(int i, int text_depth) { + return text[this.start + suffixArray[i] + text_depth]; + } + + private int med3(int a, int b, int c, int depth) { + int va = ptr2char(a, depth); + int vb = ptr2char(b, depth); + if (va == vb) { + return a; + } + int vc = ptr2char(c, depth); + if (vc == va || vc == vb) { + return c; + } + return va < vb ? (vb < vc ? b : (va < vc ? c : a)) : (vb > vc ? b : (va < vc ? a : c)); + } + + private void calculateRunningOrder() { + int i, j; + for (i = 0; i <= 256; i++) + runningOrder[i] = i; + { + int vv; + int h = 1; + do + h = 3 * h + 1; + while (h <= 257); + do { + h = h / 3; + for (i = h; i <= 256; i++) { + vv = runningOrder[i]; + j = i; + while (bigFreq(runningOrder[j - h]) > bigFreq(vv)) { + runningOrder[j] = runningOrder[j - h]; + j = j - h; + if (j <= (h - 1)) + break; + } + runningOrder[j] = vv; + } + } while (h != 1); + } + } + + /** + * */ - private int bigFreq(int b) { - return ftab[((b) + 1) << 8] - ftab[(b) << 8]; - } - - public static void main(String[] args) { - for (int i = 0; i < 5; i++) { - System.gc(); - } - int size = 1000000; - final Runtime rt = Runtime.getRuntime(); - long before, after; - Node[] nodes = new Node[size]; - before = rt.totalMemory() - rt.freeMemory(); - for (int i = 0; i < size; i++) { - nodes[i] = new Node(); - } - after = rt.totalMemory() - rt.freeMemory(); - - double a = 1.0 * (after - before) / size; - - System.out.println(before + " " + after + " " + size + " " + a); - - } + private int bigFreq(int b) { + return ftab[((b) + 1) << 8] - ftab[(b) << 8]; + } + + public static void main(String[] args) { + for (int i = 0; i < 5; i++) { + System.gc(); + } + int size = 1000000; + final Runtime rt = Runtime.getRuntime(); + long before, after; + Node[] nodes = new Node[size]; + before = rt.totalMemory() - rt.freeMemory(); + for (int i = 0; i < size; i++) { + nodes[i] = new Node(); + } + after = rt.totalMemory() - rt.freeMemory(); + + double a = 1.0 * (after - before) / size; + + System.out.println(before + " " + after + " " + size + " " + a); + + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DensePositiveDecorator.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DensePositiveDecorator.java index ba14dbd3c..43a515240 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DensePositiveDecorator.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DensePositiveDecorator.java @@ -9,43 +9,34 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public final class DensePositiveDecorator implements ISuffixArrayBuilder -{ +public final class DensePositiveDecorator implements ISuffixArrayBuilder { private final ISuffixArrayBuilder delegate; /* - * + * */ - public DensePositiveDecorator(ISuffixArrayBuilder delegate) - { + public DensePositiveDecorator(ISuffixArrayBuilder delegate) { this.delegate = delegate; } /* - * + * */ @Override - public int [] buildSuffixArray(int [] input, final int start, final int length) - { + public int[] buildSuffixArray(int[] input, final int start, final int length) { final MinMax minmax = Tools.minmax(input, start, length); final ISymbolMapper mapper; - if (minmax.range() > 0x10000) - { + if (minmax.range() > 0x10000) { throw new RuntimeException("Large symbol space not implemented yet."); - } - else - { + } else { mapper = new DensePositiveMapper(input, start, length); } mapper.map(input, start, length); - try - { + try { return delegate.buildSuffixArray(input, start, length); - } - finally - { + } finally { mapper.undo(input, start, length); } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DensePositiveMapper.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DensePositiveMapper.java index ff7f6abdc..307e72ae6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DensePositiveMapper.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DensePositiveMapper.java @@ -8,44 +8,38 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -final class DensePositiveMapper implements ISymbolMapper -{ +final class DensePositiveMapper implements ISymbolMapper { private final int offset; - private final int [] forward; - private final int [] backward; + private final int[] forward; + private final int[] backward; /* - * + * */ - public DensePositiveMapper(int [] input, int start, int length) - { + public DensePositiveMapper(int[] input, int start, int length) { final MinMax minmax = Tools.minmax(input, start, length); final int min = minmax.min; final int max = minmax.max; - final int [] forward = new int [max - min + 1]; + final int[] forward = new int[max - min + 1]; final int offset = -min; // Mark all symbols present in the alphabet. final int end = start + length; - for (int i = start; i < end; i++) - { + for (int i = start; i < end; i++) { forward[input[i] + offset] = 1; } - + // Collect present symbols, assign unique codes. int k = 1; - for (int i = 0; i < forward.length; i++) - { - if (forward[i] != 0) - { + for (int i = 0; i < forward.length; i++) { + if (forward[i] != 0) { forward[i] = k++; } } - final int [] backward = new int [k]; - for (int i = start; i < end; i++) - { + final int[] backward = new int[k]; + for (int i = start; i < end; i++) { final int v = forward[input[i] + offset]; backward[v] = input[i]; } @@ -56,25 +50,21 @@ public DensePositiveMapper(int [] input, int start, int length) } /* - * + * */ @Override - public void map(int [] input, final int start, final int length) - { - for (int i = start, l = length; l > 0; l--, i++) - { + public void map(int[] input, final int start, final int length) { + for (int i = start, l = length; l > 0; l--, i++) { input[i] = forward[input[i] + offset]; } } /* - * + * */ @Override - public void undo(int [] input, final int start, final int length) - { - for (int i = start, l = length; l > 0; l--, i++) - { + public void undo(int[] input, final int start, final int length) { + for (int i = start, l = length; l > 0; l--, i++) { input[i] = backward[input[i]]; } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DivSufSort.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DivSufSort.java index 447dcf0ea..28fc496ed 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DivSufSort.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/DivSufSort.java @@ -17,18 +17,15 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public final class DivSufSort implements ISuffixArrayBuilder -{ +public final class DivSufSort implements ISuffixArrayBuilder { /* - * + * */ - private final static class StackElement - { + private final static class StackElement { final int a, b, c, e; int d; - StackElement(int a, int b, int c, int d, int e) - { + StackElement(int a, int b, int c, int d, int e) { this.a = a; this.b = b; this.c = c; @@ -36,38 +33,32 @@ private final static class StackElement this.e = e; } - StackElement(int a, int b, int c, int d) - { + StackElement(int a, int b, int c, int d) { this(a, b, c, d, 0); } } /* - * + * */ - private final static class TRBudget - { + private final static class TRBudget { int chance; int remain; int incval; int count; - private TRBudget(int chance, int incval) - { + private TRBudget(int chance, int incval) { this.chance = chance; this.remain = incval; this.incval = incval; } - private int check(int size) - { - if (size <= this.remain) - { + private int check(int size) { + if (size <= this.remain) { this.remain -= size; return 1; } - if (this.chance == 0) - { + if (this.chance == 0) { this.count += size; return 0; } @@ -78,22 +69,19 @@ private int check(int size) } /* - * + * */ - private static final class TRPartitionResult - { + private static final class TRPartitionResult { final int a; final int b; - public TRPartitionResult(int a, int b) - { + public TRPartitionResult(int a, int b) { this.a = a; this.b = b; } } - public DivSufSort() - { + public DivSufSort() { ALPHABET_SIZE = DEFAULT_ALPHABET_SIZE; BUCKET_A_SIZE = ALPHABET_SIZE; BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; @@ -102,8 +90,7 @@ public DivSufSort() /** * @param alphabetSize */ - public DivSufSort(int alphabetSize) - { + public DivSufSort(int alphabetSize) { ALPHABET_SIZE = alphabetSize; BUCKET_A_SIZE = ALPHABET_SIZE; BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; @@ -119,46 +106,46 @@ public DivSufSort(int alphabetSize) private final static int TR_STACKSIZE = 64; private final static int TR_INSERTIONSORT_THRESHOLD = 8; - private final static int [] sqq_table = - { - 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 64, 65, 67, 69, - 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90, 91, 93, 94, 96, 97, 98, 99, - 101, 102, 103, 104, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 123, 124, 125, 126, 128, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, - 150, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 160, 161, 162, - 163, 163, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172, 173, 173, 174, - 175, 176, 176, 177, 178, 178, 179, 180, 181, 181, 182, 183, 183, 184, 185, 185, - 186, 187, 187, 188, 189, 189, 190, 191, 192, 192, 193, 193, 194, 195, 195, 196, - 197, 197, 198, 199, 199, 200, 201, 201, 202, 203, 203, 204, 204, 205, 206, 206, - 207, 208, 208, 209, 209, 210, 211, 211, 212, 212, 213, 214, 214, 215, 215, 216, - 217, 217, 218, 218, 219, 219, 220, 221, 221, 222, 222, 223, 224, 224, 225, 225, - 226, 226, 227, 227, 228, 229, 229, 230, 230, 231, 231, 232, 232, 233, 234, 234, - 235, 235, 236, 236, 237, 237, 238, 238, 239, 240, 240, 241, 241, 242, 242, 243, - 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, - 251, 252, 252, 253, 253, 254, 254, 255 - }; - - private final static int [] lg_table = - { - -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - }; + private final static int[] sqq_table = + { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 64, 65, 67, 69, + 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90, 91, 93, 94, 96, 97, 98, 99, + 101, 102, 103, 104, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, + 119, 120, 121, 122, 123, 124, 125, 126, 128, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, + 150, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 160, 161, 162, + 163, 163, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172, 173, 173, 174, + 175, 176, 176, 177, 178, 178, 179, 180, 181, 181, 182, 183, 183, 184, 185, 185, + 186, 187, 187, 188, 189, 189, 190, 191, 192, 192, 193, 193, 194, 195, 195, 196, + 197, 197, 198, 199, 199, 200, 201, 201, 202, 203, 203, 204, 204, 205, 206, 206, + 207, 208, 208, 209, 209, 210, 211, 211, 212, 212, 213, 214, 214, 215, 215, 216, + 217, 217, 218, 218, 219, 219, 220, 221, 221, 222, 222, 223, 224, 224, 225, 225, + 226, 226, 227, 227, 228, 229, 229, 230, 230, 231, 231, 232, 232, 233, 234, 234, + 235, 235, 236, 236, 237, 237, 238, 238, 239, 240, 240, 241, 241, 242, 242, 243, + 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, + 251, 252, 252, 253, 253, 254, 254, 255 + }; + + private final static int[] lg_table = + { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; /* fields */ private final int ALPHABET_SIZE; private final int BUCKET_A_SIZE; private final int BUCKET_B_SIZE; - private int [] SA; - private int [] T; + private int[] SA; + private int[] T; private int start; /** @@ -173,19 +160,18 @@ public DivSufSort(int alphabetSize) *

        */ @Override - public final int [] buildSuffixArray(int [] input, int start, int length) - { + public final int[] buildSuffixArray(int[] input, int start, int length) { Tools.assertAlways(input != null, "input must not be null"); Tools.assertAlways(length >= 2, "input length must be >= 2"); MinMax mm = Tools.minmax(input, start, length); Tools.assertAlways(mm.min >= 0, "input must not be negative"); Tools.assertAlways(mm.max < ALPHABET_SIZE, "max alphabet size is " + ALPHABET_SIZE); - final int [] ret = new int [length]; + final int[] ret = new int[length]; this.SA = ret; this.T = input; - int [] bucket_A = new int [BUCKET_A_SIZE]; - int [] bucket_B = new int [BUCKET_B_SIZE]; + int[] bucket_A = new int[BUCKET_A_SIZE]; + int[] bucket_B = new int[BUCKET_B_SIZE]; this.start = start; /* Suffixsort. */ int m = sortTypeBstar(bucket_A, bucket_B, length); @@ -196,24 +182,19 @@ public DivSufSort(int alphabetSize) /** * Constructs the suffix array by using the sorted order of type B* suffixes. */ - private final void constructSuffixArray(int [] bucket_A, int [] bucket_B, int n, int m) - { + private final void constructSuffixArray(int[] bucket_A, int[] bucket_B, int n, int m) { int i, j, k; // ptr int s, c0, c1, c2; // (_c1)]) - if (0 < m) - { + if (0 < m) { /* * Construct the sorted order of type B suffixes by using the sorted order of * type B suffixes. */ - for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) - { + for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { /* Scan the suffix array from right to left. */ - for (i = bucket_B[(c1) * ALPHABET_SIZE + (c1 + 1)], j = bucket_A[c1 + 1] - 1, k = 0, c2 = -1; i <= j; --j) - { - if (0 < (s = SA[j])) - { + for (i = bucket_B[(c1) * ALPHABET_SIZE + (c1 + 1)], j = bucket_A[c1 + 1] - 1, k = 0, c2 = -1; i <= j; --j) { + if (0 < (s = SA[j])) { // Tools.assertAlways(T[s] == c1, ""); // Tools.assertAlways(((s + 1) < n) && (T[s] <= T[s + // 1]), @@ -221,23 +202,18 @@ private final void constructSuffixArray(int [] bucket_A, int [] bucket_B, int n, // Tools.assertAlways(T[s - 1] <= T[s], ""); SA[j] = ~s; c0 = T[start + --s]; - if ((0 < s) && (T[start + s - 1] > c0)) - { + if ((0 < s) && (T[start + s - 1] > c0)) { s = ~s; } - if (c0 != c2) - { - if (0 <= c2) - { + if (c0 != c2) { + if (0 <= c2) { bucket_B[(c1) * ALPHABET_SIZE + (c2)] = k; } k = bucket_B[(c1) * ALPHABET_SIZE + (c2 = c0)]; } // Tools.assertAlways(k < j, ""); SA[k--] = s; - } - else - { + } else { // Tools.assertAlways(((s == 0) && (T[s] == c1)) // || (s < 0), ""); SA[j] = ~s; @@ -252,26 +228,20 @@ private final void constructSuffixArray(int [] bucket_A, int [] bucket_B, int n, k = bucket_A[c2 = T[start + n - 1]]; SA[k++] = (T[start + n - 2] < c2) ? ~(n - 1) : (n - 1); /* Scan the suffix array from left to right. */ - for (i = 0, j = n; i < j; ++i) - { - if (0 < (s = SA[i])) - { + for (i = 0, j = n; i < j; ++i) { + if (0 < (s = SA[i])) { // Tools.assertAlways(T[s - 1] >= T[s], ""); c0 = T[start + --s]; - if ((s == 0) || (T[start + s - 1] < c0)) - { + if ((s == 0) || (T[start + s - 1] < c0)) { s = ~s; } - if (c0 != c2) - { + if (c0 != c2) { bucket_A[c2] = k; k = bucket_A[c2 = c0]; } // Tools.assertAlways(i < k, ""); SA[k++] = s; - } - else - { + } else { // Tools.assertAlways(s < 0, ""); SA[i] = ~s; } @@ -279,10 +249,9 @@ private final void constructSuffixArray(int [] bucket_A, int [] bucket_B, int n, } /** - * - */ - private final int sortTypeBstar(int [] bucket_A, int [] bucket_B, int n) - { + * + */ + private final int sortTypeBstar(int[] bucket_A, int[] bucket_B, int n) { int PAb, ISAb, buf; int i, j, k, t, m, bufsize; @@ -293,22 +262,18 @@ private final int sortTypeBstar(int [] bucket_A, int [] bucket_B, int n) * A, B and B suffix. Moreover, store the beginning position of all type B * suffixes into the array SA. */ - for (i = n - 1, m = n, c0 = T[start + n - 1]; 0 <= i;) - { + for (i = n - 1, m = n, c0 = T[start + n - 1]; 0 <= i; ) { /* type A suffix. */ - do - { + do { ++bucket_A[c1 = c0]; } while ((0 <= --i) && ((c0 = T[start + i]) >= c1)); - if (0 <= i) - { + if (0 <= i) { /* type B suffix. */ ++bucket_B[(c0) * ALPHABET_SIZE + (c1)]; SA[--m] = i; /* type B suffix. */ - for (--i, c1 = c0; (0 <= i) && ((c0 = T[start + i]) <= c1); --i, c1 = c0) - { + for (--i, c1 = c0; (0 <= i) && ((c0 = T[start + i]) <= c1); --i, c1 = c0) { ++bucket_B[(c1) * ALPHABET_SIZE + (c0)]; } } @@ -321,26 +286,22 @@ private final int sortTypeBstar(int [] bucket_A, int [] bucket_B, int n) // begins with the same first two characters. // Calculate the index of start/end point of each bucket. - for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) - { + for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { t = i + bucket_A[c0]; bucket_A[c0] = i + j; /* start point */ i = t + bucket_B[(c0) * ALPHABET_SIZE + (c0)]; - for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) - { + for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { j += bucket_B[(c0) * ALPHABET_SIZE + (c1)]; bucket_B[(c0) * ALPHABET_SIZE + (c1)] = j; // end point i += bucket_B[(c1) * ALPHABET_SIZE + (c0)]; } } - if (0 < m) - { + if (0 < m) { // Sort the type B* suffixes by their first two characters. PAb = n - m;// SA ISAb = m;// SA - for (i = m - 2; 0 <= i; --i) - { + for (i = m - 2; 0 <= i; --i) { t = SA[PAb + i]; c0 = T[start + t]; c1 = T[start + t + 1]; @@ -356,38 +317,30 @@ private final int sortTypeBstar(int [] bucket_A, int [] bucket_B, int n) buf = m;// SA bufsize = n - (2 * m); - for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) - { - for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) - { + for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { i = bucket_B[(c0) * ALPHABET_SIZE + (c1)]; - if (1 < (j - i)) - { + if (1 < (j - i)) { ssSort(PAb, i, j, buf, bufsize, 2, n, SA[i] == (m - 1)); } } } // Compute ranks of type B* substrings. - for (i = m - 1; 0 <= i; --i) - { - if (0 <= SA[i]) - { + for (i = m - 1; 0 <= i; --i) { + if (0 <= SA[i]) { j = i; - do - { + do { SA[ISAb + SA[i]] = i; } while ((0 <= --i) && (0 <= SA[i])); SA[i + 1] = i - j; - if (i <= 0) - { + if (i <= 0) { break; } } j = i; - do - { + do { SA[ISAb + (SA[i] = ~SA[i])] = j; } while (SA[--i] < 0); @@ -397,16 +350,12 @@ private final int sortTypeBstar(int [] bucket_A, int [] bucket_B, int n) // trsort. trSort(ISAb, m, 1); // Set the sorted order of type B* suffixes. - for (i = n - 1, j = m, c0 = T[start + n - 1]; 0 <= i;) - { - for (--i, c1 = c0; (0 <= i) && ((c0 = T[start + i]) >= c1); --i, c1 = c0) - { + for (i = n - 1, j = m, c0 = T[start + n - 1]; 0 <= i; ) { + for (--i, c1 = c0; (0 <= i) && ((c0 = T[start + i]) >= c1); --i, c1 = c0) { } - if (0 <= i) - { + if (0 <= i) { t = i; - for (--i, c1 = c0; (0 <= i) && ((c0 = T[start + i]) <= c1); --i, c1 = c0) - { + for (--i, c1 = c0; (0 <= i) && ((c0 = T[start + i]) <= c1); --i, c1 = c0) { } SA[SA[ISAb + --j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; } @@ -415,17 +364,14 @@ private final int sortTypeBstar(int [] bucket_A, int [] bucket_B, int n) // Calculate the index of start/end point of each bucket. bucket_B[(ALPHABET_SIZE - 1) * ALPHABET_SIZE + (ALPHABET_SIZE - 1)] = n; // end // point - for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) - { + for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { i = bucket_A[c0 + 1] - 1; - for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) - { + for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { t = i - bucket_B[(c1) * ALPHABET_SIZE + (c0)]; bucket_B[(c1) * ALPHABET_SIZE + (c0)] = i; // end point // Move all type B* suffixes to the correct position. - for (i = t, j = bucket_B[(c0) * ALPHABET_SIZE + (c1)]; j <= k; --i, --k) - { + for (i = t, j = bucket_B[(c0) * ALPHABET_SIZE + (c1)]; j <= k; --i, --k) { SA[i] = SA[k]; } } @@ -442,71 +388,57 @@ private final int sortTypeBstar(int [] bucket_A, int [] bucket_B, int n) * */ private final void ssSort(final int PA, int first, int last, int buf, int bufsize, - int depth, int n, boolean lastsuffix) - { + int depth, int n, boolean lastsuffix) { int a, b, middle, curbuf;// SA pointer int j, k, curbufsize, limit; int i; - if (lastsuffix) - { + if (lastsuffix) { ++first; } if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) - && (bufsize < (limit = ssIsqrt(last - first)))) - { - if (SS_BLOCKSIZE < limit) - { + && (bufsize < (limit = ssIsqrt(last - first)))) { + if (SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } buf = middle = last - limit; bufsize = limit; - } - else - { + } else { middle = last; limit = 0; } - for (a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) - { + for (a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { ssMintroSort(PA, a, a + SS_BLOCKSIZE, depth); curbufsize = last - (a + SS_BLOCKSIZE); curbuf = a + SS_BLOCKSIZE; - if (curbufsize <= bufsize) - { + if (curbufsize <= bufsize) { curbufsize = bufsize; curbuf = buf; } - for (b = a, k = SS_BLOCKSIZE, j = i; (j & 1) != 0; b -= k, k <<= 1, j >>= 1) - { + for (b = a, k = SS_BLOCKSIZE, j = i; (j & 1) != 0; b -= k, k <<= 1, j >>= 1) { ssSwapMerge(PA, b - k, b, b + k, curbuf, curbufsize, depth); } } ssMintroSort(PA, a, middle, depth); - for (k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) - { - if ((i & 1) != 0) - { + for (k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if ((i & 1) != 0) { ssSwapMerge(PA, a - k, a, middle, buf, bufsize, depth); a -= k; } } - if (limit != 0) - { + if (limit != 0) { ssMintroSort(PA, middle, last, depth); ssInplaceMerge(PA, first, middle, last, depth); } - if (lastsuffix) - { + if (lastsuffix) { int p1 = SA[PA + SA[first - 1]]; int p11 = n - 2; for (a = first, i = SA[first - 1]; (a < last) - && ((SA[a] < 0) || (0 < ssCompare(p1, p11, PA + SA[a], depth))); ++a) - { + && ((SA[a] < 0) || (0 < ssCompare(p1, p11, PA + SA[a], depth))); ++a) { SA[a - 1] = SA[a]; } SA[a - 1] = i; @@ -518,13 +450,11 @@ private final void ssSort(final int PA, int first, int last, int buf, int bufsiz * special version of ss_compare for handling * ss_compare(T, &(PAi[0]), PA + *a, depth) situation. */ - private final int ssCompare(int pa, int pb, int p2, int depth) - { + private final int ssCompare(int pa, int pb, int p2, int depth) { int U1, U2, U1n, U2n;// pointers to T for (U1 = depth + pa, U2 = depth + SA[p2], U1n = pb + 2, U2n = SA[p2 + 1] + 2; (U1 < U1n) - && (U2 < U2n) && (T[start + U1] == T[start + U2]); ++U1, ++U2) - { + && (U2 < U2n) && (T[start + U1] == T[start + U2]); ++U1, ++U2) { } return U1 < U1n ? (U2 < U2n ? T[start + U1] - T[start + U2] : 1) : (U2 < U2n ? -1 @@ -532,15 +462,13 @@ private final int ssCompare(int pa, int pb, int p2, int depth) } /** - * + * */ - private final int ssCompare(int p1, int p2, int depth) - { + private final int ssCompare(int p1, int p2, int depth) { int U1, U2, U1n, U2n;// pointers to T for (U1 = depth + SA[p1], U2 = depth + SA[p2], U1n = SA[p1 + 1] + 2, U2n = SA[p2 + 1] + 2; (U1 < U1n) - && (U2 < U2n) && (T[start + U1] == T[start + U2]); ++U1, ++U2) - { + && (U2 < U2n) && (T[start + U1] == T[start + U2]); ++U1, ++U2) { } return U1 < U1n ? (U2 < U2n ? T[start + U1] - T[start + U2] : 1) : (U2 < U2n ? -1 @@ -549,66 +477,51 @@ private final int ssCompare(int p1, int p2, int depth) } /** - * + * */ - private final void ssInplaceMerge(int PA, int first, int middle, int last, int depth) - { + private final void ssInplaceMerge(int PA, int first, int middle, int last, int depth) { // PA, middle, first, last are pointers to SA int p, a, b;// pointer to SA int len, half; int q, r; int x; - for (;;) - { - if (SA[last - 1] < 0) - { + for (; ; ) { + if (SA[last - 1] < 0) { x = 1; p = PA + ~SA[last - 1]; - } - else - { + } else { x = 0; p = PA + SA[last - 1]; } - for (a = first, len = middle - first, half = len >> 1, r = -1; 0 < len; len = half, half >>= 1) - { + for (a = first, len = middle - first, half = len >> 1, r = -1; 0 < len; len = half, half >>= 1) { b = a + half; q = ssCompare(PA + ((0 <= SA[b]) ? SA[b] : ~SA[b]), p, depth); - if (q < 0) - { + if (q < 0) { a = b + 1; half -= (len & 1) ^ 1; - } - else - { + } else { r = q; } } - if (a < middle) - { - if (r == 0) - { + if (a < middle) { + if (r == 0) { SA[a] = ~SA[a]; } ssRotate(a, middle, last); last -= middle - a; middle = a; - if (first == middle) - { + if (first == middle) { break; } } --last; - if (x != 0) - { - while (SA[--last] < 0) - { + if (x != 0) { + while (SA[--last] < 0) { // nop } } - if (middle == last) - { + if (middle == last) { break; } } @@ -616,37 +529,30 @@ private final void ssInplaceMerge(int PA, int first, int middle, int last, int d } /** - * - */ - private final void ssRotate(int first, int middle, int last) - { + * + */ + private final void ssRotate(int first, int middle, int last) { // first, middle, last are pointers in SA int a, b, t;// pointers in SA int l, r; l = middle - first; r = last - middle; - for (; (0 < l) && (0 < r);) - { - if (l == r) - { + for (; (0 < l) && (0 < r); ) { + if (l == r) { ssBlockSwap(first, middle, l); break; } - if (l < r) - { + if (l < r) { a = last - 1; b = middle - 1; t = SA[a]; - do - { + do { SA[a--] = SA[b]; SA[b--] = SA[a]; - if (b < first) - { + if (b < first) { SA[a] = t; last = a; - if ((r -= l + 1) <= l) - { + if ((r -= l + 1) <= l) { break; } a -= 1; @@ -655,22 +561,17 @@ private final void ssRotate(int first, int middle, int last) } } while (true); - } - else - { + } else { a = first; b = middle; t = SA[a]; - do - { + do { SA[a++] = SA[b]; SA[b++] = SA[a]; - if (last <= b) - { + if (last <= b) { SA[a] = t; first = a + 1; - if ((l -= r + 1) <= r) - { + if ((l -= r + 1) <= r) { break; } a += 1; @@ -684,27 +585,23 @@ private final void ssRotate(int first, int middle, int last) } /** - * + * */ - private final void ssBlockSwap(int a, int b, int n) - { + private final void ssBlockSwap(int a, int b, int n) { // a, b -- pointer to SA int t; - for (; 0 < n; --n, ++a, ++b) - { + for (; 0 < n; --n, ++a, ++b) { t = SA[a]; SA[a] = SA[b]; SA[b] = t; } } - private final static int getIDX(int a) - { + private final static int getIDX(int a) { return (0 <= (a)) ? (a) : (~(a)); } - private final static int min(int a, int b) - { + private final static int min(int a, int b) { return a < b ? a : b; } @@ -712,140 +609,110 @@ private final static int min(int a, int b) * D&C based merge. */ private final void ssSwapMerge(int PA, int first, int middle, int last, int buf, - int bufsize, int depth) - { + int bufsize, int depth) { // Pa, first, middle, last and buf - pointers in SA array final int STACK_SIZE = SS_SMERGE_STACKSIZE; - StackElement [] stack = new StackElement [STACK_SIZE]; + StackElement[] stack = new StackElement[STACK_SIZE]; int l, r, lm, rm;// pointers in SA int m, len, half; int ssize; int check, next; - for (check = 0, ssize = 0;;) - { + for (check = 0, ssize = 0; ; ) { - if ((last - middle) <= bufsize) - { - if ((first < middle) && (middle < last)) - { + if ((last - middle) <= bufsize) { + if ((first < middle) && (middle < last)) { ssMergeBackward(PA, first, middle, last, buf, depth); } if (((check & 1) != 0) || (((check & 2) != 0) && (ssCompare(PA + getIDX(SA[first - 1]), PA - + SA[first], depth) == 0))) - { + + SA[first], depth) == 0))) { SA[first] = ~SA[first]; } if (((check & 4) != 0) - && ((ssCompare(PA + getIDX(SA[last - 1]), PA + SA[last], depth) == 0))) - { + && ((ssCompare(PA + getIDX(SA[last - 1]), PA + SA[last], depth) == 0))) { SA[last] = ~SA[last]; } - if (ssize > 0) - { + if (ssize > 0) { StackElement se = stack[--ssize]; first = se.a; middle = se.b; last = se.c; check = se.d; - } - else - { + } else { return; } continue; } - if ((middle - first) <= bufsize) - { - if (first < middle) - { + if ((middle - first) <= bufsize) { + if (first < middle) { ssMergeForward(PA, first, middle, last, buf, depth); } if (((check & 1) != 0) || (((check & 2) != 0) && (ssCompare(PA + getIDX(SA[first - 1]), PA - + SA[first], depth) == 0))) - { + + SA[first], depth) == 0))) { SA[first] = ~SA[first]; } if (((check & 4) != 0) - && ((ssCompare(PA + getIDX(SA[last - 1]), PA + SA[last], depth) == 0))) - { + && ((ssCompare(PA + getIDX(SA[last - 1]), PA + SA[last], depth) == 0))) { SA[last] = ~SA[last]; } - if (ssize > 0) - { + if (ssize > 0) { StackElement se = stack[--ssize]; first = se.a; middle = se.b; last = se.c; check = se.d; - } - else - { + } else { return; } continue; } - for (m = 0, len = min(middle - first, last - middle), half = len >> 1; 0 < len; len = half, half >>= 1) - { + for (m = 0, len = min(middle - first, last - middle), half = len >> 1; 0 < len; len = half, half >>= 1) { if (ssCompare(PA + getIDX(SA[middle + m + half]), PA - + getIDX(SA[middle - m - half - 1]), depth) < 0) - { + + getIDX(SA[middle - m - half - 1]), depth) < 0) { m += half + 1; half -= (len & 1) ^ 1; } } - if (0 < m) - { + if (0 < m) { lm = middle - m; rm = middle + m; ssBlockSwap(lm, middle, m); l = r = middle; next = 0; - if (rm < last) - { - if (SA[rm] < 0) - { + if (rm < last) { + if (SA[rm] < 0) { SA[rm] = ~SA[rm]; - if (first < lm) - { - for (; SA[--l] < 0;) - { + if (first < lm) { + for (; SA[--l] < 0; ) { } next |= 4; } next |= 1; - } - else if (first < lm) - { - for (; SA[r] < 0; ++r) - { + } else if (first < lm) { + for (; SA[r] < 0; ++r) { } next |= 2; } } - if ((l - first) <= (last - r)) - { + if ((l - first) <= (last - r)) { stack[ssize++] = new StackElement(r, rm, last, (next & 3) | (check & 4)); middle = lm; last = l; check = (check & 3) | (next & 4); - } - else - { - if (((next & 2) != 0) && (r == middle)) - { + } else { + if (((next & 2) != 0) && (r == middle)) { next ^= 6; } stack[ssize++] = new StackElement(first, lm, l, (check & 3) @@ -855,36 +722,28 @@ else if (first < lm) middle = rm; check = (next & 3) | (check & 4); } - } - else - { - if (ssCompare(PA + getIDX(SA[middle - 1]), PA + SA[middle], depth) == 0) - { + } else { + if (ssCompare(PA + getIDX(SA[middle - 1]), PA + SA[middle], depth) == 0) { SA[middle] = ~SA[middle]; } if (((check & 1) != 0) || (((check & 2) != 0) && (ssCompare(PA + getIDX(SA[first - 1]), PA - + SA[first], depth) == 0))) - { + + SA[first], depth) == 0))) { SA[first] = ~SA[first]; } if (((check & 4) != 0) - && ((ssCompare(PA + getIDX(SA[last - 1]), PA + SA[last], depth) == 0))) - { + && ((ssCompare(PA + getIDX(SA[last - 1]), PA + SA[last], depth) == 0))) { SA[last] = ~SA[last]; } - if (ssize > 0) - { + if (ssize > 0) { StackElement se = stack[--ssize]; first = se.a; middle = se.b; last = se.c; check = se.d; - } - else - { + } else { return; } @@ -898,8 +757,7 @@ else if (first < lm) * Merge-forward with internal buffer. */ private final void ssMergeForward(int PA, int first, int middle, int last, int buf, - int depth) - { + int depth) { // PA, first, middle, last, buf are pointers to SA int a, b, c, bufend;// pointers to SA int t, r; @@ -907,33 +765,24 @@ private final void ssMergeForward(int PA, int first, int middle, int last, int b bufend = buf + (middle - first) - 1; ssBlockSwap(buf, first, middle - first); - for (t = SA[a = first], b = buf, c = middle;;) - { + for (t = SA[a = first], b = buf, c = middle; ; ) { r = ssCompare(PA + SA[b], PA + SA[c], depth); - if (r < 0) - { - do - { + if (r < 0) { + do { SA[a++] = SA[b]; - if (bufend <= b) - { + if (bufend <= b) { SA[bufend] = t; return; } SA[b++] = SA[a]; } while (SA[b] < 0); - } - else if (r > 0) - { - do - { + } else if (r > 0) { + do { SA[a++] = SA[c]; SA[c++] = SA[a]; - if (last <= c) - { - while (b < bufend) - { + if (last <= c) { + while (b < bufend) { SA[a++] = SA[b]; SA[b++] = SA[a]; } @@ -943,15 +792,11 @@ else if (r > 0) } } while (SA[c] < 0); - } - else - { + } else { SA[c] = ~SA[c]; - do - { + do { SA[a++] = SA[b]; - if (bufend <= b) - { + if (bufend <= b) { SA[bufend] = t; return; } @@ -959,14 +804,11 @@ else if (r > 0) } while (SA[b] < 0); - do - { + do { SA[a++] = SA[c]; SA[c++] = SA[a]; - if (last <= c) - { - while (b < bufend) - { + if (last <= c) { + while (b < bufend) { SA[a++] = SA[b]; SA[b++] = SA[a]; } @@ -985,8 +827,7 @@ else if (r > 0) * Merge-backward with internal buffer. */ private final void ssMergeBackward(int PA, int first, int middle, int last, int buf, - int depth) - { + int depth) { // PA, first, middle, last, buf are pointers in SA int p1, p2;// pointers in SA int a, b, c, bufend;// pointers in SA @@ -996,33 +837,23 @@ private final void ssMergeBackward(int PA, int first, int middle, int last, int ssBlockSwap(buf, middle, last - middle); x = 0; - if (SA[bufend] < 0) - { + if (SA[bufend] < 0) { p1 = PA + ~SA[bufend]; x |= 1; - } - else - { + } else { p1 = PA + SA[bufend]; } - if (SA[middle - 1] < 0) - { + if (SA[middle - 1] < 0) { p2 = PA + ~SA[middle - 1]; x |= 2; - } - else - { + } else { p2 = PA + SA[middle - 1]; } - for (t = SA[a = last - 1], b = bufend, c = middle - 1;;) - { + for (t = SA[a = last - 1], b = bufend, c = middle - 1; ; ) { r = ssCompare(p1, p2, depth); - if (0 < r) - { - if ((x & 1) != 0) - { - do - { + if (0 < r) { + if ((x & 1) != 0) { + do { SA[a--] = SA[b]; SA[b--] = SA[a]; } @@ -1030,28 +861,20 @@ private final void ssMergeBackward(int PA, int first, int middle, int last, int x ^= 1; } SA[a--] = SA[b]; - if (b <= buf) - { + if (b <= buf) { SA[buf] = t; break; } SA[b--] = SA[a]; - if (SA[b] < 0) - { + if (SA[b] < 0) { p1 = PA + ~SA[b]; x |= 1; - } - else - { + } else { p1 = PA + SA[b]; } - } - else if (r < 0) - { - if ((x & 2) != 0) - { - do - { + } else if (r < 0) { + if ((x & 2) != 0) { + do { SA[a--] = SA[c]; SA[c--] = SA[a]; } @@ -1060,10 +883,8 @@ else if (r < 0) } SA[a--] = SA[c]; SA[c--] = SA[a]; - if (c < first) - { - while (buf < b) - { + if (c < first) { + while (buf < b) { SA[a--] = SA[b]; SA[b--] = SA[a]; } @@ -1071,22 +892,15 @@ else if (r < 0) SA[b] = t; break; } - if (SA[c] < 0) - { + if (SA[c] < 0) { p2 = PA + ~SA[c]; x |= 2; - } - else - { + } else { p2 = PA + SA[c]; } - } - else - { - if ((x & 1) != 0) - { - do - { + } else { + if ((x & 1) != 0) { + do { SA[a--] = SA[b]; SA[b--] = SA[a]; } @@ -1094,16 +908,13 @@ else if (r < 0) x ^= 1; } SA[a--] = ~SA[b]; - if (b <= buf) - { + if (b <= buf) { SA[buf] = t; break; } SA[b--] = SA[a]; - if ((x & 2) != 0) - { - do - { + if ((x & 2) != 0) { + do { SA[a--] = SA[c]; SA[c--] = SA[a]; } @@ -1112,10 +923,8 @@ else if (r < 0) } SA[a--] = SA[c]; SA[c--] = SA[a]; - if (c < first) - { - while (buf < b) - { + if (c < first) { + while (buf < b) { SA[a--] = SA[b]; SA[b--] = SA[a]; } @@ -1123,22 +932,16 @@ else if (r < 0) SA[b] = t; break; } - if (SA[b] < 0) - { + if (SA[b] < 0) { p1 = PA + ~SA[b]; x |= 1; - } - else - { + } else { p1 = PA + SA[b]; } - if (SA[c] < 0) - { + if (SA[c] < 0) { p2 = PA + ~SA[c]; x |= 2; - } - else - { + } else { p2 = PA + SA[c]; } } @@ -1148,28 +951,22 @@ else if (r < 0) /** * Insertionsort for small size groups */ - private final void ssInsertionSort(int PA, int first, int last, int depth) - { + private final void ssInsertionSort(int PA, int first, int last, int depth) { // PA, first, last are pointers in SA int i, j;// pointers in SA int t, r; - for (i = last - 2; first <= i; --i) - { - for (t = SA[i], j = i + 1; 0 < (r = ssCompare(PA + t, PA + SA[j], depth));) - { - do - { + for (i = last - 2; first <= i; --i) { + for (t = SA[i], j = i + 1; 0 < (r = ssCompare(PA + t, PA + SA[j], depth)); ) { + do { SA[j - 1] = SA[j]; } while ((++j < last) && (SA[j] < 0)); - if (last <= j) - { + if (last <= j) { break; } } - if (r == 0) - { + if (r == 0) { SA[j] = ~SA[j]; } SA[j - 1] = t; @@ -1178,36 +975,28 @@ private final void ssInsertionSort(int PA, int first, int last, int depth) } /** - * + * */ - private final static int ssIsqrt(int x) - { + private final static int ssIsqrt(int x) { int y, e; - if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) - { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } e = ((x & 0xffff0000) != 0) ? (((x & 0xff000000) != 0) ? 24 + lg_table[(x >> 24) & 0xff] : 16 + lg_table[(x >> 16) & 0xff]) : (((x & 0x0000ff00) != 0) ? 8 + lg_table[(x >> 8) & 0xff] - : 0 + lg_table[(x >> 0) & 0xff]); + : 0 + lg_table[(x >> 0) & 0xff]); - if (e >= 16) - { + if (e >= 16) { y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); - if (e >= 24) - { + if (e >= 24) { y = (y + 1 + x / y) >> 1; } y = (y + 1 + x / y) >> 1; - } - else if (e >= 8) - { + } else if (e >= 8) { y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; - } - else - { + } else { return sqq_table[x] >> 4; } @@ -1215,35 +1004,28 @@ else if (e >= 8) } /* Multikey introsort for medium size groups. */ - private final void ssMintroSort(int PA, int first, int last, int depth) - { + private final void ssMintroSort(int PA, int first, int last, int depth) { final int STACK_SIZE = SS_MISORT_STACKSIZE; - StackElement [] stack = new StackElement [STACK_SIZE]; + StackElement[] stack = new StackElement[STACK_SIZE]; int Td;// T ptr int a, b, c, d, e, f;// SA ptr int s, t; int ssize; int limit; int v, x = 0; - for (ssize = 0, limit = ssIlg(last - first);;) - { + for (ssize = 0, limit = ssIlg(last - first); ; ) { - if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) - { - if (1 < (last - first)) - { + if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) { + if (1 < (last - first)) { ssInsertionSort(PA, first, last, depth); } - if (ssize > 0) - { + if (ssize > 0) { StackElement se = stack[--ssize]; first = se.a; last = se.b; depth = se.c; limit = se.d; - } - else - { + } else { return; } @@ -1251,19 +1033,14 @@ private final void ssMintroSort(int PA, int first, int last, int depth) } Td = depth; - if (limit-- == 0) - { + if (limit-- == 0) { ssHeapSort(Td, PA, first, last - first); } - if (limit < 0) - { - for (a = first + 1, v = T[start + Td + SA[PA + SA[first]]]; a < last; ++a) - { - if ((x = T[start + Td + SA[PA + SA[a]]]) != v) - { - if (1 < (a - first)) - { + if (limit < 0) { + for (a = first + 1, v = T[start + Td + SA[PA + SA[first]]]; a < last; ++a) { + if ((x = T[start + Td + SA[PA + SA[a]]]) != v) { + if (1 < (a - first)) { break; } v = x; @@ -1271,36 +1048,26 @@ private final void ssMintroSort(int PA, int first, int last, int depth) } } - if (T[start + Td + SA[PA + SA[first]] - 1] < v) - { + if (T[start + Td + SA[PA + SA[first]] - 1] < v) { first = ssPartition(PA, first, a, depth); } - if ((a - first) <= (last - a)) - { - if (1 < (a - first)) - { + if ((a - first) <= (last - a)) { + if (1 < (a - first)) { stack[ssize++] = new StackElement(a, last, depth, -1); last = a; depth += 1; limit = ssIlg(a - first); - } - else - { + } else { first = a; limit = -1; } - } - else - { - if (1 < (last - a)) - { + } else { + if (1 < (last - a)) { stack[ssize++] = new StackElement(first, a, depth + 1, ssIlg(a - first)); first = a; limit = -1; - } - else - { + } else { last = a; depth += 1; limit = ssIlg(a - first); @@ -1315,75 +1082,57 @@ private final void ssMintroSort(int PA, int first, int last, int depth) swapInSA(first, a); // partition - for (b = first; (++b < last) && ((x = T[start + Td + SA[PA + SA[b]]]) == v);) - { - } - if (((a = b) < last) && (x < v)) - { - for (; (++b < last) && ((x = T[start + Td + SA[PA + SA[b]]]) <= v);) - { - if (x == v) - { + for (b = first; (++b < last) && ((x = T[start + Td + SA[PA + SA[b]]]) == v); ) { + } + if (((a = b) < last) && (x < v)) { + for (; (++b < last) && ((x = T[start + Td + SA[PA + SA[b]]]) <= v); ) { + if (x == v) { swapInSA(b, a); ++a; } } } - for (c = last; (b < --c) && ((x = T[start + Td + SA[PA + SA[c]]]) == v);) - { + for (c = last; (b < --c) && ((x = T[start + Td + SA[PA + SA[c]]]) == v); ) { } - if ((b < (d = c)) && (x > v)) - { - for (; (b < --c) && ((x = T[start + Td + SA[PA + SA[c]]]) >= v);) - { - if (x == v) - { + if ((b < (d = c)) && (x > v)) { + for (; (b < --c) && ((x = T[start + Td + SA[PA + SA[c]]]) >= v); ) { + if (x == v) { swapInSA(c, d); --d; } } } - for (; b < c;) - { + for (; b < c; ) { swapInSA(b, c); - for (; (++b < c) && ((x = T[start + Td + SA[PA + SA[b]]]) <= v);) - { - if (x == v) - { + for (; (++b < c) && ((x = T[start + Td + SA[PA + SA[b]]]) <= v); ) { + if (x == v) { swapInSA(b, a); ++a; } } - for (; (b < --c) && ((x = T[start + Td + SA[PA + SA[c]]]) >= v);) - { - if (x == v) - { + for (; (b < --c) && ((x = T[start + Td + SA[PA + SA[c]]]) >= v); ) { + if (x == v) { swapInSA(c, d); --d; } } } - if (a <= d) - { + if (a <= d) { c = b - 1; - if ((s = a - first) > (t = b - a)) - { + if ((s = a - first) > (t = b - a)) { s = t; } - for (e = first, f = b - s; 0 < s; --s, ++e, ++f) - { + for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { swapInSA(e, f); } - if ((s = d - c) > (t = last - d - 1)) - { + if ((s = d - c) > (t = last - d - 1)) { s = t; } - for (e = b, f = last - s; 0 < s; --s, ++e, ++f) - { + for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { swapInSA(e, f); } @@ -1392,22 +1141,16 @@ private final void ssMintroSort(int PA, int first, int last, int depth) b = (v <= T[start + Td + SA[PA + SA[a]] - 1]) ? a : ssPartition(PA, a, c, depth); - if ((a - first) <= (last - c)) - { - if ((last - c) <= (c - b)) - { + if ((a - first) <= (last - c)) { + if ((last - c) <= (c - b)) { stack[ssize++] = new StackElement(b, c, depth + 1, ssIlg(c - b)); stack[ssize++] = new StackElement(c, last, depth, limit); last = a; - } - else if ((a - first) <= (c - b)) - { + } else if ((a - first) <= (c - b)) { stack[ssize++] = new StackElement(c, last, depth, limit); stack[ssize++] = new StackElement(b, c, depth + 1, ssIlg(c - b)); last = a; - } - else - { + } else { stack[ssize++] = new StackElement(c, last, depth, limit); stack[ssize++] = new StackElement(first, a, depth, limit); first = b; @@ -1415,23 +1158,16 @@ else if ((a - first) <= (c - b)) depth += 1; limit = ssIlg(c - b); } - } - else - { - if ((a - first) <= (c - b)) - { + } else { + if ((a - first) <= (c - b)) { stack[ssize++] = new StackElement(b, c, depth + 1, ssIlg(c - b)); stack[ssize++] = new StackElement(first, a, depth, limit); first = c; - } - else if ((last - c) <= (c - b)) - { + } else if ((last - c) <= (c - b)) { stack[ssize++] = new StackElement(first, a, depth, limit); stack[ssize++] = new StackElement(b, c, depth + 1, ssIlg(c - b)); first = c; - } - else - { + } else { stack[ssize++] = new StackElement(first, a, depth, limit); stack[ssize++] = new StackElement(c, last, depth, limit); first = b; @@ -1441,12 +1177,9 @@ else if ((last - c) <= (c - b)) } } - } - else - { + } else { limit += 1; - if (T[start + Td + SA[PA + SA[first]] - 1] < v) - { + if (T[start + Td + SA[PA + SA[first]] - 1] < v) { first = ssPartition(PA, first, last, depth); limit = ssIlg(last - first); } @@ -1460,20 +1193,15 @@ else if ((last - c) <= (c - b)) /** * Returns the pivot element. */ - private final int ssPivot(int Td, int PA, int first, int last) - { + private final int ssPivot(int Td, int PA, int first, int last) { int middle;// SA pointer int t = last - first; middle = first + t / 2; - if (t <= 512) - { - if (t <= 32) - { + if (t <= 512) { + if (t <= 32) { return ssMedian3(Td, PA, first, middle, last - 1); - } - else - { + } else { t >>= 2; return ssMedian5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); } @@ -1488,24 +1216,20 @@ private final int ssPivot(int Td, int PA, int first, int last) /** * Returns the median of five elements */ - private final int ssMedian5(int Td, int PA, int v1, int v2, int v3, int v4, int v5) - { + private final int ssMedian5(int Td, int PA, int v1, int v2, int v3, int v4, int v5) { int t; - if (T[start + Td + SA[PA + SA[v2]]] > T[start + Td + SA[PA + SA[v3]]]) - { + if (T[start + Td + SA[PA + SA[v2]]] > T[start + Td + SA[PA + SA[v3]]]) { t = v2; v2 = v3; v3 = t; } - if (T[start + Td + SA[PA + SA[v4]]] > T[start + Td + SA[PA + SA[v5]]]) - { + if (T[start + Td + SA[PA + SA[v4]]] > T[start + Td + SA[PA + SA[v5]]]) { t = v4; v4 = v5; v5 = t; } - if (T[start + Td + SA[PA + SA[v2]]] > T[start + Td + SA[PA + SA[v4]]]) - { + if (T[start + Td + SA[PA + SA[v2]]] > T[start + Td + SA[PA + SA[v4]]]) { t = v2; v2 = v4; v4 = t; @@ -1513,14 +1237,12 @@ private final int ssMedian5(int Td, int PA, int v1, int v2, int v3, int v4, int v3 = v5; v5 = t; } - if (T[start + Td + SA[PA + SA[v1]]] > T[start + Td + SA[PA + SA[v3]]]) - { + if (T[start + Td + SA[PA + SA[v1]]] > T[start + Td + SA[PA + SA[v3]]]) { t = v1; v1 = v3; v3 = t; } - if (T[start + Td + SA[PA + SA[v1]]] > T[start + Td + SA[PA + SA[v4]]]) - { + if (T[start + Td + SA[PA + SA[v1]]] > T[start + Td + SA[PA + SA[v4]]]) { t = v1; v1 = v4; v4 = t; @@ -1528,8 +1250,7 @@ private final int ssMedian5(int Td, int PA, int v1, int v2, int v3, int v4, int v3 = v5; v5 = t; } - if (T[start + Td + SA[PA + SA[v3]]] > T[start + Td + SA[PA + SA[v4]]]) - { + if (T[start + Td + SA[PA + SA[v3]]] > T[start + Td + SA[PA + SA[v4]]]) { return v4; } return v3; @@ -1538,22 +1259,16 @@ private final int ssMedian5(int Td, int PA, int v1, int v2, int v3, int v4, int /** * Returns the median of three elements. */ - private final int ssMedian3(int Td, int PA, int v1, int v2, int v3) - { - if (T[start + Td + SA[PA + SA[v1]]] > T[start + Td + SA[PA + SA[v2]]]) - { + private final int ssMedian3(int Td, int PA, int v1, int v2, int v3) { + if (T[start + Td + SA[PA + SA[v1]]] > T[start + Td + SA[PA + SA[v2]]]) { int t = v1; v1 = v2; v2 = t; } - if (T[start + Td + SA[PA + SA[v2]]] > T[start + Td + SA[PA + SA[v3]]]) - { - if (T[start + Td + SA[PA + SA[v1]]] > T[start + Td + SA[PA + SA[v3]]]) - { + if (T[start + Td + SA[PA + SA[v2]]] > T[start + Td + SA[PA + SA[v3]]]) { + if (T[start + Td + SA[PA + SA[v1]]] > T[start + Td + SA[PA + SA[v3]]]) { return v1; - } - else - { + } else { return v3; } } @@ -1563,29 +1278,23 @@ private final int ssMedian3(int Td, int PA, int v1, int v2, int v3) /** * Binary partition for substrings. */ - private final int ssPartition(int PA, int first, int last, int depth) - { + private final int ssPartition(int PA, int first, int last, int depth) { int a, b;// SA pointer int t; - for (a = first - 1, b = last;;) - { - for (; (++a < b) && ((SA[PA + SA[a]] + depth) >= (SA[PA + SA[a] + 1] + 1));) - { + for (a = first - 1, b = last; ; ) { + for (; (++a < b) && ((SA[PA + SA[a]] + depth) >= (SA[PA + SA[a] + 1] + 1)); ) { SA[a] = ~SA[a]; } - for (; (a < --b) && ((SA[PA + SA[b]] + depth) < (SA[PA + SA[b] + 1] + 1));) - { + for (; (a < --b) && ((SA[PA + SA[b]] + depth) < (SA[PA + SA[b] + 1] + 1)); ) { } - if (b <= a) - { + if (b <= a) { break; } t = ~SA[b]; SA[b] = SA[a]; SA[a] = t; } - if (first < a) - { + if (first < a) { SA[first] = ~SA[first]; } return a; @@ -1594,32 +1303,26 @@ private final int ssPartition(int PA, int first, int last, int depth) /** * Simple top-down heapsort. */ - private final void ssHeapSort(int Td, int PA, int sa, int size) - { + private final void ssHeapSort(int Td, int PA, int sa, int size) { int i, m, t; m = size; - if ((size % 2) == 0) - { + if ((size % 2) == 0) { m--; if (T[start + Td + SA[PA + SA[sa + (m / 2)]]] < T[start + Td - + SA[PA + SA[sa + m]]]) - { + + SA[PA + SA[sa + m]]]) { swapInSA(sa + m, sa + (m / 2)); } } - for (i = m / 2 - 1; 0 <= i; --i) - { + for (i = m / 2 - 1; 0 <= i; --i) { ssFixDown(Td, PA, sa, i, m); } - if ((size % 2) == 0) - { + if ((size % 2) == 0) { swapInSA(sa, sa + m); ssFixDown(Td, PA, sa, 0, m); } - for (i = m - 1; 0 < i; --i) - { + for (i = m - 1; 0 < i; --i) { t = SA[sa]; SA[sa] = SA[sa + i]; ssFixDown(Td, PA, sa, 0, i); @@ -1629,25 +1332,21 @@ private final void ssHeapSort(int Td, int PA, int sa, int size) } /** - * + * */ - private final void ssFixDown(int Td, int PA, int sa, int i, int size) - { + private final void ssFixDown(int Td, int PA, int sa, int i, int size) { int j, k; int v; int c, d, e; for (v = SA[sa + i], c = T[start + Td + SA[PA + v]]; (j = 2 * i + 1) < size; SA[sa - + i] = SA[sa + k], i = k) - { + + i] = SA[sa + k], i = k) { d = T[start + Td + SA[PA + SA[sa + (k = j++)]]]; - if (d < (e = T[start + Td + SA[PA + SA[sa + j]]])) - { + if (d < (e = T[start + Td + SA[PA + SA[sa + j]]])) { k = j; d = e; } - if (d <= c) - { + if (d <= c) { break; } } @@ -1658,18 +1357,16 @@ private final void ssFixDown(int Td, int PA, int sa, int i, int size) /** * */ - private final static int ssIlg(int n) - { + private final static int ssIlg(int n) { return ((n & 0xff00) != 0) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]; } /** - * + * */ - private final void swapInSA(int a, int b) - { + private final void swapInSA(int a, int b) { int tmp = SA[a]; SA[a] = SA[b]; SA[b] = tmp; @@ -1678,59 +1375,44 @@ private final void swapInSA(int a, int b) /** * Tandem repeat sort */ - private final void trSort(int ISA, int n, int depth) - { + private final void trSort(int ISA, int n, int depth) { TRBudget budget = new TRBudget(trIlg(n) * 2 / 3, n); int ISAd; int first, last;// SA pointers int t, skip, unsorted; - for (ISAd = ISA + depth; -n < SA[0]; ISAd += ISAd - ISA) - { + for (ISAd = ISA + depth; -n < SA[0]; ISAd += ISAd - ISA) { first = 0; skip = 0; unsorted = 0; - do - { - if ((t = SA[first]) < 0) - { + do { + if ((t = SA[first]) < 0) { first -= t; skip += t; - } - else - { - if (skip != 0) - { + } else { + if (skip != 0) { SA[first + skip] = skip; skip = 0; } last = SA[ISA + t] + 1; - if (1 < (last - first)) - { + if (1 < (last - first)) { budget.count = 0; trIntroSort(ISA, ISAd, first, last, budget); - if (budget.count != 0) - { + if (budget.count != 0) { unsorted += budget.count; - } - else - { + } else { skip = first - last; } - } - else if ((last - first) == 1) - { + } else if ((last - first) == 1) { skip = -1; } first = last; } } while (first < n); - if (skip != 0) - { + if (skip != 0) { SA[first + skip] = skip; } - if (unsorted == 0) - { + if (unsorted == 0) { break; } } @@ -1740,77 +1422,58 @@ else if ((last - first) == 1) * */ private final TRPartitionResult trPartition(int ISAd, int first, int middle, - int last, int pa, int pb, int v) - { + int last, int pa, int pb, int v) { int a, b, c, d, e, f;// ptr int t, s, x = 0; - for (b = middle - 1; (++b < last) && ((x = SA[ISAd + SA[b]]) == v);) - { + for (b = middle - 1; (++b < last) && ((x = SA[ISAd + SA[b]]) == v); ) { } - if (((a = b) < last) && (x < v)) - { - for (; (++b < last) && ((x = SA[ISAd + SA[b]]) <= v);) - { - if (x == v) - { + if (((a = b) < last) && (x < v)) { + for (; (++b < last) && ((x = SA[ISAd + SA[b]]) <= v); ) { + if (x == v) { swapInSA(a, b); ++a; } } } - for (c = last; (b < --c) && ((x = SA[ISAd + SA[c]]) == v);) - { + for (c = last; (b < --c) && ((x = SA[ISAd + SA[c]]) == v); ) { } - if ((b < (d = c)) && (x > v)) - { - for (; (b < --c) && ((x = SA[ISAd + SA[c]]) >= v);) - { - if (x == v) - { + if ((b < (d = c)) && (x > v)) { + for (; (b < --c) && ((x = SA[ISAd + SA[c]]) >= v); ) { + if (x == v) { swapInSA(c, d); --d; } } } - for (; b < c;) - { + for (; b < c; ) { swapInSA(c, b); - for (; (++b < c) && ((x = SA[ISAd + SA[b]]) <= v);) - { - if (x == v) - { + for (; (++b < c) && ((x = SA[ISAd + SA[b]]) <= v); ) { + if (x == v) { swapInSA(a, b); ++a; } } - for (; (b < --c) && ((x = SA[ISAd + SA[c]]) >= v);) - { - if (x == v) - { + for (; (b < --c) && ((x = SA[ISAd + SA[c]]) >= v); ) { + if (x == v) { swapInSA(c, d); --d; } } } - if (a <= d) - { + if (a <= d) { c = b - 1; - if ((s = a - first) > (t = b - a)) - { + if ((s = a - first) > (t = b - a)) { s = t; } - for (e = first, f = b - s; 0 < s; --s, ++e, ++f) - { + for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { swapInSA(e, f); } - if ((s = d - c) > (t = last - d - 1)) - { + if ((s = d - c) > (t = last - d - 1)) { s = t; } - for (e = b, f = last - s; 0 < s; --s, ++e, ++f) - { + for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { swapInSA(e, f); } first += (b - a); @@ -1819,247 +1482,181 @@ private final TRPartitionResult trPartition(int ISAd, int first, int middle, return new TRPartitionResult(first, last); } - private final void trIntroSort(int ISA, int ISAd, int first, int last, TRBudget budget) - { + private final void trIntroSort(int ISA, int ISAd, int first, int last, TRBudget budget) { final int STACK_SIZE = TR_STACKSIZE; - StackElement [] stack = new StackElement [STACK_SIZE]; + StackElement[] stack = new StackElement[STACK_SIZE]; int a = 0, b = 0, c;// pointers int v, x = 0; int incr = ISAd - ISA; int limit, next; int ssize, trlink = -1; - for (ssize = 0, limit = trIlg(last - first);;) - { - if (limit < 0) - { - if (limit == -1) - { + for (ssize = 0, limit = trIlg(last - first); ; ) { + if (limit < 0) { + if (limit == -1) { /* tandem repeat partition */ TRPartitionResult res = trPartition(ISAd - incr, first, first, last, a, b, last - 1); a = res.a; b = res.b; /* update ranks */ - if (a < last) - { - for (c = first, v = a - 1; c < a; ++c) - { + if (a < last) { + for (c = first, v = a - 1; c < a; ++c) { SA[ISA + SA[c]] = v; } } - if (b < last) - { - for (c = a, v = b - 1; c < b; ++c) - { + if (b < last) { + for (c = a, v = b - 1; c < b; ++c) { SA[ISA + SA[c]] = v; } } /* push */ - if (1 < (b - a)) - { + if (1 < (b - a)) { stack[ssize++] = new StackElement(0, a, b, 0, 0); stack[ssize++] = new StackElement(ISAd - incr, first, last, -2, trlink); trlink = ssize - 2; } - if ((a - first) <= (last - b)) - { - if (1 < (a - first)) - { + if ((a - first) <= (last - b)) { + if (1 < (a - first)) { stack[ssize++] = new StackElement(ISAd, b, last, trIlg(last - b), trlink); last = a; limit = trIlg(a - first); - } - else if (1 < (last - b)) - { + } else if (1 < (last - b)) { first = b; limit = trIlg(last - b); - } - else - { - if (ssize > 0) - { + } else { + if (ssize > 0) { StackElement se = stack[--ssize]; ISAd = se.a; first = se.b; last = se.c; limit = se.d; trlink = se.e; - } - else - { + } else { return; } } - } - else - { - if (1 < (last - b)) - { + } else { + if (1 < (last - b)) { stack[ssize++] = new StackElement(ISAd, first, a, trIlg(a - first), trlink); first = b; limit = trIlg(last - b); - } - else if (1 < (a - first)) - { + } else if (1 < (a - first)) { last = a; limit = trIlg(a - first); - } - else - { - if (ssize > 0) - { + } else { + if (ssize > 0) { StackElement se = stack[--ssize]; ISAd = se.a; first = se.b; last = se.c; limit = se.d; trlink = se.e; - } - else - { + } else { return; } } } - } - else if (limit == -2) - { + } else if (limit == -2) { /* tandem repeat copy */ StackElement se = stack[--ssize]; a = se.b; b = se.c; - if (stack[ssize].d == 0) - { + if (stack[ssize].d == 0) { trCopy(ISA, first, a, b, last, ISAd - ISA); - } - else - { - if (0 <= trlink) - { + } else { + if (0 <= trlink) { stack[trlink].d = -1; } trPartialCopy(ISA, first, a, b, last, ISAd - ISA); } - if (ssize > 0) - { + if (ssize > 0) { se = stack[--ssize]; ISAd = se.a; first = se.b; last = se.c; limit = se.d; trlink = se.e; - } - else - { + } else { return; } - } - else - { + } else { /* sorted partition */ - if (0 <= SA[first]) - { + if (0 <= SA[first]) { a = first; - do - { + do { SA[ISA + SA[a]] = a; } while ((++a < last) && (0 <= SA[a])); first = a; } - if (first < last) - { + if (first < last) { a = first; - do - { + do { SA[a] = ~SA[a]; } while (SA[++a] < 0); next = (SA[ISA + SA[a]] != SA[ISAd + SA[a]]) ? trIlg(a - first + 1) : -1; - if (++a < last) - { - for (b = first, v = a - 1; b < a; ++b) - { + if (++a < last) { + for (b = first, v = a - 1; b < a; ++b) { SA[ISA + SA[b]] = v; } } /* push */ - if (budget.check(a - first) != 0) - { - if ((a - first) <= (last - a)) - { + if (budget.check(a - first) != 0) { + if ((a - first) <= (last - a)) { stack[ssize++] = new StackElement(ISAd, a, last, -3, trlink); ISAd += incr; last = a; limit = next; - } - else - { - if (1 < (last - a)) - { + } else { + if (1 < (last - a)) { stack[ssize++] = new StackElement(ISAd + incr, first, a, next, trlink); first = a; limit = -3; - } - else - { + } else { ISAd += incr; last = a; limit = next; } } - } - else - { - if (0 <= trlink) - { + } else { + if (0 <= trlink) { stack[trlink].d = -1; } - if (1 < (last - a)) - { + if (1 < (last - a)) { first = a; limit = -3; - } - else - { - if (ssize > 0) - { + } else { + if (ssize > 0) { StackElement se = stack[--ssize]; ISAd = se.a; first = se.b; last = se.c; limit = se.d; trlink = se.e; - } - else - { + } else { return; } } } - } - else - { - if (ssize > 0) - { + } else { + if (ssize > 0) { StackElement se = stack[--ssize]; ISAd = se.a; first = se.b; last = se.c; limit = se.d; trlink = se.e; - } - else - { + } else { return; } } @@ -2067,21 +1664,17 @@ else if (limit == -2) continue; } - if ((last - first) <= TR_INSERTIONSORT_THRESHOLD) - { + if ((last - first) <= TR_INSERTIONSORT_THRESHOLD) { trInsertionSort(ISAd, first, last); limit = -3; continue; } - if (limit-- == 0) - { + if (limit-- == 0) { trHeapSort(ISAd, first, last - first); - for (a = last - 1; first < a; a = b) - { + for (a = last - 1; first < a; a = b) { for (x = SA[ISAd + SA[a]], b = a - 1; (first <= b) - && (SA[ISAd + SA[b]] == x); --b) - { + && (SA[ISAd + SA[b]] == x); --b) { SA[b] = ~SA[b]; } } @@ -2098,64 +1691,47 @@ else if (limit == -2) a = res.a; b = res.b; - if ((last - first) != (b - a)) - { + if ((last - first) != (b - a)) { next = (SA[ISA + SA[a]] != v) ? trIlg(b - a) : -1; /* update ranks */ - for (c = first, v = a - 1; c < a; ++c) - { + for (c = first, v = a - 1; c < a; ++c) { SA[ISA + SA[c]] = v; } - if (b < last) - { - for (c = a, v = b - 1; c < b; ++c) - { + if (b < last) { + for (c = a, v = b - 1; c < b; ++c) { SA[ISA + SA[c]] = v; } } /* push */ - if ((1 < (b - a)) && ((budget.check(b - a) != 0))) - { - if ((a - first) <= (last - b)) - { - if ((last - b) <= (b - a)) - { - if (1 < (a - first)) - { + if ((1 < (b - a)) && ((budget.check(b - a) != 0))) { + if ((a - first) <= (last - b)) { + if ((last - b) <= (b - a)) { + if (1 < (a - first)) { stack[ssize++] = new StackElement(ISAd + incr, a, b, next, trlink); stack[ssize++] = new StackElement(ISAd, b, last, limit, trlink); last = a; - } - else if (1 < (last - b)) - { + } else if (1 < (last - b)) { stack[ssize++] = new StackElement(ISAd + incr, a, b, next, trlink); first = b; - } - else - { + } else { ISAd += incr; first = a; last = b; limit = next; } - } - else if ((a - first) <= (b - a)) - { - if (1 < (a - first)) - { + } else if ((a - first) <= (b - a)) { + if (1 < (a - first)) { stack[ssize++] = new StackElement(ISAd, b, last, limit, trlink); stack[ssize++] = new StackElement(ISAd + incr, a, b, next, trlink); last = a; - } - else - { + } else { stack[ssize++] = new StackElement(ISAd, b, last, limit, trlink); ISAd += incr; @@ -2163,9 +1739,7 @@ else if ((a - first) <= (b - a)) last = b; limit = next; } - } - else - { + } else { stack[ssize++] = new StackElement(ISAd, b, last, limit, trlink); stack[ssize++] = new StackElement(ISAd, first, a, limit, @@ -2175,45 +1749,32 @@ else if ((a - first) <= (b - a)) last = b; limit = next; } - } - else - { - if ((a - first) <= (b - a)) - { - if (1 < (last - b)) - { + } else { + if ((a - first) <= (b - a)) { + if (1 < (last - b)) { stack[ssize++] = new StackElement(ISAd + incr, a, b, next, trlink); stack[ssize++] = new StackElement(ISAd, first, a, limit, trlink); first = b; - } - else if (1 < (a - first)) - { + } else if (1 < (a - first)) { stack[ssize++] = new StackElement(ISAd + incr, a, b, next, trlink); last = a; - } - else - { + } else { ISAd += incr; first = a; last = b; limit = next; } - } - else if ((last - b) <= (b - a)) - { - if (1 < (last - b)) - { + } else if ((last - b) <= (b - a)) { + if (1 < (last - b)) { stack[ssize++] = new StackElement(ISAd, first, a, limit, trlink); stack[ssize++] = new StackElement(ISAd + incr, a, b, next, trlink); first = b; - } - else - { + } else { stack[ssize++] = new StackElement(ISAd, first, a, limit, trlink); ISAd += incr; @@ -2221,9 +1782,7 @@ else if ((last - b) <= (b - a)) last = b; limit = next; } - } - else - { + } else { stack[ssize++] = new StackElement(ISAd, first, a, limit, trlink); stack[ssize++] = new StackElement(ISAd, b, last, limit, @@ -2234,97 +1793,66 @@ else if ((last - b) <= (b - a)) limit = next; } } - } - else - { - if ((1 < (b - a)) && (0 <= trlink)) - { + } else { + if ((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } - if ((a - first) <= (last - b)) - { - if (1 < (a - first)) - { + if ((a - first) <= (last - b)) { + if (1 < (a - first)) { stack[ssize++] = new StackElement(ISAd, b, last, limit, trlink); last = a; - } - else if (1 < (last - b)) - { + } else if (1 < (last - b)) { first = b; - } - else - { - if (ssize > 0) - { + } else { + if (ssize > 0) { StackElement se = stack[--ssize]; ISAd = se.a; first = se.b; last = se.c; limit = se.d; trlink = se.e; - } - else - { + } else { return; } } - } - else - { - if (1 < (last - b)) - { + } else { + if (1 < (last - b)) { stack[ssize++] = new StackElement(ISAd, first, a, limit, trlink); first = b; - } - else if (1 < (a - first)) - { + } else if (1 < (a - first)) { last = a; - } - else - { - if (ssize > 0) - { + } else { + if (ssize > 0) { StackElement se = stack[--ssize]; ISAd = se.a; first = se.b; last = se.c; limit = se.d; trlink = se.e; - } - else - { + } else { return; } } } } - } - else - { - if (budget.check(last - first) != 0) - { + } else { + if (budget.check(last - first) != 0) { limit = trIlg(last - first); ISAd += incr; - } - else - { - if (0 <= trlink) - { + } else { + if (0 <= trlink) { stack[trlink].d = -1; } - if (ssize > 0) - { + if (ssize > 0) { StackElement se = stack[--ssize]; ISAd = se.a; first = se.b; last = se.c; limit = se.d; trlink = se.e; - } - else - { + } else { return; } } @@ -2337,22 +1865,17 @@ else if (1 < (a - first)) /** * Returns the pivot element. */ - private final int trPivot(int ISAd, int first, int last) - { + private final int trPivot(int ISAd, int first, int last) { int middle; int t; t = last - first; middle = first + t / 2; - if (t <= 512) - { - if (t <= 32) - { + if (t <= 512) { + if (t <= 32) { return trMedian3(ISAd, first, middle, last - 1); - } - else - { + } else { t >>= 2; return trMedian5(ISAd, first, first + t, middle, last - 1 - t, last - 1); } @@ -2367,23 +1890,19 @@ private final int trPivot(int ISAd, int first, int last) /** * Returns the median of five elements. */ - private final int trMedian5(int ISAd, int v1, int v2, int v3, int v4, int v5) - { + private final int trMedian5(int ISAd, int v1, int v2, int v3, int v4, int v5) { int t; - if (SA[ISAd + SA[v2]] > SA[ISAd + SA[v3]]) - { + if (SA[ISAd + SA[v2]] > SA[ISAd + SA[v3]]) { t = v2; v2 = v3; v3 = t; } - if (SA[ISAd + SA[v4]] > SA[ISAd + SA[v5]]) - { + if (SA[ISAd + SA[v4]] > SA[ISAd + SA[v5]]) { t = v4; v4 = v5; v5 = t; } - if (SA[ISAd + SA[v2]] > SA[ISAd + SA[v4]]) - { + if (SA[ISAd + SA[v2]] > SA[ISAd + SA[v4]]) { t = v2; v2 = v4; v4 = t; @@ -2391,14 +1910,12 @@ private final int trMedian5(int ISAd, int v1, int v2, int v3, int v4, int v5) v3 = v5; v5 = t; } - if (SA[ISAd + SA[v1]] > SA[ISAd + SA[v3]]) - { + if (SA[ISAd + SA[v1]] > SA[ISAd + SA[v3]]) { t = v1; v1 = v3; v3 = t; } - if (SA[ISAd + SA[v1]] > SA[ISAd + SA[v4]]) - { + if (SA[ISAd + SA[v1]] > SA[ISAd + SA[v4]]) { t = v1; v1 = v4; v4 = t; @@ -2406,8 +1923,7 @@ private final int trMedian5(int ISAd, int v1, int v2, int v3, int v4, int v5) v3 = v5; v5 = t; } - if (SA[ISAd + SA[v3]] > SA[ISAd + SA[v4]]) - { + if (SA[ISAd + SA[v3]] > SA[ISAd + SA[v4]]) { return v4; } return v3; @@ -2416,22 +1932,16 @@ private final int trMedian5(int ISAd, int v1, int v2, int v3, int v4, int v5) /** * Returns the median of three elements. */ - private final int trMedian3(int ISAd, int v1, int v2, int v3) - { - if (SA[ISAd + SA[v1]] > SA[ISAd + SA[v2]]) - { + private final int trMedian3(int ISAd, int v1, int v2, int v3) { + if (SA[ISAd + SA[v1]] > SA[ISAd + SA[v2]]) { int t = v1; v1 = v2; v2 = t; } - if (SA[ISAd + SA[v2]] > SA[ISAd + SA[v3]]) - { - if (SA[ISAd + SA[v1]] > SA[ISAd + SA[v3]]) - { + if (SA[ISAd + SA[v2]] > SA[ISAd + SA[v3]]) { + if (SA[ISAd + SA[v1]] > SA[ISAd + SA[v3]]) { return v1; - } - else - { + } else { return v3; } } @@ -2439,33 +1949,27 @@ private final int trMedian3(int ISAd, int v1, int v2, int v3) } /** - * + * */ - private final void trHeapSort(int ISAd, int sa, int size) - { + private final void trHeapSort(int ISAd, int sa, int size) { int i, m, t; m = size; - if ((size % 2) == 0) - { + if ((size % 2) == 0) { m--; - if (SA[ISAd + SA[sa + m / 2]] < SA[ISAd + SA[sa + m]]) - { + if (SA[ISAd + SA[sa + m / 2]] < SA[ISAd + SA[sa + m]]) { swapInSA(sa + m, sa + m / 2); } } - for (i = m / 2 - 1; 0 <= i; --i) - { + for (i = m / 2 - 1; 0 <= i; --i) { trFixDown(ISAd, sa, i, m); } - if ((size % 2) == 0) - { + if ((size % 2) == 0) { swapInSA(sa, sa + m); trFixDown(ISAd, sa, 0, m); } - for (i = m - 1; 0 < i; --i) - { + for (i = m - 1; 0 < i; --i) { t = SA[sa]; SA[sa] = SA[sa + i]; trFixDown(ISAd, sa, 0, i); @@ -2475,25 +1979,21 @@ private final void trHeapSort(int ISAd, int sa, int size) } /** - * + * */ - private final void trFixDown(int ISAd, int sa, int i, int size) - { + private final void trFixDown(int ISAd, int sa, int i, int size) { int j, k; int v; int c, d, e; for (v = SA[sa + i], c = SA[ISAd + v]; (j = 2 * i + 1) < size; SA[sa + i] = SA[sa - + k], i = k) - { + + k], i = k) { d = SA[ISAd + SA[sa + (k = j++)]]; - if (d < (e = SA[ISAd + SA[sa + j]])) - { + if (d < (e = SA[ISAd + SA[sa + j]])) { k = j; d = e; } - if (d <= c) - { + if (d <= c) { break; } } @@ -2503,27 +2003,21 @@ private final void trFixDown(int ISAd, int sa, int i, int size) /** */ - private final void trInsertionSort(int ISAd, int first, int last) - { + private final void trInsertionSort(int ISAd, int first, int last) { int a, b;// SA ptr int t, r; - for (a = first + 1; a < last; ++a) - { - for (t = SA[a], b = a - 1; 0 > (r = SA[ISAd + t] - SA[ISAd + SA[b]]);) - { - do - { + for (a = first + 1; a < last; ++a) { + for (t = SA[a], b = a - 1; 0 > (r = SA[ISAd + t] - SA[ISAd + SA[b]]); ) { + do { SA[b + 1] = SA[b]; } while ((first <= --b) && (SA[b] < 0)); - if (b < first) - { + if (b < first) { break; } } - if (r == 0) - { + if (r == 0) { SA[b] = ~SA[b]; } SA[b + 1] = t; @@ -2533,22 +2027,18 @@ private final void trInsertionSort(int ISAd, int first, int last) /** */ - private final void trPartialCopy(int ISA, int first, int a, int b, int last, int depth) - { + private final void trPartialCopy(int ISA, int first, int a, int b, int last, int depth) { int c, d, e;// ptr int s, v; int rank, lastrank, newrank = -1; v = b - 1; lastrank = -1; - for (c = first, d = a - 1; c <= d; ++c) - { - if ((0 <= (s = SA[c] - depth)) && (SA[ISA + s] == v)) - { + for (c = first, d = a - 1; c <= d; ++c) { + if ((0 <= (s = SA[c] - depth)) && (SA[ISA + s] == v)) { SA[++d] = s; rank = SA[ISA + s + depth]; - if (lastrank != rank) - { + if (lastrank != rank) { lastrank = rank; newrank = d; } @@ -2557,29 +2047,23 @@ private final void trPartialCopy(int ISA, int first, int a, int b, int last, int } lastrank = -1; - for (e = d; first <= e; --e) - { + for (e = d; first <= e; --e) { rank = SA[ISA + SA[e]]; - if (lastrank != rank) - { + if (lastrank != rank) { lastrank = rank; newrank = e; } - if (newrank != rank) - { + if (newrank != rank) { SA[ISA + SA[e]] = newrank; } } lastrank = -1; - for (c = last - 1, e = d + 1, d = b; e < d; --c) - { - if ((0 <= (s = SA[c] - depth)) && (SA[ISA + s] == v)) - { + for (c = last - 1, e = d + 1, d = b; e < d; --c) { + if ((0 <= (s = SA[c] - depth)) && (SA[ISA + s] == v)) { SA[--d] = s; rank = SA[ISA + s + depth]; - if (lastrank != rank) - { + if (lastrank != rank) { lastrank = rank; newrank = d; } @@ -2593,26 +2077,21 @@ private final void trPartialCopy(int ISA, int first, int a, int b, int last, int * sort suffixes of middle partition by using sorted order of suffixes of left and * right partition. */ - private final void trCopy(int ISA, int first, int a, int b, int last, int depth) - { + private final void trCopy(int ISA, int first, int a, int b, int last, int depth) { int c, d, e;// ptr int s, v; v = b - 1; - for (c = first, d = a - 1; c <= d; ++c) - { + for (c = first, d = a - 1; c <= d; ++c) { s = SA[c] - depth; - if ((0 <= s) && (SA[ISA + s] == v)) - { + if ((0 <= s) && (SA[ISA + s] == v)) { SA[++d] = s; SA[ISA + s] = d; } } - for (c = last - 1, e = d + 1, d = b; e < d; --c) - { + for (c = last - 1, e = d + 1, d = b; e < d; --c) { s = SA[c] - depth; - if ((0 <= s) && (SA[ISA + s] == v)) - { + if ((0 <= s) && (SA[ISA + s] == v)) { SA[--d] = s; SA[ISA + s] = d; } @@ -2622,12 +2101,11 @@ private final void trCopy(int ISA, int first, int a, int b, int last, int depth) /** * */ - private final static int trIlg(int n) - { + private final static int trIlg(int n) { return ((n & 0xffff0000) != 0) ? (((n & 0xff000000) != 0) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) : (((n & 0x0000ff00) != 0) ? 8 + lg_table[(n >> 8) & 0xff] - : 0 + lg_table[(n >> 0) & 0xff]); + : 0 + lg_table[(n >> 0) & 0xff]); } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ExtraTrailingCellsDecorator.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ExtraTrailingCellsDecorator.java index 51804d48a..cd190322e 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ExtraTrailingCellsDecorator.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ExtraTrailingCellsDecorator.java @@ -10,35 +10,31 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public final class ExtraTrailingCellsDecorator implements ISuffixArrayBuilder -{ +public final class ExtraTrailingCellsDecorator implements ISuffixArrayBuilder { private final ISuffixArrayBuilder delegate; private final int extraCells; /** * @see SuffixArrays#MAX_EXTRA_TRAILING_SPACE */ - public ExtraTrailingCellsDecorator(ISuffixArrayBuilder delegate, int extraCells) - { + public ExtraTrailingCellsDecorator(ISuffixArrayBuilder delegate, int extraCells) { this.delegate = delegate; this.extraCells = extraCells; } /* - * + * */ @Override - public int [] buildSuffixArray(int [] input, final int start, final int length) - { - if (start == 0 && start + length + extraCells < input.length) - { + public int[] buildSuffixArray(int[] input, final int start, final int length) { + if (start == 0 && start + length + extraCells < input.length) { return delegate.buildSuffixArray(input, start, length); } - final int [] shifted = new int [input.length + extraCells]; + final int[] shifted = new int[input.length + extraCells]; System.arraycopy(input, start, shifted, 0, length); - final int [] SA = delegate.buildSuffixArray(shifted, 0, length); + final int[] SA = delegate.buildSuffixArray(shifted, 0, length); return SA; } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/GenericArrayAdapter.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/GenericArrayAdapter.java index e709ed8a3..9dcd17ce1 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/GenericArrayAdapter.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/GenericArrayAdapter.java @@ -11,60 +11,60 @@ */ class GenericArrayAdapter { - private final ISuffixArrayBuilder delegate; - int[] input; - TreeMap tokIDs; - private final Comparator comparator; + private final ISuffixArrayBuilder delegate; + int[] input; + TreeMap tokIDs; + private final Comparator comparator; - public GenericArrayAdapter(ISuffixArrayBuilder builder) { - // TODO make sure T is comparable - this.delegate = builder; - this.comparator = null; - } + public GenericArrayAdapter(ISuffixArrayBuilder builder) { + // TODO make sure T is comparable + this.delegate = builder; + this.comparator = null; + } - public GenericArrayAdapter(ISuffixArrayBuilder builder, Comparator comparator) { - // TODO make sure that comparator != null or T is comparable - this.delegate = builder; - this.comparator = comparator; - } + public GenericArrayAdapter(ISuffixArrayBuilder builder, Comparator comparator) { + // TODO make sure that comparator != null or T is comparable + this.delegate = builder; + this.comparator = comparator; + } - /** - * Construct a suffix array for a given generic token array. - */ - public int[] buildSuffixArray(T[] tokens) { - final int length = tokens.length; + /** + * Construct a suffix array for a given generic token array. + */ + public int[] buildSuffixArray(T[] tokens) { + final int length = tokens.length; /* * Allocate slightly more space, some suffix construction strategies need it and * we don't want to waste space for multiple symbol mappings. */ - this.input = new int[length + SuffixArrays.MAX_EXTRA_TRAILING_SPACE]; + this.input = new int[length + SuffixArrays.MAX_EXTRA_TRAILING_SPACE]; - //System.out.println("Renaming tokens ..."); + //System.out.println("Renaming tokens ..."); /* * Here we create a mapping for the token to an integer id which we * can use in the suffax array construction algorithm. */ - this.tokIDs = new TreeMap(comparator); + this.tokIDs = new TreeMap(comparator); - // put and order all tokens in tokIDs - for (int i = 0; i < length; i++) { - tokIDs.put(tokens[i], null); // null is temporary placeholder value - } + // put and order all tokens in tokIDs + for (int i = 0; i < length; i++) { + tokIDs.put(tokens[i], null); // null is temporary placeholder value + } - // assign each token an ascending id - int _id = 1; - for (Entry entry : tokIDs.entrySet()) { - entry.setValue(_id++); - } + // assign each token an ascending id + int _id = 1; + for (Entry entry : tokIDs.entrySet()) { + entry.setValue(_id++); + } - // fill input array with ids - for (int i = 0; i < length; i++) { - input[i] = tokIDs.get(tokens[i]); - } + // fill input array with ids + for (int i = 0; i < length; i++) { + input[i] = tokIDs.get(tokens[i]); + } - //System.out.println("Renaming tokens done."); + //System.out.println("Renaming tokens done."); - return delegate.buildSuffixArray(input, 0, length); - } + return delegate.buildSuffixArray(input, 0, length); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ISuffixArrayBuilder.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ISuffixArrayBuilder.java index 2d2898240..53117080e 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ISuffixArrayBuilder.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ISuffixArrayBuilder.java @@ -2,14 +2,12 @@ /** * An algorithm that can produce a suffix array for a sequence of integer symbols. - * - * @see #buildSuffixArray(int[], int, int) * * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) + * @see #buildSuffixArray(int[], int, int) */ -public interface ISuffixArrayBuilder -{ +public interface ISuffixArrayBuilder { /** * Computes suffix array for sequence of symbols (integers). The processed sequence is * a subsequence of input determined by start and @@ -20,19 +18,19 @@ public interface ISuffixArrayBuilder * after start + length to store special marker symbols. Also, some * algorithms may require non-negative symbols in the input. For such constrained * algorithms, use various decorators and adapters available in this package. - * - * @param input A sequence of input symbols, int-coded. - * @param start The starting index (inclusive) in input. + * + * @param input A sequence of input symbols, int-coded. + * @param start The starting index (inclusive) in input. * @param length Number of symbols to process. * @return An array of indices such that the suffix of input at index - * result[i] is lexicographically larger or equal to any other - * suffix that precede it. Note that the output array may be larger than - * input.length, in which case only the first - * input.length elements are of relevance. - *

        - * The returned array contains suffix indexes starting from 0 (so - * start needs to be added manually to access a given suffix in - * input). + * result[i] is lexicographically larger or equal to any other + * suffix that precede it. Note that the output array may be larger than + * input.length, in which case only the first + * input.length elements are of relevance. + *

        + * The returned array contains suffix indexes starting from 0 (so + * start needs to be added manually to access a given suffix in + * input). */ - int [] buildSuffixArray(int [] input, int start, int length); + int[] buildSuffixArray(int[] input, int start, int length); } \ No newline at end of file diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ISymbolMapper.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ISymbolMapper.java index a3205468e..3a167548b 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ISymbolMapper.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/ISymbolMapper.java @@ -6,8 +6,8 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -interface ISymbolMapper -{ - void map(int [] input, int start, int length); - void undo(int [] input, int start, int length); +interface ISymbolMapper { + void map(int[] input, int start, int length); + + void undo(int[] input, int start, int length); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/MinMax.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/MinMax.java index bc40b434d..8168aaf0e 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/MinMax.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/MinMax.java @@ -2,25 +2,21 @@ /** * Holder for minimum and maximum. - * - * @see Tools#minmax(int[],int,int) * * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) + * @see Tools#minmax(int[], int, int) */ -final class MinMax -{ +final class MinMax { public final int min; public final int max; - - MinMax(int min, int max) - { + + MinMax(int min, int max) { this.min = min; this.max = max; } - public int range() - { + public int range() { return max - min; } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/QSufSort.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/QSufSort.java index 7f53ad8e3..09be102f6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/QSufSort.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/QSufSort.java @@ -3,10 +3,10 @@ /** *

        * Straightforward reimplementation of the qsufsort algorithm given in: - * + *

        *

          * <code>
        - * Larsson, N. Jesper and Sadakane, Kunihiko. Faster Suffix Sorting. 
        + * Larsson, N. Jesper and Sadakane, Kunihiko. Faster Suffix Sorting.
          * Report number LU-CS-TR:99-214, LUNDFD6/(NFCS-3140)/1--20/(1999). Department of Computer Science, Lund University"
          * </code>
          * 
        @@ -22,18 +22,25 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public class QSufSort implements ISuffixArrayBuilder -{ - /** group array, ultimately suffix array. */ +public class QSufSort implements ISuffixArrayBuilder { + /** + * group array, ultimately suffix array. + */ private int I[]; - /** inverse array, ultimately inverse of I. */ + /** + * inverse array, ultimately inverse of I. + */ private int V[]; - /** number of symbols aggregated by transform. */ + /** + * number of symbols aggregated by transform. + */ private int r; - /** length of already-sorted prefixes. */ + /** + * length of already-sorted prefixes. + */ private int h; /** @@ -48,8 +55,7 @@ public class QSufSort implements ISuffixArrayBuilder * Default constructor, uses the input array of symbols to preserve memory (and * destroys it). */ - public QSufSort() - { + public QSufSort() { this.preserveInput = true; } @@ -57,8 +63,7 @@ public QSufSort() * If true, the algorithm will use a copy of the input so it is left * intact. */ - public QSufSort(boolean preserveInput) - { + public QSufSort(boolean preserveInput) { this.preserveInput = preserveInput; } @@ -73,26 +78,22 @@ public QSufSort(boolean preserveInput) *

        */ @Override - public final int [] buildSuffixArray(int [] input, int start, int length) - { + public final int[] buildSuffixArray(int[] input, int start, int length) { Tools.assertAlways(input.length >= start + length + 1, "no extra space after input end"); MinMax minmax = Tools.minmax(input, start, length); Tools.assertAlways(minmax.min >= 0, "input must not be negative"); - I = new int [length + 1]; + I = new int[length + 1]; this.start = start; - if (preserveInput) - { - V = new int [length + 1]; + if (preserveInput) { + V = new int[length + 1]; this.start = 0; System.arraycopy(input, start, V, 0, length); - } - else - { + } else { V = input; } suffixsort(length, minmax.max + 1, minmax.min); - final int [] tmp = I; + final int[] tmp = I; V = I = null; return tmp; } @@ -105,18 +106,14 @@ public QSufSort(boolean preserveInput) * Original contents of x[n] is disregarded, the n -th * symbol being regarded as end-of-string smaller than all other symbols. */ - private void suffixsort(int n, int k, int l) - { + private void suffixsort(int n, int k, int l) { int pi, pk; // I pointers int i, j, s, sl; - if (n >= k - l) - { /* if bucketing possible, */ + if (n >= k - l) { /* if bucketing possible, */ j = transform(n, k, l, n); bucketsort(n, j); /* bucketsort on first r positions. */ - } - else - { + } else { transform(n, k, l, Integer.MAX_VALUE); for (i = 0; i <= n; ++i) I[i] = i; /* initialize I with suffix numbers. */ @@ -124,21 +121,15 @@ private void suffixsort(int n, int k, int l) sort_split(0, n + 1); /* quicksort on first r positions. */ } h = r; /* number of symbols aggregated by transform. */ - while (I[0] >= -n) - { + while (I[0] >= -n) { pi = 0; /* pi is first position of group. */ sl = 0; /* sl is negated length of sorted groups. */ - do - { - if ((s = I[pi]) < 0) - { + do { + if ((s = I[pi]) < 0) { pi -= s; /* skip over sorted group. */ sl += s; /* add negated length to sl. */ - } - else - { - if (sl != 0) - { + } else { + if (sl != 0) { I[pi + sl] = sl; /* combine sorted groups before pi. */ sl = 0; } @@ -149,15 +140,13 @@ private void suffixsort(int n, int k, int l) } while (pi <= n); if (sl != 0) /* if the array ends with a sorted group. */ - I[pi + sl] = sl; /* combine sorted groups at end of I. */ + I[pi + sl] = sl; /* combine sorted groups at end of I. */ h = 2 * h; /* double sorted-depth. */ } - for (i = 0; i <= n; ++i) - { + for (i = 0; i <= n; ++i) { /* reconstruct suffix array from inverse. */ - if (V[start + i] > 0) - { + if (V[start + i] > 0) { I[V[start + i] - 1] = i; } } @@ -171,13 +160,11 @@ private void suffixsort(int n, int k, int l) * "Engineering a Sort Function", Software -- Practice and Experience 23(11), * 1249-1265 (November 1993). This function is based on Program 7. */ - private void sort_split(int p, int n) - { + private void sort_split(int p, int n) { int pa, pb, pc, pd, pl, pm, pn;// pointers int f, v, s, t; - if (n < 7) - { /* multi-selection sort smallest arrays. */ + if (n < 7) { /* multi-selection sort smallest arrays. */ select_sort_split(p, n); return; } @@ -185,21 +172,16 @@ private void sort_split(int p, int n) v = choose_pivot(p, n); pa = pb = p; pc = pd = p + n - 1; - while (true) - { /* split-end partition. */ - while (pb <= pc && (f = KEY(pb)) <= v) - { - if (f == v) - { + while (true) { /* split-end partition. */ + while (pb <= pc && (f = KEY(pb)) <= v) { + if (f == v) { SWAP(pa, pb); ++pa; } ++pb; } - while (pc >= pb && (f = KEY(pc)) >= v) - { - if (f == v) - { + while (pc >= pb && (f = KEY(pc)) >= v) { + if (f == v) { SWAP(pc, pd); --pd; } @@ -230,8 +212,7 @@ private void sort_split(int p, int n) * {@link #sort_split(int, int)}. Sets group numbers for a group whose lowest position * in {@link #I} is pl and highest position is pm. */ - private void update_group(int pl, int pm) - { + private void update_group(int pl, int pm) { int g; g = pm; /* group number. */ @@ -247,18 +228,15 @@ private void update_group(int pl, int pm) /** * Subroutine for {@link #sort_split(int, int)} , algorithm by Bentley & McIlroy. */ - private int choose_pivot(int p, int n) - { + private int choose_pivot(int p, int n) { int pl, pm, pn;// pointers int s; pm = p + (n >> 1); /* small arrays, middle element. */ - if (n > 7) - { + if (n > 7) { pl = p; pn = p + n - 1; - if (n > 40) - { /* big arrays, pseudomedian of 9. */ + if (n > 40) { /* big arrays, pseudomedian of 9. */ s = n >> 3; pl = MED3(pl, pl + s, pl + s + s); pm = MED3(pm - s, pm, pm + s); @@ -273,32 +251,26 @@ private int choose_pivot(int p, int n) * Quadratic sorting method to use for small subarrays. To be able to update group * numbers consistently, a variant of selection sorting is used. */ - private void select_sort_split(int p, int n) - { + private void select_sort_split(int p, int n) { int pa, pb, pi, pn; int f, v; pa = p; /* pa is start of group being picked out. */ pn = p + n - 1; /* pn is last position of subarray. */ - while (pa < pn) - { + while (pa < pn) { for (pi = pb = pa + 1, f = KEY(pa); pi <= pn; ++pi) - if ((v = KEY(pi)) < f) - { + if ((v = KEY(pi)) < f) { f = v; /* f is smallest key found. */ SWAP(pi, pa); /* place smallest element at beginning. */ pb = pa + 1; /* pb is position for elements equal to f. */ - } - else if (v == f) - { /* if equal to smallest key. */ + } else if (v == f) { /* if equal to smallest key. */ SWAP(pi, pb); /* place next to other smallest elements. */ ++pb; } update_group(pa, pb - 1); /* update group values for new group. */ pa = pb; /* continue sorting rest of the subarray. */ } - if (pa == pn) - { /* check if last part is single element. */ + if (pa == pn) { /* check if last part is single element. */ V[start + I[pa]] = pa; I[pa] = -1; /* sorted group. */ } @@ -313,34 +285,28 @@ else if (v == f) * n+1. I is array of size n+1 whose contents * are disregarded. */ - private void bucketsort(int n, int k) - { + private void bucketsort(int n, int k) { int pi;// pointer int i, c, d, g; for (pi = 0; pi < k; ++pi) I[pi] = -1; /* mark linked lists empty. */ - for (i = 0; i <= n; ++i) - { + for (i = 0; i <= n; ++i) { V[start + i] = I[c = V[start + i]]; /* insert in linked list. */ I[c] = i; } - for (pi = k - 1, i = n; pi >= 0; --pi) - { + for (pi = k - 1, i = n; pi >= 0; --pi) { d = V[start + (c = I[pi])]; /* c is position, d is next in list. */ V[start + c] = g = i; /* last position equals group number. */ - if (d >= 0) - { /* if more than one element in group. */ + if (d >= 0) { /* if more than one element in group. */ I[i--] = c; /* p is permutation for the sorted x. */ - do - { + do { d = V[start + (c = d)]; /* next in linked list. */ V[start + c] = g; /* group number in x. */ I[i--] = c; /* permutation in p. */ } while (d >= 0); - } - else I[i--] = -1; /* one element, sorted group. */ + } else I[i--] = -1; /* one element, sorted group. */ } } @@ -359,72 +325,61 @@ private void bucketsort(int n, int k) * k-l>n, compaction is never done; if q is * {@link Integer#MAX_VALUE} , the maximum number of symbols are aggregated into one. *

        - * + * * @return an integer j in the range 1...q representing the - * size of the new alphabet. If j<=n+1 , the alphabet is - * compacted. The global variable r is set to the number of old - * symbols grouped into one. Only V[n] is 0. + * size of the new alphabet. If j<=n+1 , the alphabet is + * compacted. The global variable r is set to the number of old + * symbols grouped into one. Only V[n] is 0. */ - private int transform(int n, int k, int l, int q) - { + private int transform(int n, int k, int l, int q) { int b, c, d, e, i, j, m, s; int pi, pj;// pointers for (s = 0, i = k - l; i != 0; i >>= 1) ++s; /* s is number of bits in old symbol. */ e = Integer.MAX_VALUE >> s; /* e is for overflow checking. */ - for (b = d = r = 0; r < n && d <= e && (c = d << s | (k - l)) <= q; ++r) - { + for (b = d = r = 0; r < n && d <= e && (c = d << s | (k - l)) <= q; ++r) { b = b << s | (V[start + r] - l + 1); /* b is start of x in chunk alphabet. */ d = c; /* d is max symbol in chunk alphabet. */ } m = (1 << (r - 1) * s) - 1; /* m masks off top old symbol from chunk. */ V[start + n] = l - 1; /* emulate zero terminator. */ - if (d <= n) - { /* if bucketing possible, compact alphabet. */ + if (d <= n) { /* if bucketing possible, compact alphabet. */ for (pi = 0; pi <= d; ++pi) I[pi] = 0; /* zero transformation table. */ - for (pi = r, c = b; pi <= n; ++pi) - { + for (pi = r, c = b; pi <= n; ++pi) { I[c] = 1; /* mark used chunk symbol. */ c = (c & m) << s | (V[start + pi] - l + 1); /* * shift in next old symbol in * chunk. */ } - for (i = 1; i < r; ++i) - { /* handle last r-1 positions. */ + for (i = 1; i < r; ++i) { /* handle last r-1 positions. */ I[c] = 1; /* mark used chunk symbol. */ c = (c & m) << s; /* shift in next old symbol in chunk. */ } for (pi = 0, j = 1; pi <= d; ++pi) if (I[pi] != 0) I[pi] = j++; /* j is new alphabet size. */ - for (pi = 0, pj = r, c = b; pj <= n; ++pi, ++pj) - { + for (pi = 0, pj = r, c = b; pj <= n; ++pi, ++pj) { V[start + pi] = I[c]; /* transform to new alphabet. */ c = (c & m) << s | (V[start + pj] - l + 1); /* * shift in next old symbol in * chunk. */ } - while (pi < n) - { /* handle last r-1 positions. */ + while (pi < n) { /* handle last r-1 positions. */ V[start + pi++] = I[c]; /* transform to new alphabet. */ c = (c & m) << s; /* shift right-end zero in chunk. */ } - } - else - { /* bucketing not possible, don't compact. */ - for (pi = 0, pj = r, c = b; pj <= n; ++pi, ++pj) - { + } else { /* bucketing not possible, don't compact. */ + for (pi = 0, pj = r, c = b; pj <= n; ++pi, ++pj) { V[start + pi] = c; /* transform to new alphabet. */ c = (c & m) << s | (V[start + pj] - l + 1); /* * shift in next old symbol in * chunk. */ } - while (pi < n) - { /* handle last r-1 positions. */ + while (pi < n) { /* handle last r-1 positions. */ V[start + pi++] = c; /* transform to new alphabet. */ c = (c & m) << s; /* shift right-end zero in chunk. */ } @@ -434,20 +389,17 @@ private int transform(int n, int k, int l, int q) return j; /* return new alphabet size. */ } - private int KEY(int p) - { + private int KEY(int p) { return V[start + I[p] + h]; } - private void SWAP(int a, int b) - { + private void SWAP(int a, int b) { int tmp = I[a]; I[a] = I[b]; I[b] = tmp; } - private int MED3(int a, int b, int c) - { + private int MED3(int a, int b, int c) { return (KEY(a) < KEY(b) ? (KEY(b) < KEY(c) ? (b) : KEY(a) < KEY(c) ? (c) : (a)) : (KEY(b) > KEY(c) ? (b) : KEY(a) > KEY(c) ? (c) : (a))); } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SAIS.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SAIS.java index 51aa126eb..ee05fd9f1 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SAIS.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SAIS.java @@ -39,354 +39,567 @@ *

        * Ge Nong, Sen Zhang and Wai Hong Chan, Two Efficient Algorithms for Linear Suffix Array * Construction, 2008. - * - * @see "http://yuta.256.googlepages.com/sais" * * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) + * @see "http://yuta.256.googlepages.com/sais" */ -public final class SAIS implements ISuffixArrayBuilder -{ - private static interface BaseArray - { - public int get(int i); - public void set(int i, int val); - public int update(int i, int val); - } - - private static final class ByteArray implements BaseArray - { - private byte[] m_A; - private int m_pos; - - ByteArray(byte[] A, int pos) { m_A = A; m_pos = pos; } - public int get(int i) { return m_A[m_pos + i] & 0xff; } - public void set(int i, int val) { m_A[m_pos + i] = (byte)(val & 0xff); } - public int update(int i, int val) { return m_A[m_pos + i] += val & 0xff; } - } - - private static final class CharArray implements BaseArray - { - private char[] m_A; - private int m_pos; - CharArray(char[] A, int pos) { m_A = A; m_pos = pos; } - public int get(int i) { return m_A[m_pos + i] & 0xffff; } - public void set(int i, int val) { m_A[m_pos + i] = (char)(val & 0xffff); } - public int update(int i, int val) { return m_A[m_pos + i] += val & 0xffff; } - } - - private static final class ShortArray implements BaseArray - { - private short[] m_A; - private int m_pos; - ShortArray(short[] A, int pos) { m_A = A; m_pos = pos; } - public int get(int i) { return m_A[m_pos + i] & 0xffff; } - public void set(int i, int val) { m_A[m_pos + i] = (short)(val & 0xffff); } - public int update(int i, int val) { return m_A[m_pos + i] += val & 0xffff; } - } - - private static final class IntArray implements BaseArray - { - private int[] m_A; - private int m_pos; - IntArray(int[] A, int pos) { m_A = A; m_pos = pos; } - public int get(int i) { return m_A[m_pos + i]; } - public void set(int i, int val) { m_A[m_pos + i] = val; } - public int update(int i, int val) { return m_A[m_pos + i] += val; } - } - - private static final class StringArray implements BaseArray - { - private String m_A; - private int m_pos; - StringArray(String A, int pos) { m_A = A; m_pos = pos; } - public int get(int i) { return (int)(m_A.charAt(m_pos + i) & 0xffff); } - public void set(int i, int val) { } - public int update(int i, int val) { return 0; } - } - - /* find the start or end of each bucket */ - private static void getCounts(BaseArray T, BaseArray C, int n, int k) { - for(int i = 0; i < k; ++i) { C.set(i, 0); } - for(int i = 0; i < n; ++i) { C.update(T.get(i), 1); } - } - - private static void getBuckets(BaseArray C, BaseArray B, int k, boolean end) { - int i, sum = 0; - if (end != false) { for(i = 0; i < k; ++i) { sum += C.get(i); B.set(i, sum); } } - else { for(i = 0; i < k; ++i) { sum += C.get(i); B.set(i, sum - C.get(i)); } } - } - - /* compute SA and BWT */ - private static void induceSA(BaseArray T, int[] SA, BaseArray C, BaseArray B, int n, int k) - { - int b, i, j; - int c0, c1; +public final class SAIS implements ISuffixArrayBuilder { + private static interface BaseArray { + public int get(int i); + + public void set(int i, int val); + + public int update(int i, int val); + } + + private static final class ByteArray implements BaseArray { + private byte[] m_A; + private int m_pos; + + ByteArray(byte[] A, int pos) { + m_A = A; + m_pos = pos; + } + + public int get(int i) { + return m_A[m_pos + i] & 0xff; + } + + public void set(int i, int val) { + m_A[m_pos + i] = (byte) (val & 0xff); + } + + public int update(int i, int val) { + return m_A[m_pos + i] += val & 0xff; + } + } + + private static final class CharArray implements BaseArray { + private char[] m_A; + private int m_pos; + + CharArray(char[] A, int pos) { + m_A = A; + m_pos = pos; + } + + public int get(int i) { + return m_A[m_pos + i] & 0xffff; + } + + public void set(int i, int val) { + m_A[m_pos + i] = (char) (val & 0xffff); + } + + public int update(int i, int val) { + return m_A[m_pos + i] += val & 0xffff; + } + } + + private static final class ShortArray implements BaseArray { + private short[] m_A; + private int m_pos; + + ShortArray(short[] A, int pos) { + m_A = A; + m_pos = pos; + } + + public int get(int i) { + return m_A[m_pos + i] & 0xffff; + } + + public void set(int i, int val) { + m_A[m_pos + i] = (short) (val & 0xffff); + } + + public int update(int i, int val) { + return m_A[m_pos + i] += val & 0xffff; + } + } + + private static final class IntArray implements BaseArray { + private int[] m_A; + private int m_pos; + + IntArray(int[] A, int pos) { + m_A = A; + m_pos = pos; + } + + public int get(int i) { + return m_A[m_pos + i]; + } + + public void set(int i, int val) { + m_A[m_pos + i] = val; + } + + public int update(int i, int val) { + return m_A[m_pos + i] += val; + } + } + + private static final class StringArray implements BaseArray { + private String m_A; + private int m_pos; + + StringArray(String A, int pos) { + m_A = A; + m_pos = pos; + } + + public int get(int i) { + return (int) (m_A.charAt(m_pos + i) & 0xffff); + } + + public void set(int i, int val) { + } + + public int update(int i, int val) { + return 0; + } + } + + /* find the start or end of each bucket */ + private static void getCounts(BaseArray T, BaseArray C, int n, int k) { + for (int i = 0; i < k; ++i) { + C.set(i, 0); + } + for (int i = 0; i < n; ++i) { + C.update(T.get(i), 1); + } + } + + private static void getBuckets(BaseArray C, BaseArray B, int k, boolean end) { + int i, sum = 0; + if (end != false) { + for (i = 0; i < k; ++i) { + sum += C.get(i); + B.set(i, sum); + } + } else { + for (i = 0; i < k; ++i) { + sum += C.get(i); + B.set(i, sum - C.get(i)); + } + } + } + + /* compute SA and BWT */ + private static void induceSA(BaseArray T, int[] SA, BaseArray C, BaseArray B, int n, int k) { + int b, i, j; + int c0, c1; /* compute SAl */ - if(C == B) { getCounts(T, C, n, k); } - getBuckets(C, B, k, false); /* find starts of buckets */ - j = n - 1; - b = B.get(c1 = T.get(j)); - SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; - for(i = 0; i < n; ++i) { - j = SA[i]; SA[i] = ~j; - if(0 < j) { - if((c0 = T.get(--j)) != c1) { B.set(c1, b); b = B.get(c1 = c0); } + if (C == B) { + getCounts(T, C, n, k); + } + getBuckets(C, B, k, false); /* find starts of buckets */ + j = n - 1; + b = B.get(c1 = T.get(j)); SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; - } - } + for (i = 0; i < n; ++i) { + j = SA[i]; + SA[i] = ~j; + if (0 < j) { + if ((c0 = T.get(--j)) != c1) { + B.set(c1, b); + b = B.get(c1 = c0); + } + SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; + } + } /* compute SAs */ - if(C == B) { getCounts(T, C, n, k); } - getBuckets(C, B, k, true); /* find ends of buckets */ - for(i = n - 1, b = B.get(c1 = 0); 0 <= i; --i) { - if(0 < (j = SA[i])) { - if((c0 = T.get(--j)) != c1) { B.set(c1, b); b = B.get(c1 = c0); } - SA[--b] = ((j == 0) || (T.get(j - 1) > c1)) ? ~j : j; - } else { - SA[i] = ~j; - } + if (C == B) { + getCounts(T, C, n, k); + } + getBuckets(C, B, k, true); /* find ends of buckets */ + for (i = n - 1, b = B.get(c1 = 0); 0 <= i; --i) { + if (0 < (j = SA[i])) { + if ((c0 = T.get(--j)) != c1) { + B.set(c1, b); + b = B.get(c1 = c0); + } + SA[--b] = ((j == 0) || (T.get(j - 1) > c1)) ? ~j : j; + } else { + SA[i] = ~j; + } + } } - } - - private static int computeBWT(BaseArray T, int[] SA, BaseArray C, BaseArray B, int n, int k) { - int b, i, j, pidx = -1; - int c0, c1; + + private static int computeBWT(BaseArray T, int[] SA, BaseArray C, BaseArray B, int n, int k) { + int b, i, j, pidx = -1; + int c0, c1; /* compute SAl */ - if(C == B) { getCounts(T, C, n, k); } - getBuckets(C, B, k, false); /* find starts of buckets */ - j = n - 1; - b = B.get(c1 = T.get(j)); - SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; - for(i = 0; i < n; ++i) { - if(0 < (j = SA[i])) { - SA[i] = ~(c0 = T.get(--j)); - if(c0 != c1) { B.set(c1, b); b = B.get(c1 = c0); } + if (C == B) { + getCounts(T, C, n, k); + } + getBuckets(C, B, k, false); /* find starts of buckets */ + j = n - 1; + b = B.get(c1 = T.get(j)); SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; - } else if(j != 0) { - SA[i] = ~j; - } - } + for (i = 0; i < n; ++i) { + if (0 < (j = SA[i])) { + SA[i] = ~(c0 = T.get(--j)); + if (c0 != c1) { + B.set(c1, b); + b = B.get(c1 = c0); + } + SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; + } else if (j != 0) { + SA[i] = ~j; + } + } /* compute SAs */ - if(C == B) { getCounts(T, C, n, k); } - getBuckets(C, B, k, true); /* find ends of buckets */ - for(i = n - 1, b = B.get(c1 = 0); 0 <= i; --i) { - if(0 < (j = SA[i])) { - SA[i] = (c0 = T.get(--j)); - if(c0 != c1) { B.set(c1, b); b = B.get(c1 = c0); } - SA[--b] = ((0 < j) && (T.get(j - 1) > c1)) ? ~((int)T.get(j - 1)) : j; - } else if(j != 0) { - SA[i] = ~j; - } else { - pidx = i; - } + if (C == B) { + getCounts(T, C, n, k); + } + getBuckets(C, B, k, true); /* find ends of buckets */ + for (i = n - 1, b = B.get(c1 = 0); 0 <= i; --i) { + if (0 < (j = SA[i])) { + SA[i] = (c0 = T.get(--j)); + if (c0 != c1) { + B.set(c1, b); + b = B.get(c1 = c0); + } + SA[--b] = ((0 < j) && (T.get(j - 1) > c1)) ? ~((int) T.get(j - 1)) : j; + } else if (j != 0) { + SA[i] = ~j; + } else { + pidx = i; + } + } + return pidx; } - return pidx; - } - /* find the suffix array SA of T[0..n-1] in {0..k-1}^n - use a working space (excluding T and SA) of at most 2n+O(1) for a constant alphabet */ - private static int SA_IS(BaseArray T, int[] SA, int fs, int n, int k, boolean isbwt) { - BaseArray C, B, RA; - int i, j, c, m, p, q, plen, qlen, name, pidx = 0; - int c0, c1; - boolean diff; + /* find the suffix array SA of T[0..n-1] in {0..k-1}^n + use a working space (excluding T and SA) of at most 2n+O(1) for a constant alphabet */ + private static int SA_IS(BaseArray T, int[] SA, int fs, int n, int k, boolean isbwt) { + BaseArray C, B, RA; + int i, j, c, m, p, q, plen, qlen, name, pidx = 0; + int c0, c1; + boolean diff; /* stage 1: reduce the problem by at least 1/2 sort all the S-substrings */ - if(k <= fs) { - C = new IntArray(SA, n); - B = (k <= (fs - k)) ? new IntArray(SA, n + k) : C; - } else { - B = C = new IntArray(new int[k], 0); - } - getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ - for(i = 0; i < n; ++i) { SA[i] = 0; } - for(i = n - 2, c = 0, c1 = T.get(n - 1); 0 <= i; --i, c1 = c0) { - if((c0 = T.get(i)) < (c1 + c)) { c = 1; } - else if(c != 0) { SA[B.update(c1, -1)] = i + 1; c = 0; } - } - induceSA(T, SA, C, B, n, k); - C = null; B = null; + if (k <= fs) { + C = new IntArray(SA, n); + B = (k <= (fs - k)) ? new IntArray(SA, n + k) : C; + } else { + B = C = new IntArray(new int[k], 0); + } + getCounts(T, C, n, k); + getBuckets(C, B, k, true); /* find ends of buckets */ + for (i = 0; i < n; ++i) { + SA[i] = 0; + } + for (i = n - 2, c = 0, c1 = T.get(n - 1); 0 <= i; --i, c1 = c0) { + if ((c0 = T.get(i)) < (c1 + c)) { + c = 1; + } else if (c != 0) { + SA[B.update(c1, -1)] = i + 1; + c = 0; + } + } + induceSA(T, SA, C, B, n, k); + C = null; + B = null; /* compact all the sorted substrings into the first m items of SA 2*m must be not larger than n (proveable) */ - for(i = 0, m = 0; i < n; ++i) { - p = SA[i]; - if((0 < p) && (T.get(p - 1) > (c0 = T.get(p)))) { - for(j = p + 1; (j < n) && (c0 == (c1 = T.get(j))); ++j) { } - if((j < n) && (c0 < c1)) { SA[m++] = p; } - } - } - j = m + (n >> 1); - for(i = m; i < j; ++i) { SA[i] = 0; } /* init the name array buffer */ + for (i = 0, m = 0; i < n; ++i) { + p = SA[i]; + if ((0 < p) && (T.get(p - 1) > (c0 = T.get(p)))) { + for (j = p + 1; (j < n) && (c0 == (c1 = T.get(j))); ++j) { + } + if ((j < n) && (c0 < c1)) { + SA[m++] = p; + } + } + } + j = m + (n >> 1); + for (i = m; i < j; ++i) { + SA[i] = 0; + } /* init the name array buffer */ /* store the length of all substrings */ - for(i = n - 2, j = n, c = 0, c1 = T.get(n - 1); 0 <= i; --i, c1 = c0) { - if((c0 = T.get(i)) < (c1 + c)) { c = 1; } - else if(c != 0) { SA[m + ((i + 1) >> 1)] = j - i - 1; j = i + 1; c = 0; } - } + for (i = n - 2, j = n, c = 0, c1 = T.get(n - 1); 0 <= i; --i, c1 = c0) { + if ((c0 = T.get(i)) < (c1 + c)) { + c = 1; + } else if (c != 0) { + SA[m + ((i + 1) >> 1)] = j - i - 1; + j = i + 1; + c = 0; + } + } /* find the lexicographic names of all substrings */ - for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) { - p = SA[i]; plen = SA[m + (p >> 1)]; diff = true; - if(plen == qlen) { - for(j = 0; (j < plen) && (T.get(p + j) == T.get(q + j)); ++j) { } - if(j == plen) { diff = false; } - } - if(diff != false) { ++name; q = p; qlen = plen; } - SA[m + (p >> 1)] = name; - } + for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i) { + p = SA[i]; + plen = SA[m + (p >> 1)]; + diff = true; + if (plen == qlen) { + for (j = 0; (j < plen) && (T.get(p + j) == T.get(q + j)); ++j) { + } + if (j == plen) { + diff = false; + } + } + if (diff != false) { + ++name; + q = p; + qlen = plen; + } + SA[m + (p >> 1)] = name; + } /* stage 2: solve the reduced problem recurse if names are not yet unique */ - if(name < m) { - RA = new IntArray(SA, n + fs - m); - for(i = m + (n >> 1) - 1, j = n + fs - 1; m <= i; --i) { - if(SA[i] != 0) { SA[j--] = SA[i] - 1; } - } - SA_IS(RA, SA, fs + n - m * 2, m, name, false); - RA = null; - for(i = n - 2, j = m * 2 - 1, c = 0, c1 = T.get(n - 1); 0 <= i; --i, c1 = c0) { - if((c0 = T.get(i)) < (c1 + c)) { c = 1; } - else if(c != 0) { SA[j--] = i + 1; c = 0; } /* get p1 */ - } - for(i = 0; i < m; ++i) { SA[i] = SA[SA[i] + m]; } /* get index */ - } + if (name < m) { + RA = new IntArray(SA, n + fs - m); + for (i = m + (n >> 1) - 1, j = n + fs - 1; m <= i; --i) { + if (SA[i] != 0) { + SA[j--] = SA[i] - 1; + } + } + SA_IS(RA, SA, fs + n - m * 2, m, name, false); + RA = null; + for (i = n - 2, j = m * 2 - 1, c = 0, c1 = T.get(n - 1); 0 <= i; --i, c1 = c0) { + if ((c0 = T.get(i)) < (c1 + c)) { + c = 1; + } else if (c != 0) { + SA[j--] = i + 1; + c = 0; + } /* get p1 */ + } + for (i = 0; i < m; ++i) { + SA[i] = SA[SA[i] + m]; + } /* get index */ + } /* stage 3: induce the result for the original problem */ - if(k <= fs) { - C = new IntArray(SA, n); - B = (k <= (fs - k)) ? new IntArray(SA, n + k) : C; - } else { - B = C = new IntArray(new int[k], 0); - } + if (k <= fs) { + C = new IntArray(SA, n); + B = (k <= (fs - k)) ? new IntArray(SA, n + k) : C; + } else { + B = C = new IntArray(new int[k], 0); + } /* put all left-most S characters into their buckets */ - getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ - for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */ - for(i = m - 1; 0 <= i; --i) { - j = SA[i]; SA[i] = 0; - SA[B.update(T.get(j), -1)] = j; + getCounts(T, C, n, k); + getBuckets(C, B, k, true); /* find ends of buckets */ + for (i = m; i < n; ++i) { + SA[i] = 0; + } /* init SA[m..n-1] */ + for (i = m - 1; 0 <= i; --i) { + j = SA[i]; + SA[i] = 0; + SA[B.update(T.get(j), -1)] = j; + } + if (isbwt == false) { + induceSA(T, SA, C, B, n, k); + } else { + pidx = computeBWT(T, SA, C, B, n, k); + } + C = null; + B = null; + return pidx; } - if(isbwt == false) { induceSA(T, SA, C, B, n, k); } - else { pidx = computeBWT(T, SA, C, B, n, k); } - C = null; B = null; - return pidx; - } - /** Suffixsorting **/ + /** + * Suffixsorting * + */ /* byte */ - public static - int - suffixsort(byte[] T, int[] SA, int n) { - if((T == null) || (SA == null) || (T.length < n) || (SA.length < n)) { return -1; } - if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } - return SA_IS(new ByteArray(T, 0), SA, 0, n, 256, false); - } - /* char */ - public static - int - suffixsort(char[] T, int[] SA, int n) { - if((T == null) || (SA == null) || (T.length < n) || (SA.length < n)) { return -1; } - if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } - return SA_IS(new CharArray(T, 0), SA, 0, n, 65536, false); - } - /* short */ - public static - int - suffixsort(short[] T, int[] SA, int n, int k) { - if((T == null) || (SA == null) || - (T.length < n) || (SA.length < n) || - (k <= 0) || (65536 < k)) { return -1; } - if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } - return SA_IS(new ShortArray(T, 0), SA, 0, n, k, false); - } - /* int */ - public static - int - suffixsort(int[] T, int[] SA, int n, int k) { - if((T == null) || (SA == null) || - (T.length < n) || (SA.length < n) || - (k <= 0)) { return -1; } - if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } - return SA_IS(new IntArray(T, 0), SA, 0, n, k, false); - } - /* String */ - public static - int - suffixsort(String T, int[] SA, int n) { - if((T == null) || (SA == null) || - (T.length() < n) || (SA.length < n)) { return -1; } - if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } - return SA_IS(new StringArray(T, 0), SA, 0, n, 65536, false); - } - - /** Burrows-Wheeler Transform **/ + public static int + suffixsort(byte[] T, int[] SA, int n) { + if ((T == null) || (SA == null) || (T.length < n) || (SA.length < n)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + SA[0] = 0; + } + return 0; + } + return SA_IS(new ByteArray(T, 0), SA, 0, n, 256, false); + } + + /* char */ + public static int + suffixsort(char[] T, int[] SA, int n) { + if ((T == null) || (SA == null) || (T.length < n) || (SA.length < n)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + SA[0] = 0; + } + return 0; + } + return SA_IS(new CharArray(T, 0), SA, 0, n, 65536, false); + } + + /* short */ + public static int + suffixsort(short[] T, int[] SA, int n, int k) { + if ((T == null) || (SA == null) || + (T.length < n) || (SA.length < n) || + (k <= 0) || (65536 < k)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + SA[0] = 0; + } + return 0; + } + return SA_IS(new ShortArray(T, 0), SA, 0, n, k, false); + } + + /* int */ + public static int + suffixsort(int[] T, int[] SA, int n, int k) { + if ((T == null) || (SA == null) || + (T.length < n) || (SA.length < n) || + (k <= 0)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + SA[0] = 0; + } + return 0; + } + return SA_IS(new IntArray(T, 0), SA, 0, n, k, false); + } + + /* String */ + public static int + suffixsort(String T, int[] SA, int n) { + if ((T == null) || (SA == null) || + (T.length() < n) || (SA.length < n)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + SA[0] = 0; + } + return 0; + } + return SA_IS(new StringArray(T, 0), SA, 0, n, 65536, false); + } + + /** + * Burrows-Wheeler Transform * + */ /* byte */ - public static - int - bwtransform(byte[] T, byte[] U, int[] A, int n) { - int i, pidx; - if((T == null) || (U == null) || (A == null) || - (T.length < n) || (U.length < n) || (A.length < n)) { return -1; } - if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } - pidx = SA_IS(new ByteArray(T, 0), A, 0, n, 256, true); - U[0] = T[n - 1]; - for(i = 0; i < pidx; ++i) { U[i + 1] = (byte)(A[i] & 0xff); } - for(i += 1; i < n; ++i) { U[i] = (byte)(A[i] & 0xff); } - return pidx + 1; - } - /* char */ - public static - int - bwtransform(char[] T, char[] U, int[] A, int n) { - int i, pidx; - if((T == null) || (U == null) || (A == null) || - (T.length < n) || (U.length < n) || (A.length < n)) { return -1; } - if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } - pidx = SA_IS(new CharArray(T, 0), A, 0, n, 65536, true); - U[0] = T[n - 1]; - for(i = 0; i < pidx; ++i) { U[i + 1] = (char)(A[i] & 0xffff); } - for(i += 1; i < n; ++i) { U[i] = (char)(A[i] & 0xffff); } - return pidx + 1; - } - /* short */ - public static - int - bwtransform(short[] T, short[] U, int[] A, int n, int k) { - int i, pidx; - if((T == null) || (U == null) || (A == null) || - (T.length < n) || (U.length < n) || (A.length < n) || - (0 <= k) || (65536 < k)) { return -1; } - if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } - pidx = SA_IS(new ShortArray(T, 0), A, 0, n, k, true); - U[0] = T[n - 1]; - for(i = 0; i < pidx; ++i) { U[i + 1] = (short)(A[i] & 0xffff); } - for(i += 1; i < n; ++i) { U[i] = (short)(A[i] & 0xffff); } - return pidx + 1; - } - /* int */ - public static - int - bwtransform(int[] T, int[] U, int[] A, int n, int k) { - int i, pidx; - if((T == null) || (U == null) || (A == null) || - (T.length < n) || (U.length < n) || (A.length < n) || - (0 <= k)) { return -1; } - if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } - pidx = SA_IS(new IntArray(T, 0), A, 0, n, k, true); - U[0] = T[n - 1]; - for(i = 0; i < pidx; ++i) { U[i + 1] = A[i]; } - for(i += 1; i < n; ++i) { U[i] = A[i]; } - return pidx + 1; - } - - @Override - public int [] buildSuffixArray(int [] input, int start, int length) - { - // TODO: [dw] add constraints here. - final int [] SA = new int [length]; - MinMax mm = Tools.minmax(input, start, length); - suffixsort(input, SA, length, mm.max + 1); - return SA; - } + public static int + bwtransform(byte[] T, byte[] U, int[] A, int n) { + int i, pidx; + if ((T == null) || (U == null) || (A == null) || + (T.length < n) || (U.length < n) || (A.length < n)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + U[0] = T[0]; + } + return n; + } + pidx = SA_IS(new ByteArray(T, 0), A, 0, n, 256, true); + U[0] = T[n - 1]; + for (i = 0; i < pidx; ++i) { + U[i + 1] = (byte) (A[i] & 0xff); + } + for (i += 1; i < n; ++i) { + U[i] = (byte) (A[i] & 0xff); + } + return pidx + 1; + } + + /* char */ + public static int + bwtransform(char[] T, char[] U, int[] A, int n) { + int i, pidx; + if ((T == null) || (U == null) || (A == null) || + (T.length < n) || (U.length < n) || (A.length < n)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + U[0] = T[0]; + } + return n; + } + pidx = SA_IS(new CharArray(T, 0), A, 0, n, 65536, true); + U[0] = T[n - 1]; + for (i = 0; i < pidx; ++i) { + U[i + 1] = (char) (A[i] & 0xffff); + } + for (i += 1; i < n; ++i) { + U[i] = (char) (A[i] & 0xffff); + } + return pidx + 1; + } + + /* short */ + public static int + bwtransform(short[] T, short[] U, int[] A, int n, int k) { + int i, pidx; + if ((T == null) || (U == null) || (A == null) || + (T.length < n) || (U.length < n) || (A.length < n) || + (0 <= k) || (65536 < k)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + U[0] = T[0]; + } + return n; + } + pidx = SA_IS(new ShortArray(T, 0), A, 0, n, k, true); + U[0] = T[n - 1]; + for (i = 0; i < pidx; ++i) { + U[i + 1] = (short) (A[i] & 0xffff); + } + for (i += 1; i < n; ++i) { + U[i] = (short) (A[i] & 0xffff); + } + return pidx + 1; + } + + /* int */ + public static int + bwtransform(int[] T, int[] U, int[] A, int n, int k) { + int i, pidx; + if ((T == null) || (U == null) || (A == null) || + (T.length < n) || (U.length < n) || (A.length < n) || + (0 <= k)) { + return -1; + } + if (n <= 1) { + if (n == 1) { + U[0] = T[0]; + } + return n; + } + pidx = SA_IS(new IntArray(T, 0), A, 0, n, k, true); + U[0] = T[n - 1]; + for (i = 0; i < pidx; ++i) { + U[i + 1] = A[i]; + } + for (i += 1; i < n; ++i) { + U[i] = A[i]; + } + return pidx + 1; + } + + @Override + public int[] buildSuffixArray(int[] input, int start, int length) { + // TODO: [dw] add constraints here. + final int[] SA = new int[length]; + MinMax mm = Tools.minmax(input, start, length); + suffixsort(input, SA, length, mm.max + 1); + return SA; + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Skew.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Skew.java index f7e60ad73..f170e5f93 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Skew.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Skew.java @@ -6,7 +6,7 @@ *

        * Straightforward reimplementation of the recursive algorithm given in: * J. Kärkkäinen and P. Sanders. Simple linear work suffix array construction. - * In Proc. 13th International Conference on Automata, Languages and Programming, + * In Proc. 13th International Conference on Automata, Languages and Programming, * Springer, 2003 * *

        @@ -19,21 +19,18 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public final class Skew implements ISuffixArrayBuilder -{ +public final class Skew implements ISuffixArrayBuilder { /** * Lexicographic order for pairs. */ - private final static boolean leq(int a1, int a2, int b1, int b2) - { + private final static boolean leq(int a1, int a2, int b1, int b2) { return (a1 < b1 || (a1 == b1 && a2 <= b2)); } /** * Lexicographic order for triples. */ - private final static boolean leq(int a1, int a2, int a3, int b1, int b2, int b3) - { + private final static boolean leq(int a1, int a2, int a3, int b1, int b2, int b3) { return (a1 < b1 || (a1 == b1 && leq(a2, a3, b2, b3))); } @@ -41,9 +38,8 @@ private final static boolean leq(int a1, int a2, int a3, int b1, int b2, int b3) * Stably sort indexes from src[0..n-1] to dst[0..n-1] with values in 0..K from v. A * constant offset of vi is added to indexes from src. */ - private final static void radixPass(int [] src, int [] dst, int [] v, int vi, - final int n, final int K, int start, int [] cnt) - { + private final static void radixPass(int[] src, int[] dst, int[] v, int vi, + final int n, final int K, int start, int[] cnt) { // check counter array's size. assert cnt.length >= K + 1; Arrays.fill(cnt, 0, K + 1, 0); @@ -53,8 +49,7 @@ private final static void radixPass(int [] src, int [] dst, int [] v, int vi, cnt[v[start + vi + src[i]]]++; // exclusive prefix sums - for (int i = 0, sum = 0; i <= K; i++) - { + for (int i = 0, sum = 0; i <= K; i++) { final int t = cnt[i]; cnt[i] = sum; sum += t; @@ -69,16 +64,15 @@ private final static void radixPass(int [] src, int [] dst, int [] v, int vi, * Find the suffix array SA of s[0..n-1] in {1..K}^n. require s[n] = s[n+1] = s[n+2] = * 0, n >= 2. */ - static final int[] suffixArray(int [] s, int [] SA, int n, final int K, int start, int [] cnt) - { + static final int[] suffixArray(int[] s, int[] SA, int n, final int K, int start, int[] cnt) { final int n0 = (n + 2) / 3, n1 = (n + 1) / 3, n2 = n / 3, n02 = n0 + n2; - final int [] s12 = new int [n02 + 3]; + final int[] s12 = new int[n02 + 3]; s12[n02] = s12[n02 + 1] = s12[n02 + 2] = 0; - final int [] SA12 = new int [n02 + 3]; + final int[] SA12 = new int[n02 + 3]; SA12[n02] = SA12[n02 + 1] = SA12[n02 + 2] = 0; - final int [] s0 = new int [n0]; - final int [] SA0 = new int [n0]; + final int[] s0 = new int[n0]; + final int[] SA0 = new int[n0]; /* * generate positions of mod 1 and mod 2 suffixes the "+(n0-n1)" adds a dummy mod @@ -95,39 +89,31 @@ static final int[] suffixArray(int [] s, int [] SA, int n, final int K, int star // find lexicographic names of triples int name = 0, c0 = -1, c1 = -1, c2 = -1; - for (int i = 0; i < n02; i++) - { + for (int i = 0; i < n02; i++) { if (s[start + SA12[i]] != c0 || s[start + SA12[i] + 1] != c1 - || s[start + SA12[i] + 2] != c2) - { + || s[start + SA12[i] + 2] != c2) { name++; c0 = s[start + SA12[i]]; c1 = s[start + SA12[i] + 1]; c2 = s[start + SA12[i] + 2]; } - if ((SA12[i] % 3) == 1) - { + if ((SA12[i] % 3) == 1) { // left half s12[SA12[i] / 3] = name; - } - else - { + } else { // right half s12[SA12[i] / 3 + n0] = name; } } // recurse if names are not yet unique - if (name < n02) - { + if (name < n02) { cnt = suffixArray(s12, SA12, n02, name, start, cnt); // store unique names in s12 using the suffix array for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1; - } - else - { + } else { // generate the suffix array of s12 directly for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i; @@ -139,8 +125,7 @@ static final int[] suffixArray(int [] s, int [] SA, int n, final int K, int star radixPass(s0, SA0, s, 0, n0, K, start, cnt); // merge sorted SA0 suffixes and sorted SA12 suffixes - for (int p = 0, t = n0 - n1, k = 0; k < n; k++) - { + for (int p = 0, t = n0 - n1, k = 0; k < n; k++) { // pos of current offset 12 suffix final int i = (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2); // pos of current offset 0 suffix @@ -148,27 +133,21 @@ static final int[] suffixArray(int [] s, int [] SA, int n, final int K, int star if (SA12[t] < n0 ? leq(s[start + i], s12[SA12[t] + n0], s[start + j], s12[j / 3]) : leq(s[start + i], s[start + i + 1], s12[SA12[t] - n0 + 1], - s[start + j], s[start + j + 1], s12[j / 3 + n0])) - { + s[start + j], s[start + j + 1], s12[j / 3 + n0])) { // suffix from SA12 is smaller SA[k] = i; t++; - if (t == n02) - { + if (t == n02) { // done --- only SA0 suffixes left for (k++; p < n0; p++, k++) SA[k] = SA0[p]; } - } - else - { + } else { SA[k] = j; p++; - if (p == n0) - { + if (p == n0) { // done --- only SA12 suffixes left - for (k++; t < n02; t++, k++) - { + for (k++; t < n02; t++, k++) { SA[k] = (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2); } } @@ -181,14 +160,12 @@ static final int[] suffixArray(int [] s, int [] SA, int n, final int K, int star /** * Ensure array is large enough or reallocate (no copying). */ - private static final int [] ensureSize(int [] tab, int length) - { - if (tab.length < length) - { + private static final int[] ensureSize(int[] tab, int length) { + if (tab.length < length) { tab = null; - tab = new int [length]; + tab = new int[length]; } - + return tab; } @@ -205,33 +182,32 @@ static final int[] suffixArray(int [] s, int [] SA, int n, final int K, int star *

        * If the input contains zero or negative values, or has no extra trailing cells, * adapters can be used in the following way: - * + *

        *

              * return new {@link DensePositiveDecorator}(
              *      new {@link ExtraTrailingCellsDecorator}(
              *          new {@link Skew}(), 3));
              * 
        - * + * * @see ExtraTrailingCellsDecorator * @see DensePositiveDecorator */ @Override - public int [] buildSuffixArray(int [] input, int start, int length) - { + public int[] buildSuffixArray(int[] input, int start, int length) { Tools.assertAlways(input != null, "input must not be null"); Tools.assertAlways(length >= 2, "input length must be >= 2"); Tools.assertAlways(input.length >= start + length + 3, "no extra space after input end"); assert Tools.allPositive(input, start, length); final int alphabetSize = Tools.max(input, start, length); - final int [] SA = new int [length + 3]; + final int[] SA = new int[length + 3]; // Preserve the tail of the input (destroyed when constructing the array). - final int [] tail = new int [3]; + final int[] tail = new int[3]; System.arraycopy(input, start + length, tail, 0, 3); Arrays.fill(input, start + length, start + length + 3, 0); - suffixArray(input, SA, length, alphabetSize, start, new int [alphabetSize + 2]); + suffixArray(input, SA, length, alphabetSize, start, new int[alphabetSize + 2]); // Reconstruct the input's tail. System.arraycopy(tail, 0, input, start + length, 3); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixArrays.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixArrays.java index c7788a2ec..78513047d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixArrays.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixArrays.java @@ -28,26 +28,23 @@ * @author Dawid Weiss (Carrot Search) * @author Anton Olsson for friprogramvarusyndikatet.se */ -public final class SuffixArrays -{ +public final class SuffixArrays { /** * Maximum required trailing space in the input array (certain algorithms need it). */ final static int MAX_EXTRA_TRAILING_SPACE = DeepShallow.OVERSHOOT; /* - * + * */ - private SuffixArrays() - { + private SuffixArrays() { // no instances. } /** * Create a suffix array for a given character sequence with the default algorithm. */ - public static int [] create(CharSequence s) - { + public static int[] create(CharSequence s) { return create(s, defaultAlgorithm()); } @@ -55,40 +52,36 @@ private SuffixArrays() * Create a suffix array for a given character sequence, using the provided suffix * array building strategy. */ - public static int [] create(CharSequence s, ISuffixArrayBuilder builder) - { + public static int[] create(CharSequence s, ISuffixArrayBuilder builder) { return new CharSequenceAdapter(builder).buildSuffixArray(s); } /** * Create a suffix array and an LCP array for a given character sequence. - * + * * @see #computeLCP(int[], int, int, int[]) */ - public static SuffixData createWithLCP(CharSequence s) - { + public static SuffixData createWithLCP(CharSequence s) { return createWithLCP(s, defaultAlgorithm()); } /** * Create a suffix array and an LCP array for a given character sequence, use the * given algorithm for building the suffix array. - * + * * @see #computeLCP(int[], int, int, int[]) */ - public static SuffixData createWithLCP(CharSequence s, ISuffixArrayBuilder builder) - { + public static SuffixData createWithLCP(CharSequence s, ISuffixArrayBuilder builder) { final CharSequenceAdapter adapter = new CharSequenceAdapter(builder); - final int [] sa = adapter.buildSuffixArray(s); - final int [] lcp = computeLCP(adapter.input, 0, s.length(), sa); + final int[] sa = adapter.buildSuffixArray(s); + final int[] lcp = computeLCP(adapter.input, 0, s.length(), sa); return new SuffixData(sa, lcp); } /** * Create a suffix array and an LCP array for a given input sequence of symbols. */ - public static SuffixData createWithLCP(int [] input, int start, int length) - { + public static SuffixData createWithLCP(int[] input, int start, int length) { final ISuffixArrayBuilder builder = new DensePositiveDecorator( new ExtraTrailingCellsDecorator(defaultAlgorithm(), 3)); return createWithLCP(input, start, length, builder); @@ -98,11 +91,10 @@ public static SuffixData createWithLCP(int [] input, int start, int length) * Create a suffix array and an LCP array for a given input sequence of symbols and a * custom suffix array building strategy. */ - public static SuffixData createWithLCP(int [] input, int start, int length, - ISuffixArrayBuilder builder) - { - final int [] sa = builder.buildSuffixArray(input, start, length); - final int [] lcp = computeLCP(input, start, length, sa); + public static SuffixData createWithLCP(int[] input, int start, int length, + ISuffixArrayBuilder builder) { + final int[] sa = builder.buildSuffixArray(input, start, length); + final int[] lcp = computeLCP(input, start, length, sa); return new SuffixData(sa, lcp); } @@ -129,27 +121,21 @@ public static SuffixData createWithLCP(T[] input, ISuffixArrayBuilder builde * computation in suffix arrays and its applications. In Proc. 12th Symposium on Combinatorial * Pattern Matching (CPM ’01), pages 181–192. Springer-Verlag LNCS n. 2089, 2001. */ - public static int [] computeLCP(int [] input, final int start, final int length, - int [] sa) - { - final int [] rank = new int [length]; + public static int[] computeLCP(int[] input, final int start, final int length, + int[] sa) { + final int[] rank = new int[length]; for (int i = 0; i < length; i++) rank[sa[i]] = i; int h = 0; - final int [] lcp = new int [length]; - for (int i = 0; i < length; i++) - { + final int[] lcp = new int[length]; + for (int i = 0; i < length; i++) { int k = rank[i]; - if (k == 0) - { + if (k == 0) { lcp[k] = -1; - } - else - { + } else { final int j = sa[k - 1]; while (i + h < length && j + h < length - && input[start + i + h] == input[start + j + h]) - { + && input[start + i + h] == input[start + j + h]) { h++; } lcp[k] = h; @@ -164,20 +150,17 @@ public static SuffixData createWithLCP(T[] input, ISuffixArrayBuilder builde * @return Return a new instance of the default algorithm for use in other methods. At * the moment {@link QSufSort} is used. */ - private static ISuffixArrayBuilder defaultAlgorithm() - { + private static ISuffixArrayBuilder defaultAlgorithm() { return new QSufSort(); } /** * Utility method converting all suffixes of a given sequence to a list of strings. */ - public static List toString(CharSequence input, int [] suffixes) - { + public static List toString(CharSequence input, int[] suffixes) { final String full = input.toString(); final ArrayList result = new ArrayList<>(); - for (int i = 0; i < input.length(); i++) - { + for (int i = 0; i < input.length(); i++) { result.add(full.subSequence(suffixes[i], full.length())); } return result; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixData.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixData.java index 91de3df8b..6d44d1810 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixData.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/SuffixData.java @@ -2,29 +2,25 @@ /** * A holder structure for a suffix array and longest common prefix array of - * a given sequence. + * a given sequence. * * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public final class SuffixData -{ - private final int [] suffixArray; - private final int [] lcp; +public final class SuffixData { + private final int[] suffixArray; + private final int[] lcp; - SuffixData(int [] sa, int [] lcp) - { + SuffixData(int[] sa, int[] lcp) { this.suffixArray = sa; this.lcp = lcp; } - public int [] getSuffixArray() - { + public int[] getSuffixArray() { return suffixArray; } - public int [] getLCP() - { + public int[] getLCP() { return lcp; } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Tools.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Tools.java index fea26f726..00c135265 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Tools.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Tools.java @@ -6,10 +6,8 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -final class Tools -{ - private Tools() - { +final class Tools { + private Tools() { // No instances. } @@ -17,12 +15,9 @@ private Tools() * Check if all symbols in the given range are greater than 0, return * true if so, false otherwise. */ - static final boolean allPositive(int [] input, int start, int length) - { - for (int i = length - 1, index = start; i >= 0; i--, index++) - { - if (input[index] <= 0) - { + static final boolean allPositive(int[] input, int start, int length) { + for (int i = length - 1, index = start; i >= 0; i--, index++) { + if (input[index] <= 0) { return false; } } @@ -33,16 +28,13 @@ static final boolean allPositive(int [] input, int start, int length) /** * Determine the maximum value in a slice of an array. */ - static final int max(int [] input, int start, int length) - { + static final int max(int[] input, int start, int length) { assert length >= 1; int max = input[start]; - for (int i = length - 2, index = start + 1; i >= 0; i--, index++) - { + for (int i = length - 2, index = start + 1; i >= 0; i--, index++) { final int v = input[index]; - if (v > max) - { + if (v > max) { max = v; } } @@ -53,16 +45,13 @@ static final int max(int [] input, int start, int length) /** * Determine the minimum value in a slice of an array. */ - static final int min(int [] input, int start, int length) - { + static final int min(int[] input, int start, int length) { assert length >= 1; int min = input[start]; - for (int i = length - 2, index = start + 1; i >= 0; i--, index++) - { + for (int i = length - 2, index = start + 1; i >= 0; i--, index++) { final int v = input[index]; - if (v < min) - { + if (v < min) { min = v; } } @@ -73,19 +62,15 @@ static final int min(int [] input, int start, int length) /** * Calculate minimum and maximum value for a slice of an array. */ - static MinMax minmax(int [] input, final int start, final int length) - { + static MinMax minmax(int[] input, final int start, final int length) { int max = input[start]; int min = max; - for (int i = length - 2, index = start + 1; i >= 0; i--, index++) - { + for (int i = length - 2, index = start + 1; i >= 0; i--, index++) { final int v = input[index]; - if (v > max) - { + if (v > max) { max = v; } - if (v < min) - { + if (v < min) { min = v; } } @@ -96,14 +81,12 @@ static MinMax minmax(int [] input, final int start, final int length) /** * Throw {@link AssertionError} if a condition is false. This should * be called when the assertion must be always verified (as in the case of verifying - * the algorithm's preconditions). For other, internal assertions, one should use + * the algorithm's preconditions). For other, internal assertions, one should use * assert keyword so that such assertions can be disabled at run-time (for * performance reasons). */ - static final void assertAlways(boolean condition, String msg) - { - if (!condition) - { + static final void assertAlways(boolean condition, String msg) { + if (!condition) { throw new AssertionError(msg); } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Traversals.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Traversals.java index 1cdc99b19..6371431cb 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Traversals.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixarray/Traversals.java @@ -10,20 +10,18 @@ * @author Michał Nowak (Carrot Search) * @author Dawid Weiss (Carrot Search) */ -public final class Traversals -{ +public final class Traversals { /** * Visitor interface for post-order traversal methods in {@link Traversals}. */ - public interface IPostOrderVisitor - { + public interface IPostOrderVisitor { /** * Visits a node in the (virtual) suffix tree, labeled with length * objects starting at start in the input sequence. - * - * @param start The node label's starting offset in the input sequence. + * + * @param start The node label's starting offset in the input sequence. * @param length The node label's length (number of symbols). - * @param leaf true if this node is a leaf. + * @param leaf true if this node is a leaf. */ public void visitNode(int start, int length, boolean leaf); } @@ -32,8 +30,7 @@ public interface IPostOrderVisitor * Visitor interface for post-order traversal methods that compute an aggregated value * during the traversal. */ - public interface IPostOrderComputingVisitor - { + public interface IPostOrderComputingVisitor { /** * Aggregate two values into the result. The aggregation function should be * symmetric, that is: value1 + value2 = value2 + value1. @@ -42,10 +39,10 @@ public interface IPostOrderComputingVisitor /** * Compute the initial value for a leaf node. - * - * @param saIndex Index of the leaf node in the suffix array. + * + * @param saIndex Index of the leaf node in the suffix array. * @param symbolIndex The node label's starting offset in the input sequence. - * @param length The node label's length (number of symbols). + * @param length The node label's length (number of symbols). * @return Returns the initial function value for the leaf node. */ public E leafValue(int saIndex, int symbolIndex, int length); @@ -53,11 +50,11 @@ public interface IPostOrderComputingVisitor /** * Visits a node in the (virtual) suffix tree, labeled with length * objects starting at start in the input sequence. - * - * @param start The node label's starting offset in the input sequence. + * + * @param start The node label's starting offset in the input sequence. * @param length The node label's length (number of symbols). - * @param leaf true if this node is a leaf. - * @param value Aggregated value for all sub-nodes of the given node. + * @param leaf true if this node is a leaf. + * @param value Aggregated value for all sub-nodes of the given node. */ public void visitNode(int start, int length, boolean leaf, E value); } @@ -72,15 +69,14 @@ public interface IPostOrderComputingVisitor * The algorithm implemented here is from Efficient Substring Traversal with Suffix * Arrays by Toru Kasai, Hiroki Arimura and Setsuo Arikawa, Dept of Informatics, * Kyushu University, Japan. - * + * * @param sequenceLength Input sequence length for the suffix array and LCP array. - * @param sa Suffix array. - * @param lcp Corresponding LCP array for a given suffix array. - * @param visitor Callback visitor. + * @param sa Suffix array. + * @param lcp Corresponding LCP array for a given suffix array. + * @param visitor Callback visitor. */ - public static void postorder(final int sequenceLength, int [] sa, int [] lcp, - IPostOrderVisitor visitor) - { + public static void postorder(final int sequenceLength, int[] sa, int[] lcp, + IPostOrderVisitor visitor) { assert sequenceLength <= sa.length && sequenceLength <= lcp.length : "Input sequence length larger than suffix array or the LCP."; final Deque stack = new ArrayDeque(); @@ -91,12 +87,10 @@ public static void postorder(final int sequenceLength, int [] sa, int [] lcp, // Process every leaf. int top_h; - for (int i = 0; i <= sequenceLength; i++) - { + for (int i = 0; i <= sequenceLength; i++) { final int h = (sequenceLength == i ? -1 : lcp[i]); - while (true) - { + while (true) { top_h = stack.peek(); if (top_h <= h) break; stack.pop(); @@ -108,14 +102,12 @@ public static void postorder(final int sequenceLength, int [] sa, int [] lcp, visitor.visitNode(sa[leaf ? -(top_i + 1) : top_i], top_h, leaf); } - if (top_h < h) - { + if (top_h < h) { stack.push(i); stack.push(h); } - if (i < sequenceLength) - { + if (i < sequenceLength) { // Mark leaf nodes in the stack. stack.push(-(i + 1)); stack.push(sequenceLength - sa[i]); @@ -133,17 +125,16 @@ public static void postorder(final int sequenceLength, int [] sa, int [] lcp, * The algorithm implemented here is from Efficient Substring Traversal with Suffix * Arrays by Toru Kasai, Hiroki Arimura and Setsuo Arikawa, Dept of Informatics, * Kyushu University, Japan. - * + * * @param sequenceLength Input sequence length for the suffix array and LCP array. - * @param sa Suffix array. - * @param lcp Corresponding LCP array for a given suffix array. - * @param visitor Callback visitor computing aggregate values when traversing the - * tree. - * @param epsilon "Zero" value (epsilon) for computations. + * @param sa Suffix array. + * @param lcp Corresponding LCP array for a given suffix array. + * @param visitor Callback visitor computing aggregate values when traversing the + * tree. + * @param epsilon "Zero" value (epsilon) for computations. */ - public static void postorder(final int sequenceLength, int [] sa, int [] lcp, - E epsilon, IPostOrderComputingVisitor visitor) - { + public static void postorder(final int sequenceLength, int[] sa, int[] lcp, + E epsilon, IPostOrderComputingVisitor visitor) { assert sequenceLength <= sa.length && sequenceLength <= lcp.length : "Input sequence length larger than suffix array or the LCP."; final Deque stack = new ArrayDeque(); @@ -157,13 +148,11 @@ public static void postorder(final int sequenceLength, int [] sa, int [] lcp // Process every leaf. int top_h; E top_c; - for (int i = 0; i <= sequenceLength; i++) - { + for (int i = 0; i <= sequenceLength; i++) { final int h = (sequenceLength == i ? -1 : lcp[i]); E ci = epsilon; - while (true) - { + while (true) { top_h = stack.peek(); if (top_h <= h) break; stack.pop(); @@ -179,21 +168,17 @@ public static void postorder(final int sequenceLength, int [] sa, int [] lcp top_c = values.get(values.size() - 1); } - if (top_h < h) - { + if (top_h < h) { stack.push(i); stack.push(h); values.add(ci); - } - else - { + } else { assert top_h == h; final int index = values.size() - 1; values.set(index, visitor.aggregate(ci, values.get(index))); } - if (i < sequenceLength) - { + if (i < sequenceLength) { // Mark leaf nodes in the stack. stack.push(-(i + 1)); stack.push(sequenceLength - sa[i]); diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/ActivePoint.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/ActivePoint.java index 9c81f8f62..b9d8a67bd 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/ActivePoint.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/ActivePoint.java @@ -4,202 +4,196 @@ * Represents the Active Point used in Ukonnen's algorithm. This consists of the * triple active node, active edge and active length, which is used to identify * the point at which the next insertion should be considered. - * - * @author Max Garfinkel - * + * * @param + * @author Max Garfinkel */ -class ActivePoint> { - - private Node activeNode; - private Edge activeEdge; - private int activeLength; - private final Node root; - - /** - * Initialize the active point to the root of a suffix tree. This sets the - * active point to {root,null,0} - * - * @param root - */ - ActivePoint(Node root) { - activeNode = root; - activeEdge = null; - activeLength = 0; - this.root = root; - } - - /** - * Sets the active point to a new node, edge, length tripple. - * - * @param node - * @param edge - * @param length - */ - void setPosition(Node node, Edge edge, int length) { - activeNode = node; - activeEdge = edge; - activeLength = length; - } - - /** - * Sets the active edge. - * - * @param edge - * The edge to which we set the active edge. - */ - void setEdge(Edge edge) { - activeEdge = edge; - } - - /** - * Increments the active length. - */ - void incrementLength() { - activeLength++; - resetActivePointToTerminal(); - } - - /** - * Decrements the active length. - */ - void decrementLength() { - if (activeLength > 0) - activeLength--; - resetActivePointToTerminal(); - } - - /** - * - * @return True if the active point is the root node. False if not. - */ - boolean isRootNode() { - return activeNode.equals(root) && activeEdge == null - && activeLength == 0; - } - - /** - * - * @return True if active point is on a node. False if not. - */ - boolean isNode() { - return activeEdge == null && activeLength == 0; - } - - /** - * Retrieves the active node. - * - * @return The active node. - */ - Node getNode() { - return activeNode; - } - - /** - * - * @return True if the active point is on an edge. False if not. - */ - boolean isEdge() { - return activeEdge != null; - } - - /** - * Retrieves the current active edge. - * - * @return The active edge. - */ - Edge getEdge() { - return activeEdge; - } - - /** - * Retrieves the current active length. - * - * @return The active length. - */ - int getLength() { - return activeLength; - } - - /** - * Resets the active point after an insert. - * - * @param suffix - * The remaining suffix to be inserted. - */ - public void updateAfterInsert(Suffix suffix) { - if (activeNode == root && suffix.isEmpty()) { - activeNode = root; - activeEdge = null; - activeLength = 0; - } else if (activeNode == root) { - Object item = suffix.getStart(); - activeEdge = root.getEdgeStarting(item); - decrementLength(); - fixActiveEdgeAfterSuffixLink(suffix); - if (activeLength == 0) - activeEdge = null; - } else if (activeNode.hasSuffixLink()) { - activeNode = activeNode.getSuffixLink(); - findTrueActiveEdge(); - fixActiveEdgeAfterSuffixLink(suffix); - if (activeLength == 0) - activeEdge = null; - } else{ - activeNode = root; - findTrueActiveEdge(); - fixActiveEdgeAfterSuffixLink(suffix); - if (activeLength == 0) - activeEdge = null; - } - } - - /** - * Deal with the case when we follow a suffix link but the active length is - * greater than the new active edge length. In this situation we must walk - * down the tree updating the entire active point. - */ - private void fixActiveEdgeAfterSuffixLink(Suffix suffix) { - while (activeEdge != null && activeLength > activeEdge.getLength()) { - activeLength = activeLength - activeEdge.getLength(); - activeNode = activeEdge.getTerminal(); - Object item = suffix.getItemXFromEnd(activeLength + 1); - activeEdge = activeNode.getEdgeStarting(item); - } - resetActivePointToTerminal(); - } - - /** - * Finds the edge instance who's start item matches the current active edge - * start item but comes from the current active node. - */ - private void findTrueActiveEdge() { - if (activeEdge != null) { - Object item = activeEdge.getStartItem(); - activeEdge = activeNode.getEdgeStarting(item); - } - } - - /** - * Resizes the active length in the case where we are sitting on a terminal. - * - * @return true if reset occurs false otherwise. - */ - private boolean resetActivePointToTerminal() { - if (activeEdge != null && activeEdge.getLength() == activeLength - && activeEdge.isTerminating()) { - activeNode = activeEdge.getTerminal(); - activeEdge = null; - activeLength = 0; - return true; - } else { - return false; - } - } - - @Override - public String toString() { - return "{" + activeNode.toString() + ", " + activeEdge + ", " - + activeLength + "}"; - } +class ActivePoint> { + + private Node activeNode; + private Edge activeEdge; + private int activeLength; + private final Node root; + + /** + * Initialize the active point to the root of a suffix tree. This sets the + * active point to {root,null,0} + * + * @param root + */ + ActivePoint(Node root) { + activeNode = root; + activeEdge = null; + activeLength = 0; + this.root = root; + } + + /** + * Sets the active point to a new node, edge, length tripple. + * + * @param node + * @param edge + * @param length + */ + void setPosition(Node node, Edge edge, int length) { + activeNode = node; + activeEdge = edge; + activeLength = length; + } + + /** + * Sets the active edge. + * + * @param edge The edge to which we set the active edge. + */ + void setEdge(Edge edge) { + activeEdge = edge; + } + + /** + * Increments the active length. + */ + void incrementLength() { + activeLength++; + resetActivePointToTerminal(); + } + + /** + * Decrements the active length. + */ + void decrementLength() { + if (activeLength > 0) + activeLength--; + resetActivePointToTerminal(); + } + + /** + * @return True if the active point is the root node. False if not. + */ + boolean isRootNode() { + return activeNode.equals(root) && activeEdge == null + && activeLength == 0; + } + + /** + * @return True if active point is on a node. False if not. + */ + boolean isNode() { + return activeEdge == null && activeLength == 0; + } + + /** + * Retrieves the active node. + * + * @return The active node. + */ + Node getNode() { + return activeNode; + } + + /** + * @return True if the active point is on an edge. False if not. + */ + boolean isEdge() { + return activeEdge != null; + } + + /** + * Retrieves the current active edge. + * + * @return The active edge. + */ + Edge getEdge() { + return activeEdge; + } + + /** + * Retrieves the current active length. + * + * @return The active length. + */ + int getLength() { + return activeLength; + } + + /** + * Resets the active point after an insert. + * + * @param suffix The remaining suffix to be inserted. + */ + public void updateAfterInsert(Suffix suffix) { + if (activeNode == root && suffix.isEmpty()) { + activeNode = root; + activeEdge = null; + activeLength = 0; + } else if (activeNode == root) { + Object item = suffix.getStart(); + activeEdge = root.getEdgeStarting(item); + decrementLength(); + fixActiveEdgeAfterSuffixLink(suffix); + if (activeLength == 0) + activeEdge = null; + } else if (activeNode.hasSuffixLink()) { + activeNode = activeNode.getSuffixLink(); + findTrueActiveEdge(); + fixActiveEdgeAfterSuffixLink(suffix); + if (activeLength == 0) + activeEdge = null; + } else { + activeNode = root; + findTrueActiveEdge(); + fixActiveEdgeAfterSuffixLink(suffix); + if (activeLength == 0) + activeEdge = null; + } + } + + /** + * Deal with the case when we follow a suffix link but the active length is + * greater than the new active edge length. In this situation we must walk + * down the tree updating the entire active point. + */ + private void fixActiveEdgeAfterSuffixLink(Suffix suffix) { + while (activeEdge != null && activeLength > activeEdge.getLength()) { + activeLength = activeLength - activeEdge.getLength(); + activeNode = activeEdge.getTerminal(); + Object item = suffix.getItemXFromEnd(activeLength + 1); + activeEdge = activeNode.getEdgeStarting(item); + } + resetActivePointToTerminal(); + } + + /** + * Finds the edge instance who's start item matches the current active edge + * start item but comes from the current active node. + */ + private void findTrueActiveEdge() { + if (activeEdge != null) { + Object item = activeEdge.getStartItem(); + activeEdge = activeNode.getEdgeStarting(item); + } + } + + /** + * Resizes the active length in the case where we are sitting on a terminal. + * + * @return true if reset occurs false otherwise. + */ + private boolean resetActivePointToTerminal() { + if (activeEdge != null && activeEdge.getLength() == activeLength + && activeEdge.isTerminating()) { + activeNode = activeEdge.getTerminal(); + activeEdge = null; + activeLength = 0; + return true; + } else { + return false; + } + } + + @Override + public String toString() { + return "{" + activeNode.toString() + ", " + activeEdge + ", " + + activeLength + "}"; + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Cursor.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Cursor.java index 339ebef32..cffc929a8 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Cursor.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Cursor.java @@ -5,100 +5,98 @@ import java.util.HashSet; /** - * * @param * @param - * * @author Max Garfinkel */ -public class Cursor> { +public class Cursor> { + + private final SuffixTree tree; + private Node node; + private Edge edge; + private int length; + + + Cursor(SuffixTree tree) { + this.tree = tree; + node = tree.getRoot(); + edge = null; + length = 0; + } + + boolean proceedTo(T item) { + if (edge == null) { + Edge tmpEdge = node.getEdgeStarting(item); + if (tmpEdge != null) { + edge = tmpEdge; + length = 1; + return true; + } else { + return false; + } + } else if (edge.getLength() > length) { + T nextItem = edge.getItemAt(length); + if (nextItem != null && item.equals(nextItem)) { + length++; + return true; + } else { + return false; + } + } else { + Node terminal = edge.getTerminal(); + if (terminal == null) + return false; + else { + Edge tmpEdge = terminal.getEdgeStarting(item); + if (tmpEdge != null) { + edge = tmpEdge; + length = 1; + node = terminal; + return true; + } else { + return false; + } + } + } + } + + Collection> getSequenceTerminals() { + if (edge == null) { + return node.getSuffixTerminals(); + } else { + if ((edge.getLength() - 1 == length && !edge.isTerminating()) + || (edge.getItemAt(length).getClass().equals(SequenceTerminal.class)) && !edge.isTerminating()) { + Object seqTerminal = edge.getItemAt(length); + @SuppressWarnings("unchecked") + SequenceTerminal term = (SequenceTerminal) seqTerminal; + Collection> collection = new HashSet>(); + collection.add(term); + return collection; + } else { + Node terminal = edge.getTerminal(); + if (terminal == null) + return Collections.emptySet(); + else { + Collection> edges = terminal.getEdges(); + Collection> returnCollection = new HashSet>(); + for (Edge edge : edges) { + Object o = edge.getStartItem(); + if (o.getClass().equals(SequenceTerminal.class)) { + @SuppressWarnings("unchecked") + SequenceTerminal returnTerminal = (SequenceTerminal) o; + returnCollection.add(returnTerminal); + } + } + return returnCollection; + } + } + } + } + + void returnToRoot() { + node = tree.getRoot(); + edge = null; + length = 0; + } - private final SuffixTree tree; - private Node node; - private Edge edge; - private int length; - - - Cursor(SuffixTree tree){ - this.tree = tree; - node = tree.getRoot(); - edge = null; - length = 0; - } - - boolean proceedTo(T item){ - if(edge == null){ - Edge tmpEdge = node.getEdgeStarting(item); - if(tmpEdge != null){ - edge = tmpEdge; - length = 1; - return true; - }else{ - return false; - } - }else if(edge.getLength() > length){ - T nextItem = edge.getItemAt(length); - if(nextItem != null && item.equals(nextItem)){ - length++; - return true; - }else{ - return false; - } - }else{ - Node terminal = edge.getTerminal(); - if(terminal == null) - return false; - else{ - Edge tmpEdge = terminal.getEdgeStarting(item); - if(tmpEdge != null){ - edge = tmpEdge; - length = 1; - node = terminal; - return true; - }else{ - return false; - } - } - } - } - - Collection> getSequenceTerminals(){ - if(edge == null){ - return node.getSuffixTerminals(); - }else{ - if((edge.getLength()-1 == length && !edge.isTerminating()) - || (edge.getItemAt(length).getClass().equals(SequenceTerminal.class)) && !edge.isTerminating()){ - Object seqTerminal = edge.getItemAt(length); - @SuppressWarnings("unchecked") - SequenceTerminal term = (SequenceTerminal)seqTerminal; - Collection> collection = new HashSet>(); - collection.add(term); - return collection; - }else{ - Node terminal = edge.getTerminal(); - if(terminal == null) - return Collections.emptySet(); - else{ - Collection> edges = terminal.getEdges(); - Collection> returnCollection = new HashSet>(); - for(Edge edge : edges){ - Object o = edge.getStartItem(); - if(o.getClass().equals(SequenceTerminal.class)){ - @SuppressWarnings("unchecked") - SequenceTerminal returnTerminal = (SequenceTerminal)o; - returnCollection.add(returnTerminal); - } - } - return returnCollection; - } - } - } - } - - void returnToRoot(){ - node = tree.getRoot(); - edge = null; - length = 0; - } - } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Edge.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Edge.java index cad27f976..bbb5ba9c9 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Edge.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Edge.java @@ -3,211 +3,198 @@ import java.util.Iterator; /** - * * @param * @param - * * @author Max Garfinkel */ class Edge> implements Iterable { - private final int start; - private int end = -1; - private final Node parentNode; - private final Sequence sequence; - - private Node terminal = null; - private SuffixTree tree = null; - - /** - * Create a new Edge object. - * - * @param start - * The position in the master sequence of the first item in this - * suffix. - * @param parent - * The parent {@link Node} - * @param sequence - * The master sequence which the {@link SuffixTree} indexes. - * @param tree - * The master {@link SuffixTree} containing the root element - * which this edge is a child of. - */ - Edge(int start, Node parent, Sequence sequence, SuffixTree tree) { - this.start = start; - this.parentNode = parent; - this.sequence = sequence; - this.tree = tree; - } - - /** - * Checks to see if the edge starts with the given item. - * - * @param item - * The possible start item. - * @return True if this edge starts with item. False if not. - */ - boolean isStarting(Object item) { - return sequence.getItem(start).equals(item); - } - - /** - * Insert the given suffix at the supplied active point. - * - * @param suffix - * The suffix to insert. - * @param activePoint - * The active point to insert it at. - * @return - */ - void insert(Suffix suffix, ActivePoint activePoint) { - Object item = suffix.getEndItem(); - Object nextItem = getItemAt(activePoint.getLength()); - if (item.equals(nextItem)) { - activePoint.incrementLength(); - } else { - split(suffix, activePoint); - suffix.decrement(); - activePoint.updateAfterInsert(suffix); - - if (suffix.isEmpty()) - return; - else - tree.insert(suffix); - } - } - - /** - * Splits the edge to enable the insertion of supplied suffix at the - * supplied active point. - * - * @param suffix - * The suffix to insert. - * @param activePoint - * The active point to insert it at. - */ - private void split(Suffix suffix, ActivePoint activePoint) { - Node breakNode = new Node(this, sequence, tree); - Edge newEdge = new Edge(suffix.getEndPosition()-1, breakNode, - sequence, tree); - breakNode.insert(newEdge); - Edge oldEdge = new Edge(start + activePoint.getLength(), - breakNode, sequence, tree); - oldEdge.end = end; - oldEdge.terminal = this.terminal; - breakNode.insert(oldEdge); - this.terminal = breakNode; - end = start + activePoint.getLength(); - tree.setSuffixLink(breakNode); - tree.incrementInsertCount(); - } - - /** - * Gets the index of the true end of the edge. - * - * @return The index of the end item, of this edge, in the original - * sequence. - */ - int getEnd() { - tree.getCurrentEnd(); - return end != -1 ? end : tree.getCurrentEnd(); - } - - /** - * Tests if this edge is terminates at a node. - * - * @return True if this edge ends at a node. False if not. - */ - boolean isTerminating() { - return terminal != null; - } - - /** - * Retrieves the length of this edge. - * - * @return - */ - int getLength() { - int realEnd = getEnd(); - return realEnd - start; - } - - /** - * Retrieves the terminating node of this edge if it has any, null if not. - * - * @return The terminating node if any exists, null otherwise. - */ - Node getTerminal() { - return terminal; - } - - /** - * Retrieves the item at given position within the current edge. - * - * @param position - * The index of the item to retrieve relative to the start of - * edge. - * @return The item at position. - * @throws IllegalArgumentException - * when the position exceeds the length of the current edge. - */ - @SuppressWarnings("unchecked") - T getItemAt(int position) { - if (position > getLength()) - throw new IllegalArgumentException("Index " + position - + " is greater than " + getLength() - + " - the length of this edge."); - return (T) sequence.getItem(start + position); - } - - /** - * Retrieves the starting item of this edge. - * - * @return The item at index 0 of this edge. - */ - @SuppressWarnings("unchecked") - T getStartItem() { - return (T) sequence.getItem(start); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - for (int i = start; i < getEnd(); i++) { - sb.append(sequence.getItem(i).toString()).append(", "); - if(sequence.getItem(i).getClass().equals(SequenceTerminal.class)) - break; - } - return sb.toString(); - } - - /** - * Retrieves an iterator that steps over the items in this edge. - * - * @return An iterator that walks this edge up to the end or terminating - * node. - */ - public Iterator iterator() { - return new Iterator() { - private int currentPosition = start; - private boolean hasNext = true; - - public boolean hasNext() { - return hasNext; - } - - @SuppressWarnings("unchecked") - public T next() { - if(end == -1) - hasNext = !sequence.getItem(currentPosition).getClass().equals(SequenceTerminal.class); - else - hasNext = currentPosition < getEnd()-1; - return (T) sequence.getItem(currentPosition++); - } - - public void remove() { - throw new UnsupportedOperationException( - "The remove method is not supported."); - } - }; - } + private final int start; + private int end = -1; + private final Node parentNode; + private final Sequence sequence; + + private Node terminal = null; + private SuffixTree tree = null; + + /** + * Create a new Edge object. + * + * @param start The position in the master sequence of the first item in this + * suffix. + * @param parent The parent {@link Node} + * @param sequence The master sequence which the {@link SuffixTree} indexes. + * @param tree The master {@link SuffixTree} containing the root element + * which this edge is a child of. + */ + Edge(int start, Node parent, Sequence sequence, SuffixTree tree) { + this.start = start; + this.parentNode = parent; + this.sequence = sequence; + this.tree = tree; + } + + /** + * Checks to see if the edge starts with the given item. + * + * @param item The possible start item. + * @return True if this edge starts with item. False if not. + */ + boolean isStarting(Object item) { + return sequence.getItem(start).equals(item); + } + + /** + * Insert the given suffix at the supplied active point. + * + * @param suffix The suffix to insert. + * @param activePoint The active point to insert it at. + * @return + */ + void insert(Suffix suffix, ActivePoint activePoint) { + Object item = suffix.getEndItem(); + Object nextItem = getItemAt(activePoint.getLength()); + if (item.equals(nextItem)) { + activePoint.incrementLength(); + } else { + split(suffix, activePoint); + suffix.decrement(); + activePoint.updateAfterInsert(suffix); + + if (suffix.isEmpty()) + return; + else + tree.insert(suffix); + } + } + + /** + * Splits the edge to enable the insertion of supplied suffix at the + * supplied active point. + * + * @param suffix The suffix to insert. + * @param activePoint The active point to insert it at. + */ + private void split(Suffix suffix, ActivePoint activePoint) { + Node breakNode = new Node(this, sequence, tree); + Edge newEdge = new Edge(suffix.getEndPosition() - 1, breakNode, + sequence, tree); + breakNode.insert(newEdge); + Edge oldEdge = new Edge(start + activePoint.getLength(), + breakNode, sequence, tree); + oldEdge.end = end; + oldEdge.terminal = this.terminal; + breakNode.insert(oldEdge); + this.terminal = breakNode; + end = start + activePoint.getLength(); + tree.setSuffixLink(breakNode); + tree.incrementInsertCount(); + } + + /** + * Gets the index of the true end of the edge. + * + * @return The index of the end item, of this edge, in the original + * sequence. + */ + int getEnd() { + tree.getCurrentEnd(); + return end != -1 ? end : tree.getCurrentEnd(); + } + + /** + * Tests if this edge is terminates at a node. + * + * @return True if this edge ends at a node. False if not. + */ + boolean isTerminating() { + return terminal != null; + } + + /** + * Retrieves the length of this edge. + * + * @return + */ + int getLength() { + int realEnd = getEnd(); + return realEnd - start; + } + + /** + * Retrieves the terminating node of this edge if it has any, null if not. + * + * @return The terminating node if any exists, null otherwise. + */ + Node getTerminal() { + return terminal; + } + + /** + * Retrieves the item at given position within the current edge. + * + * @param position The index of the item to retrieve relative to the start of + * edge. + * @return The item at position. + * @throws IllegalArgumentException when the position exceeds the length of the current edge. + */ + @SuppressWarnings("unchecked") + T getItemAt(int position) { + if (position > getLength()) + throw new IllegalArgumentException("Index " + position + + " is greater than " + getLength() + + " - the length of this edge."); + return (T) sequence.getItem(start + position); + } + + /** + * Retrieves the starting item of this edge. + * + * @return The item at index 0 of this edge. + */ + @SuppressWarnings("unchecked") + T getStartItem() { + return (T) sequence.getItem(start); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + for (int i = start; i < getEnd(); i++) { + sb.append(sequence.getItem(i).toString()).append(", "); + if (sequence.getItem(i).getClass().equals(SequenceTerminal.class)) + break; + } + return sb.toString(); + } + + /** + * Retrieves an iterator that steps over the items in this edge. + * + * @return An iterator that walks this edge up to the end or terminating + * node. + */ + public Iterator iterator() { + return new Iterator() { + private int currentPosition = start; + private boolean hasNext = true; + + public boolean hasNext() { + return hasNext; + } + + @SuppressWarnings("unchecked") + public T next() { + if (end == -1) + hasNext = !sequence.getItem(currentPosition).getClass().equals(SequenceTerminal.class); + else + hasNext = currentPosition < getEnd() - 1; + return (T) sequence.getItem(currentPosition++); + } + + public void remove() { + throw new UnsupportedOperationException( + "The remove method is not supported."); + } + }; + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Node.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Node.java index f39409a86..6618a0acb 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Node.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Node.java @@ -8,165 +8,154 @@ import java.util.Set; /** - * * @param * @param - * * @author Max Garfinkel */ -class Node> implements Iterable> { - private final Map> edges = new HashMap>(); - private final Edge incomingEdge; - private Set> sequenceTerminals = new HashSet>(); - private final Sequence sequence; - private final SuffixTree tree; - private Node link = null; - - /** - * Create a new node, for the supplied tree and sequence. - * - * @param incomingEdge - * The parent edge, unless this is a root node. - * @param sequence - * The sequence this tree is indexing. - * @param tree - * The tree to which this node belongs. - */ - Node(Edge incomingEdge, Sequence sequence, SuffixTree tree) { - this.incomingEdge = incomingEdge; - this.sequence = sequence; - this.tree = tree; - } - - /** - * Inserts the suffix at the given active point. - * - * @param suffix - * The suffix to insert. - * @param activePoint - * The active point to insert it at. - */ - @SuppressWarnings("unchecked") - void insert(Suffix suffix, ActivePoint activePoint) { - Object item = suffix.getEndItem(); - - if (edges.containsKey(item)) { - if (tree.isNotFirstInsert() && activePoint.getNode() != tree.getRoot()) - tree.setSuffixLink(activePoint.getNode()); - activePoint.setEdge(edges.get(item)); - activePoint.incrementLength(); - } else { - saveSequenceTerminal(item); - Edge newEdge = new Edge(suffix.getEndPosition()-1, this, - sequence, tree); - edges.put((T) suffix.getEndItem(), newEdge); - suffix.decrement(); - activePoint.updateAfterInsert(suffix); - - if(tree.isNotFirstInsert() && !this.equals(tree.getRoot())){ - tree.getLastNodeInserted().setSuffixLink(this); - } - if (suffix.isEmpty()) - return; - else - tree.insert(suffix); - } - } - - private void saveSequenceTerminal(Object item) { - if(item.getClass().equals(SequenceTerminal.class)){ - @SuppressWarnings("unchecked") - SequenceTerminal terminal = (SequenceTerminal) item; - sequenceTerminals.add(terminal); - } - } - - /** - * Inserts the given edge as a child of this node. The edge must not already - * exist as child or an IllegalArgumentException will be thrown. - * - * @param edge - * The edge to be inserted. - * @throws IllegalArgumentException - * This is thrown when the edge already exists as an out bound - * edge of this node. - */ - void insert(Edge edge) { - if (edges.containsKey(edge.getStartItem())) - throw new IllegalArgumentException("Item " + edge.getStartItem() - + " already exists in node " + toString()); - edges.put(edge.getStartItem(), edge); - } - - /** - * Retrieves the edge starting with item or null if none exists. - * - * @param item - * @return The edge extending from this node starting with item. - */ - Edge getEdgeStarting(Object item) { - return edges.get(item); - } - - /** - * True if the node has a suffix link extending from it. - * - * @return True if node has suffix link. False if not. - */ - boolean hasSuffixLink() { - return link != null; - } - - /** - * Gets the number of edges extending from this node. - * - * @return The count of the number edges extending from this node. - */ - int getEdgeCount() { - return edges.size(); - } - - /** - * @return An iterator which iterates over the child edges. No order is - * guaranteed. - */ - public Iterator> iterator() { - return edges.values().iterator(); - } - - /** - * - * @return The node that this nodes suffix link points to if it has one. - * Null if not. - */ - Node getSuffixLink() { - return link; - } - - /** - * Sets the suffix link of this node to point to the supplied node. - * - * @param node - * The node this suffix link should point to. - */ - void setSuffixLink(Node node) { - link = node; - } - - @Override - public String toString() { - if (incomingEdge == null) - return "root"; - else { - return "end of edge [" + incomingEdge.toString() + "]"; - } - } - - public Collection> getSuffixTerminals() { - return sequenceTerminals; - } - - public Collection> getEdges(){ - return edges.values(); - } +class Node> implements Iterable> { + private final Map> edges = new HashMap>(); + private final Edge incomingEdge; + private Set> sequenceTerminals = new HashSet>(); + private final Sequence sequence; + private final SuffixTree tree; + private Node link = null; + + /** + * Create a new node, for the supplied tree and sequence. + * + * @param incomingEdge The parent edge, unless this is a root node. + * @param sequence The sequence this tree is indexing. + * @param tree The tree to which this node belongs. + */ + Node(Edge incomingEdge, Sequence sequence, SuffixTree tree) { + this.incomingEdge = incomingEdge; + this.sequence = sequence; + this.tree = tree; + } + + /** + * Inserts the suffix at the given active point. + * + * @param suffix The suffix to insert. + * @param activePoint The active point to insert it at. + */ + @SuppressWarnings("unchecked") + void insert(Suffix suffix, ActivePoint activePoint) { + Object item = suffix.getEndItem(); + + if (edges.containsKey(item)) { + if (tree.isNotFirstInsert() && activePoint.getNode() != tree.getRoot()) + tree.setSuffixLink(activePoint.getNode()); + activePoint.setEdge(edges.get(item)); + activePoint.incrementLength(); + } else { + saveSequenceTerminal(item); + Edge newEdge = new Edge(suffix.getEndPosition() - 1, this, + sequence, tree); + edges.put((T) suffix.getEndItem(), newEdge); + suffix.decrement(); + activePoint.updateAfterInsert(suffix); + + if (tree.isNotFirstInsert() && !this.equals(tree.getRoot())) { + tree.getLastNodeInserted().setSuffixLink(this); + } + if (suffix.isEmpty()) + return; + else + tree.insert(suffix); + } + } + + private void saveSequenceTerminal(Object item) { + if (item.getClass().equals(SequenceTerminal.class)) { + @SuppressWarnings("unchecked") + SequenceTerminal terminal = (SequenceTerminal) item; + sequenceTerminals.add(terminal); + } + } + + /** + * Inserts the given edge as a child of this node. The edge must not already + * exist as child or an IllegalArgumentException will be thrown. + * + * @param edge The edge to be inserted. + * @throws IllegalArgumentException This is thrown when the edge already exists as an out bound + * edge of this node. + */ + void insert(Edge edge) { + if (edges.containsKey(edge.getStartItem())) + throw new IllegalArgumentException("Item " + edge.getStartItem() + + " already exists in node " + toString()); + edges.put(edge.getStartItem(), edge); + } + + /** + * Retrieves the edge starting with item or null if none exists. + * + * @param item + * @return The edge extending from this node starting with item. + */ + Edge getEdgeStarting(Object item) { + return edges.get(item); + } + + /** + * True if the node has a suffix link extending from it. + * + * @return True if node has suffix link. False if not. + */ + boolean hasSuffixLink() { + return link != null; + } + + /** + * Gets the number of edges extending from this node. + * + * @return The count of the number edges extending from this node. + */ + int getEdgeCount() { + return edges.size(); + } + + /** + * @return An iterator which iterates over the child edges. No order is + * guaranteed. + */ + public Iterator> iterator() { + return edges.values().iterator(); + } + + /** + * @return The node that this nodes suffix link points to if it has one. + * Null if not. + */ + Node getSuffixLink() { + return link; + } + + /** + * Sets the suffix link of this node to point to the supplied node. + * + * @param node The node this suffix link should point to. + */ + void setSuffixLink(Node node) { + link = node; + } + + @Override + public String toString() { + if (incomingEdge == null) + return "root"; + else { + return "end of edge [" + incomingEdge.toString() + "]"; + } + } + + public Collection> getSuffixTerminals() { + return sequenceTerminals; + } + + public Collection> getEdges() { + return edges.values(); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Sequence.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Sequence.java index f464c54d4..32a853196 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Sequence.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Sequence.java @@ -10,92 +10,92 @@ * generic suffix tree implementation. This object automatically appends a * terminating item to the end of the instance which is included in all * operations. - * - * @author Max Garfinkel - * + * * @param + * @author Max Garfinkel */ public class Sequence> implements Iterable { - private List masterSequence = new ArrayList(); - - Sequence(){ - } - - /** - * Initialize the sequence. - * - * @param sequence - */ - Sequence(S sequence) { - for(Object item : sequence) - masterSequence.add(item); - SequenceTerminal sequenceTerminal = new SequenceTerminal(sequence); - masterSequence.add(sequenceTerminal); - } - - /** - * Retrieve the item at the position specified by index. - * - * @param index - * @return - */ - Object getItem(int index) { - return masterSequence.get(index); - } - - /** - * Adds a Sequence to the suffix tree. - * @param sequence - */ - void add(S sequence){ - for(I item : sequence){ - masterSequence.add(item); - } - SequenceTerminal terminal = new SequenceTerminal(sequence); - masterSequence.add(terminal); - } - - /** - * Retrieves an iterator for the sequence. - */ - public Iterator iterator() { - return new Iterator() { - - int currentPosition = 0; - - public boolean hasNext() { - return masterSequence.size() > currentPosition; - } - - public Object next() { - if (currentPosition <= masterSequence.size()) - return masterSequence.get(currentPosition++); - else { - return null; - } - } - - public void remove() { - throw new UnsupportedOperationException( - "Remove is not supported."); - - } - - }; - } - - int getLength(){ - return masterSequence.size(); - } - - public String toString(){ - StringBuilder sb = new StringBuilder("Sequence = ["); - for(Object i : masterSequence){ - sb.append(i).append(", "); - } - sb.append("]"); - return sb.toString(); - } - + private List masterSequence = new ArrayList(); + + Sequence() { + } + + /** + * Initialize the sequence. + * + * @param sequence + */ + Sequence(S sequence) { + for (Object item : sequence) + masterSequence.add(item); + SequenceTerminal sequenceTerminal = new SequenceTerminal(sequence); + masterSequence.add(sequenceTerminal); + } + + /** + * Retrieve the item at the position specified by index. + * + * @param index + * @return + */ + Object getItem(int index) { + return masterSequence.get(index); + } + + /** + * Adds a Sequence to the suffix tree. + * + * @param sequence + */ + void add(S sequence) { + for (I item : sequence) { + masterSequence.add(item); + } + SequenceTerminal terminal = new SequenceTerminal(sequence); + masterSequence.add(terminal); + } + + /** + * Retrieves an iterator for the sequence. + */ + public Iterator iterator() { + return new Iterator() { + + int currentPosition = 0; + + public boolean hasNext() { + return masterSequence.size() > currentPosition; + } + + public Object next() { + if (currentPosition <= masterSequence.size()) + return masterSequence.get(currentPosition++); + else { + return null; + } + } + + public void remove() { + throw new UnsupportedOperationException( + "Remove is not supported."); + + } + + }; + } + + int getLength() { + return masterSequence.size(); + } + + public String toString() { + StringBuilder sb = new StringBuilder("Sequence = ["); + for (Object i : masterSequence) { + sb.append(i).append(", "); + } + sb.append("]"); + return sb.toString(); + } + } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SequenceTerminal.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SequenceTerminal.java index 69856a42b..1cd37536a 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SequenceTerminal.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SequenceTerminal.java @@ -2,37 +2,36 @@ /** * Represents the terminating item of a sequence. - * + * * @author Max Garfinkel - * */ class SequenceTerminal { - private final S sequence; - - SequenceTerminal(S sequence){ - this.sequence = sequence; - } - - @SuppressWarnings("unchecked") - @Override - public boolean equals(Object o) { - if(o == null || o.getClass() != this.getClass()) - return false; - return ((SequenceTerminal)o).sequence.equals(this.sequence); - } - - public int hashCode(){ - return sequence.hashCode(); - } - - @Override - public String toString() { - return "$"+sequence.toString()+"$"; - } - - public S getSequence(){ - return sequence; - } + private final S sequence; + + SequenceTerminal(S sequence) { + this.sequence = sequence; + } + + @SuppressWarnings("unchecked") + @Override + public boolean equals(Object o) { + if (o == null || o.getClass() != this.getClass()) + return false; + return ((SequenceTerminal) o).sequence.equals(this.sequence); + } + + public int hashCode() { + return sequence.hashCode(); + } + + @Override + public String toString() { + return "$" + sequence.toString() + "$"; + } + + public S getSequence() { + return sequence; + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Suffix.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Suffix.java index 4da0441e3..f80a2f0e6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Suffix.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Suffix.java @@ -2,156 +2,153 @@ /** * Represents the remaining suffix to be inserted during suffix tree - * construction. This is essentially a start and end pointer into the + * construction. This is essentially a start and end pointer into the * underlying sequence. This is like a kind of sliding window where the head * can never fall behind the tail, and the tail can never fall behind the head. - * - * @author Max Garfinkel - * + * * @param + * @author Max Garfinkel */ -class Suffix> { - private int start; - private int end; - private Sequence sequence; +class Suffix> { + private int start; + private int end; + private Sequence sequence; + + /** + * Construct a subsequence of sequence. The subsequence will be a suffix of + * the sequence UP TO the point in the sequence we have reached whilst + * running Ukonnen's algorithm. In this sense it is not a true suffix of the + * sequence but only a suffix of the portion of the sequence we have so far + * parsed. + * + * @param start The start position of the suffix within the sequence + * @param end The end position of the suffix within the sequence + * @param sequence The main sequence + */ + public Suffix(int start, int end, Sequence sequence) { + testStartAndEndValues(start, end); + testStartEndAgainstSequenceLength(start, end, sequence.getLength()); + this.start = start; + this.end = end; + this.sequence = sequence; + } + + private void testStartEndAgainstSequenceLength(int start, int end, int sequenceLength) { + if (start > sequenceLength || end > sequenceLength) + throw new IllegalArgumentException("Suffix start and end must be less than or equal to sequence length"); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("[("); + sb.append(start).append(", ").append(end).append(")"); + int end = getEndPosition(); + for (int i = start; i < end; i++) { + sb.append(sequence.getItem(i)).append(","); + } + sb.append("]"); + return sb.toString(); + } + + /** + * @return The position in the master sequence of the end item in this + * suffix. This value is inclusive, thus and end of 0 implies the + * suffix contains only the item at sequence[0] + */ + int getEndPosition() { + return end; + } - /** - * Construct a subsequence of sequence. The subsequence will be a suffix of - * the sequence UP TO the point in the sequence we have reached whilst - * running Ukonnen's algorithm. In this sense it is not a true suffix of the - * sequence but only a suffix of the portion of the sequence we have so far - * parsed. - * @param start The start position of the suffix within the sequence - * @param end The end position of the suffix within the sequence - * @param sequence The main sequence - */ - public Suffix(int start, int end, Sequence sequence) { - testStartAndEndValues(start, end); - testStartEndAgainstSequenceLength(start, end, sequence.getLength()); - this.start = start; - this.end = end; - this.sequence = sequence; - } - - private void testStartEndAgainstSequenceLength(int start, int end, int sequenceLength){ - if(start > sequenceLength || end > sequenceLength) - throw new IllegalArgumentException("Suffix start and end must be less than or equal to sequence length"); - } + /** + * Get the end item of this suffix. + * + * @return The end item of sequence + */ + Object getEndItem() { + if (isEmpty()) + return null; + return sequence.getItem(end - 1); + } - @Override - public String toString() { - StringBuilder sb = new StringBuilder("[("); - sb.append(start).append(", ").append(end).append(")"); - int end = getEndPosition(); - for (int i = start; i < end; i++) { - sb.append(sequence.getItem(i)).append(","); - } - sb.append("]"); - return sb.toString(); - } + /** + * Get the start of this suffix. + * + * @return + */ + Object getStart() { + if (isEmpty()) + return null; + return sequence.getItem(start); + } - /** - * - * @return The position in the master sequence of the end item in this - * suffix. This value is inclusive, thus and end of 0 implies the - * suffix contains only the item at sequence[0] - */ - int getEndPosition() { - return end; - } + /** + * Decrement the length of this suffix. This is done by incrementing the + * start position. This is reducing its length from the back. + */ + void decrement() { + if (start == end) + increment(); + start++; + } - /** - * Get the end item of this suffix. - * - * @return The end item of sequence - */ - Object getEndItem() { - if(isEmpty()) - return null; - return sequence.getItem(end-1); - } + /** + * Increments the length of the suffix by incrementing the end position. The + * effectivly moves the suffix forward, along the master sequence. + */ + void increment() { + end++; + if (end > sequence.getLength()) + throw new IndexOutOfBoundsException("Incremented suffix beyond end of sequence"); - /** - * Get the start of this suffix. - * - * @return - */ - Object getStart() { - if(isEmpty()) - return null; - return sequence.getItem(start); - } + } - /** - * Decrement the length of this suffix. This is done by incrementing the - * start position. This is reducing its length from the back. - */ - void decrement() { - if(start==end) - increment(); - start++; - } + /** + * Indicates if the suffix is empty. + * + * @return + */ + boolean isEmpty() { + return start >= end || end > sequence.getLength(); + } - /** - * Increments the length of the suffix by incrementing the end position. The - * effectivly moves the suffix forward, along the master sequence. - */ - void increment() { - end++; - if(end > sequence.getLength()) - throw new IndexOutOfBoundsException("Incremented suffix beyond end of sequence"); - - } + /** + * Retrieves the count of remaining items in the suffix. + * + * @return The number of items in the suffix. + */ + int getRemaining() { + if (isEmpty()) + return 0; + else + return (end - start); + } - /** - * Indicates if the suffix is empty. - * - * @return - */ - boolean isEmpty() { - return start >= end || end > sequence.getLength(); - } + /** + * Retrieves the item the given distance from the end of the suffix. + * + * @param distanceFromEnd The distance from the end. + * @return The item the given distance from the end. + * @throws IllegalArgumentException if the distance from end is greater than the length of the + * suffix. + */ + public Object getItemXFromEnd(int distanceFromEnd) { + if ((end - (distanceFromEnd)) < start) { + throw new IllegalArgumentException(distanceFromEnd + + " extends before the start of this suffix: "); + } + return sequence.getItem(end - distanceFromEnd); + } - /** - * Retrieves the count of remaining items in the suffix. - * - * @return The number of items in the suffix. - */ - int getRemaining() { - if(isEmpty()) - return 0; - else - return (end - start); - } + void reset(int start, int end) { + testStartAndEndValues(start, end); + this.start = start; + this.end = end; + } - /** - * Retrieves the item the given distance from the end of the suffix. - * - * @param distanceFromEnd - * The distance from the end. - * @return The item the given distance from the end. - * @throws IllegalArgumentException - * if the distance from end is greater than the length of the - * suffix. - */ - public Object getItemXFromEnd(int distanceFromEnd) { - if ((end - (distanceFromEnd)) < start){ - throw new IllegalArgumentException(distanceFromEnd - + " extends before the start of this suffix: "); - } - return sequence.getItem(end - distanceFromEnd); - } - - void reset(int start, int end){ - testStartAndEndValues(start, end); - this.start = start; - this.end = end; - } - - private void testStartAndEndValues(int start, int end){ - if(start < 0 || end < 0) - throw new IllegalArgumentException("You cannot set a suffix start or end to less than zero."); - if(end < start) - throw new IllegalArgumentException("A suffix end position cannot be less than its start position."); - } + private void testStartAndEndValues(int start, int end) { + if (start < 0 || end < 0) + throw new IllegalArgumentException("You cannot set a suffix start or end to less than zero."); + if (end < start) + throw new IllegalArgumentException("A suffix end position cannot be less than its start position."); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SuffixTree.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SuffixTree.java index 1c12fea7b..bd3c451a5 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SuffixTree.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/SuffixTree.java @@ -2,170 +2,164 @@ /** * A suffix tree implementation using Ukkonen's algorithm capable of generating a generialised suffix tree. - * - * The type of both character and the word can be specified, and we call these items + *

        + * The type of both character and the word can be specified, and we call these items * and sequences respectively. - * + * + * @param The type of the item within the sequence. + * @param The sequence type, which must iterate over items of type I * @author Max Garfinkel - * - * @param - * The type of the item within the sequence. - * @param - * The sequence type, which must iterate over items of type I */ -public class SuffixTree> { - - private final Node root; - private final Sequence sequence; - - private Suffix suffix; - private final ActivePoint activePoint; - private int currentEnd = 0; - private int insertsThisStep = 0; - private Node lastNodeInserted = null; - - /** - * Constructs an empty suffix tree. - */ - public SuffixTree(){ - sequence = new Sequence<>(); - root = new Node<>(null, this.sequence, this); - activePoint = new ActivePoint<>(root); - } - - /** - * Construct and represent a suffix tree representation of the given - * sequence using Ukkonen's algorithm. - * - * @param sequenceArray - * the array of items for which we are going to generate a suffix - * tree. - */ - public SuffixTree(S sequenceArray) { - sequence = new Sequence<>(sequenceArray); - root = new Node<>(null, this.sequence, this); - activePoint = new ActivePoint<>(root); - suffix = new Suffix<>(0, 0, this.sequence); - extendTree(0,sequence.getLength()); - } - - /** - * Add a sequence to the suffix tree. It is immediately processed - * and added to the tree. - * @param sequence A sequence to be added. - */ - public void add(S sequence){ - int start = currentEnd; - this.sequence.add(sequence); - suffix = new Suffix<>(currentEnd,currentEnd,this.sequence); - activePoint.setPosition(root, null, 0); - extendTree(start, this.sequence.getLength()); - } - - private void extendTree(int from, int to) { - for (int i = from; i < to; i++){ - suffix.increment(); - insertsThisStep = 0; - insert(suffix); - currentEnd++; - } - } - - - /** - * Inserts the given suffix into this tree. - * - * @param suffix - * The suffix to insert. - */ - void insert(Suffix suffix) { - if (activePoint.isNode()) { - Node node = activePoint.getNode(); - node.insert(suffix, activePoint); - } else if (activePoint.isEdge()) { - Edge edge = activePoint.getEdge(); - edge.insert(suffix, activePoint); - } - } - - /** - * Retrieves the point in the sequence for which all proceeding item have - * been inserted into the tree. - * - * @return The index of the current end point of tree. - */ - int getCurrentEnd() { - return currentEnd; - } - - /** - * Retrieves the root node for this tree. - * - * @return The root node of the tree. - */ - Node getRoot() { - return root; - } - - /** - * Increments the inserts counter for this step. - */ - void incrementInsertCount() { - insertsThisStep++; - } - - /** - * Indecates if there have been inserts during the current step. - * - * @return - */ - boolean isNotFirstInsert() { - return insertsThisStep > 0; - } - - /** - * Retrieves the last node to be inserted, null if none has. - * - * @return The last node inserted or null. - */ - Node getLastNodeInserted() { - return lastNodeInserted; - } - - /** - * Sets the last node inserted to the supplied node. - * - * @param node - * The node representing the last node inserted. - */ - void setLastNodeInserted(Node node) { - lastNodeInserted = node; - } - - /** - * Sets the suffix link of the last inserted node to point to the supplied - * node. This method checks the state of the step and only applies the - * suffix link if there is a previous node inserted during this step. This - * method also set the last node inserted to the supplied node after - * applying any suffix linking. - * - * @param node - * The node to which the last node inserted's suffix link should - * point to. - */ - void setSuffixLink(Node node) { - if (isNotFirstInsert()) { - lastNodeInserted.setSuffixLink(node); - } - lastNodeInserted = node; - } - - @Override - public String toString() { - return Utils.printTreeForGraphViz(this); - } - - Sequence getSequence(){ - return sequence; - } +public class SuffixTree> { + + private final Node root; + private final Sequence sequence; + + private Suffix suffix; + private final ActivePoint activePoint; + private int currentEnd = 0; + private int insertsThisStep = 0; + private Node lastNodeInserted = null; + + /** + * Constructs an empty suffix tree. + */ + public SuffixTree() { + sequence = new Sequence<>(); + root = new Node<>(null, this.sequence, this); + activePoint = new ActivePoint<>(root); + } + + /** + * Construct and represent a suffix tree representation of the given + * sequence using Ukkonen's algorithm. + * + * @param sequenceArray the array of items for which we are going to generate a suffix + * tree. + */ + public SuffixTree(S sequenceArray) { + sequence = new Sequence<>(sequenceArray); + root = new Node<>(null, this.sequence, this); + activePoint = new ActivePoint<>(root); + suffix = new Suffix<>(0, 0, this.sequence); + extendTree(0, sequence.getLength()); + } + + /** + * Add a sequence to the suffix tree. It is immediately processed + * and added to the tree. + * + * @param sequence A sequence to be added. + */ + public void add(S sequence) { + int start = currentEnd; + this.sequence.add(sequence); + suffix = new Suffix<>(currentEnd, currentEnd, this.sequence); + activePoint.setPosition(root, null, 0); + extendTree(start, this.sequence.getLength()); + } + + private void extendTree(int from, int to) { + for (int i = from; i < to; i++) { + suffix.increment(); + insertsThisStep = 0; + insert(suffix); + currentEnd++; + } + } + + + /** + * Inserts the given suffix into this tree. + * + * @param suffix The suffix to insert. + */ + void insert(Suffix suffix) { + if (activePoint.isNode()) { + Node node = activePoint.getNode(); + node.insert(suffix, activePoint); + } else if (activePoint.isEdge()) { + Edge edge = activePoint.getEdge(); + edge.insert(suffix, activePoint); + } + } + + /** + * Retrieves the point in the sequence for which all proceeding item have + * been inserted into the tree. + * + * @return The index of the current end point of tree. + */ + int getCurrentEnd() { + return currentEnd; + } + + /** + * Retrieves the root node for this tree. + * + * @return The root node of the tree. + */ + Node getRoot() { + return root; + } + + /** + * Increments the inserts counter for this step. + */ + void incrementInsertCount() { + insertsThisStep++; + } + + /** + * Indecates if there have been inserts during the current step. + * + * @return + */ + boolean isNotFirstInsert() { + return insertsThisStep > 0; + } + + /** + * Retrieves the last node to be inserted, null if none has. + * + * @return The last node inserted or null. + */ + Node getLastNodeInserted() { + return lastNodeInserted; + } + + /** + * Sets the last node inserted to the supplied node. + * + * @param node The node representing the last node inserted. + */ + void setLastNodeInserted(Node node) { + lastNodeInserted = node; + } + + /** + * Sets the suffix link of the last inserted node to point to the supplied + * node. This method checks the state of the step and only applies the + * suffix link if there is a previous node inserted during this step. This + * method also set the last node inserted to the supplied node after + * applying any suffix linking. + * + * @param node The node to which the last node inserted's suffix link should + * point to. + */ + void setSuffixLink(Node node) { + if (isNotFirstInsert()) { + lastNodeInserted.setSuffixLink(node); + } + lastNodeInserted = node; + } + + @Override + public String toString() { + return Utils.printTreeForGraphViz(this); + } + + Sequence getSequence() { + return sequence; + } } \ No newline at end of file diff --git a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Utils.java b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Utils.java index 168343a91..1ad26c7d3 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Utils.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/suffixtree/Utils.java @@ -10,94 +10,91 @@ */ public class Utils { - /** - * Appends a SequenceTerminal element to a supplied array. - * - * @param sequence - * The sequence to which we are applying the terminating object. - * @param terminatingObject - * The instance of the terminating object. - * @return A new sequence with an extra element at the end containing the - * terminating object. - */ - static > Object[] addTerminalToSequence(S sequence, - SequenceTerminal terminatingObject) { - - ArrayList list = new ArrayList(); - for(I item : sequence) - list.add(item); - - Object[] newSequence = new Object[list.size() + 1]; - - int i = 0; - for (; i < list.size(); i++) - newSequence[i] = list.get(i); - newSequence[i] = terminatingObject; - return newSequence; - } + /** + * Appends a SequenceTerminal element to a supplied array. + * + * @param sequence The sequence to which we are applying the terminating object. + * @param terminatingObject The instance of the terminating object. + * @return A new sequence with an extra element at the end containing the + * terminating object. + */ + static > Object[] addTerminalToSequence(S sequence, + SequenceTerminal terminatingObject) { - static > String printTreeForGraphViz(SuffixTree tree) { - return printTreeForGraphViz(tree, true); - } - - /** - * Generates a .dot format string for visualizing a suffix tree. - * - * @param tree - * The tree for which we are generating a dot file. - * @return A string containing the contents of a .dot representation of the - * tree. - */ - static > String printTreeForGraphViz(SuffixTree tree, boolean printSuffixLinks) { - LinkedList> stack = new LinkedList<>(); - stack.add(tree.getRoot()); - Map, Integer> nodeMap = new HashMap<>(); - nodeMap.put(tree.getRoot(), 0); - int nodeId = 1; + ArrayList list = new ArrayList(); + for (I item : sequence) + list.add(item); - StringBuilder sb = new StringBuilder( - "\ndigraph suffixTree{\n node [shape=circle, label=\"\", fixedsize=true, width=0.1, height=0.1]\n"); + Object[] newSequence = new Object[list.size() + 1]; - while (stack.size() > 0) { - LinkedList> childNodes = new LinkedList<>(); - for (Node node : stack) { + int i = 0; + for (; i < list.size(); i++) + newSequence[i] = list.get(i); + newSequence[i] = terminatingObject; + return newSequence; + } - // List edges = node.getEdges(); - for (Edge edge : node) { - int id = nodeId++; - if (edge.isTerminating()) { - childNodes.push(edge.getTerminal()); - nodeMap.put(edge.getTerminal(), id); - } + static > String printTreeForGraphViz(SuffixTree tree) { + return printTreeForGraphViz(tree, true); + } - sb.append(nodeMap.get(node)).append(" -> ").append(id) - .append(" [label=\""); - - for (T item : edge) { - //if(item != null) - sb.append(item.toString()); - } - sb.append("\"];\n"); - } - } - stack = childNodes; - } - if(printSuffixLinks){ - // loop again to find all suffix links. - sb.append("edge [color=red]\n"); - for (Map.Entry, Integer> entry : nodeMap.entrySet()) { - Node n1 = entry.getKey(); - int id1 = entry.getValue(); - - if (n1.hasSuffixLink()) { - Node n2 = n1.getSuffixLink(); - Integer id2 = nodeMap.get(n2); - // if(id2 != null) - sb.append(id1).append(" -> ").append(id2).append(" ;\n"); - } - } - } - sb.append("}"); - return (sb.toString()); - } + /** + * Generates a .dot format string for visualizing a suffix tree. + * + * @param tree The tree for which we are generating a dot file. + * @return A string containing the contents of a .dot representation of the + * tree. + */ + static > String printTreeForGraphViz(SuffixTree tree, boolean printSuffixLinks) { + LinkedList> stack = new LinkedList<>(); + stack.add(tree.getRoot()); + Map, Integer> nodeMap = new HashMap<>(); + nodeMap.put(tree.getRoot(), 0); + int nodeId = 1; + + StringBuilder sb = new StringBuilder( + "\ndigraph suffixTree{\n node [shape=circle, label=\"\", fixedsize=true, width=0.1, height=0.1]\n"); + + while (stack.size() > 0) { + LinkedList> childNodes = new LinkedList<>(); + for (Node node : stack) { + + // List edges = node.getEdges(); + for (Edge edge : node) { + int id = nodeId++; + if (edge.isTerminating()) { + childNodes.push(edge.getTerminal()); + nodeMap.put(edge.getTerminal(), id); + } + + sb.append(nodeMap.get(node)).append(" -> ").append(id) + .append(" [label=\""); + + for (T item : edge) { + //if(item != null) + sb.append(item.toString()); + } + sb.append("\"];\n"); + } + } + stack = childNodes; + } + if (printSuffixLinks) { + // loop again to find all suffix links. + sb.append("edge [color=red]\n"); + for (Map.Entry, Integer> entry : nodeMap.entrySet()) { + Node n1 = entry.getKey(); + int id1 = entry.getValue(); + + if (n1.hasSuffixLink()) { + Node n2 = n1.getSuffixLink(); + Integer id2 = nodeMap.get(n2); + // if(id2 != null) + sb.append(id1).append(" -> ").append(id2).append(" ;\n"); + } + } + } + sb.append("}"); + return (sb.toString()); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java index d822afb0a..5d7f450d3 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java @@ -38,7 +38,7 @@ /** * Greedy String Tiling. - *

        + *

        * Implements the Greedy String Tiling algorithm as proposed by Michael J. Wise in his paper: * "String Similarity via Greedy String Tiling and Running Karp-Rabin Matching" * @@ -48,143 +48,143 @@ */ public class GreedyStringTilingAlgorithm extends CollationAlgorithm.Base { - private final Comparator comparator; - private final int minimumTileLength; + private final Comparator comparator; + private final int minimumTileLength; - private final Equality equality = new Equality() { + private final Equality equality = new Equality() { - @Override - public boolean isEqual(VariantGraph.Vertex[] a, Token b) { - for (VariantGraph.Vertex vertex : a) { - final Set tokens = vertex.tokens(); - if (!tokens.isEmpty() && comparator.compare(tokens.stream().findFirst().get(), b) == 0) { - return true; + @Override + public boolean isEqual(VariantGraph.Vertex[] a, Token b) { + for (VariantGraph.Vertex vertex : a) { + final Set tokens = vertex.tokens(); + if (!tokens.isEmpty() && comparator.compare(tokens.stream().findFirst().get(), b) == 0) { + return true; + } + } + return false; } - } - return false; - } - }; - - public GreedyStringTilingAlgorithm(Comparator comparator, int minimumTileLength) { - this.comparator = comparator; - this.minimumTileLength = minimumTileLength; - } - - @Override - public void collate(VariantGraph graph, Iterable witness) { - final VariantGraph.Vertex[][] vertices = VariantGraphRanking.of(graph).asArray(); - final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); - - final SortedSet> matches = new TreeSet<>(VertexMatch.setComparator()); - for (Match match : match(vertices, tokens, equality, minimumTileLength)) { - final SortedSet phrase = new TreeSet<>(); - for (int mc = 0, ml = match.length; mc < ml; mc++) { - final int rank = match.left + mc; - phrase.add(new VertexMatch.WithTokenIndex(vertices[rank][0], rank, match.right + mc)); - } - matches.add(phrase); + }; + + public GreedyStringTilingAlgorithm(Comparator comparator, int minimumTileLength) { + this.comparator = comparator; + this.minimumTileLength = minimumTileLength; } - merge(graph, vertices, tokens, matches); - } - - public static SortedSet match(A[] left, B[] right, Equality equality, int minimumTileLength) { - final boolean[] markedLeft = new boolean[left.length]; - final boolean[] markedRight = new boolean[right.length]; - - Arrays.fill(markedLeft, false); - Arrays.fill(markedRight, false); - - final SortedSet matches = new TreeSet<>(); - final Map> matchesByLength = new HashMap<>(); - - int maxMatchLength; - do { - maxMatchLength = minimumTileLength; - for (int rc = 0; rc < right.length; rc++) { - for (int lc = 0; lc < left.length; lc++) { - int matchLength = 0; - for (int tc = 0; - (tc + lc) < left.length && (tc + rc) < right.length && - !markedLeft[lc + tc] && !markedRight[rc + tc] && - equality.isEqual(left[lc + tc], right[rc + tc]); - tc++) { - matchLength++; - } - - if (matchLength >= maxMatchLength) { - List theMatches = matchesByLength.get(matchLength); - if (theMatches == null) { - matchesByLength.put(matchLength, theMatches = new ArrayList<>()); + @Override + public void collate(VariantGraph graph, Iterable witness) { + final VariantGraph.Vertex[][] vertices = VariantGraphRanking.of(graph).asArray(); + final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new); + + final SortedSet> matches = new TreeSet<>(VertexMatch.setComparator()); + for (Match match : match(vertices, tokens, equality, minimumTileLength)) { + final SortedSet phrase = new TreeSet<>(); + for (int mc = 0, ml = match.length; mc < ml; mc++) { + final int rank = match.left + mc; + phrase.add(new VertexMatch.WithTokenIndex(vertices[rank][0], rank, match.right + mc)); } - theMatches.add(new Match(lc, rc)); - } - - if (matchLength > maxMatchLength) { - maxMatchLength = matchLength; - } + matches.add(phrase); } - } - for (Match match : matchesByLength.getOrDefault(maxMatchLength, Collections.emptyList())) { - boolean occluded = false; + merge(graph, vertices, tokens, matches); + } - for (int tc = 0; tc < maxMatchLength; tc++) { - if (markedLeft[match.left + tc] || markedRight[match.right + tc]) { - occluded = true; - break; - } - } + public static SortedSet match(A[] left, B[] right, Equality equality, int minimumTileLength) { + final boolean[] markedLeft = new boolean[left.length]; + final boolean[] markedRight = new boolean[right.length]; + + Arrays.fill(markedLeft, false); + Arrays.fill(markedRight, false); + + final SortedSet matches = new TreeSet<>(); + final Map> matchesByLength = new HashMap<>(); + + int maxMatchLength; + do { + maxMatchLength = minimumTileLength; + for (int rc = 0; rc < right.length; rc++) { + for (int lc = 0; lc < left.length; lc++) { + int matchLength = 0; + for (int tc = 0; + (tc + lc) < left.length && (tc + rc) < right.length && + !markedLeft[lc + tc] && !markedRight[rc + tc] && + equality.isEqual(left[lc + tc], right[rc + tc]); + tc++) { + matchLength++; + } + + if (matchLength >= maxMatchLength) { + List theMatches = matchesByLength.get(matchLength); + if (theMatches == null) { + matchesByLength.put(matchLength, theMatches = new ArrayList<>()); + } + theMatches.add(new Match(lc, rc)); + } + + if (matchLength > maxMatchLength) { + maxMatchLength = matchLength; + } + } + } - if (!occluded) { - for (int tc = 0; tc < maxMatchLength; tc++) { - markedLeft[match.left + tc] = true; - markedRight[match.right + tc] = true; - } - matches.add(new Match(match.left, match.right, maxMatchLength)); - } - } + for (Match match : matchesByLength.getOrDefault(maxMatchLength, Collections.emptyList())) { + boolean occluded = false; + + for (int tc = 0; tc < maxMatchLength; tc++) { + if (markedLeft[match.left + tc] || markedRight[match.right + tc]) { + occluded = true; + break; + } + } + + if (!occluded) { + for (int tc = 0; tc < maxMatchLength; tc++) { + markedLeft[match.left + tc] = true; + markedRight[match.right + tc] = true; + } + matches.add(new Match(match.left, match.right, maxMatchLength)); + } + } - } while (maxMatchLength > minimumTileLength); + } while (maxMatchLength > minimumTileLength); - return matches; - } + return matches; + } - public static interface Equality { - boolean isEqual(A a, B b); - } + public static interface Equality { + boolean isEqual(A a, B b); + } - public static class Match implements Comparable { - public final int left; - public final int right; - public final int length; + public static class Match implements Comparable { + public final int left; + public final int right; + public final int length; - public Match(int left, int right, int length) { - this.left = left; - this.right = right; - this.length = length; - } + public Match(int left, int right, int length) { + this.left = left; + this.right = right; + this.length = length; + } - public Match(int left, int right) { - this(left, right, 0); - } + public Match(int left, int right) { + this(left, right, 0); + } - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof Match) { - return (left == ((Match) obj).left); - } - return super.equals(obj); - } + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof Match) { + return (left == ((Match) obj).left); + } + return super.equals(obj); + } - @Override - public int hashCode() { - return left; - } + @Override + public int hashCode() { + return left; + } - @Override - public int compareTo(Match o) { - return left - o.left; + @Override + public int compareTo(Match o) { + return left - o.left; + } } - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java index 62a7492f6..a49078354 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java @@ -39,61 +39,61 @@ */ public class ParallelSegmentationApparatus { - public interface GeneratorCallback { + public interface GeneratorCallback { - void start(); + void start(); - void segment(SortedMap> contents); + void segment(SortedMap> contents); - void end(); - } - - public static void generate(VariantGraphRanking ranking, GeneratorCallback callback) { - - callback.start(); - - final Set allWitnesses = ranking.witnesses(); - for (Iterator>> rowIt = ranking.getByRank().entrySet().iterator(); rowIt.hasNext(); ) { - final Map.Entry> row = rowIt.next(); - final int rank = row.getKey(); - final Collection verticesOfRank = row.getValue(); - - - if (verticesOfRank.size() == 1 && verticesOfRank.stream().findFirst().map(VariantGraph.Vertex::tokens).map(Set::isEmpty).orElse(false)) { - // skip start and end vertex - continue; - } - - // spreading vertices with same rank according to their registered transpositions - final SortedMap> verticesByTranspositionRank = new TreeMap<>(); - for (VariantGraph.Vertex v : verticesOfRank) { - int transpositionRank = 0; - for (Set transposition : v.transpositions()) { - for (VariantGraph.Vertex tv : transposition) { - transpositionRank += (ranking.apply(tv).intValue() - rank); - } - } - verticesByTranspositionRank.computeIfAbsent(transpositionRank, r -> new LinkedList<>()).add(v); - } - - // render segments - verticesByTranspositionRank.values().forEach(vertices -> { - final Map> tokensByWitness = new HashMap<>(); - for (VariantGraph.Vertex v : vertices) { - for (Token token : v.tokens()) { - tokensByWitness.computeIfAbsent(token.getWitness(), w -> new LinkedList<>()).add(token); - } - } + void end(); + } - final SortedMap> cellContents = new TreeMap<>(Witness.SIGIL_COMPARATOR); - for (Witness witness : allWitnesses) { - cellContents.put(witness, Collections.unmodifiableCollection(tokensByWitness.getOrDefault(witness, Collections.emptyList()))); + public static void generate(VariantGraphRanking ranking, GeneratorCallback callback) { + + callback.start(); + + final Set allWitnesses = ranking.witnesses(); + for (Iterator>> rowIt = ranking.getByRank().entrySet().iterator(); rowIt.hasNext(); ) { + final Map.Entry> row = rowIt.next(); + final int rank = row.getKey(); + final Collection verticesOfRank = row.getValue(); + + + if (verticesOfRank.size() == 1 && verticesOfRank.stream().findFirst().map(VariantGraph.Vertex::tokens).map(Set::isEmpty).orElse(false)) { + // skip start and end vertex + continue; + } + + // spreading vertices with same rank according to their registered transpositions + final SortedMap> verticesByTranspositionRank = new TreeMap<>(); + for (VariantGraph.Vertex v : verticesOfRank) { + int transpositionRank = 0; + for (Set transposition : v.transpositions()) { + for (VariantGraph.Vertex tv : transposition) { + transpositionRank += (ranking.apply(tv).intValue() - rank); + } + } + verticesByTranspositionRank.computeIfAbsent(transpositionRank, r -> new LinkedList<>()).add(v); + } + + // render segments + verticesByTranspositionRank.values().forEach(vertices -> { + final Map> tokensByWitness = new HashMap<>(); + for (VariantGraph.Vertex v : vertices) { + for (Token token : v.tokens()) { + tokensByWitness.computeIfAbsent(token.getWitness(), w -> new LinkedList<>()).add(token); + } + } + + final SortedMap> cellContents = new TreeMap<>(Witness.SIGIL_COMPARATOR); + for (Witness witness : allWitnesses) { + cellContents.put(witness, Collections.unmodifiableCollection(tokensByWitness.getOrDefault(witness, Collections.emptyList()))); + } + + callback.segment(cellContents); + }); } - callback.segment(cellContents); - }); + callback.end(); } - - callback.end(); - } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java index 75425870f..bc04ac1af 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java @@ -41,90 +41,90 @@ * @author Gregor Middell * @author Ronald Haentjens Dekker */ -public class VariantGraphRanking implements Iterable>, Function { - - private final Map byVertex = new HashMap<>(); - private final SortedMap> byRank = new TreeMap<>(); - private final VariantGraph graph; - - VariantGraphRanking(VariantGraph graph) { - this.graph = graph; - } - - public static VariantGraphRanking of(VariantGraph graph) { - final VariantGraphRanking ranking = new VariantGraphRanking(graph); - for (VariantGraph.Vertex v : graph.vertices()) { - int rank = -1; - for (VariantGraph.Vertex incoming : v.incoming().keySet()) { - rank = Math.max(rank, ranking.byVertex.get(incoming)); - } - rank++; - ranking.byVertex.put(v, rank); - ranking.byRank.computeIfAbsent(rank, r -> new HashSet<>()).add(v); +public class VariantGraphRanking implements Iterable>, Function { + + private final Map byVertex = new HashMap<>(); + private final SortedMap> byRank = new TreeMap<>(); + private final VariantGraph graph; + + VariantGraphRanking(VariantGraph graph) { + this.graph = graph; } - return ranking; - } - - public static VariantGraphRanking ofOnlyCertainVertices(VariantGraph graph, Set vertices) { - final VariantGraphRanking ranking = new VariantGraphRanking(graph); - for (VariantGraph.Vertex v : graph.vertices()) { - int rank = -1; - for (VariantGraph.Vertex incoming : v.incoming().keySet()) { - rank = Math.max(rank, ranking.byVertex.get(incoming)); - } - if (vertices.contains(v)) { - rank++; - } - ranking.byVertex.put(v, rank); - ranking.byRank.computeIfAbsent(rank, r -> new HashSet<>()).add(v); + + public static VariantGraphRanking of(VariantGraph graph) { + final VariantGraphRanking ranking = new VariantGraphRanking(graph); + for (VariantGraph.Vertex v : graph.vertices()) { + int rank = -1; + for (VariantGraph.Vertex incoming : v.incoming().keySet()) { + rank = Math.max(rank, ranking.byVertex.get(incoming)); + } + rank++; + ranking.byVertex.put(v, rank); + ranking.byRank.computeIfAbsent(rank, r -> new HashSet<>()).add(v); + } + return ranking; } - return ranking; - } - public Set witnesses() { - return graph.witnesses(); - } + public static VariantGraphRanking ofOnlyCertainVertices(VariantGraph graph, Set vertices) { + final VariantGraphRanking ranking = new VariantGraphRanking(graph); + for (VariantGraph.Vertex v : graph.vertices()) { + int rank = -1; + for (VariantGraph.Vertex incoming : v.incoming().keySet()) { + rank = Math.max(rank, ranking.byVertex.get(incoming)); + } + if (vertices.contains(v)) { + rank++; + } + ranking.byVertex.put(v, rank); + ranking.byRank.computeIfAbsent(rank, r -> new HashSet<>()).add(v); + } + return ranking; + } - public Map getByVertex() { - return Collections.unmodifiableMap(byVertex); - } + public Set witnesses() { + return graph.witnesses(); + } - public Map> getByRank() { - return Collections.unmodifiableMap(byRank); - } + public Map getByVertex() { + return Collections.unmodifiableMap(byVertex); + } - public int size() { - return byRank.keySet().size(); - } + public Map> getByRank() { + return Collections.unmodifiableMap(byRank); + } - @Override - public Iterator> iterator() { - return byRank.values().iterator(); - } + public int size() { + return byRank.keySet().size(); + } - public List>> asTable() { - return byRank.values().stream() + @Override + public Iterator> iterator() { + return byRank.values().iterator(); + } + + public List>> asTable() { + return byRank.values().stream() .filter(rank -> rank.stream().anyMatch(v -> !v.tokens().isEmpty())) .map(vertices -> { - final SortedMap> row = new TreeMap<>(Witness.SIGIL_COMPARATOR); - vertices.stream().flatMap(v -> v.tokens().stream()).forEach(token -> row.computeIfAbsent(token.getWitness(), w -> new HashSet<>()).add(token)); - return row; + final SortedMap> row = new TreeMap<>(Witness.SIGIL_COMPARATOR); + vertices.stream().flatMap(v -> v.tokens().stream()).forEach(token -> row.computeIfAbsent(token.getWitness(), w -> new HashSet<>()).add(token)); + return row; }) .collect(Collectors.toList()); - } - - public VariantGraph.Vertex[][] asArray() { - final VariantGraph.Vertex[][] arr = new VariantGraph.Vertex[byRank.size()][]; - byRank.forEach((rank, vertices) -> arr[rank] = vertices.toArray(new Vertex[vertices.size()])); - return arr; - } - - @Override - public Integer apply(VariantGraph.Vertex vertex) { - return byVertex.get(vertex); - } - - public Comparator comparator() { - return Comparator.comparingInt(byVertex::get); - } + } + + public VariantGraph.Vertex[][] asArray() { + final VariantGraph.Vertex[][] arr = new VariantGraph.Vertex[byRank.size()][]; + byRank.forEach((rank, vertices) -> arr[rank] = vertices.toArray(new Vertex[vertices.size()])); + return arr; + } + + @Override + public Integer apply(VariantGraph.Vertex vertex) { + return byVertex.get(vertex); + } + + public Comparator comparator() { + return Comparator.comparingInt(byVertex::get); + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java index e6051605d..2e48c37d5 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java @@ -34,58 +34,58 @@ * @author Gregor Middell */ public class VariantGraphTraversal implements Iterable { - private final VariantGraph graph; - private final Set witnesses; + private final VariantGraph graph; + private final Set witnesses; - private VariantGraphTraversal(VariantGraph graph, Set witnesses) { - this.graph = graph; - this.witnesses = witnesses; - } + private VariantGraphTraversal(VariantGraph graph, Set witnesses) { + this.graph = graph; + this.witnesses = witnesses; + } - public static VariantGraphTraversal of(VariantGraph graph, Set witnesses) { - return new VariantGraphTraversal(graph, witnesses); - } + public static VariantGraphTraversal of(VariantGraph graph, Set witnesses) { + return new VariantGraphTraversal(graph, witnesses); + } - public static VariantGraphTraversal of(VariantGraph graph) { - return new VariantGraphTraversal(graph, null); - } + public static VariantGraphTraversal of(VariantGraph graph) { + return new VariantGraphTraversal(graph, null); + } - @Override - public Iterator iterator() { - return new Iterator() { + @Override + public Iterator iterator() { + return new Iterator() { - private final Map encountered = new HashMap<>(); - private final Queue queue = new ArrayDeque<>(); - private Optional next = Optional.of(graph.getStart()); + private final Map encountered = new HashMap<>(); + private final Queue queue = new ArrayDeque<>(); + private Optional next = Optional.of(graph.getStart()); - @Override - public boolean hasNext() { - return next.isPresent(); - } + @Override + public boolean hasNext() { + return next.isPresent(); + } - @Override - public VariantGraph.Vertex next() { - final VariantGraph.Vertex next = this.next.get(); - for (Map.Entry> edge : next.outgoing().entrySet()) { - if (witnesses != null && !edge.getValue().stream().anyMatch(witnesses::contains)) { - continue; - } - final VariantGraph.Vertex end = edge.getKey(); + @Override + public VariantGraph.Vertex next() { + final VariantGraph.Vertex next = this.next.get(); + for (Map.Entry> edge : next.outgoing().entrySet()) { + if (witnesses != null && !edge.getValue().stream().anyMatch(witnesses::contains)) { + continue; + } + final VariantGraph.Vertex end = edge.getKey(); - final long endEncountered = Optional.ofNullable(encountered.get(end)).orElse(0L); - final long endIncoming = end.incoming().entrySet().stream().filter(e -> witnesses == null || e.getValue().stream().anyMatch(witnesses::contains)).count(); + final long endEncountered = Optional.ofNullable(encountered.get(end)).orElse(0L); + final long endIncoming = end.incoming().entrySet().stream().filter(e -> witnesses == null || e.getValue().stream().anyMatch(witnesses::contains)).count(); - if (endIncoming == endEncountered) { - throw new IllegalStateException(String.format("Encountered cycle traversing %s to %s", edge, end)); - } else if ((endIncoming - endEncountered) == 1) { - queue.add(end); - } + if (endIncoming == endEncountered) { + throw new IllegalStateException(String.format("Encountered cycle traversing %s to %s", edge, end)); + } else if ((endIncoming - endEncountered) == 1) { + queue.add(end); + } - encountered.put(end, endEncountered + 1); - } - this.next = Optional.ofNullable(queue.poll()); - return next; - } - }; - } + encountered.put(end, endEncountered + 1); + } + this.next = Optional.ofNullable(queue.poll()); + return next; + } + }; + } } diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java index 7d6fa7760..8a4759800 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java @@ -29,80 +29,80 @@ import java.util.function.Predicate; /** -* @author Gregor Middell -*/ + * @author Gregor Middell + */ public abstract class VertexMatch implements Comparable { - public final VariantGraph.Vertex vertex; - public final int vertexRank; - - VertexMatch(VariantGraph.Vertex vertex, int vertexRank) { - this.vertex = vertex; - this.vertexRank = vertexRank; - } - - @Override - public int compareTo(VertexMatch o) { - return (vertexRank - o.vertexRank); - } - - @Override - public boolean equals(Object obj) { - if (obj != null && obj instanceof VertexMatch) { - return vertexRank == ((VertexMatch)obj).vertexRank; - } - return super.equals(obj); - } - - @Override - public int hashCode() { - return vertexRank; - } + public final VariantGraph.Vertex vertex; + public final int vertexRank; - public static Comparator> setComparator() { - return (o1, o2) -> o1.first().compareTo(o2.first()); - } - - /** - * @author Gregor Middell - */ - public static class WithToken extends VertexMatch { + VertexMatch(VariantGraph.Vertex vertex, int vertexRank) { + this.vertex = vertex; + this.vertexRank = vertexRank; + } - public final Token token; + @Override + public int compareTo(VertexMatch o) { + return (vertexRank - o.vertexRank); + } - public WithToken(VariantGraph.Vertex vertex, int vertexRank, Token token) { - super(vertex, vertexRank); - this.token = token; + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof VertexMatch) { + return vertexRank == ((VertexMatch) obj).vertexRank; + } + return super.equals(obj); } @Override - public String toString() { - return "{" + vertex + " -> " + token + "}"; + public int hashCode() { + return vertexRank; } - } - /** - * @author Gregor Middell - */ - public static class WithTokenIndex extends VertexMatch { + public static Comparator> setComparator() { + return (o1, o2) -> o1.first().compareTo(o2.first()); + } - public final int token; + /** + * @author Gregor Middell + */ + public static class WithToken extends VertexMatch { - public WithTokenIndex(VariantGraph.Vertex vertex, int vertexRank, int token) { - super(vertex, vertexRank); - this.token = token; + public final Token token; + + public WithToken(VariantGraph.Vertex vertex, int vertexRank, Token token) { + super(vertex, vertexRank); + this.token = token; + } + + @Override + public String toString() { + return "{" + vertex + " -> " + token + "}"; + } } - @Override - public String toString() { - return "{" + vertex + " -> " + token + "}"; + /** + * @author Gregor Middell + */ + public static class WithTokenIndex extends VertexMatch { + + public final int token; + + public WithTokenIndex(VariantGraph.Vertex vertex, int vertexRank, int token) { + super(vertex, vertexRank); + this.token = token; + } + + @Override + public String toString() { + return "{" + vertex + " -> " + token + "}"; + } } - } - public static Function tokenResolver(final Token[] tokens) { - return input -> new WithToken(input.vertex, input.vertexRank, tokens[input.token]); - } + public static Function tokenResolver(final Token[] tokens) { + return input -> new WithToken(input.vertex, input.vertexRank, tokens[input.token]); + } - public static Predicate> filter(final BitSet rankFilter, final BitSet tokenFilter) { - return input -> input.stream().anyMatch(match -> tokenFilter.get(match.token) || rankFilter.get(match.vertexRank)); - } + public static Predicate> filter(final BitSet rankFilter, final BitSet tokenFilter) { + return input -> input.stream().anyMatch(match -> tokenFilter.get(match.token) || rankFilter.get(match.vertexRank)); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java index 3500f93ff..3652b3094 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/AbstractTest.java @@ -52,150 +52,151 @@ * @author Gregor Middell */ public abstract class AbstractTest { - public static final char[] SIGLA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); + public static final char[] SIGLA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); - protected final Logger LOG = Logger.getLogger(getClass().getName()); + protected final Logger LOG = Logger.getLogger(getClass().getName()); - protected CollationAlgorithm collationAlgorithm; + protected CollationAlgorithm collationAlgorithm; - @Before - public void initAlgorithm() { - collationAlgorithm = CollationAlgorithmFactory.dekkerMatchMatrix(new EqualityTokenComparator(), 2); - } + @Before + public void initAlgorithm() { + collationAlgorithm = CollationAlgorithmFactory.dekkerMatchMatrix(new EqualityTokenComparator(), 2); + } - protected SimpleWitness[] createWitnesses(String... contents) { - Assert.assertTrue("Not enough sigla", contents.length <= SIGLA.length); - final SimpleWitness[] witnesses = new SimpleWitness[contents.length]; - for (int wc = 0; wc < contents.length; wc++) { - witnesses[wc] = new SimpleWitness(Character.toString(SIGLA[wc]), contents[wc]); + protected SimpleWitness[] createWitnesses(String... contents) { + Assert.assertTrue("Not enough sigla", contents.length <= SIGLA.length); + final SimpleWitness[] witnesses = new SimpleWitness[contents.length]; + for (int wc = 0; wc < contents.length; wc++) { + witnesses[wc] = new SimpleWitness(Character.toString(SIGLA[wc]), contents[wc]); + } + return witnesses; } - return witnesses; - } - protected VariantGraph collate(SimpleWitness... witnesses) { - final VariantGraph graph = new VariantGraph(); - collate(graph, witnesses); - return graph; - } + protected VariantGraph collate(SimpleWitness... witnesses) { + final VariantGraph graph = new VariantGraph(); + collate(graph, witnesses); + return graph; + } - protected void collate(VariantGraph graph, SimpleWitness... witnesses) { - collationAlgorithm.collate(graph, witnesses); - } + protected void collate(VariantGraph graph, SimpleWitness... witnesses) { + collationAlgorithm.collate(graph, witnesses); + } - protected VariantGraph collate(String... witnesses) { - return collate(createWitnesses(witnesses)); - } + protected VariantGraph collate(String... witnesses) { + return collate(createWitnesses(witnesses)); + } - protected static List>> table(VariantGraph graph) { - return VariantGraphRanking.of(graph).asTable(); - } + protected static List>> table(VariantGraph graph) { + return VariantGraphRanking.of(graph).asTable(); + } - protected static SortedSet extractPhrases(SortedSet phrases, VariantGraph graph, Witness witness) { - for (VariantGraph.Vertex v : VariantGraphTraversal.of(graph, Collections.singleton(witness))) { - phrases.add(toString(v, witness)); + protected static SortedSet extractPhrases(SortedSet phrases, VariantGraph graph, Witness witness) { + for (VariantGraph.Vertex v : VariantGraphTraversal.of(graph, Collections.singleton(witness))) { + phrases.add(toString(v, witness)); + } + return phrases; } - return phrases; - } - protected static String toString(VariantGraph.Vertex vertex, Witness... witnesses) { - final Set witnessSet = new HashSet<>(Arrays.asList(witnesses)); - return vertex.tokens().stream() + protected static String toString(VariantGraph.Vertex vertex, Witness... witnesses) { + final Set witnessSet = new HashSet<>(Arrays.asList(witnesses)); + return vertex.tokens().stream() .filter(t -> witnessSet.contains(t.getWitness())) .collect(Collectors.groupingBy(Token::getWitness)).entrySet().stream() .sorted(Comparator.comparing(e -> e.getKey().getSigil())) .map(Map.Entry::getValue) .flatMap(tokens -> tokens.stream() - .filter(t -> t instanceof SimpleToken) - .map(t -> (SimpleToken) t) - .sorted() - .map(SimpleToken::getNormalized) + .filter(t -> t instanceof SimpleToken) + .map(t -> (SimpleToken) t) + .sorted() + .map(SimpleToken::getNormalized) ) .collect(Collectors.joining(" ")); - } - - protected static void assertGraphVertices(VariantGraph graph, int vertices) { - assertEquals(vertices, StreamSupport.stream(graph.vertices().spliterator(), false).count()); - } - - protected static void assertGraphEdges(VariantGraph graph, int edges) { - assertEquals(edges, StreamSupport.stream(graph.vertices().spliterator(), false).map(VariantGraph.Vertex::outgoing).map(Map::keySet).flatMap(Set::stream).count()); - } - protected static void assetGraphSize(VariantGraph graph, int vertices, int edges) { - assertGraphVertices(graph, vertices); - assertGraphEdges(graph, edges); - } - - protected static void assertHasWitnesses(Set edge, Witness... witnesses) { - assertEquals(new HashSet<>(Arrays.asList(witnesses)), edge); - } - - protected static Set edgeBetween(VariantGraph.Vertex start, VariantGraph.Vertex end) { - final Optional> edge = Optional.ofNullable(start.outgoing().get(end)); - Assert.assertTrue(String.format("No edge between %s and %s", start, end), edge.isPresent()); - return edge.get(); - } - - protected static void assertVertexEquals(String expected, VariantGraph.Vertex vertex) { - assertEquals(expected, vertex.tokens().stream().findFirst().map(t -> (SimpleToken) t).map(SimpleToken::getNormalized).get()); - } - - protected static void assertTokenEquals(String expected, Token token) { - assertEquals(expected, ((SimpleToken) token).getContent()); - } - - protected static void assertVertexHasContent(VariantGraph.Vertex vertex, String content, Witness in) { - Assert.assertEquals(String.format("%s does not has expected content for %s", vertex, in), content, toString(vertex, in)); - } - - protected static VariantGraph.Vertex vertexWith(VariantGraph graph, String content, Witness in) { - for (VariantGraph.Vertex v : VariantGraphTraversal.of(graph, Collections.singleton(in))) { - if (content.equals(toString(v, in))) { - return v; - } - } - fail(String.format("No vertex with content '%s' in witness %s", content, in)); - return null; - } - - protected static Stream witnesses(List>> table) { - return table.stream() + } + + protected static void assertGraphVertices(VariantGraph graph, int vertices) { + assertEquals(vertices, StreamSupport.stream(graph.vertices().spliterator(), false).count()); + } + + protected static void assertGraphEdges(VariantGraph graph, int edges) { + assertEquals(edges, StreamSupport.stream(graph.vertices().spliterator(), false).map(VariantGraph.Vertex::outgoing).map(Map::keySet).flatMap(Set::stream).count()); + } + + protected static void assetGraphSize(VariantGraph graph, int vertices, int edges) { + assertGraphVertices(graph, vertices); + assertGraphEdges(graph, edges); + } + + protected static void assertHasWitnesses(Set edge, Witness... witnesses) { + assertEquals(new HashSet<>(Arrays.asList(witnesses)), edge); + } + + protected static Set edgeBetween(VariantGraph.Vertex start, VariantGraph.Vertex end) { + final Optional> edge = Optional.ofNullable(start.outgoing().get(end)); + Assert.assertTrue(String.format("No edge between %s and %s", start, end), edge.isPresent()); + return edge.get(); + } + + protected static void assertVertexEquals(String expected, VariantGraph.Vertex vertex) { + assertEquals(expected, vertex.tokens().stream().findFirst().map(t -> (SimpleToken) t).map(SimpleToken::getNormalized).get()); + } + + protected static void assertTokenEquals(String expected, Token token) { + assertEquals(expected, ((SimpleToken) token).getContent()); + } + + protected static void assertVertexHasContent(VariantGraph.Vertex vertex, String content, Witness in) { + Assert.assertEquals(String.format("%s does not has expected content for %s", vertex, in), content, toString(vertex, in)); + } + + protected static VariantGraph.Vertex vertexWith(VariantGraph graph, String content, Witness in) { + for (VariantGraph.Vertex v : VariantGraphTraversal.of(graph, Collections.singleton(in))) { + if (content.equals(toString(v, in))) { + return v; + } + } + fail(String.format("No vertex with content '%s' in witness %s", content, in)); + return null; + } + + protected static Stream witnesses(List>> table) { + return table.stream() .map(SortedMap::keySet) .flatMap(Set::stream) .distinct(); - } - - protected static String toString(List>> table) { - return witnesses(table) + } + + protected static String toString(List>> table) { + return witnesses(table) .sorted(Witness.SIGIL_COMPARATOR) .map(witness -> String.format("%s: %s\n", witness.getSigil(), toString(table, witness))) .collect(Collectors.joining()); - } + } - protected static String toString(List>> table, Witness witness) { - return String.format("|%s|", table.stream() + protected static String toString(List>> table, Witness witness) { + return String.format("|%s|", table.stream() .map(r -> r.getOrDefault(witness, Collections.emptySet())) .map(tokens -> tokens.stream() - .filter(t -> SimpleToken.class.isAssignableFrom(t.getClass())) - .map(t -> (SimpleToken) t) - .sorted() - .map(SimpleToken::getNormalized) - .collect(Collectors.joining(" ")) + .filter(t -> SimpleToken.class.isAssignableFrom(t.getClass())) + .map(t -> (SimpleToken) t) + .sorted() + .map(SimpleToken::getNormalized) + .collect(Collectors.joining(" ")) ) .map(cell -> cell.isEmpty() ? " " : cell) .collect(Collectors.joining("|"))); - } + } - protected void assertPhraseMatches(String... expectedPhrases) { - List> phraseMatches = ((DekkerAlgorithm) collationAlgorithm).getPhraseMatches(); - int i = 0; - for (List phraseMatch : phraseMatches) { - Assert.assertEquals(expectedPhrases[i], SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatch))); - i++; + protected void assertPhraseMatches(String... expectedPhrases) { + List> phraseMatches = ((DekkerAlgorithm) collationAlgorithm).getPhraseMatches(); + int i = 0; + for (List phraseMatch : phraseMatches) { + Assert.assertEquals(expectedPhrases[i], SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatch))); + i++; + } } - } - protected void setCollationAlgorithm(CollationAlgorithm collationAlgorithm) { - this.collationAlgorithm = collationAlgorithm; - } + protected void setCollationAlgorithm(CollationAlgorithm collationAlgorithm) { + this.collationAlgorithm = collationAlgorithm; + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/ScriptEngineTest.java b/collatex-core/src/test/java/eu/interedition/collatex/ScriptEngineTest.java index d121ac212..5fdea0a37 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/ScriptEngineTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/ScriptEngineTest.java @@ -36,24 +36,24 @@ */ public class ScriptEngineTest extends AbstractTest { - @Test - public void functions() throws ScriptException, NoSuchMethodException { - final ScriptEngineManager scriptEngineManager = new ScriptEngineManager(); - for (ScriptEngineFactory scriptEngineFactory : scriptEngineManager.getEngineFactories()) { - LOG.fine(() -> Stream.of( - scriptEngineFactory.getEngineName(), - scriptEngineFactory.getEngineVersion(), - scriptEngineFactory.getLanguageName(), - scriptEngineFactory.getLanguageVersion(), - scriptEngineFactory.getExtensions().toString() - ).collect(Collectors.joining("; "))); + @Test + public void functions() throws ScriptException, NoSuchMethodException { + final ScriptEngineManager scriptEngineManager = new ScriptEngineManager(); + for (ScriptEngineFactory scriptEngineFactory : scriptEngineManager.getEngineFactories()) { + LOG.fine(() -> Stream.of( + scriptEngineFactory.getEngineName(), + scriptEngineFactory.getEngineVersion(), + scriptEngineFactory.getLanguageName(), + scriptEngineFactory.getLanguageVersion(), + scriptEngineFactory.getExtensions().toString() + ).collect(Collectors.joining("; "))); + } + + final Compilable compiler = (Compilable) Objects.requireNonNull(scriptEngineManager.getEngineByExtension("js")); + final CompiledScript script = compiler.compile("function compare(a, b) { return a == b }\nfunction cost(a) { return 1; }"); + + script.eval(); + + System.out.println(((Invocable) script.getEngine()).invokeFunction("compare", "1", "0")); } - - final Compilable compiler = (Compilable) Objects.requireNonNull(scriptEngineManager.getEngineByExtension("js")); - final CompiledScript script = compiler.compile("function compare(a, b) { return a == b }\nfunction cost(a) { return 1; }"); - - script.eval(); - - System.out.println(((Invocable) script.getEngine()).invokeFunction("compare", "1", "0")); - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java index b1b736a26..452bf8f2f 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/VariantGraphTest.java @@ -35,166 +35,166 @@ public class VariantGraphTest extends AbstractTest { - @Test - public void emptyGraph() { - final VariantGraph graph = collate(createWitnesses()); - assertEquals(0, graph.witnesses().size()); - assetGraphSize(graph, 2, 1); - } - - @Test - public void getTokens() { - final SimpleWitness[] w = createWitnesses("a b c d"); - final VariantGraph graph = collate(w); - final List vertices = StreamSupport.stream(VariantGraphTraversal.of(graph).spliterator(), false).collect(Collectors.toList()); - assertEquals(6, vertices.size()); - assertEquals(graph.getStart(), vertices.get(0)); - assertVertexEquals("a", vertices.get(1)); - assertVertexEquals("b", vertices.get(2)); - assertVertexEquals("c", vertices.get(3)); - assertVertexEquals("d", vertices.get(4)); - assertEquals(graph.getEnd(), vertices.get(5)); - } - - @Test - public void oneWitness() { - final SimpleWitness[] w = createWitnesses("only one witness"); - final VariantGraph graph = collate(w); - - assetGraphSize(graph, 5, 4); - - final VariantGraph.Vertex firstVertex = vertexWith(graph, "only", w[0]); - final VariantGraph.Vertex secondVertex = vertexWith(graph, "one", w[0]); - final VariantGraph.Vertex thirdVertex = vertexWith(graph, "witness", w[0]); - - assertHasWitnesses(edgeBetween(graph.getStart(), firstVertex), w[0]); - assertHasWitnesses(edgeBetween(firstVertex, secondVertex), w[0]); - assertHasWitnesses(edgeBetween(secondVertex, thirdVertex), w[0]); - assertHasWitnesses(edgeBetween(thirdVertex, graph.getEnd()), w[0]); - } - - @Test - public void getPathForWitness() { - final SimpleWitness[] w = createWitnesses("a b c d e f ", "x y z d e", "a b x y z"); - final VariantGraph graph = collate(w); - final List path = StreamSupport.stream(VariantGraphTraversal.of(graph, Collections.singleton(w[0])).spliterator(), false).collect(Collectors.toList()); - - assertEquals(8, path.size()); - assertEquals(graph.getStart(), path.get(0)); - assertVertexEquals("a", path.get(1)); - assertVertexEquals("b", path.get(2)); - assertVertexEquals("c", path.get(3)); - assertVertexEquals("d", path.get(4)); - assertVertexEquals("e", path.get(5)); - assertVertexEquals("f", path.get(6)); - assertEquals(graph.getEnd(), path.get(7)); - } - - @Test - public void transpositions1() { - final VariantGraph graph = collate("the nice black and white cat", "the friendly white and black cat"); - assertGraphEdges(graph, 12); - } - - @Test - public void transpositions2() { - final SimpleWitness[] w = createWitnesses("The black dog chases a red cat.", "A red cat chases the black dog.", "A red cat chases the yellow dog"); - final VariantGraph graph = collate(w); - - // There should be two vertices for cat in the graph - assertHasWitnesses(edgeBetween(vertexWith(graph, "red", w[0]), vertexWith(graph, "cat", w[0])), w[0]); - assertHasWitnesses(edgeBetween(vertexWith(graph, "red", w[1]), vertexWith(graph, "cat", w[1])), w[1], w[2]); - - assetGraphSize(graph, 17, 20); - } - - @Test - public void joinTwoIdenticalWitnesses() { - final SimpleWitness[] w = createWitnesses("the black cat", "the black cat"); - final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); - - assetGraphSize(graph, 3, 2); - - final VariantGraph.Vertex joinedVertex = vertexWith(graph, "the black cat", w[0]); - - assertHasWitnesses(edgeBetween(graph.getStart(), joinedVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(joinedVertex, graph.getEnd()), w[0], w[1]); - } - - @Test - public void joinTwoDifferentWitnesses() { - final SimpleWitness[] w = createWitnesses("the nice black cat shared his food", "the bad white cat spilled his food again"); - final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); - - final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); - final VariantGraph.Vertex niceBlackVertex = vertexWith(graph, "nice black", w[0]); - final VariantGraph.Vertex badWhiteVertex = vertexWith(graph, "bad white", w[1]); - final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); - final VariantGraph.Vertex sharedVertex = vertexWith(graph, "shared", w[0]); - final VariantGraph.Vertex spilledVertex = vertexWith(graph, "spilled", w[1]); - final VariantGraph.Vertex hisFoodVertex = vertexWith(graph, "his food", w[0]); - final VariantGraph.Vertex againVertex = vertexWith(graph, "again", w[1]); - - assertHasWitnesses(edgeBetween(graph.getStart(), theVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(theVertex, niceBlackVertex), w[0]); - assertHasWitnesses(edgeBetween(niceBlackVertex, catVertex), w[0]); - assertHasWitnesses(edgeBetween(theVertex, badWhiteVertex), w[1]); - assertHasWitnesses(edgeBetween(badWhiteVertex, catVertex), w[1]); - assertHasWitnesses(edgeBetween(catVertex, sharedVertex), w[0]); - assertHasWitnesses(edgeBetween(sharedVertex, hisFoodVertex), w[0]); - assertHasWitnesses(edgeBetween(catVertex, spilledVertex), w[1]); - assertHasWitnesses(edgeBetween(spilledVertex, hisFoodVertex), w[1]); - assertHasWitnesses(edgeBetween(hisFoodVertex, againVertex), w[1]); - } - - @Test - public void joinTwoDifferentWitnesses2() { - final SimpleWitness[] w = createWitnesses("Blackie, the black cat", "Whitney, the white cat"); - final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); - - final VariantGraph.Vertex blackieVertex = vertexWith(graph, "blackie", w[0]); - final VariantGraph.Vertex whitneyVertex = vertexWith(graph, "whitney", w[1]); - final VariantGraph.Vertex theVertex = vertexWith(graph, ", the", w[0]); - final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); - final VariantGraph.Vertex whiteVertex = vertexWith(graph, "white", w[1]); - final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); - - assertHasWitnesses(edgeBetween(graph.getStart(), blackieVertex), w[0]); - assertHasWitnesses(edgeBetween(blackieVertex, theVertex), w[0]); - assertHasWitnesses(edgeBetween(graph.getStart(), whitneyVertex), w[1]); - assertHasWitnesses(edgeBetween(whitneyVertex, theVertex), w[1]); - assertHasWitnesses(edgeBetween(theVertex, blackVertex), w[0]); - assertHasWitnesses(edgeBetween(blackVertex, catVertex), w[0]); - assertHasWitnesses(edgeBetween(theVertex, whiteVertex), w[1]); - assertHasWitnesses(edgeBetween(whiteVertex, catVertex), w[1]); - } - - @Test - public void joinTwoDifferentWitnessesWithTranspositions() { - final SimpleWitness[] w = createWitnesses("voor Zo nu en dan zin2 na voor", "voor zin2 Nu en dan voor"); - final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); - final StringWriter writer = new StringWriter(); - new SimpleVariantGraphSerializer(graph).toDot(writer); - LOG.log(Level.FINE, "dot={0}", writer.toString()); - - final VariantGraph.Vertex voorVertex1 = vertexWith(graph, "voor", w[0]); - final VariantGraph.Vertex zoVertex = vertexWith(graph, "zo", w[0]); - final VariantGraph.Vertex nuendanVertex = vertexWith(graph, "nu en dan", w[0]); - // final VariantGraphVertex zin2AVertex = vertexWith(graph, "zin2", w[0]); - final VariantGraph.Vertex zin2BVertex = vertexWith(graph, "zin2", w[1]); - // final VariantGraphVertex naVertex = vertexWith(graph, "na", w[0]); - // final VariantGraphVertex voorVertex2 = vertexWith(graph, "voor", w[0]); - - assertHasWitnesses(edgeBetween(graph.getStart(), voorVertex1), w[0], w[1]); - assertHasWitnesses(edgeBetween(voorVertex1, zoVertex), w[0]); - assertHasWitnesses(edgeBetween(zoVertex, nuendanVertex), w[0]); - // assertHasWitnesses(edgeBetween(nuendanVertex, zin2AVertex), w[0]); - // assertHasWitnesses(edgeBetween(zin2AVertex, naVertex), w[0]); - // assertHasWitnesses(edgeBetween(naVertex, voorVertex2), w[0]); - // assertHasWitnesses(edgeBetween(voorVertex2, graph.getEnd()), w[0], w[1]); - - assertHasWitnesses(edgeBetween(voorVertex1, zin2BVertex), w[1]); - assertHasWitnesses(edgeBetween(zin2BVertex, nuendanVertex), w[1]); - // assertHasWitnesses(edgeBetween(nuendanVertex, voorVertex2), w[1]); - } + @Test + public void emptyGraph() { + final VariantGraph graph = collate(createWitnesses()); + assertEquals(0, graph.witnesses().size()); + assetGraphSize(graph, 2, 1); + } + + @Test + public void getTokens() { + final SimpleWitness[] w = createWitnesses("a b c d"); + final VariantGraph graph = collate(w); + final List vertices = StreamSupport.stream(VariantGraphTraversal.of(graph).spliterator(), false).collect(Collectors.toList()); + assertEquals(6, vertices.size()); + assertEquals(graph.getStart(), vertices.get(0)); + assertVertexEquals("a", vertices.get(1)); + assertVertexEquals("b", vertices.get(2)); + assertVertexEquals("c", vertices.get(3)); + assertVertexEquals("d", vertices.get(4)); + assertEquals(graph.getEnd(), vertices.get(5)); + } + + @Test + public void oneWitness() { + final SimpleWitness[] w = createWitnesses("only one witness"); + final VariantGraph graph = collate(w); + + assetGraphSize(graph, 5, 4); + + final VariantGraph.Vertex firstVertex = vertexWith(graph, "only", w[0]); + final VariantGraph.Vertex secondVertex = vertexWith(graph, "one", w[0]); + final VariantGraph.Vertex thirdVertex = vertexWith(graph, "witness", w[0]); + + assertHasWitnesses(edgeBetween(graph.getStart(), firstVertex), w[0]); + assertHasWitnesses(edgeBetween(firstVertex, secondVertex), w[0]); + assertHasWitnesses(edgeBetween(secondVertex, thirdVertex), w[0]); + assertHasWitnesses(edgeBetween(thirdVertex, graph.getEnd()), w[0]); + } + + @Test + public void getPathForWitness() { + final SimpleWitness[] w = createWitnesses("a b c d e f ", "x y z d e", "a b x y z"); + final VariantGraph graph = collate(w); + final List path = StreamSupport.stream(VariantGraphTraversal.of(graph, Collections.singleton(w[0])).spliterator(), false).collect(Collectors.toList()); + + assertEquals(8, path.size()); + assertEquals(graph.getStart(), path.get(0)); + assertVertexEquals("a", path.get(1)); + assertVertexEquals("b", path.get(2)); + assertVertexEquals("c", path.get(3)); + assertVertexEquals("d", path.get(4)); + assertVertexEquals("e", path.get(5)); + assertVertexEquals("f", path.get(6)); + assertEquals(graph.getEnd(), path.get(7)); + } + + @Test + public void transpositions1() { + final VariantGraph graph = collate("the nice black and white cat", "the friendly white and black cat"); + assertGraphEdges(graph, 12); + } + + @Test + public void transpositions2() { + final SimpleWitness[] w = createWitnesses("The black dog chases a red cat.", "A red cat chases the black dog.", "A red cat chases the yellow dog"); + final VariantGraph graph = collate(w); + + // There should be two vertices for cat in the graph + assertHasWitnesses(edgeBetween(vertexWith(graph, "red", w[0]), vertexWith(graph, "cat", w[0])), w[0]); + assertHasWitnesses(edgeBetween(vertexWith(graph, "red", w[1]), vertexWith(graph, "cat", w[1])), w[1], w[2]); + + assetGraphSize(graph, 17, 20); + } + + @Test + public void joinTwoIdenticalWitnesses() { + final SimpleWitness[] w = createWitnesses("the black cat", "the black cat"); + final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); + + assetGraphSize(graph, 3, 2); + + final VariantGraph.Vertex joinedVertex = vertexWith(graph, "the black cat", w[0]); + + assertHasWitnesses(edgeBetween(graph.getStart(), joinedVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(joinedVertex, graph.getEnd()), w[0], w[1]); + } + + @Test + public void joinTwoDifferentWitnesses() { + final SimpleWitness[] w = createWitnesses("the nice black cat shared his food", "the bad white cat spilled his food again"); + final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); + + final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); + final VariantGraph.Vertex niceBlackVertex = vertexWith(graph, "nice black", w[0]); + final VariantGraph.Vertex badWhiteVertex = vertexWith(graph, "bad white", w[1]); + final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); + final VariantGraph.Vertex sharedVertex = vertexWith(graph, "shared", w[0]); + final VariantGraph.Vertex spilledVertex = vertexWith(graph, "spilled", w[1]); + final VariantGraph.Vertex hisFoodVertex = vertexWith(graph, "his food", w[0]); + final VariantGraph.Vertex againVertex = vertexWith(graph, "again", w[1]); + + assertHasWitnesses(edgeBetween(graph.getStart(), theVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(theVertex, niceBlackVertex), w[0]); + assertHasWitnesses(edgeBetween(niceBlackVertex, catVertex), w[0]); + assertHasWitnesses(edgeBetween(theVertex, badWhiteVertex), w[1]); + assertHasWitnesses(edgeBetween(badWhiteVertex, catVertex), w[1]); + assertHasWitnesses(edgeBetween(catVertex, sharedVertex), w[0]); + assertHasWitnesses(edgeBetween(sharedVertex, hisFoodVertex), w[0]); + assertHasWitnesses(edgeBetween(catVertex, spilledVertex), w[1]); + assertHasWitnesses(edgeBetween(spilledVertex, hisFoodVertex), w[1]); + assertHasWitnesses(edgeBetween(hisFoodVertex, againVertex), w[1]); + } + + @Test + public void joinTwoDifferentWitnesses2() { + final SimpleWitness[] w = createWitnesses("Blackie, the black cat", "Whitney, the white cat"); + final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); + + final VariantGraph.Vertex blackieVertex = vertexWith(graph, "blackie", w[0]); + final VariantGraph.Vertex whitneyVertex = vertexWith(graph, "whitney", w[1]); + final VariantGraph.Vertex theVertex = vertexWith(graph, ", the", w[0]); + final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); + final VariantGraph.Vertex whiteVertex = vertexWith(graph, "white", w[1]); + final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); + + assertHasWitnesses(edgeBetween(graph.getStart(), blackieVertex), w[0]); + assertHasWitnesses(edgeBetween(blackieVertex, theVertex), w[0]); + assertHasWitnesses(edgeBetween(graph.getStart(), whitneyVertex), w[1]); + assertHasWitnesses(edgeBetween(whitneyVertex, theVertex), w[1]); + assertHasWitnesses(edgeBetween(theVertex, blackVertex), w[0]); + assertHasWitnesses(edgeBetween(blackVertex, catVertex), w[0]); + assertHasWitnesses(edgeBetween(theVertex, whiteVertex), w[1]); + assertHasWitnesses(edgeBetween(whiteVertex, catVertex), w[1]); + } + + @Test + public void joinTwoDifferentWitnessesWithTranspositions() { + final SimpleWitness[] w = createWitnesses("voor Zo nu en dan zin2 na voor", "voor zin2 Nu en dan voor"); + final VariantGraph graph = VariantGraph.JOIN.apply(collate(w)); + final StringWriter writer = new StringWriter(); + new SimpleVariantGraphSerializer(graph).toDot(writer); + LOG.log(Level.FINE, "dot={0}", writer.toString()); + + final VariantGraph.Vertex voorVertex1 = vertexWith(graph, "voor", w[0]); + final VariantGraph.Vertex zoVertex = vertexWith(graph, "zo", w[0]); + final VariantGraph.Vertex nuendanVertex = vertexWith(graph, "nu en dan", w[0]); + // final VariantGraphVertex zin2AVertex = vertexWith(graph, "zin2", w[0]); + final VariantGraph.Vertex zin2BVertex = vertexWith(graph, "zin2", w[1]); + // final VariantGraphVertex naVertex = vertexWith(graph, "na", w[0]); + // final VariantGraphVertex voorVertex2 = vertexWith(graph, "voor", w[0]); + + assertHasWitnesses(edgeBetween(graph.getStart(), voorVertex1), w[0], w[1]); + assertHasWitnesses(edgeBetween(voorVertex1, zoVertex), w[0]); + assertHasWitnesses(edgeBetween(zoVertex, nuendanVertex), w[0]); + // assertHasWitnesses(edgeBetween(nuendanVertex, zin2AVertex), w[0]); + // assertHasWitnesses(edgeBetween(zin2AVertex, naVertex), w[0]); + // assertHasWitnesses(edgeBetween(naVertex, voorVertex2), w[0]); + // assertHasWitnesses(edgeBetween(voorVertex2, graph.getEnd()), w[0], w[1]); + + assertHasWitnesses(edgeBetween(voorVertex1, zin2BVertex), w[1]); + assertHasWitnesses(edgeBetween(zin2BVertex, nuendanVertex), w[1]); + // assertHasWitnesses(edgeBetween(nuendanVertex, voorVertex2), w[1]); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java index 75d5fd17e..ecc1aca13 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/AlignmentTest.java @@ -1,159 +1,158 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ -package eu.interedition.collatex.dekker; - -import eu.interedition.collatex.AbstractTest; -import eu.interedition.collatex.CollationAlgorithmFactory; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.matching.EqualityTokenComparator; -import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; -import eu.interedition.collatex.simple.SimpleWitness; -import org.junit.Test; - -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.SortedMap; - -import static org.junit.Assert.assertEquals; - -/** - * - * @author Ronald Haentjens Dekker - * - * This test class tests the PhraseMatchDetector and - * the TranspositionDetector - */ -public class AlignmentTest extends AbstractTest { - - @Test - public void doubleTransposition1() { - final SimpleWitness[] w = createWitnesses("the cat is black", "black is the cat"); - final List>> t = table(collate(w)); - assertEquals("|the|cat|is|black| |", toString(t, w[0])); - assertEquals("|black| |is|the|cat|", toString(t, w[1])); - } - - @Test - public void doubleTransposition2() { - final SimpleWitness[] w = createWitnesses("a b", "b a"); - final List>> t = table(collate(w)); - assertEquals("| |a|b|", toString(t, w[0])); - assertEquals("|b|a| |", toString(t, w[1])); - } - - @Test - public void doubleTransposition3() { - final SimpleWitness[] w = createWitnesses("a b c", "b a c"); - final List>> t = table(collate(w)); - assertEquals("| |a|b|c|", toString(t, w[0])); - assertEquals("|b|a| |c|", toString(t, w[1])); - } - - @Test - public void additionInCombinationWithTransposition() { - final SimpleWitness[] w = createWitnesses(// - "the cat is very happy",// - "very happy is the cat",// - "very delitied and happy is the cat"); - final List>> t = table(collate(w)); - assertEquals("|the|cat| | |is|very|happy|", toString(t, w[0])); - assertEquals("|very| | |happy|is|the|cat|", toString(t, w[1])); - assertEquals("|very|delitied|and|happy|is|the|cat|", toString(t, w[2])); - } - - @Test - public void simpleTransposition() { - final SimpleWitness[] w = createWitnesses(// - "A black cat in a white basket",// - "A white cat in a black basket"); - final List>> t = table(collate(w)); - assertEquals("|a|black|cat|in|a|white|basket|", toString(t, w[0])); - assertEquals("|a|white|cat|in|a|black|basket|", toString(t, w[1])); - } - - @Test - public void transposeInOnePair() { - final SimpleWitness[] w = createWitnesses("y", "x y z", "z y"); - final List>> t = table(collate(w)); - assertEquals("| |y| |", toString(t, w[0])); - assertEquals("|x|y|z|", toString(t, w[1])); - assertEquals("|z|y| |", toString(t, w[2])); - } - - @Test - public void transposeInTwoPairs() { - final SimpleWitness[] w = createWitnesses("y x", "x y z", "z y"); - final List>> t = table(collate(w)); - assertEquals("| |y|x|", toString(t, w[0])); - assertEquals("|x|y|z|", toString(t, w[1])); - assertEquals("|z|y| |", toString(t, w[2])); - } - - @Test - public void testOrderIndependence() { - final SimpleWitness[] w = createWitnesses("Hello cruel world", "Hello nice world", "Hello nice cruel world"); - collate(w[0], w[1], w[2]); - assertPhraseMatches("Hello","nice","cruel","world"); - List> transpositions = ((DekkerAlgorithm) collationAlgorithm).getTranspositions(); - assertEquals(0, transpositions.size()); - } - - @Test - public void testPhraseMatchingShouldNotIgnoreDeletions() { - final SimpleWitness[] w = createWitnesses("Hello cruel world", "Hello world"); - collate(w); - assertPhraseMatches("Hello", "world"); - List> transpositions = ((DekkerAlgorithm) collationAlgorithm).getTranspositions(); - assertEquals(0, transpositions.size()); - } - - @Test - public void testPhraseMatchingShouldNotIgnoreAdditions() { - final SimpleWitness[] w = createWitnesses("Hello world", "Hello cruel world"); - collate(w); - assertPhraseMatches("Hello", "world"); - List> transpositions = ((DekkerAlgorithm) collationAlgorithm).getTranspositions(); - assertEquals(0, transpositions.size()); - } - - @Test - public void testOrderIndependenceTroy() throws XMLStreamException { - final List witnesses = new ArrayList<>(); - witnesses.add(new SimpleWitness("w1", "X A Z ")); - witnesses.add(new SimpleWitness("w2", "Y B Z ")); - witnesses.add(new SimpleWitness("w3", "Y A X ")); - - setCollationAlgorithm(CollationAlgorithmFactory.dekker(new EqualityTokenComparator())); - VariantGraph graph = new VariantGraph(); - collationAlgorithm.collate(graph, witnesses.toArray(new SimpleWitness[0])); - - StringWriter writer = new StringWriter(); - XMLStreamWriter swriter = XMLOutputFactory.newInstance().createXMLStreamWriter(writer); - swriter.writeStartDocument(); - new SimpleVariantGraphSerializer(graph).toGraphML(swriter); - swriter.writeEndDocument(); - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ +package eu.interedition.collatex.dekker; + +import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.CollationAlgorithmFactory; +import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.Witness; +import eu.interedition.collatex.matching.EqualityTokenComparator; +import eu.interedition.collatex.simple.SimpleVariantGraphSerializer; +import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; + +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.SortedMap; + +import static org.junit.Assert.assertEquals; + +/** + * @author Ronald Haentjens Dekker + *

        + * This test class tests the PhraseMatchDetector and + * the TranspositionDetector + */ +public class AlignmentTest extends AbstractTest { + + @Test + public void doubleTransposition1() { + final SimpleWitness[] w = createWitnesses("the cat is black", "black is the cat"); + final List>> t = table(collate(w)); + assertEquals("|the|cat|is|black| |", toString(t, w[0])); + assertEquals("|black| |is|the|cat|", toString(t, w[1])); + } + + @Test + public void doubleTransposition2() { + final SimpleWitness[] w = createWitnesses("a b", "b a"); + final List>> t = table(collate(w)); + assertEquals("| |a|b|", toString(t, w[0])); + assertEquals("|b|a| |", toString(t, w[1])); + } + + @Test + public void doubleTransposition3() { + final SimpleWitness[] w = createWitnesses("a b c", "b a c"); + final List>> t = table(collate(w)); + assertEquals("| |a|b|c|", toString(t, w[0])); + assertEquals("|b|a| |c|", toString(t, w[1])); + } + + @Test + public void additionInCombinationWithTransposition() { + final SimpleWitness[] w = createWitnesses(// + "the cat is very happy",// + "very happy is the cat",// + "very delitied and happy is the cat"); + final List>> t = table(collate(w)); + assertEquals("|the|cat| | |is|very|happy|", toString(t, w[0])); + assertEquals("|very| | |happy|is|the|cat|", toString(t, w[1])); + assertEquals("|very|delitied|and|happy|is|the|cat|", toString(t, w[2])); + } + + @Test + public void simpleTransposition() { + final SimpleWitness[] w = createWitnesses(// + "A black cat in a white basket",// + "A white cat in a black basket"); + final List>> t = table(collate(w)); + assertEquals("|a|black|cat|in|a|white|basket|", toString(t, w[0])); + assertEquals("|a|white|cat|in|a|black|basket|", toString(t, w[1])); + } + + @Test + public void transposeInOnePair() { + final SimpleWitness[] w = createWitnesses("y", "x y z", "z y"); + final List>> t = table(collate(w)); + assertEquals("| |y| |", toString(t, w[0])); + assertEquals("|x|y|z|", toString(t, w[1])); + assertEquals("|z|y| |", toString(t, w[2])); + } + + @Test + public void transposeInTwoPairs() { + final SimpleWitness[] w = createWitnesses("y x", "x y z", "z y"); + final List>> t = table(collate(w)); + assertEquals("| |y|x|", toString(t, w[0])); + assertEquals("|x|y|z|", toString(t, w[1])); + assertEquals("|z|y| |", toString(t, w[2])); + } + + @Test + public void testOrderIndependence() { + final SimpleWitness[] w = createWitnesses("Hello cruel world", "Hello nice world", "Hello nice cruel world"); + collate(w[0], w[1], w[2]); + assertPhraseMatches("Hello", "nice", "cruel", "world"); + List> transpositions = ((DekkerAlgorithm) collationAlgorithm).getTranspositions(); + assertEquals(0, transpositions.size()); + } + + @Test + public void testPhraseMatchingShouldNotIgnoreDeletions() { + final SimpleWitness[] w = createWitnesses("Hello cruel world", "Hello world"); + collate(w); + assertPhraseMatches("Hello", "world"); + List> transpositions = ((DekkerAlgorithm) collationAlgorithm).getTranspositions(); + assertEquals(0, transpositions.size()); + } + + @Test + public void testPhraseMatchingShouldNotIgnoreAdditions() { + final SimpleWitness[] w = createWitnesses("Hello world", "Hello cruel world"); + collate(w); + assertPhraseMatches("Hello", "world"); + List> transpositions = ((DekkerAlgorithm) collationAlgorithm).getTranspositions(); + assertEquals(0, transpositions.size()); + } + + @Test + public void testOrderIndependenceTroy() throws XMLStreamException { + final List witnesses = new ArrayList<>(); + witnesses.add(new SimpleWitness("w1", "X A Z ")); + witnesses.add(new SimpleWitness("w2", "Y B Z ")); + witnesses.add(new SimpleWitness("w3", "Y A X ")); + + setCollationAlgorithm(CollationAlgorithmFactory.dekker(new EqualityTokenComparator())); + VariantGraph graph = new VariantGraph(); + collationAlgorithm.collate(graph, witnesses.toArray(new SimpleWitness[0])); + + StringWriter writer = new StringWriter(); + XMLStreamWriter swriter = XMLOutputFactory.newInstance().createXMLStreamWriter(writer); + swriter.writeStartDocument(); + new SimpleVariantGraphSerializer(graph).toGraphML(swriter); + swriter.writeEndDocument(); + } +} diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java index 2558dd8d1..2e681a295 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/BeckettTest.java @@ -1,233 +1,233 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.dekker; - -import eu.interedition.collatex.AbstractTest; -import eu.interedition.collatex.CollationAlgorithmFactory; -import eu.interedition.collatex.Token; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.Witness; -import eu.interedition.collatex.matching.EqualityTokenComparator; -import eu.interedition.collatex.matching.Matches; -import eu.interedition.collatex.simple.SimpleToken; -import eu.interedition.collatex.simple.SimpleWitness; -import org.junit.Assert; -import org.junit.Test; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; - -import static eu.interedition.collatex.dekker.Match.PHRASE_MATCH_TO_TOKENS; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -public class BeckettTest extends AbstractTest { - - /** - * The ranking of vertices in the transposition detector should only - * rank matched vertices!!! - */ - @Test - public void testBeckettStrangeTransposition() { - SimpleWitness[] w = createWitnesses("People with things, people without things, things without people, what does it matter. I'm confident I can soon scatter them.", "People with things, people without things, things without people, what does it matter, it will not take me long to scatter them.", "People with things, people without things, things without people, what does it matter, I flatter myself it will not take me long to scatter them, whenever I choose, to the winds."); - final VariantGraph graph = collate(w[0], w[1]); - DekkerAlgorithm algo = new DekkerAlgorithm(new EqualityTokenComparator()); - algo.collate(graph, w[2]); -// List> phraseMatches = algo.getPhraseMatches(); -// for (List phraseMatch: phraseMatches) { -// System.out.println(SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatch))); -// } - - assertEquals(0, algo.getTranspositions().size()); - } - - - @Test - public void dirkVincent() { - final SimpleWitness[] w = createWitnesses(// - "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// - "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); - final VariantGraph graph = collate(w[0]); - final Map> matches = Matches.between(graph.vertices(), w[1], new EqualityTokenComparator()).allMatches; - - assertVertexHasContent(matches.get(w[1].getTokens().get(0)).get(0), "its", w[0]); - assertEquals(2, matches.get(w[1].getTokens().get(3)).size()); // 2 matches for 'light' - } - - @Test - public void dirkVincentWithMatchMatrixLinker() { - setCollationAlgorithm(CollationAlgorithmFactory.dekkerMatchMatrix(new EqualityTokenComparator(), 1)); - final SimpleWitness[] w = createWitnesses(// - "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// - "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); - final VariantGraph graph = collate(w[0]); - final Map> matches = Matches.between(graph.vertices(), w[1], new EqualityTokenComparator()).allMatches; - - assertVertexHasContent(matches.get(w[1].getTokens().get(0)).get(0), "its", w[0]); - assertEquals(2, matches.get(w[1].getTokens().get(3)).size()); // 2 matches for 'light' - } - - @Test - public void dirkVincent5() { - final SimpleWitness[] w = createWitnesses("Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa."); - final VariantGraph graph = collate(w); - - vertexWith(graph, "its", w[0]); - vertexWith(graph, "soft", w[0]); - vertexWith(graph, "light", w[0]); - vertexWith(graph, "neither", w[0]); - vertexWith(graph, "daylight", w[0]); - } - - @Test - public void dirkVincent6() { - final SimpleWitness[] w = createWitnesses("Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// - "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); - final VariantGraph graph = collate(w); - - final VariantGraph.Vertex itsVertex = vertexWith(graph, "its", w[0]); - final VariantGraph.Vertex softVertex = vertexWith(graph, "soft", w[0]); - final VariantGraph.Vertex changelessVertex = vertexWith(graph, "changeless", w[1]); - final VariantGraph.Vertex lightVertex = vertexWith(graph, "light", w[0]); - - assertHasWitnesses(edgeBetween(graph.getStart(), itsVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(itsVertex, softVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(softVertex, lightVertex), w[0]); - assertHasWitnesses(edgeBetween(softVertex, changelessVertex), w[1]); - assertHasWitnesses(edgeBetween(changelessVertex, lightVertex), w[1]); - } - - @Test - public void testDirkVincent7() { - final SimpleWitness[] w = createWitnesses(// - "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.", "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); - collate(w); - assertPhraseMatches("Its soft","light", "any light he could remember from the days", "nights when day followed", "night", "vice versa."); - assertTrue(((DekkerAlgorithm) collationAlgorithm).getTranspositions().isEmpty()); - } - - @Test - public void dirkVincent8() { - final SimpleWitness[] w = createWitnesses(// - "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// - "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa.",// - "Its faint unchanging light unlike any light he could remember from the days & nights when day followed on night & night on day."); - final VariantGraph graph = collate(w[0], w[1]); - final Matches matches = Matches.between(graph.vertices(), w[2].getTokens(), new EqualityTokenComparator()); - - final Set unmatchedTokens = matches.unmatchedInWitness; - final Set unsureTokens = matches.ambiguousInWitness; - final List w2Tokens = w[2].getTokens(); - - assertTrue(unmatchedTokens.contains(w2Tokens.get(1))); - assertTrue(unmatchedTokens.contains(w2Tokens.get(2))); - assertTrue(unsureTokens.contains(w2Tokens.get(3))); - assertTrue(unsureTokens.contains(w2Tokens.get(6))); - assertTrue(unsureTokens.contains(w2Tokens.get(13))); // & - assertTrue(unsureTokens.contains(w2Tokens.get(16))); // day - } - - @Test - public void dirkVincent10() { - final SimpleWitness[] w = createWitnesses("Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// - "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa.",// - "Its faint unchanging light unlike any light he could remember from the days & nights when day followed on night & night on day."); - final VariantGraph graph = collate(w); - - vertexWith(graph, "its", w[0]); - vertexWith(graph, "soft", w[0]); - vertexWith(graph, "changeless", w[1]); - vertexWith(graph, "faint", w[2]); - vertexWith(graph, "unchanging", w[2]); - vertexWith(graph, "light", w[0]); - vertexWith(graph, "neither", w[0]); - vertexWith(graph, "daylight", w[0]); - vertexWith(graph, "nor", w[0]); - vertexWith(graph, "moonlight", w[0]); - vertexWith(graph, "starlight", w[0]); - - // FIXME: test this! - /* - assertEquals("its", iterator.get.getNormalized()); - assertEquals("soft", iterator.next().getNormalized()); - assertEquals("changeless", iterator.next().getNormalized()); - assertEquals("faint", iterator.next().getNormalized()); - assertEquals("unchanging", iterator.next().getNormalized()); - assertEquals("light", iterator.next().getNormalized()); - assertEquals("neither", iterator.next().getNormalized()); - assertEquals("daylight", iterator.next().getNormalized()); - assertEquals("nor", iterator.next().getNormalized()); - assertEquals("moonlight", iterator.next().getNormalized()); - assertEquals("nor", iterator.next().getNormalized()); - assertEquals("starlight", iterator.next().getNormalized()); - assertEquals("nor", iterator.next().getNormalized()); - assertEquals("unlike", iterator.next().getNormalized()); - assertEquals("any", iterator.next().getNormalized()); - assertEquals("light", iterator.next().getNormalized()); - assertEquals("he", iterator.next().getNormalized()); - assertEquals("could", iterator.next().getNormalized()); - */ - } - - @Test - public void sentence42Transposition() { - // punctuation should be treated as separate tokens for this test to succeed - final SimpleWitness[] w = createWitnesses(// - "The same clock as when for example Magee once died.",// - "The same as when for example Magee once died.",// - "The same as when for example McKee once died .",// - "The same as when among others Darly once died & left him.",// - "The same as when Darly among others once died and left him."); - - final VariantGraph graph = collate(w[0], w[1]); - assertGraphContains(graph, "the", "same", "clock", "as", "when", "for", "example", "magee", "once", "died"); - - collate(graph, w[2]); - assertGraphContains(graph, "the", "same", "clock", "as", "when", "for", "example", "magee", "mckee", "once", "died"); - - collate(graph, w[3]); - assertGraphContains(graph, "the", "same", "clock", "as", "when", "for", "example", "magee", "mckee", "among", "others", "darly", "once", "died", "left", "him"); - - // transpositions should be handled correctly for this test to succeed - collate(graph, w[4]); - final List> phraseMatches = ((DekkerAlgorithm) collationAlgorithm).getPhraseMatches(); - final List> transpositions = ((DekkerAlgorithm) collationAlgorithm).getTranspositions(); - assertEquals("The same", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(0)))); - assertEquals("as when", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(1)))); - assertEquals("Darly", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(2)))); - assertEquals("among others", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(3)))); - assertEquals("once died", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(4)))); - assertEquals("left him", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(5)))); - assertEquals(1, transpositions.size()); - assertEquals("Darly", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(transpositions.get(0)))); - } - - private static void assertGraphContains(VariantGraph graph, String... expected) { - SortedSet contents = new TreeSet<>(); - for (Witness witness : graph.witnesses()) { - extractPhrases(contents, graph, witness); - } - Assert.assertTrue(contents.containsAll(Arrays.asList(expected))); - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex.dekker; + +import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.CollationAlgorithmFactory; +import eu.interedition.collatex.Token; +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.Witness; +import eu.interedition.collatex.matching.EqualityTokenComparator; +import eu.interedition.collatex.matching.Matches; +import eu.interedition.collatex.simple.SimpleToken; +import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import static eu.interedition.collatex.dekker.Match.PHRASE_MATCH_TO_TOKENS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class BeckettTest extends AbstractTest { + + /** + * The ranking of vertices in the transposition detector should only + * rank matched vertices!!! + */ + @Test + public void testBeckettStrangeTransposition() { + SimpleWitness[] w = createWitnesses("People with things, people without things, things without people, what does it matter. I'm confident I can soon scatter them.", "People with things, people without things, things without people, what does it matter, it will not take me long to scatter them.", "People with things, people without things, things without people, what does it matter, I flatter myself it will not take me long to scatter them, whenever I choose, to the winds."); + final VariantGraph graph = collate(w[0], w[1]); + DekkerAlgorithm algo = new DekkerAlgorithm(new EqualityTokenComparator()); + algo.collate(graph, w[2]); +// List> phraseMatches = algo.getPhraseMatches(); +// for (List phraseMatch: phraseMatches) { +// System.out.println(SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatch))); +// } + + assertEquals(0, algo.getTranspositions().size()); + } + + + @Test + public void dirkVincent() { + final SimpleWitness[] w = createWitnesses(// + "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// + "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); + final VariantGraph graph = collate(w[0]); + final Map> matches = Matches.between(graph.vertices(), w[1], new EqualityTokenComparator()).allMatches; + + assertVertexHasContent(matches.get(w[1].getTokens().get(0)).get(0), "its", w[0]); + assertEquals(2, matches.get(w[1].getTokens().get(3)).size()); // 2 matches for 'light' + } + + @Test + public void dirkVincentWithMatchMatrixLinker() { + setCollationAlgorithm(CollationAlgorithmFactory.dekkerMatchMatrix(new EqualityTokenComparator(), 1)); + final SimpleWitness[] w = createWitnesses(// + "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// + "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); + final VariantGraph graph = collate(w[0]); + final Map> matches = Matches.between(graph.vertices(), w[1], new EqualityTokenComparator()).allMatches; + + assertVertexHasContent(matches.get(w[1].getTokens().get(0)).get(0), "its", w[0]); + assertEquals(2, matches.get(w[1].getTokens().get(3)).size()); // 2 matches for 'light' + } + + @Test + public void dirkVincent5() { + final SimpleWitness[] w = createWitnesses("Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa."); + final VariantGraph graph = collate(w); + + vertexWith(graph, "its", w[0]); + vertexWith(graph, "soft", w[0]); + vertexWith(graph, "light", w[0]); + vertexWith(graph, "neither", w[0]); + vertexWith(graph, "daylight", w[0]); + } + + @Test + public void dirkVincent6() { + final SimpleWitness[] w = createWitnesses("Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// + "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); + final VariantGraph graph = collate(w); + + final VariantGraph.Vertex itsVertex = vertexWith(graph, "its", w[0]); + final VariantGraph.Vertex softVertex = vertexWith(graph, "soft", w[0]); + final VariantGraph.Vertex changelessVertex = vertexWith(graph, "changeless", w[1]); + final VariantGraph.Vertex lightVertex = vertexWith(graph, "light", w[0]); + + assertHasWitnesses(edgeBetween(graph.getStart(), itsVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(itsVertex, softVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(softVertex, lightVertex), w[0]); + assertHasWitnesses(edgeBetween(softVertex, changelessVertex), w[1]); + assertHasWitnesses(edgeBetween(changelessVertex, lightVertex), w[1]); + } + + @Test + public void testDirkVincent7() { + final SimpleWitness[] w = createWitnesses(// + "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.", "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa."); + collate(w); + assertPhraseMatches("Its soft", "light", "any light he could remember from the days", "nights when day followed", "night", "vice versa."); + assertTrue(((DekkerAlgorithm) collationAlgorithm).getTranspositions().isEmpty()); + } + + @Test + public void dirkVincent8() { + final SimpleWitness[] w = createWitnesses(// + "Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// + "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa.",// + "Its faint unchanging light unlike any light he could remember from the days & nights when day followed on night & night on day."); + final VariantGraph graph = collate(w[0], w[1]); + final Matches matches = Matches.between(graph.vertices(), w[2].getTokens(), new EqualityTokenComparator()); + + final Set unmatchedTokens = matches.unmatchedInWitness; + final Set unsureTokens = matches.ambiguousInWitness; + final List w2Tokens = w[2].getTokens(); + + assertTrue(unmatchedTokens.contains(w2Tokens.get(1))); + assertTrue(unmatchedTokens.contains(w2Tokens.get(2))); + assertTrue(unsureTokens.contains(w2Tokens.get(3))); + assertTrue(unsureTokens.contains(w2Tokens.get(6))); + assertTrue(unsureTokens.contains(w2Tokens.get(13))); // & + assertTrue(unsureTokens.contains(w2Tokens.get(16))); // day + } + + @Test + public void dirkVincent10() { + final SimpleWitness[] w = createWitnesses("Its soft light neither daylight nor moonlight nor starlight nor any light he could remember from the days & nights when day followed night & vice versa.",// + "Its soft changeless light unlike any light he could remember from the days and nights when day followed hard on night and vice versa.",// + "Its faint unchanging light unlike any light he could remember from the days & nights when day followed on night & night on day."); + final VariantGraph graph = collate(w); + + vertexWith(graph, "its", w[0]); + vertexWith(graph, "soft", w[0]); + vertexWith(graph, "changeless", w[1]); + vertexWith(graph, "faint", w[2]); + vertexWith(graph, "unchanging", w[2]); + vertexWith(graph, "light", w[0]); + vertexWith(graph, "neither", w[0]); + vertexWith(graph, "daylight", w[0]); + vertexWith(graph, "nor", w[0]); + vertexWith(graph, "moonlight", w[0]); + vertexWith(graph, "starlight", w[0]); + + // FIXME: test this! + /* + assertEquals("its", iterator.get.getNormalized()); + assertEquals("soft", iterator.next().getNormalized()); + assertEquals("changeless", iterator.next().getNormalized()); + assertEquals("faint", iterator.next().getNormalized()); + assertEquals("unchanging", iterator.next().getNormalized()); + assertEquals("light", iterator.next().getNormalized()); + assertEquals("neither", iterator.next().getNormalized()); + assertEquals("daylight", iterator.next().getNormalized()); + assertEquals("nor", iterator.next().getNormalized()); + assertEquals("moonlight", iterator.next().getNormalized()); + assertEquals("nor", iterator.next().getNormalized()); + assertEquals("starlight", iterator.next().getNormalized()); + assertEquals("nor", iterator.next().getNormalized()); + assertEquals("unlike", iterator.next().getNormalized()); + assertEquals("any", iterator.next().getNormalized()); + assertEquals("light", iterator.next().getNormalized()); + assertEquals("he", iterator.next().getNormalized()); + assertEquals("could", iterator.next().getNormalized()); + */ + } + + @Test + public void sentence42Transposition() { + // punctuation should be treated as separate tokens for this test to succeed + final SimpleWitness[] w = createWitnesses(// + "The same clock as when for example Magee once died.",// + "The same as when for example Magee once died.",// + "The same as when for example McKee once died .",// + "The same as when among others Darly once died & left him.",// + "The same as when Darly among others once died and left him."); + + final VariantGraph graph = collate(w[0], w[1]); + assertGraphContains(graph, "the", "same", "clock", "as", "when", "for", "example", "magee", "once", "died"); + + collate(graph, w[2]); + assertGraphContains(graph, "the", "same", "clock", "as", "when", "for", "example", "magee", "mckee", "once", "died"); + + collate(graph, w[3]); + assertGraphContains(graph, "the", "same", "clock", "as", "when", "for", "example", "magee", "mckee", "among", "others", "darly", "once", "died", "left", "him"); + + // transpositions should be handled correctly for this test to succeed + collate(graph, w[4]); + final List> phraseMatches = ((DekkerAlgorithm) collationAlgorithm).getPhraseMatches(); + final List> transpositions = ((DekkerAlgorithm) collationAlgorithm).getTranspositions(); + assertEquals("The same", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(0)))); + assertEquals("as when", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(1)))); + assertEquals("Darly", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(2)))); + assertEquals("among others", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(3)))); + assertEquals("once died", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(4)))); + assertEquals("left him", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(phraseMatches.get(5)))); + assertEquals(1, transpositions.size()); + assertEquals("Darly", SimpleToken.toString(PHRASE_MATCH_TO_TOKENS.apply(transpositions.get(0)))); + } + + private static void assertGraphContains(VariantGraph graph, String... expected) { + SortedSet contents = new TreeSet<>(); + for (Witness witness : graph.witnesses()) { + extractPhrases(contents, graph, witness); + } + Assert.assertTrue(contents.containsAll(Arrays.asList(expected))); + } +} diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java index 178cfd3ab..bb330feeb 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/DarwinTest.java @@ -29,31 +29,33 @@ * @author Gregor Middell */ public class DarwinTest extends AbstractTest { - - @Before - public void switchCollationAlgorithm() { - //collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(new EqualityTokenComparator()); - } - @Test - public void cyclicJoin() { - final VariantGraph graph = collate(// + @Before + public void switchCollationAlgorithm() { + //collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(new EqualityTokenComparator()); + } + + @Test + public void cyclicJoin() { + final VariantGraph graph = collate(// "It has been disputed at what period of life the causes of variability, whatever they may be, generally act; whether during the early or late period of development of the embryo, or at the instant of conception. Geoffroy St. Hilaire's experiments show that unnatural treatment of the embryo causes monstrosities; and monstrosities cannot be separated by any clear line of distinction from mere variations. But I am strongly inclined to suspect that the most frequent cause of variability may be attributed to the male and female reproductive elements having been affected prior to the act of conception. Several reasons make me believe in this; but the chief one is the remarkable effect which confinement or cultivation has on the functions of the reproductive system; this system appearing to be far more susceptible than any other part of the organisation, to the action of any change in the conditions of life. Nothing is more easy than to tame an animal, and few things more difficult than to get it to breed freely under confinement, even in the many cases when the male and female unite. How many animals there are which will not breed, though living long under not very close confinement in their native country! This is generally attributed to vitiated instincts; but how many cultivated plants display the utmost vigour, and yet rarely or never seed! In some few such cases it has been found out that very trifling changes, such as a little more or less water at some particular period of growth, will determine whether or not the plant sets a seed. I cannot here enter on the copious details which I have collected on this curious subject; but to show how singular the laws are which determine the reproduction of animals under confinement, I may just mention that carnivorous animals, even from the tropics, breed in this country pretty freely under confinement, with the exception of the plantigrades or bear family; whereas, carnivorous birds, with the rarest exceptions, hardly ever lay fertile eggs. Many exotic plants have pollen utterly worthless, in the same exact condition as in the most sterile hybrids. When, on the one hand, we see domesticated animals and plants, though often weak and sickly, yet breeding quite freely under confinement; and when, on the other hand, we see individuals, though taken young from a state of nature, perfectly tamed, long-lived, and healthy (of which I could give numerous instances), yet having their reproductive system so seriously affected by unperceived causes as to fail in acting, we need not be surprised at this system, when it does act under confinement, acting not quite regularly, and producing offspring not perfectly like their parents or variable.",// "With respect to what I have called the indirect action of changed conditions, namely, through the reproductive system being affected, we may infer that variability is thus induced, partly from the fact of this system being extremely sensitive to any change in the conditions, and partly from the similarity, as Kölreuter and others have remarked, between the variability which follows from the crossing of distinct species, and that which may be observed with all plants and animals when reared under new or unnatural conditions. Many facts clearly show how eminently susceptible the reproductive system is to very slight changes in the surrounding conditions. Nothing is more easy than to tame an animal, and few things more difficult than to get it to breed freely under confinement, even when the male and female unite. How many animals there are which will not breed, though kept in an almost free state in their native country! This is generally, but erroneously, attributed to vitiated instincts. Many cultivated plants display the utmost vigour, and yet rarely or never seed! In some few cases it has been discovered that a very trifling change, such as a little more or less water at some particular period of growth, will determine whether or not a plant will produce seeds. I cannot here give the details which I have collected and elsewhere published on this curious subject; but to show how singular the laws are which determine the reproduction of animals under confinement, I may mention that carnivorous animals, even from the tropics, breed in this country pretty freely under confinement, with the exception of the plantigrades or bear family, which seldom produce young; whereas carnivorous birds, with the rarest exceptions, hardly ever lay fertile eggs. Many exotic plants have pollen utterly worthless, in the same condition as in the most sterile hybrids. When, on the one hand, we see domesticated animals and plants, though often weak and sickly, yet breeding freely under confinement; and when, on the other hand, we see individuals, though taken young from a state of nature, perfectly tamed, long-lived, and healthy (of which I could give numerous instances), yet having their reproductive system so seriously affected by unperceived causes as to fail to act, we need not be surprised at this system, when it does act under confinement, acting irregularly, and producing offspring somewhat unlike their parents. I may add, that as some organisms breed freely under the most unnatural conditions (for instance, rabbits and ferrets kept in hutches), showing that their reproductive organs are not affected; so will some animals and plants withstand domestication or cultivation, and vary very slightly — perhaps hardly more than in a state of nature."); - graph.vertices().forEach(v -> {}); // does implicit cycle detection - VariantGraph.JOIN.apply(graph).vertices().forEach(v -> {}); // does implicit cycle detection - } + graph.vertices().forEach(v -> { + }); // does implicit cycle detection + VariantGraph.JOIN.apply(graph).vertices().forEach(v -> { + }); // does implicit cycle detection + } - @Test - public void incomplete() { - final VariantGraph graph = collate(// + @Test + public void incomplete() { + final VariantGraph graph = collate(// "Habit also has a decided influence, as in the period of flowering with plants when transported from one climate to another. In animals it has a more marked effect; for instance, I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild-duck; and I presume that this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parent. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with the state of these organs in other countries, is another instance of the effect of use. Not a single domestic animal can be named which has not in some country drooping ears; and the view suggested by some authors, that the drooping is due to the disuse of the muscles of the ear, from the animals not being much alarmed by danger, seems probable.", "Habit also has a decided influence, as in the period of flowering with plants when transported from one climate to another. In animals it has a more marked effect; for instance, I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild-duck; and I presume that this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parent. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with the state of these organs in other countries, is another instance of the effect of use. Not a single domestic animal can be named which has not in some country drooping ears; and the view suggested by some authors, that the drooping is due to the disuse of the muscles of the ear, from the animals not being much alarmed by danger, seems probable.", "Habit also has a decided influence, as in the period of flowering with plants when transported from one climate to another. In animals it has a more marked effect; for instance, I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild-duck; and I presume that this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parent. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with the state of these organs in other countries, is another instance of the effect of use. Not a single domestic animal can be named which has not in some country drooping ears; and the view suggested by some authors, that the drooping is due to the disuse of the muscles of the ear, from the animals not being much alarmed by danger, seems probable.", "Effects of Habit; Correlation of Growth; Inheritance. Habit also has a decided influence, as in the period of flowering with plants when transported from one climate to another. In animals it has a more marked effect; for instance, I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild-duck; and I presume that this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parent. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with the state of these organs in other countries, is probably another instance of the effects of use. Not a single domestic animal can be named which has not in some country drooping ears; and the view which has been suggested that the drooping is due to the disuse of the muscles of the ear, from the animals being seldom alarmed by danger, seems probable.", "Habits are inherited and have a decided influence; as in the period of the flowering of plants when transported from one climate to another. In animals they have a more marked effect; for instance, I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild-duck; and this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parents. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with the state of these organs in other countries, is probably another instance of the effects of use. Not one of our domestic animals can be named which has not in some country drooping ears; and the view which has been suggested that the drooping is due to the disuse of the muscles of the ear, from the animals being seldom alarmed by danger, seems probable.", "Effects of Habit and of the Use or Disuse of Parts; Correlated Variation; Inheritance. Changed habits produce an inherited effect, as in the period of the flowering of plants when transported from one climate to another. With animals the increased use or disuse of parts has had a more marked influence; thus I find in the domestic duck that the bones of the wing weigh less and the bones of the leg more, in proportion to the whole skeleton, than do the same bones in the wild duck; and this change may be safely attributed to the domestic duck flying much less, and walking more, than its wild parents. The great and inherited development of the udders in cows and goats in countries where they are habitually milked, in comparison with these organs in other countries, is probably another instance of the effects of use. Not one of our domestic animals can be named which has not in some country drooping ears; and the view which has been suggested that the drooping is due to the disuse of the muscles of the ear, from the animals being seldom much alarmed, seems probable."); - Assert.assertEquals(6, witnesses(table(graph)).count()); - } + Assert.assertEquals(6, witnesses(table(graph)).count()); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java index 98c79dd29..5d9c967bc 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/SpencerHoweTest.java @@ -45,35 +45,35 @@ */ public class SpencerHoweTest extends AbstractTest { - @Test - public void alignmentTable() { - final SimpleWitness[] w = createWitnesses("a b c d e f", "x y z d e", "a b x y z"); - final List>> table = VariantGraphRanking.of(collate(w)).asTable(); + @Test + public void alignmentTable() { + final SimpleWitness[] w = createWitnesses("a b c d e f", "x y z d e", "a b x y z"); + final List>> table = VariantGraphRanking.of(collate(w)).asTable(); - assertEquals(3, table.stream().flatMap(r -> r.keySet().stream()).distinct().count()); - //NOTE: Currently the AT visualization aligns variation to the left of the table: see the 'C' element - assertEquals("|a|b|c| | |d|e|f|", toString(table, w[0])); - assertEquals("| | |x|y|z|d|e| |", toString(table, w[1])); - assertEquals("|a|b|x|y|z| | | |", toString(table, w[2])); - } + assertEquals(3, table.stream().flatMap(r -> r.keySet().stream()).distinct().count()); + //NOTE: Currently the AT visualization aligns variation to the left of the table: see the 'C' element + assertEquals("|a|b|c| | |d|e|f|", toString(table, w[0])); + assertEquals("| | |x|y|z|d|e| |", toString(table, w[1])); + assertEquals("|a|b|x|y|z| | | |", toString(table, w[2])); + } - @Test - public void graph() { - final SimpleWitness[] w = createWitnesses("a", "b", "a b"); - final VariantGraph graph = collate(w); - - assetGraphSize(graph, 4, 5); + @Test + public void graph() { + final SimpleWitness[] w = createWitnesses("a", "b", "a b"); + final VariantGraph graph = collate(w); - final VariantGraph.Vertex startVertex = graph.getStart(); - final VariantGraph.Vertex aVertex = vertexWith(graph, "a", w[0]); - final VariantGraph.Vertex bVertex = vertexWith(graph, "b", w[1]); - final VariantGraph.Vertex endVertex = graph.getEnd(); + assetGraphSize(graph, 4, 5); - assertHasWitnesses(edgeBetween(startVertex, aVertex), w[0], w[2]); - assertHasWitnesses(edgeBetween(aVertex, endVertex), w[0]); - assertHasWitnesses(edgeBetween(startVertex, bVertex), w[1]); - assertHasWitnesses(edgeBetween(bVertex, endVertex), w[1], w[2]); - assertHasWitnesses(edgeBetween(aVertex, bVertex), w[2]); - } + final VariantGraph.Vertex startVertex = graph.getStart(); + final VariantGraph.Vertex aVertex = vertexWith(graph, "a", w[0]); + final VariantGraph.Vertex bVertex = vertexWith(graph, "b", w[1]); + final VariantGraph.Vertex endVertex = graph.getEnd(); + + assertHasWitnesses(edgeBetween(startVertex, aVertex), w[0], w[2]); + assertHasWitnesses(edgeBetween(aVertex, endVertex), w[0]); + assertHasWitnesses(edgeBetween(startVertex, bVertex), w[1]); + assertHasWitnesses(edgeBetween(bVertex, endVertex), w[1], w[2]); + assertHasWitnesses(edgeBetween(aVertex, bVertex), w[2]); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java index b0193573c..0fa175083 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionGraphTest.java @@ -16,71 +16,71 @@ public class TranspositionGraphTest extends AbstractTest { - @Before - public void setup() { - collationAlgorithm = new DekkerAlgorithm(new EqualityTokenComparator()); - ((DekkerAlgorithm)collationAlgorithm).setMergeTranspositions(true); - } - - @Test - public void transpositions() { - final SimpleWitness[] w = createWitnesses("the black and white cat", "the white and black cat", "the black and black cat"); - final VariantGraph graph = collate(w[0], w[1]); - assertEquals(2, graph.transpositions().size()); - collate(graph, w[2]); - final Set> transposed = graph.transpositions(); - assertEquals(2, transposed.size()); - } + @Before + public void setup() { + collationAlgorithm = new DekkerAlgorithm(new EqualityTokenComparator()); + ((DekkerAlgorithm) collationAlgorithm).setMergeTranspositions(true); + } + + @Test + public void transpositions() { + final SimpleWitness[] w = createWitnesses("the black and white cat", "the white and black cat", "the black and black cat"); + final VariantGraph graph = collate(w[0], w[1]); + assertEquals(2, graph.transpositions().size()); + collate(graph, w[2]); + final Set> transposed = graph.transpositions(); + assertEquals(2, transposed.size()); + } + + @Test + public void noTransposition() { + assertEquals(0, collate("no transposition", "no transposition").transpositions().size()); + assertEquals(0, collate("a b", "c a").transpositions().size()); + } - @Test - public void noTransposition() { - assertEquals(0, collate("no transposition", "no transposition").transpositions().size()); - assertEquals(0, collate("a b", "c a").transpositions().size()); - } + @Test + public void oneTransposition() { + assertEquals(1, collate("a b", "b a").transpositions().size()); + } - @Test - public void oneTransposition() { - assertEquals(1, collate("a b", "b a").transpositions().size()); - } + @Test + public void multipleTranspositions() { + assertEquals(1, collate("a b c", "b c a").transpositions().size()); + } - @Test - public void multipleTranspositions() { - assertEquals(1, collate("a b c", "b c a").transpositions().size()); - } + @Test + public void testTranspositionLimiter1() { + final SimpleWitness a = new SimpleWitness("A", "X a b"); + final SimpleWitness b = new SimpleWitness("B", "a b X"); + VariantGraph graph = collate(a, b); + assertEquals(1, graph.transpositions().size()); + } + + //test case supplied by Troy + @Test + public void testGreekTwoWitnesses() { + SimpleWitness[] w = createWitnesses( + "και αποκριθεισ ειπεν αυτω ου βλεπεισ ταυτασ μεγαλασ οικοδομασ αμην λεγω σοι ο(υ μη α)φεθη ωδε λιθοσ επι λιθω (οσ ου) μη καταλυθη", // + "και αποκριθεισ ο ι̅σ̅ ειπεν αυτω βλεπεισ Ταυτασ τασ μεγαλασ οικοδομασ λεγω υμιν ου μη αφεθη λιθοσ επι λιθου οσ ου μη καταλυθη"); + VariantGraph graph = collate(w[0], w[1]); + Set> transpositions = graph.transpositions(); + assertTrue(transpositions.isEmpty()); + } - @Test - public void testTranspositionLimiter1() { - final SimpleWitness a = new SimpleWitness("A","X a b"); - final SimpleWitness b = new SimpleWitness("B","a b X"); - VariantGraph graph = collate(a,b); - assertEquals(1, graph.transpositions().size()); - } - - //test case supplied by Troy - @Test - public void testGreekTwoWitnesses() { - SimpleWitness[] w = createWitnesses( - "και αποκριθεισ ειπεν αυτω ου βλεπεισ ταυτασ μεγαλασ οικοδομασ αμην λεγω σοι ο(υ μη α)φεθη ωδε λιθοσ επι λιθω (οσ ου) μη καταλυθη", // - "και αποκριθεισ ο ι̅σ̅ ειπεν αυτω βλεπεισ Ταυτασ τασ μεγαλασ οικοδομασ λεγω υμιν ου μη αφεθη λιθοσ επι λιθου οσ ου μη καταλυθη"); - VariantGraph graph = collate(w[0], w[1]); - Set> transpositions = graph.transpositions(); - assertTrue(transpositions.isEmpty()); - } - - //test case supplied by Troy - @Test - public void testGreekThreeWitnesses() { - SimpleWitness[] w = createWitnesses("και αποκριθεισ ειπεν αυτω ου βλεπεισ ταυτασ μεγαλασ οικοδομασ αμην λεγω σοι ο(υ μη α)φεθη ωδε λιθοσ επι λιθω (οσ ου) μη καταλυθη", "και αποκριθεισ ο ι̅σ̅ ειπεν αυτω βλεπεισ Ταυτασ τασ μεγαλασ οικοδομασ λεγω υμιν ου μη αφεθη λιθοσ επι λιθου οσ ου μη καταλυθη", "και ο ι̅σ̅ αποκριθεισ ειπεν αυτω βλεπεισ ταυτασ τασ μεγαλασ οικοδομασ ου μη αφεθη λιθοσ επι λιθον οσ ου μη καταλυθη"); - VariantGraph graph = collate(w[0], w[1], w[2]); - Set> transpositions = graph.transpositions(); - assertEquals(1, transpositions.size()); - Set transposition = transpositions.iterator().next(); - Set transposedVertices = new HashSet<>(); - for (Vertex transposedVertex : transposition) { - transposedVertices.add(transposedVertex.toString()); + //test case supplied by Troy + @Test + public void testGreekThreeWitnesses() { + SimpleWitness[] w = createWitnesses("και αποκριθεισ ειπεν αυτω ου βλεπεισ ταυτασ μεγαλασ οικοδομασ αμην λεγω σοι ο(υ μη α)φεθη ωδε λιθοσ επι λιθω (οσ ου) μη καταλυθη", "και αποκριθεισ ο ι̅σ̅ ειπεν αυτω βλεπεισ Ταυτασ τασ μεγαλασ οικοδομασ λεγω υμιν ου μη αφεθη λιθοσ επι λιθου οσ ου μη καταλυθη", "και ο ι̅σ̅ αποκριθεισ ειπεν αυτω βλεπεισ ταυτασ τασ μεγαλασ οικοδομασ ου μη αφεθη λιθοσ επι λιθον οσ ου μη καταλυθη"); + VariantGraph graph = collate(w[0], w[1], w[2]); + Set> transpositions = graph.transpositions(); + assertEquals(1, transpositions.size()); + Set transposition = transpositions.iterator().next(); + Set transposedVertices = new HashSet<>(); + for (Vertex transposedVertex : transposition) { + transposedVertices.add(transposedVertex.toString()); + } + assertTrue(transposedVertices.contains("[B:2:'ο']")); + assertTrue(transposedVertices.contains("[C:2:'ι̅σ̅']")); } - assertTrue(transposedVertices.contains("[B:2:'ο']")); - assertTrue(transposedVertices.contains("[C:2:'ι̅σ̅']")); - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionRenderingTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionRenderingTest.java index e8459b158..0fbe3d354 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionRenderingTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/TranspositionRenderingTest.java @@ -36,91 +36,91 @@ * @author Gregor Middell */ public class TranspositionRenderingTest extends AbstractTest { - @Test - public void transposition1() { - final SimpleWitness[] w = createWitnesses(// + @Test + public void transposition1() { + final SimpleWitness[] w = createWitnesses(// "the white and black cat", "The black cat",// "the black and white cat", "the black and green cat"); - final List>> table = table(collate(w)); - - assertEquals("|the|white|and|black|cat|", toString(table, w[0])); - assertEquals("|the| | |black|cat|", toString(table, w[1])); - assertEquals("|the|black|and|white|cat|", toString(table, w[2])); - assertEquals("|the|black|and|green|cat|", toString(table, w[3])); - } - - @Test - public void transposition2() { - final SimpleWitness[] w = createWitnesses("He was agast, so", "He was agast", "So he was agast"); - final List>> table = table(collate(w)); - - assertEquals("| |he|was|agast|,|so|", toString(table, w[0])); - assertEquals("| |he|was|agast| | |", toString(table, w[1])); - assertEquals("|so|he|was|agast| | |", toString(table, w[2])); - } - - @Test - public void transposition2Reordered() { - final SimpleWitness[] w = createWitnesses("So he was agast", "He was agast", "He was agast, so"); - final List>> table = table(collate(w)); - - assertEquals("|so|he|was|agast| | |", toString(table, w[0])); - assertEquals("| |he|was|agast| | |", toString(table, w[1])); - assertEquals("| |he|was|agast|,|so|", toString(table, w[2])); - } - - @Test - public void testTranspositionLimiter1() { - final SimpleWitness a = new SimpleWitness("A","X a b"); - final SimpleWitness b = new SimpleWitness("B","a b X"); - VariantGraph graph = collate(a,b); - final List>> table = table(graph); - assertEquals("|x|a|b| |", toString(table, a)); - assertEquals("| |a|b|x|", toString(table, b)); - } - - @Test - public void testTranspositionLimiter2() { - final SimpleWitness a = new SimpleWitness("A","a b c ."); - final SimpleWitness b = new SimpleWitness("B","a b c d e f g h i j k l m n o p q r s t u v w ."); - final List>> table = table(collate(a, b)); - assertEquals("|a|b|c| | | | | | | | | | | | | | | | | | | | |.|", toString(table, a)); - assertEquals("|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|.|", toString(table, b)); - } - - @Test - public void testTranspositionLimiter3() { - final SimpleWitness a = new SimpleWitness("A","X a b c d e f g h i j k l m n o p"); - final SimpleWitness b = new SimpleWitness("B","a b c d e f g h i j k l m n o p X"); - VariantGraph graph = collate(a,b); - assertEquals(0, graph.transpositions().size()); - final List>> table = table(graph); - assertEquals("|x|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| |", toString(table, a)); - assertEquals("| |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|x|", toString(table, b)); - } - - @Test - public void testTranspositionLimiter4() { - final SimpleWitness a = new SimpleWitness("A","a b c d e f g h i j k l m n o p X"); - final SimpleWitness b = new SimpleWitness("B","X a b c d e f g h i j k l m n o p"); - VariantGraph graph = collate(a,b); - assertEquals(0, graph.transpositions().size()); - final List>> table = table(graph); - assertEquals("| |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|x|", toString(table, a)); - assertEquals("|x|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| |", toString(table, b)); - } - - @Test - public void additionInCombinationWithTransposition2() { - final SimpleWitness[] w = createWitnesses(// - "the cat is black",// - "black is the cat",// - "black and white is the cat"); - final List>> t = table(collate(w[0], w[1], w[2])); - assertEquals("|the|cat| |is|black| |", toString(t, w[0])); - assertEquals("|black| | |is|the|cat|", toString(t, w[1])); - assertEquals("|black|and|white|is|the|cat|", toString(t, w[2])); - } + final List>> table = table(collate(w)); + + assertEquals("|the|white|and|black|cat|", toString(table, w[0])); + assertEquals("|the| | |black|cat|", toString(table, w[1])); + assertEquals("|the|black|and|white|cat|", toString(table, w[2])); + assertEquals("|the|black|and|green|cat|", toString(table, w[3])); + } + + @Test + public void transposition2() { + final SimpleWitness[] w = createWitnesses("He was agast, so", "He was agast", "So he was agast"); + final List>> table = table(collate(w)); + + assertEquals("| |he|was|agast|,|so|", toString(table, w[0])); + assertEquals("| |he|was|agast| | |", toString(table, w[1])); + assertEquals("|so|he|was|agast| | |", toString(table, w[2])); + } + + @Test + public void transposition2Reordered() { + final SimpleWitness[] w = createWitnesses("So he was agast", "He was agast", "He was agast, so"); + final List>> table = table(collate(w)); + + assertEquals("|so|he|was|agast| | |", toString(table, w[0])); + assertEquals("| |he|was|agast| | |", toString(table, w[1])); + assertEquals("| |he|was|agast|,|so|", toString(table, w[2])); + } + + @Test + public void testTranspositionLimiter1() { + final SimpleWitness a = new SimpleWitness("A", "X a b"); + final SimpleWitness b = new SimpleWitness("B", "a b X"); + VariantGraph graph = collate(a, b); + final List>> table = table(graph); + assertEquals("|x|a|b| |", toString(table, a)); + assertEquals("| |a|b|x|", toString(table, b)); + } + + @Test + public void testTranspositionLimiter2() { + final SimpleWitness a = new SimpleWitness("A", "a b c ."); + final SimpleWitness b = new SimpleWitness("B", "a b c d e f g h i j k l m n o p q r s t u v w ."); + final List>> table = table(collate(a, b)); + assertEquals("|a|b|c| | | | | | | | | | | | | | | | | | | | |.|", toString(table, a)); + assertEquals("|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|.|", toString(table, b)); + } + + @Test + public void testTranspositionLimiter3() { + final SimpleWitness a = new SimpleWitness("A", "X a b c d e f g h i j k l m n o p"); + final SimpleWitness b = new SimpleWitness("B", "a b c d e f g h i j k l m n o p X"); + VariantGraph graph = collate(a, b); + assertEquals(0, graph.transpositions().size()); + final List>> table = table(graph); + assertEquals("|x|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| |", toString(table, a)); + assertEquals("| |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|x|", toString(table, b)); + } + + @Test + public void testTranspositionLimiter4() { + final SimpleWitness a = new SimpleWitness("A", "a b c d e f g h i j k l m n o p X"); + final SimpleWitness b = new SimpleWitness("B", "X a b c d e f g h i j k l m n o p"); + VariantGraph graph = collate(a, b); + assertEquals(0, graph.transpositions().size()); + final List>> table = table(graph); + assertEquals("| |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|x|", toString(table, a)); + assertEquals("|x|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| |", toString(table, b)); + } + + @Test + public void additionInCombinationWithTransposition2() { + final SimpleWitness[] w = createWitnesses(// + "the cat is black",// + "black is the cat",// + "black and white is the cat"); + final List>> t = table(collate(w[0], w[1], w[2])); + assertEquals("|the|cat| |is|black| |", toString(t, w[0])); + assertEquals("|black| | |is|the|cat|", toString(t, w[1])); + assertEquals("|black|and|white|is|the|cat|", toString(t, w[2])); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphRankerTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphRankerTest.java index 1d7a80027..c23c17512 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphRankerTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphRankerTest.java @@ -32,45 +32,45 @@ public class VariantGraphRankerTest extends AbstractTest { - @Test - public void ranking() { - final VariantGraph graph = collate("The black cat", "The black and white cat", "The black and green cat"); - final VariantGraphRanking ranking = VariantGraphRanking.of(graph); - final List vertices = StreamSupport.stream(graph.vertices().spliterator(), false).collect(Collectors.toList()); + @Test + public void ranking() { + final VariantGraph graph = collate("The black cat", "The black and white cat", "The black and green cat"); + final VariantGraphRanking ranking = VariantGraphRanking.of(graph); + final List vertices = StreamSupport.stream(graph.vertices().spliterator(), false).collect(Collectors.toList()); - assertVertexEquals("the", vertices.get(1)); - assertEquals(1, (long) ranking.apply(vertices.get(1))); + assertVertexEquals("the", vertices.get(1)); + assertEquals(1, (long) ranking.apply(vertices.get(1))); - assertVertexEquals("black", vertices.get(2)); - assertEquals(2, (long) ranking.apply(vertices.get(2))); + assertVertexEquals("black", vertices.get(2)); + assertEquals(2, (long) ranking.apply(vertices.get(2))); - assertVertexEquals("and", vertices.get(3)); - assertEquals(3, (long) ranking.apply(vertices.get(3))); + assertVertexEquals("and", vertices.get(3)); + assertEquals(3, (long) ranking.apply(vertices.get(3))); - assertEquals(4, (long) ranking.apply(vertices.get(4))); // green or white - assertEquals(4, (long) ranking.apply(vertices.get(5))); // green or white + assertEquals(4, (long) ranking.apply(vertices.get(4))); // green or white + assertEquals(4, (long) ranking.apply(vertices.get(5))); // green or white - assertVertexEquals("cat", vertices.get(6)); - assertEquals(5, (long) ranking.apply(vertices.get(6))); - } + assertVertexEquals("cat", vertices.get(6)); + assertEquals(5, (long) ranking.apply(vertices.get(6))); + } - @Test - public void agastTranspositionHandling() { - final VariantGraph graph = collate("He was agast, so", "He was agast", "So he was agast"); - final VariantGraphRanking ranking = VariantGraphRanking.of(graph); - final List vertices = StreamSupport.stream(graph.vertices().spliterator(), false).collect(Collectors.toList()); + @Test + public void agastTranspositionHandling() { + final VariantGraph graph = collate("He was agast, so", "He was agast", "So he was agast"); + final VariantGraphRanking ranking = VariantGraphRanking.of(graph); + final List vertices = StreamSupport.stream(graph.vertices().spliterator(), false).collect(Collectors.toList()); - assertVertexEquals("so", vertices.get(1)); - assertEquals(1,(long) ranking.apply(vertices.get(1))); - assertVertexEquals("he", vertices.get(2)); - assertEquals(2, (long) ranking.apply(vertices.get(2))); - assertVertexEquals("was", vertices.get(3)); - assertEquals(3, (long) ranking.apply(vertices.get(3))); - assertVertexEquals("agast", vertices.get(4)); - assertEquals(4, (long) ranking.apply(vertices.get(4))); - assertVertexEquals(",", vertices.get(5)); - assertEquals(5, (long) ranking.apply(vertices.get(5))); - assertVertexEquals("so", vertices.get(6)); - assertEquals(6, (long) ranking.apply(vertices.get(6))); - } + assertVertexEquals("so", vertices.get(1)); + assertEquals(1, (long) ranking.apply(vertices.get(1))); + assertVertexEquals("he", vertices.get(2)); + assertEquals(2, (long) ranking.apply(vertices.get(2))); + assertVertexEquals("was", vertices.get(3)); + assertEquals(3, (long) ranking.apply(vertices.get(3))); + assertVertexEquals("agast", vertices.get(4)); + assertEquals(4, (long) ranking.apply(vertices.get(4))); + assertVertexEquals(",", vertices.get(5)); + assertEquals(5, (long) ranking.apply(vertices.get(5))); + assertVertexEquals("so", vertices.get(6)); + assertEquals(6, (long) ranking.apply(vertices.get(6))); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java index dec853d7f..bc0069afb 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/VariantGraphTest.java @@ -29,93 +29,93 @@ */ public class VariantGraphTest extends AbstractTest { - @Test - public void twoWitnesses() { - final SimpleWitness[] w = createWitnesses("the black cat", "the black cat"); - final VariantGraph graph = collate(w); - - assetGraphSize(graph, 5, 4); - - final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); - final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); - final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); - - assertHasWitnesses(edgeBetween(graph.getStart(), theVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(theVertex, blackVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(blackVertex, catVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(catVertex, graph.getEnd()), w[0], w[1]); - } - - @Test - public void addition1() { - final SimpleWitness[] w = createWitnesses("the black cat", "the white and black cat"); - final VariantGraph graph = collate(w); - - assetGraphSize(graph, 7, 7); - - final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); - final VariantGraph.Vertex whiteVertex = vertexWith(graph, "white", w[1]); - final VariantGraph.Vertex andVertex = vertexWith(graph, "and", w[1]); - final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); - final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); - - assertHasWitnesses(edgeBetween(graph.getStart(), theVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(theVertex, blackVertex), w[0]); - assertHasWitnesses(edgeBetween(blackVertex, catVertex), w[0], w[1]); - assertHasWitnesses(edgeBetween(catVertex, graph.getEnd()), w[0], w[1]); - assertHasWitnesses(edgeBetween(theVertex, whiteVertex), w[1]); - assertHasWitnesses(edgeBetween(whiteVertex, andVertex), w[1]); - assertHasWitnesses(edgeBetween(andVertex, blackVertex), w[1]); - } - - @Test - public void variant() { - final SimpleWitness[] w = createWitnesses("the black cat", "the white cat", "the green cat", "the red cat", "the yellow cat"); - final VariantGraph graph = collate(w); - - assetGraphSize(graph, 9, 12); - - final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); - final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); - final VariantGraph.Vertex whiteVertex = vertexWith(graph, "white", w[1]); - final VariantGraph.Vertex greenVertex = vertexWith(graph, "green", w[2]); - final VariantGraph.Vertex redVertex = vertexWith(graph, "red", w[3]); - final VariantGraph.Vertex yellowVertex = vertexWith(graph, "yellow", w[4]); - final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); - - assertHasWitnesses(edgeBetween(graph.getStart(), theVertex), w[0], w[1], w[2], w[3], w[4]); - assertHasWitnesses(edgeBetween(theVertex, blackVertex), w[0]); - assertHasWitnesses(edgeBetween(blackVertex, catVertex), w[0]); - assertHasWitnesses(edgeBetween(catVertex, graph.getEnd()), w[0], w[1], w[2], w[3], w[4]); - assertHasWitnesses(edgeBetween(theVertex, whiteVertex), w[1]); - assertHasWitnesses(edgeBetween(whiteVertex, catVertex), w[1]); - assertHasWitnesses(edgeBetween(theVertex, greenVertex), w[2]); - assertHasWitnesses(edgeBetween(greenVertex, catVertex), w[2]); - assertHasWitnesses(edgeBetween(theVertex, redVertex), w[3]); - assertHasWitnesses(edgeBetween(redVertex, catVertex), w[3]); - assertHasWitnesses(edgeBetween(theVertex, yellowVertex), w[4]); - assertHasWitnesses(edgeBetween(yellowVertex, catVertex), w[4]); - } - - @Test - public void doubleTransposition2() { - final SimpleWitness[] w = createWitnesses("a b", "b a"); - final VariantGraph graph = collate(w); - - assertGraphVertices(graph, 5); - - assertHasWitnesses(edgeBetween(vertexWith(graph, "b", w[1]), vertexWith(graph, "a", w[1])), w[1]); - assertHasWitnesses(edgeBetween(vertexWith(graph, "a", w[0]), vertexWith(graph, "b", w[0])), w[0]); - } - - @Test - public void mirroredTranspositionsWithMatchInBetween() { - final SimpleWitness[] w = createWitnesses("the black and white cat", "the white and black cat"); - final VariantGraph graph = collate(w); - - assertGraphVertices(graph, 9); - - // FIXME: find out, how to test this without stable topological order + @Test + public void twoWitnesses() { + final SimpleWitness[] w = createWitnesses("the black cat", "the black cat"); + final VariantGraph graph = collate(w); + + assetGraphSize(graph, 5, 4); + + final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); + final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); + final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); + + assertHasWitnesses(edgeBetween(graph.getStart(), theVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(theVertex, blackVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(blackVertex, catVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(catVertex, graph.getEnd()), w[0], w[1]); + } + + @Test + public void addition1() { + final SimpleWitness[] w = createWitnesses("the black cat", "the white and black cat"); + final VariantGraph graph = collate(w); + + assetGraphSize(graph, 7, 7); + + final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); + final VariantGraph.Vertex whiteVertex = vertexWith(graph, "white", w[1]); + final VariantGraph.Vertex andVertex = vertexWith(graph, "and", w[1]); + final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); + final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); + + assertHasWitnesses(edgeBetween(graph.getStart(), theVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(theVertex, blackVertex), w[0]); + assertHasWitnesses(edgeBetween(blackVertex, catVertex), w[0], w[1]); + assertHasWitnesses(edgeBetween(catVertex, graph.getEnd()), w[0], w[1]); + assertHasWitnesses(edgeBetween(theVertex, whiteVertex), w[1]); + assertHasWitnesses(edgeBetween(whiteVertex, andVertex), w[1]); + assertHasWitnesses(edgeBetween(andVertex, blackVertex), w[1]); + } + + @Test + public void variant() { + final SimpleWitness[] w = createWitnesses("the black cat", "the white cat", "the green cat", "the red cat", "the yellow cat"); + final VariantGraph graph = collate(w); + + assetGraphSize(graph, 9, 12); + + final VariantGraph.Vertex theVertex = vertexWith(graph, "the", w[0]); + final VariantGraph.Vertex blackVertex = vertexWith(graph, "black", w[0]); + final VariantGraph.Vertex whiteVertex = vertexWith(graph, "white", w[1]); + final VariantGraph.Vertex greenVertex = vertexWith(graph, "green", w[2]); + final VariantGraph.Vertex redVertex = vertexWith(graph, "red", w[3]); + final VariantGraph.Vertex yellowVertex = vertexWith(graph, "yellow", w[4]); + final VariantGraph.Vertex catVertex = vertexWith(graph, "cat", w[0]); + + assertHasWitnesses(edgeBetween(graph.getStart(), theVertex), w[0], w[1], w[2], w[3], w[4]); + assertHasWitnesses(edgeBetween(theVertex, blackVertex), w[0]); + assertHasWitnesses(edgeBetween(blackVertex, catVertex), w[0]); + assertHasWitnesses(edgeBetween(catVertex, graph.getEnd()), w[0], w[1], w[2], w[3], w[4]); + assertHasWitnesses(edgeBetween(theVertex, whiteVertex), w[1]); + assertHasWitnesses(edgeBetween(whiteVertex, catVertex), w[1]); + assertHasWitnesses(edgeBetween(theVertex, greenVertex), w[2]); + assertHasWitnesses(edgeBetween(greenVertex, catVertex), w[2]); + assertHasWitnesses(edgeBetween(theVertex, redVertex), w[3]); + assertHasWitnesses(edgeBetween(redVertex, catVertex), w[3]); + assertHasWitnesses(edgeBetween(theVertex, yellowVertex), w[4]); + assertHasWitnesses(edgeBetween(yellowVertex, catVertex), w[4]); + } + + @Test + public void doubleTransposition2() { + final SimpleWitness[] w = createWitnesses("a b", "b a"); + final VariantGraph graph = collate(w); + + assertGraphVertices(graph, 5); + + assertHasWitnesses(edgeBetween(vertexWith(graph, "b", w[1]), vertexWith(graph, "a", w[1])), w[1]); + assertHasWitnesses(edgeBetween(vertexWith(graph, "a", w[0]), vertexWith(graph, "b", w[0])), w[0]); + } + + @Test + public void mirroredTranspositionsWithMatchInBetween() { + final SimpleWitness[] w = createWitnesses("the black and white cat", "the white and black cat"); + final VariantGraph graph = collate(w); + + assertGraphVertices(graph, 9); + + // FIXME: find out, how to test this without stable topological order /* final Iterator iterator = graph.iterator(); @@ -128,24 +128,24 @@ public void mirroredTranspositionsWithMatchInBetween() { assertEquals("black", iterator.next().getNormalized()); assertEquals("cat", iterator.next().getNormalized()); */ - } - - // @Test - // public void transpositionJoin() { - // final SimpleWitness[] w = createWitnesses("pre a b c d post", "pre c d a b post"); - // final VariantGraph graph = collate(w).join(); - // Iterable vertices = graph.vertices(); - // // assertEquals(2, Iterables.size(vertices)); - // Iterator iterator = vertices.iterator(); - // for (VariantGraphVertex variantGraphVertex : vertices) { - // LOG.debug("{}", variantGraphVertex); - // } - // assertEquals("[]", iterator.next().toString()); - // assertEquals("[A:0:'pre', B:0:'pre']", iterator.next().toString()); - // assertEquals("[B:1:'b']", iterator.next().toString()); - // assertEquals("[A:1:'a', B:2:'a']", iterator.next().toString()); - // assertEquals("[A:1:'b']", iterator.next().toString()); - // assertEquals("[A:0:'post', B:0:'post']", iterator.next().toString()); - // assertEquals("[]", iterator.next().toString()); - // } + } + + // @Test + // public void transpositionJoin() { + // final SimpleWitness[] w = createWitnesses("pre a b c d post", "pre c d a b post"); + // final VariantGraph graph = collate(w).join(); + // Iterable vertices = graph.vertices(); + // // assertEquals(2, Iterables.size(vertices)); + // Iterator iterator = vertices.iterator(); + // for (VariantGraphVertex variantGraphVertex : vertices) { + // LOG.debug("{}", variantGraphVertex); + // } + // assertEquals("[]", iterator.next().toString()); + // assertEquals("[A:0:'pre', B:0:'pre']", iterator.next().toString()); + // assertEquals("[B:1:'b']", iterator.next().toString()); + // assertEquals("[A:1:'a', B:2:'a']", iterator.next().toString()); + // assertEquals("[A:1:'b']", iterator.next().toString()); + // assertEquals("[A:0:'post', B:0:'post']", iterator.next().toString()); + // assertEquals("[]", iterator.next().toString()); + // } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java index 4fba32f25..822e5b147 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/HermansTest.java @@ -49,314 +49,314 @@ public class HermansTest extends AbstractTest { - @Before - public void setUp() { - setCollationAlgorithm(CollationAlgorithmFactory.dekkerMatchMatrix(new StrictEqualityTokenComparator(), 1)); - } - - @Test - public void testHermansText1() { - String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer, de lucht was helder blauw, het water rimpelend satijn."; - String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        "; - SimpleWitness[] sw = createWitnesses(textD1, textD9); - VariantGraph vg = collate(sw[0]); - MatchTable buildMatrix = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - LOG.fine(new MatchTableSerializer(buildMatrix).toHtml()); - } - - @Test - public void testHermansText2() { - String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer. Onder de velen aan boojrd bevond zich een bruine, korte dikke man. JSg werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ontzaggelijk breed; hij omsloot den buik van den kleinen man als een soort balcon."; - String textD9 = "Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; - SimpleWitness[] sw = createWitnesses(textD1, textD9); - VariantGraph vg = collate(sw[0]); - MatchTable matchTable = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - // System.out.println(buildMatrix.toHtml()); - IslandConflictResolver archipelago = new IslandConflictResolver(matchTable); + @Before + public void setUp() { + setCollationAlgorithm(CollationAlgorithmFactory.dekkerMatchMatrix(new StrictEqualityTokenComparator(), 1)); + } + + @Test + public void testHermansText1() { + String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer, de lucht was helder blauw, het water rimpelend satijn."; + String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        "; + SimpleWitness[] sw = createWitnesses(textD1, textD9); + VariantGraph vg = collate(sw[0]); + MatchTable buildMatrix = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + LOG.fine(new MatchTableSerializer(buildMatrix).toHtml()); + } + + @Test + public void testHermansText2() { + String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer. Onder de velen aan boojrd bevond zich een bruine, korte dikke man. JSg werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ontzaggelijk breed; hij omsloot den buik van den kleinen man als een soort balcon."; + String textD9 = "Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; + SimpleWitness[] sw = createWitnesses(textD1, textD9); + VariantGraph vg = collate(sw[0]); + MatchTable matchTable = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + // System.out.println(buildMatrix.toHtml()); + IslandConflictResolver archipelago = new IslandConflictResolver(matchTable); // LOG.fine("archipelago: " + archipelago); // LOG.fine("archipelago.size(): " + archipelago.size()); // assertEquals(42, archipelago.size()); // assertEquals(98, archipelago.numOfConflicts()); - // assertTrue(false); - // archipelago.createNonConflictingVersions(); - // assertEquals(603,archipelago.numOfNonConflConstell()); - // assertEquals(500,archipelago.getVersion(0).value()); - // assertEquals(497,archipelago.getVersion(4).value()); - - MatchTableSelection firstVersion = archipelago.createNonConflictingVersion(); - for (Island isl : firstVersion.getIslands()) { - LOG.fine(" " + isl.size()); + // assertTrue(false); + // archipelago.createNonConflictingVersions(); + // assertEquals(603,archipelago.numOfNonConflConstell()); + // assertEquals(500,archipelago.getVersion(0).value()); + // assertEquals(497,archipelago.getVersion(4).value()); + + MatchTableSelection firstVersion = archipelago.createNonConflictingVersion(); + for (Island isl : firstVersion.getIslands()) { + LOG.fine(" " + isl.size()); + } + // for(int i=0; i<10; i++) { + // try { + // String file_name = "result_"+i+".html"; + // File logFile = new File(File.separator + + // "C:\\Documents and Settings\\meindert\\Mijn Documenten\\Project Hermans productielijn\\Materiaal input collateX\\output_collatex_exp\\"+file_name); + // PrintWriter logging = new PrintWriter(new FileOutputStream(logFile)); + // logging.println(buildMatrix.toHtml(archipelago.getVersion(i))); + // logging.close(); + // } catch (FileNotFoundException e) { + // e.printStackTrace(); + // } + // } + } - // for(int i=0; i<10; i++) { - // try { - // String file_name = "result_"+i+".html"; - // File logFile = new File(File.separator + - // "C:\\Documents and Settings\\meindert\\Mijn Documenten\\Project Hermans productielijn\\Materiaal input collateX\\output_collatex_exp\\"+file_name); - // PrintWriter logging = new PrintWriter(new FileOutputStream(logFile)); - // logging.println(buildMatrix.toHtml(archipelago.getVersion(i))); - // logging.close(); - // } catch (FileNotFoundException e) { - // e.printStackTrace(); - // } - // } - - } - - @Ignore("Archipelago size changed?") - @Test - public void testHermansText3() { - String textMZ_DJ233 = "Werumeus Buning maakt artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \n" + "J. W. Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van litterair criticus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \n" + "Zo nu en dan koopt Elsevier een artikel van een echte professor wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \n" + "Edouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \n" + "De politiek van dit blad wordt geschreven door een der leeuwen uit het Nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        \n"; - String textD4F = "Werumeus Buning maakt artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \n" + "J. W.Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van literair kritikus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \n" + "Edouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \n" + "Nu en dan koopt Elsevier een artikel van een echte professor, wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \n" + "\n" + "De politiek van dit blad wordt geschreven door een der leeuwen uit het nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        "; - SimpleWitness[] sw = createWitnesses(textMZ_DJ233, textD4F); - VariantGraph vg = collate(sw[0]); - MatchTable matchTable = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - // try { - // FileWriter fw = new - // FileWriter("C:\\Documents and Settings\\meindert\\Mijn Documenten\\Project Hermans productielijn\\Materiaal input collateX\\Hulp1.html"); - // fw.write(buildMatrix.toHtml()); - // } catch (IOException e) { - // e.printStackTrace(); - // } - // System.out.println(buildMatrix.toHtml()); - IslandConflictResolver archipelago = new IslandConflictResolver(matchTable); + + @Ignore("Archipelago size changed?") + @Test + public void testHermansText3() { + String textMZ_DJ233 = "Werumeus Buning maakt artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \n" + "J. W. Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van litterair criticus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \n" + "Zo nu en dan koopt Elsevier een artikel van een echte professor wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \n" + "Edouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \n" + "De politiek van dit blad wordt geschreven door een der leeuwen uit het Nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        \n"; + String textD4F = "Werumeus Buning maakt artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \n" + "J. W.Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van literair kritikus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \n" + "Edouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \n" + "Nu en dan koopt Elsevier een artikel van een echte professor, wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \n" + "\n" + "De politiek van dit blad wordt geschreven door een der leeuwen uit het nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        "; + SimpleWitness[] sw = createWitnesses(textMZ_DJ233, textD4F); + VariantGraph vg = collate(sw[0]); + MatchTable matchTable = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + // try { + // FileWriter fw = new + // FileWriter("C:\\Documents and Settings\\meindert\\Mijn Documenten\\Project Hermans productielijn\\Materiaal input collateX\\Hulp1.html"); + // fw.write(buildMatrix.toHtml()); + // } catch (IOException e) { + // e.printStackTrace(); + // } + // System.out.println(buildMatrix.toHtml()); + IslandConflictResolver archipelago = new IslandConflictResolver(matchTable); // LOG.fine("archipelago: " + archipelago); // LOG.fine("archipelago.size(): " + archipelago.size()); - MatchTableSelection firstVersion = archipelago.createNonConflictingVersion(); - for (Island isl : firstVersion.getIslands()) { - LOG.fine(" " + isl.size()); + MatchTableSelection firstVersion = archipelago.createNonConflictingVersion(); + for (Island isl : firstVersion.getIslands()) { + LOG.fine(" " + isl.size()); + } + // assertEquals(4877, firstVersion.value()); + // assertTrue(false); + + // archipelago.createNonConflictingVersions(); + // assertEquals(603,archipelago.numOfNonConflConstell()); + // assertEquals(500,archipelago.getVersion(0).value()); + // assertEquals(497,archipelago.getVersion(4).value()); + // for(int i=0; i<10; i++) { + // try { + // String file_name = "result_2_"+i+".html"; + // File logFile = new File(File.separator + + // "C:\\Documents and Settings\\meindert\\Mijn Documenten\\Project Hermans productielijn\\Materiaal input collateX\\output_collatex_exp\\"+file_name); + // PrintWriter logging = new PrintWriter(new FileOutputStream(logFile)); + // logging.println(buildMatrix.toHtml(archipelago.getVersion(i))); + // logging.close(); + // } catch (FileNotFoundException e) { + // e.printStackTrace(); + // } + // } } - // assertEquals(4877, firstVersion.value()); - // assertTrue(false); - - // archipelago.createNonConflictingVersions(); - // assertEquals(603,archipelago.numOfNonConflConstell()); - // assertEquals(500,archipelago.getVersion(0).value()); - // assertEquals(497,archipelago.getVersion(4).value()); - // for(int i=0; i<10; i++) { - // try { - // String file_name = "result_2_"+i+".html"; - // File logFile = new File(File.separator + - // "C:\\Documents and Settings\\meindert\\Mijn Documenten\\Project Hermans productielijn\\Materiaal input collateX\\output_collatex_exp\\"+file_name); - // PrintWriter logging = new PrintWriter(new FileOutputStream(logFile)); - // logging.println(buildMatrix.toHtml(archipelago.getVersion(i))); - // logging.close(); - // } catch (FileNotFoundException e) { - // e.printStackTrace(); - // } - // } - } - - @Test - public void testHermansTextWithOutliers() throws XMLStreamException { - String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer, de lucht was helder blauw, het water rimpelend satijn. Op den Atlantischen Oceaan voer een groote stoomer. Onder de velen aan boojrd bevond zich een bruine, korte dikke man. JSg werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ontzaggelijk breed; hij omsloot den buik van den kleinen man als een soort balcon."; - String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; - SimpleWitness[] witnesses = createWitnesses(textD1, textD9); - - testWitnessCollation(witnesses); - } - - @Test - public void testHermansText2b() throws XMLStreamException { - String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer, de lucht was helder blauw, het water rimpelend satijn. Op den Atlantischen Oceaan voer een groote stoomer. Onder de velen aan boojrd bevond zich een bruine, korte dikke man. JSg werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ontzaggelijk breed; hij omsloot den buik van den kleinen man als een soort balcon."; - String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; - String textDMD1 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        \nOp sommige dekken van de stomer lagen mensen in de zon, op andere dekken werd getennist, op nog andere liepen de passagiers heen en weer en praatten. Wie over de reling hing en recht naar beneden keek, kon vaststellen dat het schip vorderde; of draaide alleen de aarde er onderdoor?

        \nOp de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon.

        "; - SimpleWitness[] witnesses = createWitnesses(textD1, textD9, textDMD1); - - testWitnessCollation(witnesses); - } - - @Test - public void testHermansText2c() throws XMLStreamException { - String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer."; - String textD9 = "Over de Atlantische Oceaan voer een grote stomer."; - String textDMD1 = "Over de Atlantische Oceaan voer een vreselijk grote stomer."; - SimpleWitness[] witnesses = createWitnesses(textD1, textD9, textDMD1); - - testWitnessCollation(witnesses); - } - - - private void testWitnessCollation(SimpleWitness[] sw) throws XMLStreamException, FactoryConfigurationError { - VariantGraph vg = collate(sw); - // List v = Lists.newArrayList(vg.vertices()); - String teiMM = generateTEI(vg); - assertNotNull(teiMM); - LOG.fine(teiMM); - - // setCollationAlgorithm(CollationAlgorithmFactory.dekker(new EqualityTokenComparator())); - // vg = collate(sw); - // String teiD = generateTEI(vg); - // LOG.debug(teiD); - // assertNotNull(teiD); - // assertFalse(teiD.equals(teiMM)); - } - - @Ignore("The b-transposition is one token short") - @Test - public void test4JoinedTranspositions2witnesses() throws XMLStreamException { - String a = "a1 a2 a3 b1 b2 b3 c1 c2 c3 d1 d2 d3"; - String b = "d1 d2 d3 a1 a2 a3 c1 c2 c3 b1 b2 b3"; - SimpleWitness[] sw = createWitnesses(a, b); - // testWitnessCollation(sw); - VariantGraph vg = collate(sw); - Set> transpositions0 = vg.transpositions(); - for (Set t : transpositions0) { - LOG.log(Level.FINE, "transposition {0}", t.toString()); + + @Test + public void testHermansTextWithOutliers() throws XMLStreamException { + String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer, de lucht was helder blauw, het water rimpelend satijn. Op den Atlantischen Oceaan voer een groote stoomer. Onder de velen aan boojrd bevond zich een bruine, korte dikke man. JSg werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ontzaggelijk breed; hij omsloot den buik van den kleinen man als een soort balcon."; + String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; + SimpleWitness[] witnesses = createWitnesses(textD1, textD9); + + testWitnessCollation(witnesses); } - Iterable vertices = vg.vertices(); - for (VariantGraph.Vertex v : vertices) { - LOG.log(Level.FINE, "vertex:{0}, transpositions:{1}", new Object[] { v, v.transpositions().toString() }); + @Test + public void testHermansText2b() throws XMLStreamException { + String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer, de lucht was helder blauw, het water rimpelend satijn. Op den Atlantischen Oceaan voer een groote stoomer. Onder de velen aan boojrd bevond zich een bruine, korte dikke man. JSg werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ontzaggelijk breed; hij omsloot den buik van den kleinen man als een soort balcon."; + String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; + String textDMD1 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        \nOp sommige dekken van de stomer lagen mensen in de zon, op andere dekken werd getennist, op nog andere liepen de passagiers heen en weer en praatten. Wie over de reling hing en recht naar beneden keek, kon vaststellen dat het schip vorderde; of draaide alleen de aarde er onderdoor?

        \nOp de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon.

        "; + SimpleWitness[] witnesses = createWitnesses(textD1, textD9, textDMD1); + + testWitnessCollation(witnesses); } - vg = VariantGraph.JOIN.apply(vg); - LOG.fine(toString(table(vg))); - Set> transpositions = vg.transpositions(); - LOG.log(Level.FINE, "{0} transpositions", transpositions.size()); - for (Set t : transpositions) { - LOG.log(Level.FINE, "transposition {0}", t.toString()); - // all joined vertices should be size 3 - for (VariantGraph.Vertex vertex : t) { - assertEquals(t.toString(), 3, vertex.tokens().size()); - } + + @Test + public void testHermansText2c() throws XMLStreamException { + String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer."; + String textD9 = "Over de Atlantische Oceaan voer een grote stomer."; + String textDMD1 = "Over de Atlantische Oceaan voer een vreselijk grote stomer."; + SimpleWitness[] witnesses = createWitnesses(textD1, textD9, textDMD1); + + testWitnessCollation(witnesses); } - assertEquals(3, transpositions.size()); - } - - @Ignore("Not all transpositions detected anymore?") - @Test - public void test4JoinedTranspositions3witnesses() throws XMLStreamException { - String a = "a1 a2 a3 b1 b2 b3 c1 c2 c3 d1 d2 d3"; - String b = "d1 d2 d3 a1 a2 a3 c1 c2 c3 b1 b2 b3"; - String c = "c1 c2 c3 b1 b2 b3 d1 d2 d3 a1 a2 a3"; - SimpleWitness[] sw = createWitnesses(a, b, c); - // testWitnessCollation(sw); - VariantGraph vg = collate(sw); - Iterable vertices = vg.vertices(); - for (VariantGraph.Vertex v : vertices) { - LOG.log(Level.FINE, "vertex:{0}, transpositions:{1}", new Object[]{v, v.transpositions()}); + + private void testWitnessCollation(SimpleWitness[] sw) throws XMLStreamException, FactoryConfigurationError { + VariantGraph vg = collate(sw); + // List v = Lists.newArrayList(vg.vertices()); + String teiMM = generateTEI(vg); + assertNotNull(teiMM); + LOG.fine(teiMM); + + // setCollationAlgorithm(CollationAlgorithmFactory.dekker(new EqualityTokenComparator())); + // vg = collate(sw); + // String teiD = generateTEI(vg); + // LOG.debug(teiD); + // assertNotNull(teiD); + // assertFalse(teiD.equals(teiMM)); } - final StringWriter writer = new StringWriter(); - new SimpleVariantGraphSerializer(vg).toDot(writer); - LOG.fine(writer.toString()); - - vg = VariantGraph.JOIN.apply(vg); - Set> transpositions = vg.transpositions(); - LOG.log(Level.FINE, "{0} transpositions", transpositions.size()); - for (Set t : transpositions) { - String showTransposition = t.toString(); - LOG.log(Level.FINE, "transposition {0}", showTransposition); - boolean transpositionOfA = showTransposition.contains("a"); - boolean transpositionOfB = showTransposition.contains("b"); - boolean transpositionOfC = showTransposition.contains("c"); - boolean transpositionOfD = showTransposition.contains("d"); - // there should be only a, b, c or d in the transpositions - assertTrue(transpositionOfA ^ transpositionOfB ^ transpositionOfC ^ transpositionOfD); - // assertEquals(showTransposition(t), 3, t.from().tokens().size()); - // assertEquals(showTransposition(t), 3, t.to().tokens().size()); + @Ignore("The b-transposition is one token short") + @Test + public void test4JoinedTranspositions2witnesses() throws XMLStreamException { + String a = "a1 a2 a3 b1 b2 b3 c1 c2 c3 d1 d2 d3"; + String b = "d1 d2 d3 a1 a2 a3 c1 c2 c3 b1 b2 b3"; + SimpleWitness[] sw = createWitnesses(a, b); + // testWitnessCollation(sw); + VariantGraph vg = collate(sw); + Set> transpositions0 = vg.transpositions(); + for (Set t : transpositions0) { + LOG.log(Level.FINE, "transposition {0}", t.toString()); + } + + Iterable vertices = vg.vertices(); + for (VariantGraph.Vertex v : vertices) { + LOG.log(Level.FINE, "vertex:{0}, transpositions:{1}", new Object[]{v, v.transpositions().toString()}); + } + vg = VariantGraph.JOIN.apply(vg); + LOG.fine(toString(table(vg))); + Set> transpositions = vg.transpositions(); + LOG.log(Level.FINE, "{0} transpositions", transpositions.size()); + for (Set t : transpositions) { + LOG.log(Level.FINE, "transposition {0}", t.toString()); + // all joined vertices should be size 3 + for (VariantGraph.Vertex vertex : t) { + assertEquals(t.toString(), 3, vertex.tokens().size()); + } + } + assertEquals(3, transpositions.size()); } - assertEquals(7, transpositions.size()); - } - - @Test - public void testHermansText3aJoinedTranspositions() throws XMLStreamException { - String textMZ_DJ233 = "Werumeus Buning maakt artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \nJ. W. Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van litterair criticus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \nZo nu en dan koopt Elsevier een artikel van een echte professor wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \nEdouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \nDe politiek van dit blad wordt geschreven door een der leeuwen uit het Nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        \nAldus de artikelen van Werumeus Buning"; - String textD4F = "Werumeus Buning maakt machtigmooie artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \nJ. W.Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van literair kritikus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \nEdouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \nNu en dan koopt Elsevier een artikel van een echte professor, wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \n\nDe politiek van dit blad wordt geschreven door een der leeuwen uit het nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        Volgens de stukjes van Werumeus Buning"; - SimpleWitness[] sw = createWitnesses(textMZ_DJ233, textD4F); - testWitnessCollation(sw); - } - - // @Test - public void testHermansText3aJoinedTranspositions2() throws XMLStreamException { - String textD1 = "Over hem waakten de @Dochters Zions# naar Micha 4:13 of ook genaamd de @Zonen van Dan (Gen. 49:17)"; - String textD9 = "Over hem waakte een garde, genaamd de @Dochter Zions# naar Micha 4 13, of ook de ,,/onen van Dan# Gen 49 17"; - String textDMD1 = "Over hem waakte een garde, genaamd de ,,Dochter Zions# naar Micha 4 : 13, of ook de @zonen van Dan# Gen. 49 : 17."; - String textDMD5 = "Over hem waakte een garde, genaamd de @Dochter Zions# naar Micha 4 . 13, of ook de @zonen van Dan# Gen 49 17"; - - SimpleWitness[] sw = createWitnesses(textD1, textD9, textDMD1, textDMD5); - testWitnessCollation(sw); - - VariantGraph vg = VariantGraph.JOIN.apply(collate(sw)); - Set> transpositions = vg.transpositions(); - assertEquals(5, transpositions.size()); - Set transposition = transpositions.iterator().next(); - // assertEquals("genaamd de", transposition.from().toString()); - } - - @Test - public void testTEI() throws XMLStreamException, FactoryConfigurationError { - String text1 = "voor Zo nu en dan zin2 na voor"; - String text2 = "voor zin2 Nu en dan voor"; - SimpleWitness[] sw = createWitnesses(text1, text2); - testWitnessCollation(sw); - } - - @Test - public void testHermansAllesIsBetrekkelijk() throws XMLStreamException { - String textD1 = "Zij had gelijk; natuurlijk is alles betrekkelijk en het hangt er van af hoe men het gewend is."; - String textD9 = "Zij had gelijk. Natuurlijk, alles is&KOP+betrekkelijk en het hangt er alleen van af\n |P 46|\nhoe men het gewend is."; - String textDmd1 = "Zij had gelijk. Natuurlijk, alles is betrekkelijk en het hangt er alleen van af\n&WR+\n46

        \nhoe men het gewend is."; - String textDmd9 = "Zij had gelijk. Natuurlijk, alles is&KOP+betrekkelijk en het hangt er alleen van af\n |P 46|\nhoe men het gewend is."; - SimpleWitness[] sw = createWitnesses(textD1, textD9, textDmd1, textDmd9); - testWitnessCollation(sw); - } - - @Test - public void testHermansAllesIsBetrekkelijk1() throws XMLStreamException { - String textD1 = "natuurlijk is alles betrekkelijk"; - String textD9 = "Natuurlijk, alles mag relatief zijn"; - String textDmd1 = "Natuurlijk, alles is betrekkelijk"; - SimpleWitness[] sw = createWitnesses(textD1, textD9, textDmd1); - testWitnessCollation(sw); - } - - /* TODO: Find solution for rendering transposition correctly - * with more than two witnesses. - */ - @Ignore - @Test - public void testNoLoops() throws XMLStreamException { - String w1 = "a b c d"; - String w2 = "e c f g"; - String w3 = "e c b d"; - SimpleWitness[] sw = createWitnesses(w1, w2, w3); - VariantGraph vg = collate(sw); - Set> transpositions = vg.transpositions(); - assertEquals(1, transpositions.size()); - Set t = transpositions.iterator().next(); - for (VariantGraph.Vertex vertex : t) { - for (SimpleToken token : vertex.tokens().stream().map(tk -> (SimpleToken)tk).toArray(SimpleToken[]::new)) { - assertEquals(token.toString(), token.getNormalized(), "c"); - } + + @Ignore("Not all transpositions detected anymore?") + @Test + public void test4JoinedTranspositions3witnesses() throws XMLStreamException { + String a = "a1 a2 a3 b1 b2 b3 c1 c2 c3 d1 d2 d3"; + String b = "d1 d2 d3 a1 a2 a3 c1 c2 c3 b1 b2 b3"; + String c = "c1 c2 c3 b1 b2 b3 d1 d2 d3 a1 a2 a3"; + SimpleWitness[] sw = createWitnesses(a, b, c); + // testWitnessCollation(sw); + VariantGraph vg = collate(sw); + + Iterable vertices = vg.vertices(); + for (VariantGraph.Vertex v : vertices) { + LOG.log(Level.FINE, "vertex:{0}, transpositions:{1}", new Object[]{v, v.transpositions()}); + } + + final StringWriter writer = new StringWriter(); + new SimpleVariantGraphSerializer(vg).toDot(writer); + LOG.fine(writer.toString()); + + vg = VariantGraph.JOIN.apply(vg); + Set> transpositions = vg.transpositions(); + LOG.log(Level.FINE, "{0} transpositions", transpositions.size()); + for (Set t : transpositions) { + String showTransposition = t.toString(); + LOG.log(Level.FINE, "transposition {0}", showTransposition); + boolean transpositionOfA = showTransposition.contains("a"); + boolean transpositionOfB = showTransposition.contains("b"); + boolean transpositionOfC = showTransposition.contains("c"); + boolean transpositionOfD = showTransposition.contains("d"); + // there should be only a, b, c or d in the transpositions + assertTrue(transpositionOfA ^ transpositionOfB ^ transpositionOfC ^ transpositionOfD); + // assertEquals(showTransposition(t), 3, t.from().tokens().size()); + // assertEquals(showTransposition(t), 3, t.to().tokens().size()); + } + assertEquals(7, transpositions.size()); + } + + @Test + public void testHermansText3aJoinedTranspositions() throws XMLStreamException { + String textMZ_DJ233 = "Werumeus Buning maakt artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \nJ. W. Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van litterair criticus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \nZo nu en dan koopt Elsevier een artikel van een echte professor wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \nEdouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \nDe politiek van dit blad wordt geschreven door een der leeuwen uit het Nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        \nAldus de artikelen van Werumeus Buning"; + String textD4F = "Werumeus Buning maakt machtigmooie artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \nJ. W.Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van literair kritikus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \nEdouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \nNu en dan koopt Elsevier een artikel van een echte professor, wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \n\nDe politiek van dit blad wordt geschreven door een der leeuwen uit het nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        Volgens de stukjes van Werumeus Buning"; + SimpleWitness[] sw = createWitnesses(textMZ_DJ233, textD4F); + testWitnessCollation(sw); + } + + // @Test + public void testHermansText3aJoinedTranspositions2() throws XMLStreamException { + String textD1 = "Over hem waakten de @Dochters Zions# naar Micha 4:13 of ook genaamd de @Zonen van Dan (Gen. 49:17)"; + String textD9 = "Over hem waakte een garde, genaamd de @Dochter Zions# naar Micha 4 13, of ook de ,,/onen van Dan# Gen 49 17"; + String textDMD1 = "Over hem waakte een garde, genaamd de ,,Dochter Zions# naar Micha 4 : 13, of ook de @zonen van Dan# Gen. 49 : 17."; + String textDMD5 = "Over hem waakte een garde, genaamd de @Dochter Zions# naar Micha 4 . 13, of ook de @zonen van Dan# Gen 49 17"; + + SimpleWitness[] sw = createWitnesses(textD1, textD9, textDMD1, textDMD5); + testWitnessCollation(sw); + + VariantGraph vg = VariantGraph.JOIN.apply(collate(sw)); + Set> transpositions = vg.transpositions(); + assertEquals(5, transpositions.size()); + Set transposition = transpositions.iterator().next(); + // assertEquals("genaamd de", transposition.from().toString()); + } + + @Test + public void testTEI() throws XMLStreamException, FactoryConfigurationError { + String text1 = "voor Zo nu en dan zin2 na voor"; + String text2 = "voor zin2 Nu en dan voor"; + SimpleWitness[] sw = createWitnesses(text1, text2); + testWitnessCollation(sw); } - final Set witnessesInTransposition = new HashSet<>(); - for (VariantGraph.Vertex vertex : t) { - for (Token token : vertex.tokens()) { - witnessesInTransposition.add(token.getWitness()); - } + + @Test + public void testHermansAllesIsBetrekkelijk() throws XMLStreamException { + String textD1 = "Zij had gelijk; natuurlijk is alles betrekkelijk en het hangt er van af hoe men het gewend is."; + String textD9 = "Zij had gelijk. Natuurlijk, alles is&KOP+betrekkelijk en het hangt er alleen van af\n |P 46|\nhoe men het gewend is."; + String textDmd1 = "Zij had gelijk. Natuurlijk, alles is betrekkelijk en het hangt er alleen van af\n&WR+\n46

        \nhoe men het gewend is."; + String textDmd9 = "Zij had gelijk. Natuurlijk, alles is&KOP+betrekkelijk en het hangt er alleen van af\n |P 46|\nhoe men het gewend is."; + SimpleWitness[] sw = createWitnesses(textD1, textD9, textDmd1, textDmd9); + testWitnessCollation(sw); + } + + @Test + public void testHermansAllesIsBetrekkelijk1() throws XMLStreamException { + String textD1 = "natuurlijk is alles betrekkelijk"; + String textD9 = "Natuurlijk, alles mag relatief zijn"; + String textDmd1 = "Natuurlijk, alles is betrekkelijk"; + SimpleWitness[] sw = createWitnesses(textD1, textD9, textDmd1); + testWitnessCollation(sw); + } + + /* TODO: Find solution for rendering transposition correctly + * with more than two witnesses. + */ + @Ignore + @Test + public void testNoLoops() throws XMLStreamException { + String w1 = "a b c d"; + String w2 = "e c f g"; + String w3 = "e c b d"; + SimpleWitness[] sw = createWitnesses(w1, w2, w3); + VariantGraph vg = collate(sw); + Set> transpositions = vg.transpositions(); + assertEquals(1, transpositions.size()); + Set t = transpositions.iterator().next(); + for (VariantGraph.Vertex vertex : t) { + for (SimpleToken token : vertex.tokens().stream().map(tk -> (SimpleToken) tk).toArray(SimpleToken[]::new)) { + assertEquals(token.toString(), token.getNormalized(), "c"); + } + } + final Set witnessesInTransposition = new HashSet<>(); + for (VariantGraph.Vertex vertex : t) { + for (Token token : vertex.tokens()) { + witnessesInTransposition.add(token.getWitness()); + } + } + assertEquals(new HashSet<>(Arrays.asList(sw)), witnessesInTransposition); + } + + // @Test + // public void testHermansText4() throws XMLStreamException { + // String textMZ_DJ233 = "Werumeus Buning maakt artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \nJ. W. Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van litterair criticus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \nZo nu en dan koopt Elsevier een artikel van een echte professor wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \nEdouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \nDe politiek van dit blad wordt geschreven door een der leeuwen uit het Nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        \nAldus de artikelen van Werumeus Buning"; + // String textD4F = "Werumeus Buning maakt machtigmooie artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \nJ. W.Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van literair kritikus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \nEdouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \nNu en dan koopt Elsevier een artikel van een echte professor, wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \n\nDe politiek van dit blad wordt geschreven door een der leeuwen uit het nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        Volgens de stukjes van Werumeus Buning"; + // SimpleWitness[] sw = createWitnesses(textMZ_DJ233, textD4F); + // testWitnessCollation(sw); + // } + + private String generateTEI(VariantGraph vg) throws XMLStreamException, FactoryConfigurationError { + SimpleVariantGraphSerializer s = new SimpleVariantGraphSerializer(VariantGraph.JOIN.apply(vg)); + StringWriter writer = new StringWriter(); + s.toDot(writer); + LOG.fine(writer.toString()); + XMLStreamWriter xml = XMLOutputFactory.newInstance().createXMLStreamWriter(writer); + xml.writeStartDocument(); + s.toTEI(xml); + xml.writeEndDocument(); + return writer.toString(); } - assertEquals(new HashSet<>(Arrays.asList(sw)), witnessesInTransposition); - } - - // @Test - // public void testHermansText4() throws XMLStreamException { - // String textMZ_DJ233 = "Werumeus Buning maakt artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \nJ. W. Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van litterair criticus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \nZo nu en dan koopt Elsevier een artikel van een echte professor wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \nEdouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \nDe politiek van dit blad wordt geschreven door een der leeuwen uit het Nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        \nAldus de artikelen van Werumeus Buning"; - // String textD4F = "Werumeus Buning maakt machtigmooie artikelen van vijf pagina&APO+s over de geologie van de diepzee, die hij uit Engelse boeken overschrijft, wat hij pas in de laatste regel vermeldt, omdat hij zo goed kan koken.

        \nJ. W.Hofstra kan niet lezen en nauwelijks stotteren, laat staan schrijven. Hij oefent het ambt van literair kritikus uit omdat hij uiterlijk veel weg heeft van een Duitse filmacteur (Adolf Wohlbrock).

        \nEdouard Bouquin is het olijke culturele geweten. Bouquin betekent: 1) oud boek van geringe waarde, 2) oude bok, 3) mannetjeskonijn. Ik kan het ook niet helpen, het staat in Larousse.

        \nNu en dan koopt Elsevier een artikel van een echte professor, wiens naam en titels zu vet worden afgedrukt, dat zij allicht de andere copie ook iets professoraals geven, in het oog van de speksnijders.

        \n\nDe politiek van dit blad wordt geschreven door een der leeuwen uit het nederlandse wapen (ik geloof de rechtse) op een krakerige gerechtszaaltoon in zeer korte zinnetjes, omdat hij tot zijn spijt de syntaxis onvoldoende beheerst.

        Volgens de stukjes van Werumeus Buning"; - // SimpleWitness[] sw = createWitnesses(textMZ_DJ233, textD4F); - // testWitnessCollation(sw); - // } - - private String generateTEI(VariantGraph vg) throws XMLStreamException, FactoryConfigurationError { - SimpleVariantGraphSerializer s = new SimpleVariantGraphSerializer(VariantGraph.JOIN.apply(vg)); - StringWriter writer = new StringWriter(); - s.toDot(writer); - LOG.fine(writer.toString()); - XMLStreamWriter xml = XMLOutputFactory.newInstance().createXMLStreamWriter(writer); - xml.writeStartDocument(); - s.toTEI(xml); - xml.writeEndDocument(); - return writer.toString(); - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java index 13f596edb..4499bef5c 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolverTest.java @@ -12,26 +12,26 @@ import static org.junit.Assert.assertEquals; public class IslandConflictResolverTest extends AbstractTest { - - // 3 islands of 2, 1 island of size 1 - // the 3 islands of size 2 overlap partly - //TODO: add new IslandCompetitionType: party overlapping! - @Test - public void testPartlyOverlappingIslands() { - // create two witnesses - SimpleWitness[] w = createWitnesses("The cat and the dog", "the dog and the cat"); - // create graph from the first witness - VariantGraph graph = collate(w[0]); - // create table from the graph and the second witness - MatchTable table = MatchTable.create(graph, w[1]); - List possibleIslands = new ArrayList<>(); - for (Island island : table.getIslands()) { - if (island.size()==2) { - possibleIslands.add(island); - } + + // 3 islands of 2, 1 island of size 1 + // the 3 islands of size 2 overlap partly + //TODO: add new IslandCompetitionType: party overlapping! + @Test + public void testPartlyOverlappingIslands() { + // create two witnesses + SimpleWitness[] w = createWitnesses("The cat and the dog", "the dog and the cat"); + // create graph from the first witness + VariantGraph graph = collate(w[0]); + // create table from the graph and the second witness + MatchTable table = MatchTable.create(graph, w[1]); + List possibleIslands = new ArrayList<>(); + for (Island island : table.getIslands()) { + if (island.size() == 2) { + possibleIslands.add(island); + } + } + IslandConflictResolver resolver = new IslandConflictResolver(table); + Map> competition = resolver.analyzeConflictsBetweenPossibleIslands(possibleIslands); + assertEquals(3, competition.get(IslandCompetition.CompetingIsland).size()); } - IslandConflictResolver resolver = new IslandConflictResolver(table); - Map> competition = resolver.analyzeConflictsBetweenPossibleIslands(possibleIslands); - assertEquals(3, competition.get(IslandCompetition.CompetingIsland).size()); - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandTest.java index 97f80fd7a..f4a520806 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/IslandTest.java @@ -1,149 +1,149 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.dekker.matrix; - -import eu.interedition.collatex.AbstractTest; -import eu.interedition.collatex.VariantGraph; -import eu.interedition.collatex.matching.EqualityTokenComparator; -import eu.interedition.collatex.simple.SimpleWitness; -import org.junit.Test; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -public class IslandTest extends AbstractTest { - - String newLine = System.getProperty("line.separator"); - - @Test - public void testCoordinates() { - Coordinate a = new Coordinate(0, 0); - Coordinate b = new Coordinate(0, 0); - Coordinate c = new Coordinate(1, 1); - assertEquals(new Coordinate(0, 0), a); - assertEquals(b, a); - assertFalse(a.equals(c)); - } - - @Test - public void testBorders() { - Coordinate a = new Coordinate(0, 0); - Coordinate b = new Coordinate(1, 1); - Coordinate c = new Coordinate(1, 2); - assertTrue(a.bordersOn(b)); - assertFalse(a.bordersOn(c)); - assertFalse(b.bordersOn(c)); - } - - @Test - public void testUndirectedIsland() { - Island isl = new Island(); - isl.add(new Coordinate(0, 0)); - assertEquals(1, isl.size()); - isl.add(new Coordinate(0, 0)); - assertEquals(1, isl.size()); - isl.add(new Coordinate(1, 0)); - assertEquals(1, isl.size()); - isl.add(new Coordinate(2, 2)); - assertEquals(1, isl.size()); - assertTrue(isl.neighbour(new Coordinate(1, 1))); - isl.add(new Coordinate(1, 1)); - assertEquals(2, isl.size()); - } - - @Test - public void testDirectedIsland() { - Island isl = new Island(); - isl.add(new Coordinate(0, 0)); - assertEquals(1, isl.size()); - assertEquals(0, isl.direction()); - isl.add(new Coordinate(1, 1)); - assertEquals(2, isl.size()); - assertEquals(1, isl.direction()); - isl.add(new Coordinate(2, 2)); - assertEquals(3, isl.size()); - assertEquals(1, isl.direction()); - } - - @Test - public void testArchipelago() { - Archipelago arch = new Archipelago(); - Island isl_1 = new Island(); - isl_1.add(new Coordinate(0, 0)); - isl_1.add(new Coordinate(1, 1)); - arch.add(isl_1); - Island isl_2 = new Island(); - isl_2.add(new Coordinate(2, 2)); - isl_2.add(new Coordinate(3, 3)); - arch.add(isl_2); - assertEquals(2, arch.size()); - assertTrue(isl_1.overlap(isl_2)); - } - - @Test - public void testArchipelagoRivalIslands() { - SimpleWitness[] sw = createWitnesses("A B C A B", "A B C A B"); - VariantGraph vg = collate(sw[0]); - MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - Archipelago archipelago = new Archipelago(); - for (Island isl : table.getIslands()) { - archipelago.add(isl); - } - assertEquals(3, archipelago.size()); - } - - @Test - public void testRemovePoints() { - Island di_1 = new Island(); - di_1.add(new Coordinate(1, 1)); - di_1.add(new Coordinate(2, 2)); - Island di_2 = new Island(); - di_2.add(new Coordinate(2, 2)); - Island di_3 = di_1.removePoints(di_2); - assertEquals(2, di_1.size()); - assertEquals(1, di_3.size()); - } - - @Test - public void testFindCoorOnRowOrCol() { - Island isl_1 = new Island(); - isl_1.add(new Coordinate(0, 0)); - isl_1.add(new Coordinate(1, 1)); - assertEquals(new Coordinate(0, 0), isl_1.getCoorOnRow(0)); - assertEquals(new Coordinate(1, 1), isl_1.getCoorOnCol(1)); - assertEquals(null, isl_1.getCoorOnCol(4)); - } - - @Test - public void testIslandValue() { - Island isl_1 = new Island(); - isl_1.add(new Coordinate(1, 1)); - assertEquals(1, isl_1.value()); - isl_1.add(new Coordinate(2, 2)); - assertEquals(5, isl_1.value()); - isl_1.add(new Coordinate(3, 3)); - assertEquals(10, isl_1.value()); - Island isl_2 = new Island(); - isl_2.add(new Coordinate(2, 2)); - isl_2.add(new Coordinate(1, 3)); - assertEquals(3, isl_2.value()); - } -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex.dekker.matrix; + +import eu.interedition.collatex.AbstractTest; +import eu.interedition.collatex.VariantGraph; +import eu.interedition.collatex.matching.EqualityTokenComparator; +import eu.interedition.collatex.simple.SimpleWitness; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class IslandTest extends AbstractTest { + + String newLine = System.getProperty("line.separator"); + + @Test + public void testCoordinates() { + Coordinate a = new Coordinate(0, 0); + Coordinate b = new Coordinate(0, 0); + Coordinate c = new Coordinate(1, 1); + assertEquals(new Coordinate(0, 0), a); + assertEquals(b, a); + assertFalse(a.equals(c)); + } + + @Test + public void testBorders() { + Coordinate a = new Coordinate(0, 0); + Coordinate b = new Coordinate(1, 1); + Coordinate c = new Coordinate(1, 2); + assertTrue(a.bordersOn(b)); + assertFalse(a.bordersOn(c)); + assertFalse(b.bordersOn(c)); + } + + @Test + public void testUndirectedIsland() { + Island isl = new Island(); + isl.add(new Coordinate(0, 0)); + assertEquals(1, isl.size()); + isl.add(new Coordinate(0, 0)); + assertEquals(1, isl.size()); + isl.add(new Coordinate(1, 0)); + assertEquals(1, isl.size()); + isl.add(new Coordinate(2, 2)); + assertEquals(1, isl.size()); + assertTrue(isl.neighbour(new Coordinate(1, 1))); + isl.add(new Coordinate(1, 1)); + assertEquals(2, isl.size()); + } + + @Test + public void testDirectedIsland() { + Island isl = new Island(); + isl.add(new Coordinate(0, 0)); + assertEquals(1, isl.size()); + assertEquals(0, isl.direction()); + isl.add(new Coordinate(1, 1)); + assertEquals(2, isl.size()); + assertEquals(1, isl.direction()); + isl.add(new Coordinate(2, 2)); + assertEquals(3, isl.size()); + assertEquals(1, isl.direction()); + } + + @Test + public void testArchipelago() { + Archipelago arch = new Archipelago(); + Island isl_1 = new Island(); + isl_1.add(new Coordinate(0, 0)); + isl_1.add(new Coordinate(1, 1)); + arch.add(isl_1); + Island isl_2 = new Island(); + isl_2.add(new Coordinate(2, 2)); + isl_2.add(new Coordinate(3, 3)); + arch.add(isl_2); + assertEquals(2, arch.size()); + assertTrue(isl_1.overlap(isl_2)); + } + + @Test + public void testArchipelagoRivalIslands() { + SimpleWitness[] sw = createWitnesses("A B C A B", "A B C A B"); + VariantGraph vg = collate(sw[0]); + MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + Archipelago archipelago = new Archipelago(); + for (Island isl : table.getIslands()) { + archipelago.add(isl); + } + assertEquals(3, archipelago.size()); + } + + @Test + public void testRemovePoints() { + Island di_1 = new Island(); + di_1.add(new Coordinate(1, 1)); + di_1.add(new Coordinate(2, 2)); + Island di_2 = new Island(); + di_2.add(new Coordinate(2, 2)); + Island di_3 = di_1.removePoints(di_2); + assertEquals(2, di_1.size()); + assertEquals(1, di_3.size()); + } + + @Test + public void testFindCoorOnRowOrCol() { + Island isl_1 = new Island(); + isl_1.add(new Coordinate(0, 0)); + isl_1.add(new Coordinate(1, 1)); + assertEquals(new Coordinate(0, 0), isl_1.getCoorOnRow(0)); + assertEquals(new Coordinate(1, 1), isl_1.getCoorOnCol(1)); + assertEquals(null, isl_1.getCoorOnCol(4)); + } + + @Test + public void testIslandValue() { + Island isl_1 = new Island(); + isl_1.add(new Coordinate(1, 1)); + assertEquals(1, isl_1.value()); + isl_1.add(new Coordinate(2, 2)); + assertEquals(5, isl_1.value()); + isl_1.add(new Coordinate(3, 3)); + assertEquals(10, isl_1.value()); + Island isl_2 = new Island(); + isl_2.add(new Coordinate(2, 2)); + isl_2.add(new Coordinate(1, 3)); + assertEquals(3, isl_2.value()); + } +} diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java index 4155142e2..15c1d1874 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableLinkerTest.java @@ -46,211 +46,211 @@ public class MatchTableLinkerTest extends AbstractTest { - @Test - public void additionInCombinationWithTransposition2() { - final SimpleWitness[] w = createWitnesses(// - "the cat is black",// - "black is the cat",// - "black and white is the cat"); - final VariantGraph graph = collate(w[0], w[1]); - MatchTableLinker linker = new MatchTableLinker(); - Map link = linker.link(graph, w[2], new EqualityTokenComparator()); - Set tokens = link.keySet(); - Map tokensAsString = new HashMap<>(); - for (Token token : tokens) { - tokensAsString.put(token.toString(), link.get(token).toString()); + @Test + public void additionInCombinationWithTransposition2() { + final SimpleWitness[] w = createWitnesses(// + "the cat is black",// + "black is the cat",// + "black and white is the cat"); + final VariantGraph graph = collate(w[0], w[1]); + MatchTableLinker linker = new MatchTableLinker(); + Map link = linker.link(graph, w[2], new EqualityTokenComparator()); + Set tokens = link.keySet(); + Map tokensAsString = new HashMap<>(); + for (Token token : tokens) { + tokensAsString.put(token.toString(), link.get(token).toString()); + } + assertEquals("[B:0:'black']", tokensAsString.get("C:0:'black'")); } - assertEquals("[B:0:'black']", tokensAsString.get("C:0:'black'")); - } - - @Test - public void testUsecase1() { - final SimpleWitness[] w = createWitnesses("The black cat", "The black and white cat"); - final VariantGraph graph = collate(w[0]); - MatchTableLinker linker = new MatchTableLinker(); - Map link = linker.link(graph, w[1], new EqualityTokenComparator()); - assertEquals(3, link.size()); - } - @Test - public void testGapsEverythingEqual() { - // All the witness are equal - // There are choices to be made however, since there is duplication of tokens - // Optimal alignment has no gaps - final SimpleWitness[] w = createWitnesses("The red cat and the black cat", "The red cat and the black cat"); - final VariantGraph graph = collate(w[0]); - MatchTableLinker linker = new MatchTableLinker(); - Map link = linker.link(graph, w[1], new EqualityTokenComparator()); - PhraseMatchDetector detector = new PhraseMatchDetector(); - List> phraseMatches = detector.detect(link, graph, w[1]); - assertEquals(1, phraseMatches.size()); - } + @Test + public void testUsecase1() { + final SimpleWitness[] w = createWitnesses("The black cat", "The black and white cat"); + final VariantGraph graph = collate(w[0]); + MatchTableLinker linker = new MatchTableLinker(); + Map link = linker.link(graph, w[1], new EqualityTokenComparator()); + assertEquals(3, link.size()); + } + + @Test + public void testGapsEverythingEqual() { + // All the witness are equal + // There are choices to be made however, since there is duplication of tokens + // Optimal alignment has no gaps + final SimpleWitness[] w = createWitnesses("The red cat and the black cat", "The red cat and the black cat"); + final VariantGraph graph = collate(w[0]); + MatchTableLinker linker = new MatchTableLinker(); + Map link = linker.link(graph, w[1], new EqualityTokenComparator()); + PhraseMatchDetector detector = new PhraseMatchDetector(); + List> phraseMatches = detector.detect(link, graph, w[1]); + assertEquals(1, phraseMatches.size()); + } - @Test - public void testGapsOmission() { - // There is an omission - // Optimal alignment has 1 gap - // Note: there are two paths here that contain 1 gap - final SimpleWitness[] w = createWitnesses("The red cat and the black cat", "the black cat"); - final VariantGraph graph = collate(w[0]); - MatchTableLinker linker = new MatchTableLinker(); - Map link = linker.link(graph, w[1], new EqualityTokenComparator()); - PhraseMatchDetector detector = new PhraseMatchDetector(); - List> phraseMatches = detector.detect(link, graph, w[1]); - assertEquals(1, phraseMatches.size()); - } + @Test + public void testGapsOmission() { + // There is an omission + // Optimal alignment has 1 gap + // Note: there are two paths here that contain 1 gap + final SimpleWitness[] w = createWitnesses("The red cat and the black cat", "the black cat"); + final VariantGraph graph = collate(w[0]); + MatchTableLinker linker = new MatchTableLinker(); + Map link = linker.link(graph, w[1], new EqualityTokenComparator()); + PhraseMatchDetector detector = new PhraseMatchDetector(); + List> phraseMatches = detector.detect(link, graph, w[1]); + assertEquals(1, phraseMatches.size()); + } - @Test - //Note: test taken from HermansTest - public void testHermansText2c() throws XMLStreamException { - String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer."; - String textD9 = "Over de Atlantische Oceaan voer een grote stomer."; - String textDMD1 = "Over de Atlantische Oceaan voer een vreselijk grote stomer."; - SimpleWitness[] witnesses = createWitnesses(textD1, textD9, textDMD1); + @Test + //Note: test taken from HermansTest + public void testHermansText2c() throws XMLStreamException { + String textD1 = "Op den Atlantischen Oceaan voer een groote stoomer."; + String textD9 = "Over de Atlantische Oceaan voer een grote stomer."; + String textDMD1 = "Over de Atlantische Oceaan voer een vreselijk grote stomer."; + SimpleWitness[] witnesses = createWitnesses(textD1, textD9, textDMD1); - VariantGraph graph = collate(witnesses[0], witnesses[1]); + VariantGraph graph = collate(witnesses[0], witnesses[1]); - MatchTableLinker linker = new MatchTableLinker(); - Map linkedTokens = linker.link(graph, witnesses[2], new EqualityTokenComparator()); + MatchTableLinker linker = new MatchTableLinker(); + Map linkedTokens = linker.link(graph, witnesses[2], new EqualityTokenComparator()); - Set tokens = linkedTokens.keySet(); - Set tokensAsString = new LinkedHashSet<>(); - for (Token token : tokens) { - tokensAsString.add(token.toString()); + Set tokens = linkedTokens.keySet(); + Set tokensAsString = new LinkedHashSet<>(); + for (Token token : tokens) { + tokensAsString.add(token.toString()); + } + LOG.fine(tokensAsString::toString); + assertTrue(tokensAsString.contains("C:0:'over'")); + assertTrue(tokensAsString.contains("C:1:'de'")); + assertTrue(tokensAsString.contains("C:2:'atlantische'")); + assertTrue(tokensAsString.contains("C:3:'oceaan'")); + assertTrue(tokensAsString.contains("C:4:'voer'")); + assertTrue(tokensAsString.contains("C:5:'een'")); + assertTrue(tokensAsString.contains("C:7:'grote'")); + assertTrue(tokensAsString.contains("C:8:'stomer'")); } - LOG.fine(tokensAsString::toString); - assertTrue(tokensAsString.contains("C:0:'over'")); - assertTrue(tokensAsString.contains("C:1:'de'")); - assertTrue(tokensAsString.contains("C:2:'atlantische'")); - assertTrue(tokensAsString.contains("C:3:'oceaan'")); - assertTrue(tokensAsString.contains("C:4:'voer'")); - assertTrue(tokensAsString.contains("C:5:'een'")); - assertTrue(tokensAsString.contains("C:7:'grote'")); - assertTrue(tokensAsString.contains("C:8:'stomer'")); - } - // String newLine = System.getProperty("line.separator"); + // String newLine = System.getProperty("line.separator"); - @Test - public void test1() { - SimpleWitness[] sw = createWitnesses("A B C A B", "A B C A B"); - VariantGraph vg = collate(sw[0]); - MatchTableLinker linker = new MatchTableLinker(); - Map linkedTokens = linker.link(vg, sw[1], new EqualityTokenComparator()); + @Test + public void test1() { + SimpleWitness[] sw = createWitnesses("A B C A B", "A B C A B"); + VariantGraph vg = collate(sw[0]); + MatchTableLinker linker = new MatchTableLinker(); + Map linkedTokens = linker.link(vg, sw[1], new EqualityTokenComparator()); - Set tokens = linkedTokens.keySet(); - Set tokensAsString = new LinkedHashSet<>(); - for (Token token : tokens) { - tokensAsString.add(token.toString()); + Set tokens = linkedTokens.keySet(); + Set tokensAsString = new LinkedHashSet<>(); + for (Token token : tokens) { + tokensAsString.add(token.toString()); + } + assertTrue(tokensAsString.contains("B:0:'a'")); + assertTrue(tokensAsString.contains("B:1:'b'")); + assertTrue(tokensAsString.contains("B:2:'c'")); + assertTrue(tokensAsString.contains("B:3:'a'")); + assertTrue(tokensAsString.contains("B:4:'b'")); } - assertTrue(tokensAsString.contains("B:0:'a'")); - assertTrue(tokensAsString.contains("B:1:'b'")); - assertTrue(tokensAsString.contains("B:2:'c'")); - assertTrue(tokensAsString.contains("B:3:'a'")); - assertTrue(tokensAsString.contains("B:4:'b'")); - } - @Test - public void testOverDeAtlantischeOceaan() { - int outlierTranspositionsSizeLimit = 1; - collationAlgorithm = CollationAlgorithmFactory.dekkerMatchMatrix(new StrictEqualityTokenComparator(), outlierTranspositionsSizeLimit); - String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; - String textDMD1 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        \nOp sommige dekken van de stomer lagen mensen in de zon, op andere dekken werd getennist, op nog andere liepen de passagiers heen en weer en praatten. Wie over de reling hing en recht naar beneden keek, kon vaststellen dat het schip vorderde; of draaide alleen de aarde er onderdoor?

        \nOp de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon.

        "; - SimpleWitness[] sw = createWitnesses(textD9, textDMD1); - VariantGraph vg = collate(sw[0]); - Map linkedTokens = new MatchTableLinker().link(vg, sw[1], new StrictEqualityTokenComparator()); + @Test + public void testOverDeAtlantischeOceaan() { + int outlierTranspositionsSizeLimit = 1; + collationAlgorithm = CollationAlgorithmFactory.dekkerMatchMatrix(new StrictEqualityTokenComparator(), outlierTranspositionsSizeLimit); + String textD9 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        Op de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon."; + String textDMD1 = "Over de Atlantische Oceaan voer een grote stomer. De lucht was helder blauw, het water rimpelend satijn.

        \nOp sommige dekken van de stomer lagen mensen in de zon, op andere dekken werd getennist, op nog andere liepen de passagiers heen en weer en praatten. Wie over de reling hing en recht naar beneden keek, kon vaststellen dat het schip vorderde; of draaide alleen de aarde er onderdoor?

        \nOp de Atlantische Oceaan voer een ontzaggelijk zeekasteel. Onder de vele passagiers aan boord, bevond zich een bruine, korte dikke man. Hij werd nooit zonder sigaar gezien. Zijn pantalon had lijnrechte vouwen in de pijpen, maar zat toch altijd vol rimpels. De pantalon werd naar boven toe breed, ongelofelijk breed: hij omsloot de buik van de kleine man als een soort balkon.

        "; + SimpleWitness[] sw = createWitnesses(textD9, textDMD1); + VariantGraph vg = collate(sw[0]); + Map linkedTokens = new MatchTableLinker().link(vg, sw[1], new StrictEqualityTokenComparator()); - Set tokens = linkedTokens.keySet(); - Set tokensAsString = new LinkedHashSet<>(); - for (Token token : tokens) { - tokensAsString.add(token.toString()); + Set tokens = linkedTokens.keySet(); + Set tokensAsString = new LinkedHashSet<>(); + for (Token token : tokens) { + tokensAsString.add(token.toString()); + } + List l = new ArrayList<>(tokensAsString); + Collections.sort(l); + LOG.log(Level.FINE, () -> l.stream().collect(Collectors.joining("\n"))); + assertTrue(tokensAsString.contains("B:87:'onder'")); + assertTrue(tokensAsString.contains("B:0:'over'")); + assertTrue(tokensAsString.contains("B:1:'de'")); + assertTrue(tokensAsString.contains("B:2:'atlantische'")); + assertTrue(tokensAsString.contains("B:3:'oceaan'")); + assertTrue(tokensAsString.contains("B:4:'voer'")); } - List l = new ArrayList<>(tokensAsString); - Collections.sort(l); - LOG.log(Level.FINE, () -> l.stream().collect(Collectors.joining("\n"))); - assertTrue(tokensAsString.contains("B:87:'onder'")); - assertTrue(tokensAsString.contains("B:0:'over'")); - assertTrue(tokensAsString.contains("B:1:'de'")); - assertTrue(tokensAsString.contains("B:2:'atlantische'")); - assertTrue(tokensAsString.contains("B:3:'oceaan'")); - assertTrue(tokensAsString.contains("B:4:'voer'")); - } - @Test - public void testHermansAllesIsBetrekkelijk1() throws XMLStreamException { - String textD1 = "natuurlijk is alles betrekkelijk"; - String textD9 = "Natuurlijk, alles mag relatief zijn"; - String textDmd1 = "Natuurlijk, alles is betrekkelijk"; - SimpleWitness[] sw = createWitnesses(textD1, textD9, textDmd1); - VariantGraph vg = collate(sw[0], sw[1]); - Map linkedTokens = new MatchTableLinker().link(vg, sw[2], new StrictEqualityTokenComparator()); + @Test + public void testHermansAllesIsBetrekkelijk1() throws XMLStreamException { + String textD1 = "natuurlijk is alles betrekkelijk"; + String textD9 = "Natuurlijk, alles mag relatief zijn"; + String textDmd1 = "Natuurlijk, alles is betrekkelijk"; + SimpleWitness[] sw = createWitnesses(textD1, textD9, textDmd1); + VariantGraph vg = collate(sw[0], sw[1]); + Map linkedTokens = new MatchTableLinker().link(vg, sw[2], new StrictEqualityTokenComparator()); - Set tokens = linkedTokens.keySet(); - Set tokensAsString = new LinkedHashSet<>(); - for (Token token : tokens) { - tokensAsString.add(token.toString()); + Set tokens = linkedTokens.keySet(); + Set tokensAsString = new LinkedHashSet<>(); + for (Token token : tokens) { + tokensAsString.add(token.toString()); + } + List l = new ArrayList<>(tokensAsString); + Collections.sort(l); + LOG.log(Level.FINE, "tokensAsString={0}", l); + // assertTrue(tokensAsString.contains("B:75:'onder'")); + // assertTrue(tokensAsString.contains("B:0:'over'")); + // assertTrue(tokensAsString.contains("B:1:'de'")); + // assertTrue(tokensAsString.contains("B:2:'atlantische'")); + // assertTrue(tokensAsString.contains("B:3:'oceaan'")); + // assertTrue(tokensAsString.contains("B:4:'voer'")); } - List l = new ArrayList<>(tokensAsString); - Collections.sort(l); - LOG.log(Level.FINE, "tokensAsString={0}", l); - // assertTrue(tokensAsString.contains("B:75:'onder'")); - // assertTrue(tokensAsString.contains("B:0:'over'")); - // assertTrue(tokensAsString.contains("B:1:'de'")); - // assertTrue(tokensAsString.contains("B:2:'atlantische'")); - // assertTrue(tokensAsString.contains("B:3:'oceaan'")); - // assertTrue(tokensAsString.contains("B:4:'voer'")); - } - @Test - public void testSuscepto() throws XMLStreamException { - String a = "Et sumpno suscepto tribus diebus morte morietur et deinde ab inferis regressus ad lucem veniet."; - String b = "Et mortem sortis finiet post tridui somnum et morte morietur tribus diebus somno suscepto et tunc ab inferis regressus ad lucem veniet."; - String c = "Et sortem mortis tribus diebus sompno suscepto et tunc ab inferis regressus ad lucem veniet."; - SimpleWitness[] sw = createWitnesses(a, b, c); - VariantGraph vg = collate(sw[0], sw[1]); - Map linkedTokens = new MatchTableLinker().link(vg, sw[2], new StrictEqualityTokenComparator()); + @Test + public void testSuscepto() throws XMLStreamException { + String a = "Et sumpno suscepto tribus diebus morte morietur et deinde ab inferis regressus ad lucem veniet."; + String b = "Et mortem sortis finiet post tridui somnum et morte morietur tribus diebus somno suscepto et tunc ab inferis regressus ad lucem veniet."; + String c = "Et sortem mortis tribus diebus sompno suscepto et tunc ab inferis regressus ad lucem veniet."; + SimpleWitness[] sw = createWitnesses(a, b, c); + VariantGraph vg = collate(sw[0], sw[1]); + Map linkedTokens = new MatchTableLinker().link(vg, sw[2], new StrictEqualityTokenComparator()); - Set tokens = linkedTokens.keySet(); - Set tokensAsString = new LinkedHashSet<>(); - for (Token token : tokens) { - tokensAsString.add(token.toString()); + Set tokens = linkedTokens.keySet(); + Set tokensAsString = new LinkedHashSet<>(); + for (Token token : tokens) { + tokensAsString.add(token.toString()); + } + List l = new ArrayList<>(tokensAsString); + Collections.sort(l); + LOG.log(Level.FINE, "tokensAsString={0}", l); + assertTrue(tokensAsString.contains("C:6:'suscepto'")); } - List l = new ArrayList<>(tokensAsString); - Collections.sort(l); - LOG.log(Level.FINE, "tokensAsString={0}", l); - assertTrue(tokensAsString.contains("C:6:'suscepto'")); - } - @Test - public void testOutlierTranspositionLimitAndPunctuation() { - String w1 = "a b c ."; - String w2 = "a b c Natuurlijk, alles mag relatief zijn."; - SimpleWitness[] sw = createWitnesses(w1, w2); - - // assert that punctuation are separate tokens - List tokensA = sw[0].getTokens(); - assertEquals("A:0:'a'", tokensA.get(0).toString()); - assertEquals("A:1:'b'", tokensA.get(1).toString()); - assertEquals("A:2:'c'", tokensA.get(2).toString()); - assertEquals("A:3:'.'", tokensA.get(3).toString()); - assertEquals(4, tokensA.size()); - - VariantGraph vg = collate(sw[0]); - Map linkedTokens = new MatchTableLinker().link(vg, sw[1], new StrictEqualityTokenComparator()); - - // assert linked tokens; helper method - Set tokens = linkedTokens.keySet(); - Set tokensAsString = new LinkedHashSet<>(); - for (Token token : tokens) { - tokensAsString.add(token.toString()); + @Test + public void testOutlierTranspositionLimitAndPunctuation() { + String w1 = "a b c ."; + String w2 = "a b c Natuurlijk, alles mag relatief zijn."; + SimpleWitness[] sw = createWitnesses(w1, w2); + + // assert that punctuation are separate tokens + List tokensA = sw[0].getTokens(); + assertEquals("A:0:'a'", tokensA.get(0).toString()); + assertEquals("A:1:'b'", tokensA.get(1).toString()); + assertEquals("A:2:'c'", tokensA.get(2).toString()); + assertEquals("A:3:'.'", tokensA.get(3).toString()); + assertEquals(4, tokensA.size()); + + VariantGraph vg = collate(sw[0]); + Map linkedTokens = new MatchTableLinker().link(vg, sw[1], new StrictEqualityTokenComparator()); + + // assert linked tokens; helper method + Set tokens = linkedTokens.keySet(); + Set tokensAsString = new LinkedHashSet<>(); + for (Token token : tokens) { + tokensAsString.add(token.toString()); + } + List l = new ArrayList<>(tokensAsString); + Collections.sort(l); + + assertTrue(l.contains("B:0:'a'")); + assertTrue(l.contains("B:1:'b'")); + assertTrue(l.contains("B:2:'c'")); + assertTrue(l.contains("B:9:'.'")); + assertEquals(4, l.size()); } - List l = new ArrayList<>(tokensAsString); - Collections.sort(l); - - assertTrue(l.contains("B:0:'a'")); - assertTrue(l.contains("B:1:'b'")); - assertTrue(l.contains("B:2:'c'")); - assertTrue(l.contains("B:9:'.'")); - assertEquals(4, l.size()); - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java index d936f49d0..2e35a4b23 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/dekker/matrix/MatchTableTest.java @@ -36,171 +36,171 @@ public class MatchTableTest extends AbstractTest { - // helper method - private void assertIslandEquals(int leftRow, int leftColumn, int rightRow, int rightColumn, Island island) { - Coordinate leftEnd = island.getLeftEnd(); - assertEquals(leftRow, leftEnd.getRow()); - assertEquals(leftColumn, leftEnd.getColumn()); - Coordinate rightEnd = island.getRightEnd(); - assertEquals(rightRow, rightEnd.getRow()); - assertEquals(rightColumn, rightEnd.getColumn()); - } - - // helper method - // note: x = x of start coordinate - // note: y = y of start coordinate - //TODO: replace Island by a real Vector class - private void assertVectorEquals(int x, int y, int length, Island island) { - Coordinate leftEnd = island.getLeftEnd(); - assertEquals(x, leftEnd.getRow()); - assertEquals(y, leftEnd.getColumn()); - assertEquals(length, island.size()); - } - - @Test - public void testTableCreationEmptyGraph() { - final VariantGraph graph = new VariantGraph(); - SimpleWitness[] witnesses = createWitnesses("a b"); - MatchTable table = MatchTable.create(graph, witnesses[0]); - assertEquals(0, table.columnList().size()); - } - - @Test - public void testTableCreationVariationDoesNotCauseExtraColumns() { - SimpleWitness[] witnesses = createWitnesses("a", "b", "c", "d"); - VariantGraph graph = collate(witnesses[0], witnesses[1], witnesses[2]); - MatchTable table = MatchTable.create(graph, witnesses[3]); - assertEquals(1, table.columnList().size()); - } - - @Test - public void testTableCreationAbAcAbc() { - SimpleWitness[] witnesses = createWitnesses("a b", "a c", "a b c"); - VariantGraph graph = collate(witnesses[0], witnesses[1]); - MatchTable table = MatchTable.create(graph, witnesses[2]); - assertVertexEquals("a", table.vertexAt(0, 0)); - assertVertexEquals("b", table.vertexAt(1, 1)); - assertVertexEquals("c", table.vertexAt(2, 1)); - } - - @Test - public void testTableCreationAbcabCab() { - SimpleWitness[] witnesses = createWitnesses("a b c a b", "c a b"); - VariantGraph graph = collate(witnesses[0]); - MatchTable table = MatchTable.create(graph, witnesses[1]); - assertVertexEquals("a", table.vertexAt(1, 0)); - assertVertexEquals("b", table.vertexAt(2, 1)); - assertVertexEquals("c", table.vertexAt(0, 2)); - assertVertexEquals("a", table.vertexAt(1, 3)); - assertVertexEquals("b", table.vertexAt(2, 4)); - } - - @Test - public void testTableCreationAbcabAbcab() { - SimpleWitness[] sw = createWitnesses("A B C A B", "A B C A B"); - VariantGraph vg = collate(sw[0]); - MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - assertEquals(5, table.columnList().size()); - assertEquals(5, table.rowList().size()); - assertVertexEquals("a", table.vertexAt(0, 0)); - assertVertexEquals("a", table.vertexAt(0, 3)); - assertVertexEquals("b", table.vertexAt(1, 1)); - assertVertexEquals("b", table.vertexAt(1, 4)); - assertVertexEquals("c", table.vertexAt(2, 2)); - assertVertexEquals("a", table.vertexAt(3, 0)); - assertVertexEquals("a", table.vertexAt(3, 3)); - assertVertexEquals("b", table.vertexAt(4, 1)); - assertVertexEquals("b", table.vertexAt(4, 4)); - } - - @Test - public void testTableCreationAsymmatricMatrix() { - SimpleWitness[] sw = createWitnesses("A B A B C", "A B C A B"); - VariantGraph vg = collate(sw[0]); - MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - assertVertexEquals("a", table.vertexAt(0, 0)); - assertVertexEquals("a", table.vertexAt(0, 2)); - assertVertexEquals("b", table.vertexAt(1, 1)); - assertVertexEquals("b", table.vertexAt(1, 3)); - assertVertexEquals("c", table.vertexAt(2, 4)); - assertVertexEquals("a", table.vertexAt(3, 0)); - assertVertexEquals("a", table.vertexAt(3, 2)); - assertVertexEquals("b", table.vertexAt(4, 1)); - assertVertexEquals("b", table.vertexAt(4, 3)); - } - - @Test - public void testRowLabels() { - String textD1 = "de het een"; - String textD9 = "het een de"; - SimpleWitness[] sw = createWitnesses(textD1, textD9); - VariantGraph vg = collate(sw[0]); - MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - List labels = table.rowList(); - assertTokenEquals("het ", labels.get(0)); - assertTokenEquals("een ", labels.get(1)); - assertTokenEquals("de", labels.get(2)); - } - - @Test - public void testColumnLabels() { - String textD1 = "de het een"; - String textD9 = "het een de"; - SimpleWitness[] sw = createWitnesses(textD1, textD9); - VariantGraph vg = collate(sw[0]); - MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - List labels = table.columnList(); - assertEquals((Integer) 0, labels.get(0)); - assertEquals((Integer) 1, labels.get(1)); - assertEquals((Integer) 2, labels.get(2)); - } - - @Test - public void testGetAllMatches() { - SimpleWitness[] sw = createWitnesses("A B A B C", "A B C A B"); - VariantGraph vg = collate(sw[0]); - MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); - List allTrue = table.allMatches(); - assertEquals(9, allTrue.size()); - assertTrue(allTrue.contains(new Coordinate(0, 0))); - assertFalse(allTrue.contains(new Coordinate(0, 1))); - } - - @Test - public void testIslandDetectionAbcabCab() { - SimpleWitness[] witnesses = createWitnesses("a b c a b", "c a b"); - VariantGraph graph = collate(witnesses[0]); - MatchTable table = MatchTable.create(graph, witnesses[1]); - List islands = new ArrayList<>(table.getIslands()); - assertEquals(2, islands.size()); - Collections.sort(islands); - Island island = islands.get(1); - assertIslandEquals(0, 2, 2, 4, island); - } - - @Test - public void testIslandDetectionXabcabXcab() { - SimpleWitness[] witnesses = createWitnesses("x a b c a b", "x c a b"); - VariantGraph graph = collate(witnesses[0]); - MatchTable table = MatchTable.create(graph, witnesses[1]); - List islands = new ArrayList<>(table.getIslands()); - assertEquals(3, islands.size()); - Collections.sort(islands); - Island island = islands.get(0); - assertIslandEquals(0, 0, 0, 0, island); - } - - @Test - public void testIslandDetectionPartlyOverlappingIslandsUsecase() { - SimpleWitness[] w = createWitnesses("The cat and the dog", "the dog and the cat"); - VariantGraph graph = collate(w[0]); - MatchTable table = MatchTable.create(graph, w[1], new EqualityTokenComparator()); - List islands = new ArrayList<>(table.getIslands()); - Collections.sort(islands); - assertEquals(4, islands.size()); - assertVectorEquals(0, 0, 1, islands.get(0)); - assertVectorEquals(3, 0, 2, islands.get(1)); - assertVectorEquals(2, 2, 2, islands.get(2)); - assertVectorEquals(0, 3, 2, islands.get(3)); - } -} + // helper method + private void assertIslandEquals(int leftRow, int leftColumn, int rightRow, int rightColumn, Island island) { + Coordinate leftEnd = island.getLeftEnd(); + assertEquals(leftRow, leftEnd.getRow()); + assertEquals(leftColumn, leftEnd.getColumn()); + Coordinate rightEnd = island.getRightEnd(); + assertEquals(rightRow, rightEnd.getRow()); + assertEquals(rightColumn, rightEnd.getColumn()); + } + + // helper method + // note: x = x of start coordinate + // note: y = y of start coordinate + //TODO: replace Island by a real Vector class + private void assertVectorEquals(int x, int y, int length, Island island) { + Coordinate leftEnd = island.getLeftEnd(); + assertEquals(x, leftEnd.getRow()); + assertEquals(y, leftEnd.getColumn()); + assertEquals(length, island.size()); + } + + @Test + public void testTableCreationEmptyGraph() { + final VariantGraph graph = new VariantGraph(); + SimpleWitness[] witnesses = createWitnesses("a b"); + MatchTable table = MatchTable.create(graph, witnesses[0]); + assertEquals(0, table.columnList().size()); + } + + @Test + public void testTableCreationVariationDoesNotCauseExtraColumns() { + SimpleWitness[] witnesses = createWitnesses("a", "b", "c", "d"); + VariantGraph graph = collate(witnesses[0], witnesses[1], witnesses[2]); + MatchTable table = MatchTable.create(graph, witnesses[3]); + assertEquals(1, table.columnList().size()); + } + + @Test + public void testTableCreationAbAcAbc() { + SimpleWitness[] witnesses = createWitnesses("a b", "a c", "a b c"); + VariantGraph graph = collate(witnesses[0], witnesses[1]); + MatchTable table = MatchTable.create(graph, witnesses[2]); + assertVertexEquals("a", table.vertexAt(0, 0)); + assertVertexEquals("b", table.vertexAt(1, 1)); + assertVertexEquals("c", table.vertexAt(2, 1)); + } + + @Test + public void testTableCreationAbcabCab() { + SimpleWitness[] witnesses = createWitnesses("a b c a b", "c a b"); + VariantGraph graph = collate(witnesses[0]); + MatchTable table = MatchTable.create(graph, witnesses[1]); + assertVertexEquals("a", table.vertexAt(1, 0)); + assertVertexEquals("b", table.vertexAt(2, 1)); + assertVertexEquals("c", table.vertexAt(0, 2)); + assertVertexEquals("a", table.vertexAt(1, 3)); + assertVertexEquals("b", table.vertexAt(2, 4)); + } + + @Test + public void testTableCreationAbcabAbcab() { + SimpleWitness[] sw = createWitnesses("A B C A B", "A B C A B"); + VariantGraph vg = collate(sw[0]); + MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + assertEquals(5, table.columnList().size()); + assertEquals(5, table.rowList().size()); + assertVertexEquals("a", table.vertexAt(0, 0)); + assertVertexEquals("a", table.vertexAt(0, 3)); + assertVertexEquals("b", table.vertexAt(1, 1)); + assertVertexEquals("b", table.vertexAt(1, 4)); + assertVertexEquals("c", table.vertexAt(2, 2)); + assertVertexEquals("a", table.vertexAt(3, 0)); + assertVertexEquals("a", table.vertexAt(3, 3)); + assertVertexEquals("b", table.vertexAt(4, 1)); + assertVertexEquals("b", table.vertexAt(4, 4)); + } + + @Test + public void testTableCreationAsymmatricMatrix() { + SimpleWitness[] sw = createWitnesses("A B A B C", "A B C A B"); + VariantGraph vg = collate(sw[0]); + MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + assertVertexEquals("a", table.vertexAt(0, 0)); + assertVertexEquals("a", table.vertexAt(0, 2)); + assertVertexEquals("b", table.vertexAt(1, 1)); + assertVertexEquals("b", table.vertexAt(1, 3)); + assertVertexEquals("c", table.vertexAt(2, 4)); + assertVertexEquals("a", table.vertexAt(3, 0)); + assertVertexEquals("a", table.vertexAt(3, 2)); + assertVertexEquals("b", table.vertexAt(4, 1)); + assertVertexEquals("b", table.vertexAt(4, 3)); + } + + @Test + public void testRowLabels() { + String textD1 = "de het een"; + String textD9 = "het een de"; + SimpleWitness[] sw = createWitnesses(textD1, textD9); + VariantGraph vg = collate(sw[0]); + MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + List labels = table.rowList(); + assertTokenEquals("het ", labels.get(0)); + assertTokenEquals("een ", labels.get(1)); + assertTokenEquals("de", labels.get(2)); + } + + @Test + public void testColumnLabels() { + String textD1 = "de het een"; + String textD9 = "het een de"; + SimpleWitness[] sw = createWitnesses(textD1, textD9); + VariantGraph vg = collate(sw[0]); + MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + List labels = table.columnList(); + assertEquals((Integer) 0, labels.get(0)); + assertEquals((Integer) 1, labels.get(1)); + assertEquals((Integer) 2, labels.get(2)); + } + + @Test + public void testGetAllMatches() { + SimpleWitness[] sw = createWitnesses("A B A B C", "A B C A B"); + VariantGraph vg = collate(sw[0]); + MatchTable table = MatchTable.create(vg, sw[1], new EqualityTokenComparator()); + List allTrue = table.allMatches(); + assertEquals(9, allTrue.size()); + assertTrue(allTrue.contains(new Coordinate(0, 0))); + assertFalse(allTrue.contains(new Coordinate(0, 1))); + } + + @Test + public void testIslandDetectionAbcabCab() { + SimpleWitness[] witnesses = createWitnesses("a b c a b", "c a b"); + VariantGraph graph = collate(witnesses[0]); + MatchTable table = MatchTable.create(graph, witnesses[1]); + List islands = new ArrayList<>(table.getIslands()); + assertEquals(2, islands.size()); + Collections.sort(islands); + Island island = islands.get(1); + assertIslandEquals(0, 2, 2, 4, island); + } + + @Test + public void testIslandDetectionXabcabXcab() { + SimpleWitness[] witnesses = createWitnesses("x a b c a b", "x c a b"); + VariantGraph graph = collate(witnesses[0]); + MatchTable table = MatchTable.create(graph, witnesses[1]); + List islands = new ArrayList<>(table.getIslands()); + assertEquals(3, islands.size()); + Collections.sort(islands); + Island island = islands.get(0); + assertIslandEquals(0, 0, 0, 0, island); + } + + @Test + public void testIslandDetectionPartlyOverlappingIslandsUsecase() { + SimpleWitness[] w = createWitnesses("The cat and the dog", "the dog and the cat"); + VariantGraph graph = collate(w[0]); + MatchTable table = MatchTable.create(graph, w[1], new EqualityTokenComparator()); + List islands = new ArrayList<>(table.getIslands()); + Collections.sort(islands); + assertEquals(4, islands.size()); + assertVectorEquals(0, 0, 1, islands.get(0)); + assertVectorEquals(3, 0, 2, islands.get(1)); + assertVectorEquals(2, 2, 2, islands.get(2)); + assertVectorEquals(0, 3, 2, islands.get(3)); + } +} diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java index 51b5c5292..5e5bdb40d 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/CollateXLaboratory.java @@ -57,209 +57,209 @@ */ @SuppressWarnings("serial") public class CollateXLaboratory extends JFrame { - private static final Logger LOG = Logger.getLogger(CollateXLaboratory.class.getName()); - public static final BasicStroke DASHED_STROKE = new BasicStroke(1.0f, BasicStroke.CAP_SQUARE, BasicStroke.JOIN_MITER, 10.0f, new float[] { 5.0f }, 0.0f); - public static final BasicStroke SOLID_STROKE = new BasicStroke(1.5f); - - private final WitnessPanel witnessPanel = new WitnessPanel(); - - private final JTable matchMatrixTable = new JTable(); - - private final JComboBox algorithm; - private final JTabbedPane tabbedPane; - - public CollateXLaboratory() { - super("CollateX Laboratory"); - - this.algorithm = new JComboBox<>(new String[] { "Dekker", "Needleman-Wunsch", "Greedy String Tiling", "MEDITE" }); - this.algorithm.setEditable(false); - this.algorithm.setFocusable(false); - this.algorithm.setMaximumSize(new Dimension(200, this.algorithm.getMaximumSize().height)); - - this.tabbedPane = new JTabbedPane(); - this.tabbedPane.addTab("Match Table", new JScrollPane(matchMatrixTable)); - matchMatrixTable.setAutoResizeMode(JTable.AUTO_RESIZE_OFF); - matchMatrixTable.setShowGrid(true); - matchMatrixTable.setGridColor(new Color(0, 0, 0, 32)); - matchMatrixTable.setColumnSelectionAllowed(true); - - final JSplitPane splitPane = new JSplitPane(JSplitPane.VERTICAL_SPLIT); - splitPane.setContinuousLayout(true); - splitPane.setLeftComponent(witnessPanel); - splitPane.setRightComponent(tabbedPane); - add(splitPane, BorderLayout.CENTER); - - final JToolBar toolBar = new JToolBar(); - toolBar.setBorderPainted(true); - toolBar.add(algorithm); - toolBar.addSeparator(); - toolBar.add(new AddWitnessAction()); - toolBar.add(new RemoveWitnessesAction()); - toolBar.add(new CollateAction()); - toolBar.add(new MatchMatrixAction()); - add(toolBar, BorderLayout.NORTH); - - setDefaultCloseOperation(EXIT_ON_CLOSE); - - final Dimension screenSize = getToolkit().getScreenSize(); - setSize(Math.max(800, screenSize.width - 200), Math.max(600, screenSize.height - 100)); - - final Dimension frameSize = getSize(); - setLocation((screenSize.width - frameSize.width) / 2, (screenSize.height - frameSize.height) / 2); - - splitPane.setDividerLocation(0.3f); - } - - public static void main(String[] args) { - try { - UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName()); - } catch (Exception e) { - e.printStackTrace(); - } - new CollateXLaboratory().setVisible(true); - } + private static final Logger LOG = Logger.getLogger(CollateXLaboratory.class.getName()); + public static final BasicStroke DASHED_STROKE = new BasicStroke(1.0f, BasicStroke.CAP_SQUARE, BasicStroke.JOIN_MITER, 10.0f, new float[]{5.0f}, 0.0f); + public static final BasicStroke SOLID_STROKE = new BasicStroke(1.5f); + + private final WitnessPanel witnessPanel = new WitnessPanel(); + + private final JTable matchMatrixTable = new JTable(); + + private final JComboBox algorithm; + private final JTabbedPane tabbedPane; + + public CollateXLaboratory() { + super("CollateX Laboratory"); + + this.algorithm = new JComboBox<>(new String[]{"Dekker", "Needleman-Wunsch", "Greedy String Tiling", "MEDITE"}); + this.algorithm.setEditable(false); + this.algorithm.setFocusable(false); + this.algorithm.setMaximumSize(new Dimension(200, this.algorithm.getMaximumSize().height)); + + this.tabbedPane = new JTabbedPane(); + this.tabbedPane.addTab("Match Table", new JScrollPane(matchMatrixTable)); + matchMatrixTable.setAutoResizeMode(JTable.AUTO_RESIZE_OFF); + matchMatrixTable.setShowGrid(true); + matchMatrixTable.setGridColor(new Color(0, 0, 0, 32)); + matchMatrixTable.setColumnSelectionAllowed(true); - private class AddWitnessAction extends AbstractAction { + final JSplitPane splitPane = new JSplitPane(JSplitPane.VERTICAL_SPLIT); + splitPane.setContinuousLayout(true); + splitPane.setLeftComponent(witnessPanel); + splitPane.setRightComponent(tabbedPane); + add(splitPane, BorderLayout.CENTER); - private AddWitnessAction() { - super("Add"); + final JToolBar toolBar = new JToolBar(); + toolBar.setBorderPainted(true); + toolBar.add(algorithm); + toolBar.addSeparator(); + toolBar.add(new AddWitnessAction()); + toolBar.add(new RemoveWitnessesAction()); + toolBar.add(new CollateAction()); + toolBar.add(new MatchMatrixAction()); + add(toolBar, BorderLayout.NORTH); + + setDefaultCloseOperation(EXIT_ON_CLOSE); + + final Dimension screenSize = getToolkit().getScreenSize(); + setSize(Math.max(800, screenSize.width - 200), Math.max(600, screenSize.height - 100)); + + final Dimension frameSize = getSize(); + setLocation((screenSize.width - frameSize.width) / 2, (screenSize.height - frameSize.height) / 2); + + splitPane.setDividerLocation(0.3f); } - @Override - public void actionPerformed(ActionEvent e) { - witnessPanel.newWitness(); + public static void main(String[] args) { + try { + UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName()); + } catch (Exception e) { + e.printStackTrace(); + } + new CollateXLaboratory().setVisible(true); } - } - private class RemoveWitnessesAction extends AbstractAction { + private class AddWitnessAction extends AbstractAction { - private RemoveWitnessesAction() { - super("Remove"); - } + private AddWitnessAction() { + super("Add"); + } - @Override - public void actionPerformed(ActionEvent e) { - witnessPanel.removeEmptyWitnesses(); + @Override + public void actionPerformed(ActionEvent e) { + witnessPanel.newWitness(); + } } - } - private class CollateAction extends AbstractAction { + private class RemoveWitnessesAction extends AbstractAction { + + private RemoveWitnessesAction() { + super("Remove"); + } - private CollateAction() { - super("Collate"); + @Override + public void actionPerformed(ActionEvent e) { + witnessPanel.removeEmptyWitnesses(); + } } - @Override - public void actionPerformed(ActionEvent e) { - final List w = witnessPanel.getWitnesses(); + private class CollateAction extends AbstractAction { - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Collating {0}", w.toString()); - } + private CollateAction() { + super("Collate"); + } + @Override + public void actionPerformed(ActionEvent e) { + final List w = witnessPanel.getWitnesses(); - final EqualityTokenComparator comparator = new EqualityTokenComparator(); - final VariantGraph variantGraph = new VariantGraph(); + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "Collating {0}", w.toString()); + } - final CollationAlgorithm collator; - if ("Dekker".equals(algorithm.getSelectedItem())) { - collator = CollationAlgorithmFactory.dekker(comparator); - } else if ("Needleman-Wunsch".equals(algorithm.getSelectedItem())) { - collator = CollationAlgorithmFactory.needlemanWunsch(comparator); - } else if ("Greedy String Tiling".equals(algorithm.getSelectedItem())) { - collator = CollationAlgorithmFactory.greedyStringTiling(comparator, 2); - } else { - collator = CollationAlgorithmFactory.medite(comparator, SimpleToken.TOKEN_MATCH_EVALUATOR); - } - collator.collate(variantGraph, w); + final EqualityTokenComparator comparator = new EqualityTokenComparator(); + final VariantGraph variantGraph = new VariantGraph(); - VariantGraph.JOIN.apply(variantGraph); + final CollationAlgorithm collator; + if ("Dekker".equals(algorithm.getSelectedItem())) { + collator = CollationAlgorithmFactory.dekker(comparator); + } else if ("Needleman-Wunsch".equals(algorithm.getSelectedItem())) { + collator = CollationAlgorithmFactory.needlemanWunsch(comparator); + } else if ("Greedy String Tiling".equals(algorithm.getSelectedItem())) { + collator = CollationAlgorithmFactory.greedyStringTiling(comparator, 2); + } else { + collator = CollationAlgorithmFactory.medite(comparator, SimpleToken.TOKEN_MATCH_EVALUATOR); + } - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Collated {0}", w.toString()); - } - } - } + collator.collate(variantGraph, w); - private class MatchMatrixAction extends AbstractAction { + VariantGraph.JOIN.apply(variantGraph); - private MatchMatrixAction() { - super("Match Table"); + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "Collated {0}", w.toString()); + } + } } - @Override - public void actionPerformed(ActionEvent e) { - final List w = witnessPanel.getWitnesses(); - - if (w.size() < 2) { - return; - } + private class MatchMatrixAction extends AbstractAction { - final StrictEqualityTokenComparator comparator = new StrictEqualityTokenComparator(); - final VariantGraph vg = new VariantGraph(); + private MatchMatrixAction() { + super("Match Table"); + } - int outlierTranspositionsSizeLimit = 3; - for (int i = 0; i <= w.size() - 2; i++) { - SimpleWitness witness = w.get(i); - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Collating: {0}", witness.getSigil()); + @Override + public void actionPerformed(ActionEvent e) { + final List w = witnessPanel.getWitnesses(); + + if (w.size() < 2) { + return; + } + + final StrictEqualityTokenComparator comparator = new StrictEqualityTokenComparator(); + final VariantGraph vg = new VariantGraph(); + + int outlierTranspositionsSizeLimit = 3; + for (int i = 0; i <= w.size() - 2; i++) { + SimpleWitness witness = w.get(i); + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "Collating: {0}", witness.getSigil()); + } + CollationAlgorithmFactory.dekkerMatchMatrix(comparator, outlierTranspositionsSizeLimit).collate(vg, witness); + } + + SimpleWitness lastWitness = w.get(w.size() - 1); + if (LOG.isLoggable(Level.FINE)) { + LOG.log(Level.FINE, "Creating MatchTable for: {0}", lastWitness.getSigil()); + } + matchMatrixTable.setModel(new MatchMatrixTableModel(MatchTable.create(vg, lastWitness, comparator))); + + final TableColumnModel columnModel = matchMatrixTable.getColumnModel(); + columnModel.getColumn(0).setCellRenderer(matchMatrixTable.getTableHeader().getDefaultRenderer()); + for (int col = 1; col < matchMatrixTable.getColumnCount(); col++) { + columnModel.getColumn(col).setCellRenderer(MATCH_MATRIX_CELL_RENDERER); + } + + tabbedPane.setSelectedIndex(0); } - CollationAlgorithmFactory.dekkerMatchMatrix(comparator, outlierTranspositionsSizeLimit).collate(vg, witness); - } - - SimpleWitness lastWitness = w.get(w.size() - 1); - if (LOG.isLoggable(Level.FINE)) { - LOG.log(Level.FINE, "Creating MatchTable for: {0}", lastWitness.getSigil()); - } - matchMatrixTable.setModel(new MatchMatrixTableModel(MatchTable.create(vg, lastWitness, comparator))); - - final TableColumnModel columnModel = matchMatrixTable.getColumnModel(); - columnModel.getColumn(0).setCellRenderer(matchMatrixTable.getTableHeader().getDefaultRenderer()); - for (int col = 1; col < matchMatrixTable.getColumnCount(); col++) { - columnModel.getColumn(col).setCellRenderer(MATCH_MATRIX_CELL_RENDERER); - } - - tabbedPane.setSelectedIndex(0); } - } - - private static final TableCellRenderer MATCH_MATRIX_CELL_RENDERER = new TableCellRenderer() { - private JLabel label; - - @Override - public Component getTableCellRendererComponent(JTable table, Object value, boolean isSelected, boolean hasFocus, int row, int column) { - if (label == null) { - label = new JLabel(); - label.setOpaque(true); - label.getInsets().set(5, 5, 5, 5); - } - MatchTableCell cell = (MatchTableCell) value; - MatchMatrixCellStatus status = cell.getStatus(); - - switch (status) { - case PREFERRED_MATCH: - label.setBackground(isSelected ? Color.GREEN : Color.GREEN.darker()); - label.setText(cell.getText()); - break; - - case OPTIONAL_MATCH: - label.setBackground(isSelected ? Color.YELLOW : Color.YELLOW.darker()); - label.setText(cell.getText()); - break; - - case EMPTY: - label.setBackground(isSelected ? Color.LIGHT_GRAY : Color.WHITE); - label.setText(""); - break; - - default: - label.setBackground(isSelected ? Color.LIGHT_GRAY : Color.WHITE); - break; - } - - return label; - } - }; + + private static final TableCellRenderer MATCH_MATRIX_CELL_RENDERER = new TableCellRenderer() { + private JLabel label; + + @Override + public Component getTableCellRendererComponent(JTable table, Object value, boolean isSelected, boolean hasFocus, int row, int column) { + if (label == null) { + label = new JLabel(); + label.setOpaque(true); + label.getInsets().set(5, 5, 5, 5); + } + MatchTableCell cell = (MatchTableCell) value; + MatchMatrixCellStatus status = cell.getStatus(); + + switch (status) { + case PREFERRED_MATCH: + label.setBackground(isSelected ? Color.GREEN : Color.GREEN.darker()); + label.setText(cell.getText()); + break; + + case OPTIONAL_MATCH: + label.setBackground(isSelected ? Color.YELLOW : Color.YELLOW.darker()); + label.setText(cell.getText()); + break; + + case EMPTY: + label.setBackground(isSelected ? Color.LIGHT_GRAY : Color.WHITE); + label.setText(""); + break; + + default: + label.setBackground(isSelected ? Color.LIGHT_GRAY : Color.WHITE); + break; + } + + return label; + } + }; } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixCellStatus.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixCellStatus.java index dc1ed7894..f2f15e494 100755 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixCellStatus.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixCellStatus.java @@ -1,24 +1,24 @@ -/* - * Copyright (c) 2013 The Interedition Development Group. - * - * This file is part of CollateX. - * - * CollateX is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CollateX is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CollateX. If not, see . - */ - -package eu.interedition.collatex.lab; - -public enum MatchMatrixCellStatus { - PREFERRED_MATCH, OPTIONAL_MATCH, EMPTY -} +/* + * Copyright (c) 2013 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + +package eu.interedition.collatex.lab; + +public enum MatchMatrixCellStatus { + PREFERRED_MATCH, OPTIONAL_MATCH, EMPTY +} diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java index 53fa13095..e15636d50 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchMatrixTableModel.java @@ -37,80 +37,80 @@ @SuppressWarnings("serial") public class MatchMatrixTableModel extends AbstractTableModel { - private final String[] rowNames; - private final String[] columnNames; - private final MatchTableCell[][] data; + private final String[] rowNames; + private final String[] columnNames; + private final MatchTableCell[][] data; - public MatchMatrixTableModel(MatchTable matchTable) { - List rowList = matchTable.rowList(); - List columnList = matchTable.columnList(); + public MatchMatrixTableModel(MatchTable matchTable) { + List rowList = matchTable.rowList(); + List columnList = matchTable.columnList(); - final int rowNum = rowList.size(); - final int colNum = columnList.size(); + final int rowNum = rowList.size(); + final int colNum = columnList.size(); - // set the row labels - rowNames = new String[rowNum]; - for (int row = 0; row < rowNum; row++) { - rowNames[row] = ((SimpleToken) rowList.get(row)).getContent(); - } - - // set the column labels - columnNames = new String[colNum]; - for (int col = 0; col < colNum; col++) { - columnNames[col] = Integer.toString(columnList.get(col) + 1); - } + // set the row labels + rowNames = new String[rowNum]; + for (int row = 0; row < rowNum; row++) { + rowNames[row] = ((SimpleToken) rowList.get(row)).getContent(); + } - // fill the cells with colors - MatchTableSelection preferred = preferred(matchTable); - //LOG.debug(matchMatrix.toHtml(preferred)); - data = new MatchTableCell[rowNum][colNum]; - for (int row = 0; row < rowNum; row++) { - for (int col = 0; col < colNum; col++) { - VariantGraph.Vertex at = matchTable.vertexAt(row, col); - MatchMatrixCellStatus status; - if (at != null) { - status = preferred.containsCoordinate(row, col) ? MatchMatrixCellStatus.PREFERRED_MATCH : MatchMatrixCellStatus.OPTIONAL_MATCH; - } else { - status = MatchMatrixCellStatus.EMPTY; + // set the column labels + columnNames = new String[colNum]; + for (int col = 0; col < colNum; col++) { + columnNames[col] = Integer.toString(columnList.get(col) + 1); } - String text; - if (at != null) { - text = ((SimpleToken) at.tokens().iterator().next()).getContent(); - } else { - text = null; + + // fill the cells with colors + MatchTableSelection preferred = preferred(matchTable); + //LOG.debug(matchMatrix.toHtml(preferred)); + data = new MatchTableCell[rowNum][colNum]; + for (int row = 0; row < rowNum; row++) { + for (int col = 0; col < colNum; col++) { + VariantGraph.Vertex at = matchTable.vertexAt(row, col); + MatchMatrixCellStatus status; + if (at != null) { + status = preferred.containsCoordinate(row, col) ? MatchMatrixCellStatus.PREFERRED_MATCH : MatchMatrixCellStatus.OPTIONAL_MATCH; + } else { + status = MatchMatrixCellStatus.EMPTY; + } + String text; + if (at != null) { + text = ((SimpleToken) at.tokens().iterator().next()).getContent(); + } else { + text = null; + } + data[row][col] = new MatchTableCell(status, text); + } } - data[row][col] = new MatchTableCell(status, text); - } } - } - private MatchTableSelection preferred(MatchTable matchTable) { - // prepare - IslandConflictResolver resolver = new IslandConflictResolver(matchTable); - // find preferred islands - return resolver.createNonConflictingVersion(); - } + private MatchTableSelection preferred(MatchTable matchTable) { + // prepare + IslandConflictResolver resolver = new IslandConflictResolver(matchTable); + // find preferred islands + return resolver.createNonConflictingVersion(); + } - @Override - public String getColumnName(int column) { - return (column == 0 ? "" : columnNames[column - 1]); - } + @Override + public String getColumnName(int column) { + return (column == 0 ? "" : columnNames[column - 1]); + } - @Override - public int getRowCount() { - return data.length; - } + @Override + public int getRowCount() { + return data.length; + } - @Override - public int getColumnCount() { - return (data.length == 0 ? 0 : data[0].length + 1); - } + @Override + public int getColumnCount() { + return (data.length == 0 ? 0 : data[0].length + 1); + } - @Override - public Object getValueAt(int rowIndex, int columnIndex) { - if (columnIndex == 0) { - return rowNames[rowIndex]; + @Override + public Object getValueAt(int rowIndex, int columnIndex) { + if (columnIndex == 0) { + return rowNames[rowIndex]; + } + return data[rowIndex][columnIndex - 1]; } - return data[rowIndex][columnIndex - 1]; - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchTableCell.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchTableCell.java index 5f97c3589..304ce293c 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchTableCell.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/MatchTableCell.java @@ -24,19 +24,19 @@ */ public class MatchTableCell { - private final MatchMatrixCellStatus status; - private final String text; - - public MatchTableCell(MatchMatrixCellStatus status, String text) { - this.status = status; - this.text = text; - } + private final MatchMatrixCellStatus status; + private final String text; - public MatchMatrixCellStatus getStatus() { - return status; - } + public MatchTableCell(MatchMatrixCellStatus status, String text) { + this.status = status; + this.text = text; + } - public String getText() { - return text; - } + public MatchMatrixCellStatus getStatus() { + return status; + } + + public String getText() { + return text; + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/lab/WitnessPanel.java b/collatex-core/src/test/java/eu/interedition/collatex/lab/WitnessPanel.java index fd491e984..891166c47 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/lab/WitnessPanel.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/lab/WitnessPanel.java @@ -44,130 +44,130 @@ * @author Gregor Middell */ public class WitnessPanel extends JPanel { - private static final char[] SIGLA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); - - private List witnesses = new ArrayList<>(SIGLA.length); - - public WitnessPanel() { - super(); - setLayout(new BoxLayout(this, BoxLayout.X_AXIS)); - setMinimumSize(new Dimension(400, 200)); - createWitness(); - createWitness(); - } - - public void newWitness() { - WitnessTextArea newWitness = null; - for (WitnessTextArea witness : witnesses) { - if (witness.getTextContent().isEmpty()) { - newWitness = witness; - break; - } - } - if (newWitness == null && (witnesses.size() < SIGLA.length)) { - newWitness = createWitness(); - } - if (newWitness != null) { - newWitness.requestFocusInWindow(); - } - } - - public List getWitnesses() { - List witnesses = new ArrayList<>(this.witnesses.size()); - for (WitnessTextArea textArea : this.witnesses) { - final String textContent = textArea.getTextContent(); - if (!textContent.isEmpty()) { - witnesses.add(new SimpleWitness(textArea.getSigil(), textContent)); - } - } - return witnesses; - } - - public void removeEmptyWitnesses() { - for (int wc = witnesses.size() - 1; wc >= 0 && witnesses.size() > 2; wc--) { - final WitnessTextArea textArea = witnesses.get(wc); - if (textArea.getTextContent().isEmpty()) { - remove(SwingUtilities.getAncestorOfClass(JScrollPane.class, textArea)); - witnesses.remove(wc); - } + private static final char[] SIGLA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); + + private List witnesses = new ArrayList<>(SIGLA.length); + + public WitnessPanel() { + super(); + setLayout(new BoxLayout(this, BoxLayout.X_AXIS)); + setMinimumSize(new Dimension(400, 200)); + createWitness(); + createWitness(); } - revalidate(); - } - - protected WitnessTextArea createWitness() { - final WitnessTextArea newTextArea = new WitnessTextArea(); - witnesses.add(newTextArea); - add(new JScrollPane(newTextArea)); - revalidate(); - return newTextArea; - } - - class WitnessTextArea extends JTextArea { - - private WitnessTextArea() { - super(""); - setLineWrap(true); - setWrapStyleWord(true); - setOpaque(false); - setMinimumSize(new Dimension(100, 100)); - setPreferredSize(new Dimension(200, 100)); - setBorder(BorderFactory.createEmptyBorder(5, 5, 5, 5)); - addKeyListener(new KeyAdapter() { - @Override - public void keyPressed(KeyEvent e) { - if (e.getKeyCode() != KeyEvent.VK_TAB) { - return; - } - e.consume(); - - int nextIndex = witnesses.indexOf(WitnessTextArea.this) + (e.isShiftDown() ? -1 : 1); - if (nextIndex < 0) { - nextIndex = witnesses.size() - 1; - } else if (nextIndex >= witnesses.size()) { - nextIndex = 0; - } - witnesses.get(nextIndex).requestFocusInWindow(); + public void newWitness() { + WitnessTextArea newWitness = null; + for (WitnessTextArea witness : witnesses) { + if (witness.getTextContent().isEmpty()) { + newWitness = witness; + break; + } } - }); - addFocusListener(new FocusAdapter() { - @Override - public void focusGained(FocusEvent e) { - selectAll(); + if (newWitness == null && (witnesses.size() < SIGLA.length)) { + newWitness = createWitness(); } + if (newWitness != null) { + newWitness.requestFocusInWindow(); + } + } - @Override - public void focusLost(FocusEvent e) { - select(0, 0); + public List getWitnesses() { + List witnesses = new ArrayList<>(this.witnesses.size()); + for (WitnessTextArea textArea : this.witnesses) { + final String textContent = textArea.getTextContent(); + if (!textContent.isEmpty()) { + witnesses.add(new SimpleWitness(textArea.getSigil(), textContent)); + } } - }); + return witnesses; } - public String getTextContent() { - return getText().trim(); + public void removeEmptyWitnesses() { + for (int wc = witnesses.size() - 1; wc >= 0 && witnesses.size() > 2; wc--) { + final WitnessTextArea textArea = witnesses.get(wc); + if (textArea.getTextContent().isEmpty()) { + remove(SwingUtilities.getAncestorOfClass(JScrollPane.class, textArea)); + witnesses.remove(wc); + } + } + revalidate(); } - public String getSigil() { - return Character.toString(SIGLA[witnesses.indexOf(this)]); + protected WitnessTextArea createWitness() { + final WitnessTextArea newTextArea = new WitnessTextArea(); + witnesses.add(newTextArea); + add(new JScrollPane(newTextArea)); + revalidate(); + return newTextArea; } - @Override - protected void paintComponent(Graphics g) { - Graphics2D g2 = (Graphics2D) g; + class WitnessTextArea extends JTextArea { + + private WitnessTextArea() { + super(""); + setLineWrap(true); + setWrapStyleWord(true); + setOpaque(false); + setMinimumSize(new Dimension(100, 100)); + setPreferredSize(new Dimension(200, 100)); + setBorder(BorderFactory.createEmptyBorder(5, 5, 5, 5)); + addKeyListener(new KeyAdapter() { + + @Override + public void keyPressed(KeyEvent e) { + if (e.getKeyCode() != KeyEvent.VK_TAB) { + return; + } + e.consume(); + + int nextIndex = witnesses.indexOf(WitnessTextArea.this) + (e.isShiftDown() ? -1 : 1); + if (nextIndex < 0) { + nextIndex = witnesses.size() - 1; + } else if (nextIndex >= witnesses.size()) { + nextIndex = 0; + } + witnesses.get(nextIndex).requestFocusInWindow(); + } + }); + addFocusListener(new FocusAdapter() { + @Override + public void focusGained(FocusEvent e) { + selectAll(); + } + + @Override + public void focusLost(FocusEvent e) { + select(0, 0); + } + }); + } + + public String getTextContent() { + return getText().trim(); + } + + public String getSigil() { + return Character.toString(SIGLA[witnesses.indexOf(this)]); + } - g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + @Override + protected void paintComponent(Graphics g) { + Graphics2D g2 = (Graphics2D) g; - g2.setColor(Color.WHITE); - g2.fillRect(0, 0, getWidth(), getHeight()); + g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); - g2.setColor(new Color(196, 196, 255)); - g2.setFont(g2.getFont().deriveFont(60.0f)); + g2.setColor(Color.WHITE); + g2.fillRect(0, 0, getWidth(), getHeight()); - final String sigil = getSigil(); - final Rectangle bounds = g2.getFontMetrics().getStringBounds(sigil, g2).getBounds(); - g2.drawString(sigil, (getWidth() - bounds.width) / 2, (int) bounds.getHeight()); + g2.setColor(new Color(196, 196, 255)); + g2.setFont(g2.getFont().deriveFont(60.0f)); - super.paintComponent(g); + final String sigil = getSigil(); + final Rectangle bounds = g2.getFontMetrics().getStringBounds(sigil, g2).getBounds(); + g2.drawString(sigil, (getWidth() - bounds.width) / 2, (int) bounds.getHeight()); + + super.paintComponent(g); + } } - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java b/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java index fdc49bab7..91d01ba79 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/matching/MatchesTest.java @@ -34,59 +34,59 @@ public class MatchesTest extends AbstractTest { - @Test - public void test1() { - final SimpleWitness[] w = createWitnesses("john and paul and george and ringo", "john and paul and george and ringo"); - final VariantGraph graph = collate(w[0]); - final Matches matches = Matches.between(graph.vertices(), w[1].getTokens(), new EqualityTokenComparator()); - - int expected_unmatched = 0; - int expected_unique = 4; // john paul george ringo - int expected_ambiguous = 3; // 3 ands in 2nd witness - assertMatches(matches, expected_unmatched, expected_unique, expected_ambiguous); - } - - @Test - public void test2() { - final SimpleWitness[] w = createWitnesses("the white cat", "the black cat"); - final VariantGraph graph = collate(w[0]); - final Matches matches = Matches.between(graph.vertices(), w[1].getTokens(), new EqualityTokenComparator()); - - int expected_unmatched = 1; // black - int expected_unique = 2; // the & cat - int expected_ambiguous = 0; - assertMatches(matches, expected_unmatched, expected_unique, expected_ambiguous); - } - - // This test tests overlapping islands - @Test - public void test3OverlappingIslands() { - String witnessA = "the cat and the dog"; - String witnessB = "the dog and the cat"; - SimpleWitness[] sw = createWitnesses(witnessA, witnessB); - VariantGraph vg = collate(sw[0]); - final Matches matches = Matches.between(vg.vertices(), sw[1].getTokens(), new EqualityTokenComparator()); - assertMatches(matches, 0, 3, 2); - assertEquals(7, matches.allMatches.values().stream().flatMap(List::stream).count()); - } - - - private void assertMatches(final Matches matches, int expected_unmatched, int expected_unique, int expected_ambiguous) { - Set unmatched = matches.unmatchedInWitness; - LOG.log(Level.FINE, "unmatched: {0}", unmatched); - - Set unique = matches.uniqueInWitness; - LOG.log(Level.FINE, "unique: {0}", unique); - - Set ambiguous = matches.ambiguousInWitness; - LOG.log(Level.FINE, "ambiguous: {0}", ambiguous); - - Map> all = matches.allMatches; - LOG.log(Level.FINE, "all: {0}", all); - - assertEquals(expected_unmatched, unmatched.size()); - assertEquals(expected_unique, unique.size()); - assertEquals(expected_ambiguous, ambiguous.size()); - // assertEquals(expected_unique + expected_ambiguous, all.size()); - } + @Test + public void test1() { + final SimpleWitness[] w = createWitnesses("john and paul and george and ringo", "john and paul and george and ringo"); + final VariantGraph graph = collate(w[0]); + final Matches matches = Matches.between(graph.vertices(), w[1].getTokens(), new EqualityTokenComparator()); + + int expected_unmatched = 0; + int expected_unique = 4; // john paul george ringo + int expected_ambiguous = 3; // 3 ands in 2nd witness + assertMatches(matches, expected_unmatched, expected_unique, expected_ambiguous); + } + + @Test + public void test2() { + final SimpleWitness[] w = createWitnesses("the white cat", "the black cat"); + final VariantGraph graph = collate(w[0]); + final Matches matches = Matches.between(graph.vertices(), w[1].getTokens(), new EqualityTokenComparator()); + + int expected_unmatched = 1; // black + int expected_unique = 2; // the & cat + int expected_ambiguous = 0; + assertMatches(matches, expected_unmatched, expected_unique, expected_ambiguous); + } + + // This test tests overlapping islands + @Test + public void test3OverlappingIslands() { + String witnessA = "the cat and the dog"; + String witnessB = "the dog and the cat"; + SimpleWitness[] sw = createWitnesses(witnessA, witnessB); + VariantGraph vg = collate(sw[0]); + final Matches matches = Matches.between(vg.vertices(), sw[1].getTokens(), new EqualityTokenComparator()); + assertMatches(matches, 0, 3, 2); + assertEquals(7, matches.allMatches.values().stream().flatMap(List::stream).count()); + } + + + private void assertMatches(final Matches matches, int expected_unmatched, int expected_unique, int expected_ambiguous) { + Set unmatched = matches.unmatchedInWitness; + LOG.log(Level.FINE, "unmatched: {0}", unmatched); + + Set unique = matches.uniqueInWitness; + LOG.log(Level.FINE, "unique: {0}", unique); + + Set ambiguous = matches.ambiguousInWitness; + LOG.log(Level.FINE, "ambiguous: {0}", ambiguous); + + Map> all = matches.allMatches; + LOG.log(Level.FINE, "all: {0}", all); + + assertEquals(expected_unmatched, unmatched.size()); + assertEquals(expected_unique, unique.size()); + assertEquals(expected_ambiguous, ambiguous.size()); + // assertEquals(expected_unique + expected_ambiguous, all.size()); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java b/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java index f6840fdfd..b790589c0 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/matching/NearMatcherTest.java @@ -31,15 +31,15 @@ import static org.junit.Assert.assertEquals; public class NearMatcherTest extends AbstractTest { - - @Test - public void nearTokenMatching() { - final SimpleWitness[] w = createWitnesses("near matching yeah", "nar matching"); - final VariantGraph graph = collate(w[0]); - final Map> matches = Matches.between(graph.vertices(), w[1].getTokens(), new EditDistanceTokenComparator()).allMatches; - assertEquals(2, matches.values().stream().flatMap(List::stream).count()); - assertEquals(w[0].getTokens().get(0), matches.get(w[1].getTokens().get(0)).get(0).tokens().stream().findFirst().get()); - assertEquals(w[0].getTokens().get(1), matches.get(w[1].getTokens().get(1)).get(0).tokens().stream().findFirst().get()); - } + @Test + public void nearTokenMatching() { + final SimpleWitness[] w = createWitnesses("near matching yeah", "nar matching"); + final VariantGraph graph = collate(w[0]); + final Map> matches = Matches.between(graph.vertices(), w[1].getTokens(), new EditDistanceTokenComparator()).allMatches; + + assertEquals(2, matches.values().stream().flatMap(List::stream).count()); + assertEquals(w[0].getTokens().get(0), matches.get(w[1].getTokens().get(0)).get(0).tokens().stream().findFirst().get()); + assertEquals(w[0].getTokens().get(1), matches.get(w[1].getTokens().get(1)).get(0).tokens().stream().findFirst().get()); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/medite/MediteTest.java b/collatex-core/src/test/java/eu/interedition/collatex/medite/MediteTest.java index 829600ff0..22577a08e 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/medite/MediteTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/medite/MediteTest.java @@ -31,14 +31,14 @@ */ public class MediteTest extends AbstractTest { - @Override - public void initAlgorithm() { - collationAlgorithm = CollationAlgorithmFactory.medite(new EqualityTokenComparator(), SimpleToken.TOKEN_MATCH_EVALUATOR); - } + @Override + public void initAlgorithm() { + collationAlgorithm = CollationAlgorithmFactory.medite(new EqualityTokenComparator(), SimpleToken.TOKEN_MATCH_EVALUATOR); + } - @Test - public void medite() { - LOG.fine(toString(VariantGraphRanking.of(collate( + @Test + public void medite() { + LOG.fine(toString(VariantGraphRanking.of(collate( "This Carpenter hadde wedded newe a wyf", "This Carpenter hadde wedded a newe wyf", "This Carpenter hadde newe wedded a wyf", @@ -46,6 +46,6 @@ public void medite() { "This Carpenter hadde E wedded newe a wyf", "This Carpenter hadde newli wedded a wyf", "This Carpenter hadde wedded a wyf" - )).asTable())); - } + )).asTable())); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/medite/SuffixTreeTest.java b/collatex-core/src/test/java/eu/interedition/collatex/medite/SuffixTreeTest.java index c129b7d2d..82967a907 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/medite/SuffixTreeTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/medite/SuffixTreeTest.java @@ -32,17 +32,17 @@ */ public class SuffixTreeTest extends AbstractTest { - @Test - public void suffixTree() { - final SuffixTree st = SuffixTree.build(new Comparator() { - @Override - public int compare(String o1, String o2) { - return o1.toLowerCase().compareTo(o2.toLowerCase()); - } - }, "S", "P", "O", "a", "s", "p", "o"); + @Test + public void suffixTree() { + final SuffixTree st = SuffixTree.build(new Comparator() { + @Override + public int compare(String o1, String o2) { + return o1.toLowerCase().compareTo(o2.toLowerCase()); + } + }, "S", "P", "O", "a", "s", "p", "o"); - LOG.fine(() -> st.toString()); - LOG.fine(() -> StreamSupport.stream(st.match(Arrays.asList("s", "p", "o", "a")).spliterator(), false).map(Object::toString).collect(Collectors.joining(", "))); - } + LOG.fine(() -> st.toString()); + LOG.fine(() -> StreamSupport.stream(st.match(Arrays.asList("s", "p", "o", "a")).spliterator(), false).map(Object::toString).collect(Collectors.joining(", "))); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschTest.java b/collatex-core/src/test/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschTest.java index f49353405..ec63d3179 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschTest.java @@ -29,9 +29,9 @@ */ public class NeedlemanWunschTest extends AbstractTest { - @Test - public void simple() { - setCollationAlgorithm(CollationAlgorithmFactory.needlemanWunsch(new EqualityTokenComparator())); - LOG.fine(toString(table(collate("a b a b a", "a b a")))); - } + @Test + public void simple() { + setCollationAlgorithm(CollationAlgorithmFactory.needlemanWunsch(new EqualityTokenComparator())); + LOG.fine(toString(table(collate("a b a b a", "a b a")))); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java b/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java index 3d6dea954..59a2b010e 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/output/AlignmentTableTest.java @@ -35,113 +35,113 @@ public class AlignmentTableTest extends AbstractTest { - @Test - public void emptyTable() { - assertEquals(0, table(collate(createWitnesses())).size()); - } - - @Test - public void firstWitness() { - final SimpleWitness[] w = createWitnesses("the black cat"); - final List>> table = table(collate(w)); - assertEquals(1, witnesses(table).count()); - assertEquals("|the|black|cat|", toString(table, w[0])); - } - - @Test - public void everythingMatches() { - final SimpleWitness[] w = createWitnesses("the black cat", "the black cat", "the black cat"); - final List>> table = table(collate(w)); - assertEquals(3, witnesses(table).count()); - assertEquals("|the|black|cat|", toString(table, w[0])); - assertEquals("|the|black|cat|", toString(table, w[1])); - assertEquals("|the|black|cat|", toString(table, w[2])); - } - - @Test - public void variant() { - final SimpleWitness[] w = createWitnesses("the black cat", "the white cat", "the green cat", "the red cat", "the yellow cat"); - final List>> table = table(collate(w)); - assertEquals(5, witnesses(table).count()); - assertEquals("|the|black|cat|", toString(table, w[0])); - assertEquals("|the|white|cat|", toString(table, w[1])); - assertEquals("|the|green|cat|", toString(table, w[2])); - assertEquals("|the|red|cat|", toString(table, w[3])); - assertEquals("|the|yellow|cat|", toString(table, w[4])); - } - - @Test - public void omission() { - final List>> table = table(collate("the black cat", "the cat", "the black cat")); - assertEquals("A: |the|black|cat|\nB: |the| |cat|\nC: |the|black|cat|\n", toString(table)); - } - - @Test - public void addition1() { - final List>> table = table(collate("the black cat", "the white and black cat")); - assertEquals("A: |the| | |black|cat|\nB: |the|white|and|black|cat|\n", toString(table)); - } - - @Test - public void addition2() { - final List>> table = table(collate("the cat", "before the cat", "the black cat", "the cat walks")); - assertEquals("A: | |the| |cat| |\nB: |before|the| |cat| |\nC: | |the|black|cat| |\nD: | |the| |cat|walks|\n", toString(table)); - } - - @Test - public void addition3() { - final List>> t = table(collate("the cat", "before the cat", "the black cat", "just before midnight the cat walks")); - assertEquals("A: | | | |the| |cat| |\nB: | |before| |the| |cat| |\nC: | | | |the|black|cat| |\nD: |just|before|midnight|the| |cat|walks|\n", toString(t)); - } - - @Test - public void transpositionAndReplacement() { - final - List>> t = table(collate("the black dog chases a red cat", "a red cat chases the black dog", "a red cat chases the yellow dog")); - assertEquals("A: |the|black|dog|chases|a|red|cat|\nB: |a|red|cat|chases|the|black|dog|\nC: |a|red|cat|chases|the|yellow|dog|\n", toString(t)); - } - - @Test - @Ignore("By default we align to the left; right alignment would be nicer in this specific case") - public void variation() { - final List>> t = table(collate("the black cat", "the black and white cat", "the black very special cat", "the black not very special cat")); - assertEquals("A: |the|black| | | |cat|\nB: |the|black| |and|white|cat|\nC: |the|black| |very|special|cat|\nD: |the|black|not|very|special|cat|\n", toString(t)); - } - - @Test - public void witnessReorder() { - final SimpleWitness[] w = createWitnesses("the black cat", "the black and white cat", "the black not very special cat", "the black very special cat"); - VariantGraph variantgraph = collate(w); - final List>> table = table(variantgraph); - assertEquals("|the|black| | | |cat|", toString(table, w[0])); - assertEquals("|the|black|and|white| |cat|", toString(table, w[1])); - assertEquals("|the|black|not|very|special|cat|", toString(table, w[2])); - assertEquals("|the|black| |very|special|cat|", toString(table, w[3])); - } - - @Test - public void testSimpleSpencerHowe() { - final SimpleWitness[] w = createWitnesses("a", "b", "a b"); - final List>> table = table(collate(w)); - assertEquals(3, witnesses(table).count()); - assertEquals("|a| |", toString(table, w[0])); - assertEquals("| |b|", toString(table, w[1])); - assertEquals("|a|b|", toString(table, w[2])); - } - - @Test - public void stringOutputOneWitness() { - assertEquals("A: |the|black|cat|\n", toString(table(collate("the black cat")))); - } - - @Test - public void stringOutputTwoWitnesses() { - final List>> table = table(collate("the black cat", "the black cat")); - assertEquals("A: |the|black|cat|\nB: |the|black|cat|\n", toString(table)); - } - - @Test - public void stringOutputEmptyCells() { - assertEquals("A: |the|black|cat|\nB: |the| | |\n", toString(table(collate("the black cat", "the")))); - } + @Test + public void emptyTable() { + assertEquals(0, table(collate(createWitnesses())).size()); + } + + @Test + public void firstWitness() { + final SimpleWitness[] w = createWitnesses("the black cat"); + final List>> table = table(collate(w)); + assertEquals(1, witnesses(table).count()); + assertEquals("|the|black|cat|", toString(table, w[0])); + } + + @Test + public void everythingMatches() { + final SimpleWitness[] w = createWitnesses("the black cat", "the black cat", "the black cat"); + final List>> table = table(collate(w)); + assertEquals(3, witnesses(table).count()); + assertEquals("|the|black|cat|", toString(table, w[0])); + assertEquals("|the|black|cat|", toString(table, w[1])); + assertEquals("|the|black|cat|", toString(table, w[2])); + } + + @Test + public void variant() { + final SimpleWitness[] w = createWitnesses("the black cat", "the white cat", "the green cat", "the red cat", "the yellow cat"); + final List>> table = table(collate(w)); + assertEquals(5, witnesses(table).count()); + assertEquals("|the|black|cat|", toString(table, w[0])); + assertEquals("|the|white|cat|", toString(table, w[1])); + assertEquals("|the|green|cat|", toString(table, w[2])); + assertEquals("|the|red|cat|", toString(table, w[3])); + assertEquals("|the|yellow|cat|", toString(table, w[4])); + } + + @Test + public void omission() { + final List>> table = table(collate("the black cat", "the cat", "the black cat")); + assertEquals("A: |the|black|cat|\nB: |the| |cat|\nC: |the|black|cat|\n", toString(table)); + } + + @Test + public void addition1() { + final List>> table = table(collate("the black cat", "the white and black cat")); + assertEquals("A: |the| | |black|cat|\nB: |the|white|and|black|cat|\n", toString(table)); + } + + @Test + public void addition2() { + final List>> table = table(collate("the cat", "before the cat", "the black cat", "the cat walks")); + assertEquals("A: | |the| |cat| |\nB: |before|the| |cat| |\nC: | |the|black|cat| |\nD: | |the| |cat|walks|\n", toString(table)); + } + + @Test + public void addition3() { + final List>> t = table(collate("the cat", "before the cat", "the black cat", "just before midnight the cat walks")); + assertEquals("A: | | | |the| |cat| |\nB: | |before| |the| |cat| |\nC: | | | |the|black|cat| |\nD: |just|before|midnight|the| |cat|walks|\n", toString(t)); + } + + @Test + public void transpositionAndReplacement() { + final + List>> t = table(collate("the black dog chases a red cat", "a red cat chases the black dog", "a red cat chases the yellow dog")); + assertEquals("A: |the|black|dog|chases|a|red|cat|\nB: |a|red|cat|chases|the|black|dog|\nC: |a|red|cat|chases|the|yellow|dog|\n", toString(t)); + } + + @Test + @Ignore("By default we align to the left; right alignment would be nicer in this specific case") + public void variation() { + final List>> t = table(collate("the black cat", "the black and white cat", "the black very special cat", "the black not very special cat")); + assertEquals("A: |the|black| | | |cat|\nB: |the|black| |and|white|cat|\nC: |the|black| |very|special|cat|\nD: |the|black|not|very|special|cat|\n", toString(t)); + } + + @Test + public void witnessReorder() { + final SimpleWitness[] w = createWitnesses("the black cat", "the black and white cat", "the black not very special cat", "the black very special cat"); + VariantGraph variantgraph = collate(w); + final List>> table = table(variantgraph); + assertEquals("|the|black| | | |cat|", toString(table, w[0])); + assertEquals("|the|black|and|white| |cat|", toString(table, w[1])); + assertEquals("|the|black|not|very|special|cat|", toString(table, w[2])); + assertEquals("|the|black| |very|special|cat|", toString(table, w[3])); + } + + @Test + public void testSimpleSpencerHowe() { + final SimpleWitness[] w = createWitnesses("a", "b", "a b"); + final List>> table = table(collate(w)); + assertEquals(3, witnesses(table).count()); + assertEquals("|a| |", toString(table, w[0])); + assertEquals("| |b|", toString(table, w[1])); + assertEquals("|a|b|", toString(table, w[2])); + } + + @Test + public void stringOutputOneWitness() { + assertEquals("A: |the|black|cat|\n", toString(table(collate("the black cat")))); + } + + @Test + public void stringOutputTwoWitnesses() { + final List>> table = table(collate("the black cat", "the black cat")); + assertEquals("A: |the|black|cat|\nB: |the|black|cat|\n", toString(table)); + } + + @Test + public void stringOutputEmptyCells() { + assertEquals("A: |the|black|cat|\nB: |the| | |\n", toString(table(collate("the black cat", "the")))); + } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilderTest.java b/collatex-core/src/test/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilderTest.java index 2701f71cd..23fe97be2 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilderTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilderTest.java @@ -19,35 +19,33 @@ package eu.interedition.collatex.simple; -import java.io.IOException; -import java.io.InputStream; - -import javax.xml.stream.XMLStreamException; - +import eu.interedition.collatex.Token; import org.junit.Ignore; import org.junit.Test; -import eu.interedition.collatex.Token; +import javax.xml.stream.XMLStreamException; +import java.io.IOException; +import java.io.InputStream; public class SimpleWitnessTeiBuilderTest { - @Ignore - @Test - public void testTei() throws IOException, XMLStreamException { - InputStream resourceAsStream = getClass().getResourceAsStream("/matenadaran_1731.xml"); - //System.out.println(resourceAsStream.available()); - SimpleWitnessTeiBuilder builder = new SimpleWitnessTeiBuilder(); - SimpleWitness w = builder.read(resourceAsStream); - for (Token t : w) { - System.out.print(((SimpleToken) t).getContent() + " "); - } - System.out.println(); - InputStream stream1767 = getClass().getResourceAsStream("/matenadaran_1767.xml"); - //System.out.println(resourceAsStream.available()); - builder = new SimpleWitnessTeiBuilder(); - SimpleWitness w2 = builder.read(stream1767); - for (Token t : w2) { - System.out.print(((SimpleToken) t).getContent() + " "); + @Ignore + @Test + public void testTei() throws IOException, XMLStreamException { + InputStream resourceAsStream = getClass().getResourceAsStream("/matenadaran_1731.xml"); + //System.out.println(resourceAsStream.available()); + SimpleWitnessTeiBuilder builder = new SimpleWitnessTeiBuilder(); + SimpleWitness w = builder.read(resourceAsStream); + for (Token t : w) { + System.out.print(((SimpleToken) t).getContent() + " "); + } + System.out.println(); + InputStream stream1767 = getClass().getResourceAsStream("/matenadaran_1767.xml"); + //System.out.println(resourceAsStream.available()); + builder = new SimpleWitnessTeiBuilder(); + SimpleWitness w2 = builder.read(stream1767); + for (Token t : w2) { + System.out.print(((SimpleToken) t).getContent() + " "); + } } - } } diff --git a/collatex-core/src/test/java/eu/interedition/collatex/simple/SimpleWitnessTest.java b/collatex-core/src/test/java/eu/interedition/collatex/simple/SimpleWitnessTest.java index 56039e82a..1ccb96d1f 100644 --- a/collatex-core/src/test/java/eu/interedition/collatex/simple/SimpleWitnessTest.java +++ b/collatex-core/src/test/java/eu/interedition/collatex/simple/SimpleWitnessTest.java @@ -26,17 +26,17 @@ public class SimpleWitnessTest extends AbstractTest { - @Test - public void normalize() { - assertNormalized("Hello", "hello"); - assertNormalized("Now!", "now"); - assertNormalized("later?", "later"); - assertNormalized("#$@!?", "#$@!?"); - assertNormalized("&", "&"); - } + @Test + public void normalize() { + assertNormalized("Hello", "hello"); + assertNormalized("Now!", "now"); + assertNormalized("later?", "later"); + assertNormalized("#$@!?", "#$@!?"); + assertNormalized("&", "&"); + } - private static void assertNormalized(String content, String expected) { - assertEquals(expected, SimpleWitness.TOKEN_NORMALIZER.apply(content)); - } + private static void assertNormalized(String content, String expected) { + assertEquals(expected, SimpleWitness.TOKEN_NORMALIZER.apply(content)); + } } diff --git a/collatex-tools/pom.xml b/collatex-tools/pom.xml index 6e93b309a..1d0d92596 100644 --- a/collatex-tools/pom.xml +++ b/collatex-tools/pom.xml @@ -1,5 +1,6 @@ - + 4.0.0 eu.interedition @@ -11,25 +12,25 @@ CollateX Tools CollateX Tool Suite including a command line interface and a HTTP service - - eu.interedition - collatex-core - - - org.glassfish - javax.json - 1.0.4 - - - org.glassfish.grizzly - grizzly-http-server - 2.3.17 - - - commons-cli - commons-cli - 1.2 - + + eu.interedition + collatex-core + + + org.glassfish + javax.json + 1.0.4 + + + org.glassfish.grizzly + grizzly-http-server + 2.3.17 + + + commons-cli + commons-cli + 1.2 + @@ -47,10 +48,12 @@ - + eu.interedition.collatex.tools.CollateX - + diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java index e7cb61fbe..c11f73d54 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollateX.java @@ -77,291 +77,291 @@ */ public class CollateX implements AutoCloseable { - Charset inputCharset; - boolean xmlMode; - List inputResources; - List witnesses; - XPathExpression tokenXPath; - - Function> tokenizer; - Function normalizer; - Comparator comparator; - CollationAlgorithm collationAlgorithm; - VariantGraph variantGraph; - boolean joined = false; - - String outputFormat; - Charset outputCharset; - PrintWriter out; - File outFile = null; - PrintWriter log = new PrintWriter(System.err); - boolean errorOccurred = false; - - CollateX configure(CommandLine commandLine) throws XPathExpressionException, ParseException, ScriptException, IOException { - this.inputCharset = Charset.forName(commandLine.getOptionValue("ie", "UTF-8")); - this.xmlMode = commandLine.hasOption("xml"); - this.tokenXPath = XPathFactory.newInstance().newXPath().compile(commandLine.getOptionValue("xp", "//text()")); - - final String script = commandLine.getOptionValue("s"); - try { - final PluginScript pluginScript = (script == null - ? PluginScript.read("", new StringReader("")) - : PluginScript.read(argumentToResource(script))); - - this.tokenizer = Optional.ofNullable(pluginScript.tokenizer()).orElse(SimplePatternTokenizer.BY_WS_OR_PUNCT); - this.normalizer = Optional.ofNullable(pluginScript.normalizer()).orElse(SimpleTokenNormalizers.LC_TRIM_WS); - this.comparator = Optional.ofNullable(pluginScript.comparator()).orElse(new EqualityTokenComparator()); - } catch (IOException e) { - throw new ParseException("Failed to read script '" + script + "' - " + e.getMessage()); - } + Charset inputCharset; + boolean xmlMode; + List inputResources; + List witnesses; + XPathExpression tokenXPath; + + Function> tokenizer; + Function normalizer; + Comparator comparator; + CollationAlgorithm collationAlgorithm; + VariantGraph variantGraph; + boolean joined = false; + + String outputFormat; + Charset outputCharset; + PrintWriter out; + File outFile = null; + PrintWriter log = new PrintWriter(System.err); + boolean errorOccurred = false; + + CollateX configure(CommandLine commandLine) throws XPathExpressionException, ParseException, ScriptException, IOException { + this.inputCharset = Charset.forName(commandLine.getOptionValue("ie", "UTF-8")); + this.xmlMode = commandLine.hasOption("xml"); + this.tokenXPath = XPathFactory.newInstance().newXPath().compile(commandLine.getOptionValue("xp", "//text()")); + + final String script = commandLine.getOptionValue("s"); + try { + final PluginScript pluginScript = (script == null + ? PluginScript.read("", new StringReader("")) + : PluginScript.read(argumentToResource(script))); + + this.tokenizer = Optional.ofNullable(pluginScript.tokenizer()).orElse(SimplePatternTokenizer.BY_WS_OR_PUNCT); + this.normalizer = Optional.ofNullable(pluginScript.normalizer()).orElse(SimpleTokenNormalizers.LC_TRIM_WS); + this.comparator = Optional.ofNullable(pluginScript.comparator()).orElse(new EqualityTokenComparator()); + } catch (IOException e) { + throw new ParseException("Failed to read script '" + script + "' - " + e.getMessage()); + } - final String algorithm = commandLine.getOptionValue("a", "dekker").toLowerCase(); - switch (algorithm) { - case "needleman-wunsch": - this.collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(this.comparator); - break; - case "medite": - this.collationAlgorithm = CollationAlgorithmFactory.medite(this.comparator, SimpleToken.TOKEN_MATCH_EVALUATOR); - break; - case "gst": - this.collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(comparator, 2); - break; - default: - this.collationAlgorithm = CollationAlgorithmFactory.dekker(this.comparator); - break; - } + final String algorithm = commandLine.getOptionValue("a", "dekker").toLowerCase(); + switch (algorithm) { + case "needleman-wunsch": + this.collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(this.comparator); + break; + case "medite": + this.collationAlgorithm = CollationAlgorithmFactory.medite(this.comparator, SimpleToken.TOKEN_MATCH_EVALUATOR); + break; + case "gst": + this.collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(comparator, 2); + break; + default: + this.collationAlgorithm = CollationAlgorithmFactory.dekker(this.comparator); + break; + } - this.variantGraph = new VariantGraph(); + this.variantGraph = new VariantGraph(); - this.joined = !commandLine.hasOption("t"); + this.joined = !commandLine.hasOption("t"); - this.outputFormat = commandLine.getOptionValue("f", "json").toLowerCase(); + this.outputFormat = commandLine.getOptionValue("f", "json").toLowerCase(); - outputCharset = Charset.forName(commandLine.getOptionValue("oe", "UTF-8")); - final String output = commandLine.getOptionValue("o", "-"); - if (!"-".equals(output)) { - try { - this.outFile = new File(output); - this.out = new PrintWriter(Files.newBufferedWriter(this.outFile.toPath(), outputCharset)); - } catch (FileNotFoundException e) { - throw new ParseException("Output file '" + outFile + "' not found"); - } - } else { - this.out = new PrintWriter(new OutputStreamWriter(System.out, outputCharset)); - } + outputCharset = Charset.forName(commandLine.getOptionValue("oe", "UTF-8")); + final String output = commandLine.getOptionValue("o", "-"); + if (!"-".equals(output)) { + try { + this.outFile = new File(output); + this.out = new PrintWriter(Files.newBufferedWriter(this.outFile.toPath(), outputCharset)); + } catch (FileNotFoundException e) { + throw new ParseException("Output file '" + outFile + "' not found"); + } + } else { + this.out = new PrintWriter(new OutputStreamWriter(System.out, outputCharset)); + } - final String[] witnessSpecs = commandLine.getArgs(); - this.inputResources = new ArrayList<>(witnessSpecs.length); - for (String witnessSpec : witnessSpecs) { - inputResources.add(argumentToResource(witnessSpec)); - } - if (inputResources.size() < 1) { - throw new ParseException("No input resource(s) given"); - } + final String[] witnessSpecs = commandLine.getArgs(); + this.inputResources = new ArrayList<>(witnessSpecs.length); + for (String witnessSpec : witnessSpecs) { + inputResources.add(argumentToResource(witnessSpec)); + } + if (inputResources.size() < 1) { + throw new ParseException("No input resource(s) given"); + } - return this; - } - - CollateX read() throws IOException, XPathExpressionException, SAXException { - if (inputResources.size() < 2) { - try (InputStream inputStream = inputResources.get(0).openStream()) { - this.witnesses = JsonProcessor.read(inputStream).getWitnesses(); - } - } else { - this.witnesses = new ArrayList<>(inputResources.size()); - //noinspection Convert2streamapi - for (URL witnessURL : inputResources) { - this.witnesses.add(new URLWitness("w" + (witnesses.size() + 1), witnessURL) - .read(tokenizer, normalizer, inputCharset, (xmlMode ? tokenXPath : null))); - } + return this; } - return this; - } - - CollateX collate() { - new SimpleCollation(witnesses, collationAlgorithm, joined).collate(variantGraph); - return this; - } - - void write() throws IOException { - final SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(variantGraph); - if ("csv".equals(outputFormat)) { - serializer.toCsv(out); - } else if ("dot".equals(outputFormat)) { - serializer.toDot(out); - } else if ("graphml".equals(outputFormat) || "tei".equals(outputFormat)) { - XMLStreamWriter xml = null; - try { - xml = XMLOutputFactory.newInstance().createXMLStreamWriter(out); - xml.writeStartDocument(outputCharset.name(), "1.0"); - if ("graphml".equals(outputFormat)) { - serializer.toGraphML(xml); + + CollateX read() throws IOException, XPathExpressionException, SAXException { + if (inputResources.size() < 2) { + try (InputStream inputStream = inputResources.get(0).openStream()) { + this.witnesses = JsonProcessor.read(inputStream).getWitnesses(); + } } else { - serializer.toTEI(xml); + this.witnesses = new ArrayList<>(inputResources.size()); + //noinspection Convert2streamapi + for (URL witnessURL : inputResources) { + this.witnesses.add(new URLWitness("w" + (witnesses.size() + 1), witnessURL) + .read(tokenizer, normalizer, inputCharset, (xmlMode ? tokenXPath : null))); + } } - xml.writeEndDocument(); - } catch (XMLStreamException e) { - throw new IOException(e); - } finally { - if (xml != null) { - try { - xml.close(); - } catch (XMLStreamException e) { - // ignored - } + return this; + } + + CollateX collate() { + new SimpleCollation(witnesses, collationAlgorithm, joined).collate(variantGraph); + return this; + } + + void write() throws IOException { + final SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(variantGraph); + if ("csv".equals(outputFormat)) { + serializer.toCsv(out); + } else if ("dot".equals(outputFormat)) { + serializer.toDot(out); + } else if ("graphml".equals(outputFormat) || "tei".equals(outputFormat)) { + XMLStreamWriter xml = null; + try { + xml = XMLOutputFactory.newInstance().createXMLStreamWriter(out); + xml.writeStartDocument(outputCharset.name(), "1.0"); + if ("graphml".equals(outputFormat)) { + serializer.toGraphML(xml); + } else { + serializer.toTEI(xml); + } + xml.writeEndDocument(); + } catch (XMLStreamException e) { + throw new IOException(e); + } finally { + if (xml != null) { + try { + xml.close(); + } catch (XMLStreamException e) { + // ignored + } + } + } + } else { + JsonProcessor.write(variantGraph, out); } - } - } else { - JsonProcessor.write(variantGraph, out); } - } - CollateX serve(CommandLine commandLine) { - final CollatorService collator = new CollatorService( + CollateX serve(CommandLine commandLine) { + final CollatorService collator = new CollatorService( Integer.parseInt(commandLine.getOptionValue("mpc", "2")), Integer.parseInt(commandLine.getOptionValue("mcs", "0")), commandLine.getOptionValue("dot", null) - ); - final String staticPath = System.getProperty("collatex.static.path", ""); - final HttpHandler httpHandler = staticPath.isEmpty() ? new CLStaticHttpHandler(CollateX.class.getClassLoader(), "/static/") { - @Override - protected void onMissingResource(Request request, Response response) throws Exception { - collator.service(request, response); - } - } : new StaticHttpHandler(staticPath.replaceAll("/+$", "") + "/") { - @Override - protected void onMissingResource(Request request, Response response) throws Exception { - collator.service(request, response); - } - }; - - final NetworkListener httpListener = new NetworkListener("http", "0.0.0.0", Integer.parseInt(commandLine.getOptionValue("p", "7369"))); - - final CompressionConfig compressionConfig = httpListener.getCompressionConfig(); - compressionConfig.setCompressionMode(CompressionConfig.CompressionMode.ON); - compressionConfig.setCompressionMinSize(860); // http://webmasters.stackexchange.com/questions/31750/what-is-recommended-minimum-object-size-for-gzip-performance-benefits - compressionConfig.setCompressableMimeTypes("application/javascript", "application/json", "application/xml", "text/css", "text/html", "text/javascript", "text/plain", "text/xml"); - - final HttpServer httpServer = new HttpServer(); - httpServer.addListener(httpListener); - httpServer.getServerConfiguration().addHttpHandler(httpHandler, commandLine.getOptionValue("cp", "").replaceAll("/+$", "") + "/*"); - - Runtime.getRuntime().addShutdownHook(new Thread(() -> { - if (LOG.isLoggable(Level.INFO)) { - LOG.info("Stopping HTTP server"); - } - httpServer.shutdown(); - })); - - try { - httpServer.start(); - Thread.sleep(Long.MAX_VALUE); - } catch (IOException | InterruptedException e) { - error(e.getMessage(), e); + ); + final String staticPath = System.getProperty("collatex.static.path", ""); + final HttpHandler httpHandler = staticPath.isEmpty() ? new CLStaticHttpHandler(CollateX.class.getClassLoader(), "/static/") { + @Override + protected void onMissingResource(Request request, Response response) throws Exception { + collator.service(request, response); + } + } : new StaticHttpHandler(staticPath.replaceAll("/+$", "") + "/") { + @Override + protected void onMissingResource(Request request, Response response) throws Exception { + collator.service(request, response); + } + }; + + final NetworkListener httpListener = new NetworkListener("http", "0.0.0.0", Integer.parseInt(commandLine.getOptionValue("p", "7369"))); + + final CompressionConfig compressionConfig = httpListener.getCompressionConfig(); + compressionConfig.setCompressionMode(CompressionConfig.CompressionMode.ON); + compressionConfig.setCompressionMinSize(860); // http://webmasters.stackexchange.com/questions/31750/what-is-recommended-minimum-object-size-for-gzip-performance-benefits + compressionConfig.setCompressableMimeTypes("application/javascript", "application/json", "application/xml", "text/css", "text/html", "text/javascript", "text/plain", "text/xml"); + + final HttpServer httpServer = new HttpServer(); + httpServer.addListener(httpListener); + httpServer.getServerConfiguration().addHttpHandler(httpHandler, commandLine.getOptionValue("cp", "").replaceAll("/+$", "") + "/*"); + + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + if (LOG.isLoggable(Level.INFO)) { + LOG.info("Stopping HTTP server"); + } + httpServer.shutdown(); + })); + + try { + httpServer.start(); + Thread.sleep(Long.MAX_VALUE); + } catch (IOException | InterruptedException e) { + error(e.getMessage(), e); + } + return this; } - return this; - } - - CollateX log(String str) { - log.write(str); - return this; - } - - void error(String str, Throwable t) { - errorOccurred = true; - log(str).log("\n").log(t.getMessage()).log("\n"); - } - - void help() { - new HelpFormatter().printHelp(log, 78, "collatex []\n ( | [[] ...])", "", OPTIONS, 2, 4, ""); - } - - URL argumentToResource(String arg) throws ParseException { - try { - final File witnessFile = new File(arg); - if (witnessFile.exists()) { - return witnessFile.toURI().normalize().toURL(); - } else { - return new URL(arg); - } - } catch (MalformedURLException urlEx) { - throw new ParseException("Invalid resource: " + arg); + + CollateX log(String str) { + log.write(str); + return this; + } + + void error(String str, Throwable t) { + errorOccurred = true; + log(str).log("\n").log(t.getMessage()).log("\n"); + } + + void help() { + new HelpFormatter().printHelp(log, 78, "collatex []\n ( | [[] ...])", "", OPTIONS, 2, 4, ""); + } + + URL argumentToResource(String arg) throws ParseException { + try { + final File witnessFile = new File(arg); + if (witnessFile.exists()) { + return witnessFile.toURI().normalize().toURL(); + } else { + return new URL(arg); + } + } catch (MalformedURLException urlEx) { + throw new ParseException("Invalid resource: " + arg); + } } - } - - public static void main(String... args) { - final CollateX engine = new CollateX(); - try { - final CommandLine commandLine = new GnuParser().parse(OPTIONS, args); - if (commandLine.hasOption("h")) { - engine.help(); - return; - } - if (commandLine.hasOption("srv")) { - engine.serve(commandLine); - } else { - engine.configure(commandLine).read().collate().write(); - } - } catch (ParseException e) { - engine.error("Error while parsing command line arguments", e); - engine.log("\n").help(); - } catch (IllegalArgumentException e) { - engine.error("Illegal argument", e); - } catch (IOException e) { - engine.error("I/O error", e); - } catch (SAXException e) { - engine.error("XML error", e); - } catch (XPathExpressionException e) { - engine.error("XPath error", e); - } catch (ScriptException | PluginScript.PluginScriptExecutionException e) { - engine.error("Script error", e); - } finally { + + public static void main(String... args) { + final CollateX engine = new CollateX(); try { - engine.close(); - } catch (IOException ignored) { - // ignored + final CommandLine commandLine = new GnuParser().parse(OPTIONS, args); + if (commandLine.hasOption("h")) { + engine.help(); + return; + } + if (commandLine.hasOption("srv")) { + engine.serve(commandLine); + } else { + engine.configure(commandLine).read().collate().write(); + } + } catch (ParseException e) { + engine.error("Error while parsing command line arguments", e); + engine.log("\n").help(); + } catch (IllegalArgumentException e) { + engine.error("Illegal argument", e); + } catch (IOException e) { + engine.error("I/O error", e); + } catch (SAXException e) { + engine.error("XML error", e); + } catch (XPathExpressionException e) { + engine.error("XPath error", e); + } catch (ScriptException | PluginScript.PluginScriptExecutionException e) { + engine.error("Script error", e); + } finally { + try { + engine.close(); + } catch (IOException ignored) { + // ignored + } } } - } - - static final Logger LOG = Logger.getLogger(CollateX.class.getName()); - static final Options OPTIONS = new Options(); - - static { - OPTIONS.addOption("h", "help", false, "print usage instructions"); - - OPTIONS.addOption("o", "output", true, "output file; '-' for standard output (default)"); - OPTIONS.addOption("ie", "input-encoding", true, "charset to use for decoding non-XML witnesses; default: UTF-8"); - OPTIONS.addOption("oe", "output-encoding", true, "charset to use for encoding the output; default: UTF-8"); - OPTIONS.addOption("xml", "xml-mode", false, "witnesses are treated as XML documents"); - OPTIONS.addOption("xp", "xpath", true, "XPath 1.0 expression evaluating to tokens of XML witnesses; default: '//text()'"); - OPTIONS.addOption("a", "algorithm", true, "progressive alignment algorithm to use 'dekker' (default), 'medite', 'needleman-wunsch'"); - OPTIONS.addOption("t", "tokenized", false, "consecutive matches of tokens will *not* be joined to segments"); - OPTIONS.addOption("f", "format", true, "result/output format: 'json', 'csv', 'dot', 'graphml', 'tei'"); - OPTIONS.addOption("s", "script", true, "ECMA/JavaScript resource with functions to be plugged into the alignment algorithm"); - - OPTIONS.addOption("srv", "server", false, "start RESTful HTTP server"); - OPTIONS.addOption("cp", "context-path", true, "URL base/context path of the service, default: '/'"); - OPTIONS.addOption("dot", "dot-path", true, "path to Graphviz 'dot', auto-detected by default"); - OPTIONS.addOption("p", "port", true, "HTTP port to bind server to, default: 7369"); - OPTIONS.addOption("mpc", "max-parallel-collations", true, "maximum number of collations to perform in parallel, default: 2"); - OPTIONS.addOption("mcs", "max-collation-size", true, "maximum number of characters (counted over all witnesses) to perform collations on, default: unlimited"); - - } - - @Override - public void close() throws IOException { - try { - for (PrintWriter writer : new PrintWriter[] { out, log }) { - writer.close(); - } - } finally { - if (errorOccurred && (outFile != null) && outFile.isFile()) { - //noinspection ResultOfMethodCallIgnored - outFile.delete(); - } + + static final Logger LOG = Logger.getLogger(CollateX.class.getName()); + static final Options OPTIONS = new Options(); + + static { + OPTIONS.addOption("h", "help", false, "print usage instructions"); + + OPTIONS.addOption("o", "output", true, "output file; '-' for standard output (default)"); + OPTIONS.addOption("ie", "input-encoding", true, "charset to use for decoding non-XML witnesses; default: UTF-8"); + OPTIONS.addOption("oe", "output-encoding", true, "charset to use for encoding the output; default: UTF-8"); + OPTIONS.addOption("xml", "xml-mode", false, "witnesses are treated as XML documents"); + OPTIONS.addOption("xp", "xpath", true, "XPath 1.0 expression evaluating to tokens of XML witnesses; default: '//text()'"); + OPTIONS.addOption("a", "algorithm", true, "progressive alignment algorithm to use 'dekker' (default), 'medite', 'needleman-wunsch'"); + OPTIONS.addOption("t", "tokenized", false, "consecutive matches of tokens will *not* be joined to segments"); + OPTIONS.addOption("f", "format", true, "result/output format: 'json', 'csv', 'dot', 'graphml', 'tei'"); + OPTIONS.addOption("s", "script", true, "ECMA/JavaScript resource with functions to be plugged into the alignment algorithm"); + + OPTIONS.addOption("srv", "server", false, "start RESTful HTTP server"); + OPTIONS.addOption("cp", "context-path", true, "URL base/context path of the service, default: '/'"); + OPTIONS.addOption("dot", "dot-path", true, "path to Graphviz 'dot', auto-detected by default"); + OPTIONS.addOption("p", "port", true, "HTTP port to bind server to, default: 7369"); + OPTIONS.addOption("mpc", "max-parallel-collations", true, "maximum number of collations to perform in parallel, default: 2"); + OPTIONS.addOption("mcs", "max-collation-size", true, "maximum number of characters (counted over all witnesses) to perform collations on, default: unlimited"); + + } + + @Override + public void close() throws IOException { + try { + for (PrintWriter writer : new PrintWriter[]{out, log}) { + writer.close(); + } + } finally { + if (errorOccurred && (outFile != null) && outFile.isFile()) { + //noinspection ResultOfMethodCallIgnored + outFile.delete(); + } + } } - } } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java index c1e42b877..c791221c1 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/CollatorService.java @@ -39,179 +39,179 @@ */ public class CollatorService { - private final int maxCollationSize; - private final String dotPath; - private final ExecutorService collationThreads; - private final ExecutorService processThreads = Executors.newCachedThreadPool(); - - public CollatorService(int maxParallelCollations, int maxCollationSize, String dotPath) { - this.collationThreads = Executors.newFixedThreadPool(maxParallelCollations, new ThreadFactory() { - private final AtomicLong counter = new AtomicLong(); - - @Override - public Thread newThread(Runnable r) { - final Thread t = new Thread(r, "collator-" + counter.incrementAndGet()); - t.setDaemon(true); - t.setPriority(Thread.MIN_PRIORITY); - return t; - } - }); - - this.maxCollationSize = maxCollationSize; - this.dotPath = dotPath; - } - - public void service(Request request, Response response) throws Exception { - final Deque path = path(request); - if (path.isEmpty() || !"collate".equals(path.pop())) { - response.sendError(404); - return; + private final int maxCollationSize; + private final String dotPath; + private final ExecutorService collationThreads; + private final ExecutorService processThreads = Executors.newCachedThreadPool(); + + public CollatorService(int maxParallelCollations, int maxCollationSize, String dotPath) { + this.collationThreads = Executors.newFixedThreadPool(maxParallelCollations, new ThreadFactory() { + private final AtomicLong counter = new AtomicLong(); + + @Override + public Thread newThread(Runnable r) { + final Thread t = new Thread(r, "collator-" + counter.incrementAndGet()); + t.setDaemon(true); + t.setPriority(Thread.MIN_PRIORITY); + return t; + } + }); + + this.maxCollationSize = maxCollationSize; + this.dotPath = dotPath; } - final SimpleCollation collation = JsonProcessor.read(request.getInputStream()); - if (maxCollationSize > 0) { - for (SimpleWitness witness : collation.getWitnesses()) { - final int witnessLength = witness.getTokens().stream() - .filter(t -> t instanceof SimpleToken).map(t -> (SimpleToken) t) - .collect(Collectors.summingInt(t -> t.getContent().length())); - if (witnessLength > maxCollationSize) { - response.sendError(413, "Request Entity Too Large"); - return; + public void service(Request request, Response response) throws Exception { + final Deque path = path(request); + if (path.isEmpty() || !"collate".equals(path.pop())) { + response.sendError(404); + return; } - } - } - response.suspend(60, TimeUnit.SECONDS, new EmptyCompletionHandler<>()); - collationThreads.submit(() -> { - try { - final VariantGraph graph = new VariantGraph(); - collation.collate(graph); - - // CORS support - response.setHeader("Access-Control-Allow-Origin", Optional.ofNullable(request.getHeader("Origin")).orElse("*")); - response.setHeader("Access-Control-Allow-Methods", Optional.ofNullable(request.getHeader("Access-Control-Request-Method")).orElse("GET, POST, HEAD, OPTIONS")); - response.setHeader("Access-Control-Allow-Headers", Optional.ofNullable(request.getHeader("Access-Control-Request-Headers")).orElse("Content-Type, Accept, X-Requested-With")); - response.setHeader("Access-Control-Max-Age", "86400"); - response.setHeader("Access-Control-Allow-Credentials", "true"); - - final String clientAccepts = Optional.ofNullable(request.getHeader(Header.Accept)).orElse(""); - - if (clientAccepts.contains("text/plain")) { - response.setContentType("text/plain"); - response.setCharacterEncoding("utf-8"); - try (final Writer out = response.getWriter()) { - new SimpleVariantGraphSerializer(graph).toDot(out); - } - response.resume(); - - } else if (clientAccepts.contains("application/tei+xml")) { - XMLStreamWriter xml = null; - try { - response.setContentType("application/tei+xml"); - try (OutputStream responseStream = response.getOutputStream()) { - xml = XMLOutputFactory.newInstance().createXMLStreamWriter(responseStream); - xml.writeStartDocument(); - new SimpleVariantGraphSerializer(graph).toTEI(xml); - xml.writeEndDocument(); - } finally { - if (xml != null) { - xml.close(); - } + final SimpleCollation collation = JsonProcessor.read(request.getInputStream()); + if (maxCollationSize > 0) { + for (SimpleWitness witness : collation.getWitnesses()) { + final int witnessLength = witness.getTokens().stream() + .filter(t -> t instanceof SimpleToken).map(t -> (SimpleToken) t) + .collect(Collectors.summingInt(t -> t.getContent().length())); + if (witnessLength > maxCollationSize) { + response.sendError(413, "Request Entity Too Large"); + return; + } } - response.resume(); - } catch (XMLStreamException e) { - e.printStackTrace(); - } - } else if (clientAccepts.contains("application/graphml+xml")) { - XMLStreamWriter xml = null; - try { - response.setContentType("application/graphml+xml"); - try (OutputStream responseStream = response.getOutputStream()) { - xml = XMLOutputFactory.newInstance().createXMLStreamWriter(responseStream); - xml.writeStartDocument(); - new SimpleVariantGraphSerializer(graph).toGraphML(xml); - xml.writeEndDocument(); - } finally { - if (xml != null) { - xml.close(); - } - } - response.resume(); - } catch (XMLStreamException e) { - e.printStackTrace(); - } - } else if (clientAccepts.contains("image/svg+xml")) { - if (dotPath == null) { - response.sendError(204); - response.resume(); - } else { - final StringWriter dot = new StringWriter(); - new SimpleVariantGraphSerializer(graph).toDot(dot); - - final Process dotProc = new ProcessBuilder(dotPath, "-Grankdir=LR", "-Gid=VariantGraph", "-Tsvg").start(); - final StringWriter errors = new StringWriter(); - CompletableFuture.allOf( - CompletableFuture.runAsync(() -> { - final char[] buf = new char[8192]; - try (final Reader errorStream = new InputStreamReader(dotProc.getErrorStream())) { - int len; - while ((len = errorStream.read(buf)) >= 0) { - errors.write(buf, 0, len); - } - } catch (IOException e) { - throw new CompletionException(e); - } - }, processThreads), - CompletableFuture.runAsync(() -> { - try (final Writer dotProcStream = new OutputStreamWriter(dotProc.getOutputStream(), "UTF-8")) { - dotProcStream.write(dot.toString()); - } catch (IOException e) { - throw new CompletionException(e); - } - }, processThreads), - CompletableFuture.runAsync(() -> { - response.setContentType("image/svg+xml"); - final byte[] buf = new byte[8192]; - try (final InputStream in = dotProc.getInputStream(); final OutputStream out = response.getOutputStream()) { - int len; - while ((len = in.read(buf)) >= 0) { - out.write(buf, 0, len); + } + + response.suspend(60, TimeUnit.SECONDS, new EmptyCompletionHandler<>()); + collationThreads.submit(() -> { + try { + final VariantGraph graph = new VariantGraph(); + collation.collate(graph); + + // CORS support + response.setHeader("Access-Control-Allow-Origin", Optional.ofNullable(request.getHeader("Origin")).orElse("*")); + response.setHeader("Access-Control-Allow-Methods", Optional.ofNullable(request.getHeader("Access-Control-Request-Method")).orElse("GET, POST, HEAD, OPTIONS")); + response.setHeader("Access-Control-Allow-Headers", Optional.ofNullable(request.getHeader("Access-Control-Request-Headers")).orElse("Content-Type, Accept, X-Requested-With")); + response.setHeader("Access-Control-Max-Age", "86400"); + response.setHeader("Access-Control-Allow-Credentials", "true"); + + final String clientAccepts = Optional.ofNullable(request.getHeader(Header.Accept)).orElse(""); + + if (clientAccepts.contains("text/plain")) { + response.setContentType("text/plain"); + response.setCharacterEncoding("utf-8"); + try (final Writer out = response.getWriter()) { + new SimpleVariantGraphSerializer(graph).toDot(out); + } + response.resume(); + + } else if (clientAccepts.contains("application/tei+xml")) { + XMLStreamWriter xml = null; + try { + response.setContentType("application/tei+xml"); + try (OutputStream responseStream = response.getOutputStream()) { + xml = XMLOutputFactory.newInstance().createXMLStreamWriter(responseStream); + xml.writeStartDocument(); + new SimpleVariantGraphSerializer(graph).toTEI(xml); + xml.writeEndDocument(); + } finally { + if (xml != null) { + xml.close(); + } } - } catch (IOException e) { - throw new CompletionException(e); - } - }, processThreads), - CompletableFuture.runAsync(() -> { - try { - if (dotProc.waitFor() != 0) { - throw new CompletionException(new IllegalStateException(errors.toString())); + response.resume(); + } catch (XMLStreamException e) { + e.printStackTrace(); + } + } else if (clientAccepts.contains("application/graphml+xml")) { + XMLStreamWriter xml = null; + try { + response.setContentType("application/graphml+xml"); + try (OutputStream responseStream = response.getOutputStream()) { + xml = XMLOutputFactory.newInstance().createXMLStreamWriter(responseStream); + xml.writeStartDocument(); + new SimpleVariantGraphSerializer(graph).toGraphML(xml); + xml.writeEndDocument(); + } finally { + if (xml != null) { + xml.close(); + } } - } catch (InterruptedException e) { - throw new CompletionException(e); - } - }, processThreads) - ).exceptionally(t -> { - t.printStackTrace(); - return null; - }).thenRunAsync(response::resume, processThreads); - } - } else { - response.setContentType("application/json"); - try (final OutputStream responseStream = response.getOutputStream()) { - JsonProcessor.write(graph, responseStream); - } - response.resume(); - } - } catch (IOException e) { - // FIXME: ignored - } - }); - } - - private static Deque path(Request request) { - return Pattern.compile("/+").splitAsStream(Optional.ofNullable(request.getPathInfo()).orElse("")) + response.resume(); + } catch (XMLStreamException e) { + e.printStackTrace(); + } + } else if (clientAccepts.contains("image/svg+xml")) { + if (dotPath == null) { + response.sendError(204); + response.resume(); + } else { + final StringWriter dot = new StringWriter(); + new SimpleVariantGraphSerializer(graph).toDot(dot); + + final Process dotProc = new ProcessBuilder(dotPath, "-Grankdir=LR", "-Gid=VariantGraph", "-Tsvg").start(); + final StringWriter errors = new StringWriter(); + CompletableFuture.allOf( + CompletableFuture.runAsync(() -> { + final char[] buf = new char[8192]; + try (final Reader errorStream = new InputStreamReader(dotProc.getErrorStream())) { + int len; + while ((len = errorStream.read(buf)) >= 0) { + errors.write(buf, 0, len); + } + } catch (IOException e) { + throw new CompletionException(e); + } + }, processThreads), + CompletableFuture.runAsync(() -> { + try (final Writer dotProcStream = new OutputStreamWriter(dotProc.getOutputStream(), "UTF-8")) { + dotProcStream.write(dot.toString()); + } catch (IOException e) { + throw new CompletionException(e); + } + }, processThreads), + CompletableFuture.runAsync(() -> { + response.setContentType("image/svg+xml"); + final byte[] buf = new byte[8192]; + try (final InputStream in = dotProc.getInputStream(); final OutputStream out = response.getOutputStream()) { + int len; + while ((len = in.read(buf)) >= 0) { + out.write(buf, 0, len); + } + } catch (IOException e) { + throw new CompletionException(e); + } + }, processThreads), + CompletableFuture.runAsync(() -> { + try { + if (dotProc.waitFor() != 0) { + throw new CompletionException(new IllegalStateException(errors.toString())); + } + } catch (InterruptedException e) { + throw new CompletionException(e); + } + }, processThreads) + ).exceptionally(t -> { + t.printStackTrace(); + return null; + }).thenRunAsync(response::resume, processThreads); + } + } else { + response.setContentType("application/json"); + try (final OutputStream responseStream = response.getOutputStream()) { + JsonProcessor.write(graph, responseStream); + } + response.resume(); + } + } catch (IOException e) { + // FIXME: ignored + } + }); + } + + private static Deque path(Request request) { + return Pattern.compile("/+").splitAsStream(Optional.ofNullable(request.getPathInfo()).orElse("")) .filter(s -> !s.isEmpty()) .collect(Collectors.toCollection(ArrayDeque::new)); - } + } } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/JsonProcessor.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/JsonProcessor.java index f087f0c96..571165a94 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/JsonProcessor.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/JsonProcessor.java @@ -40,223 +40,224 @@ */ public class JsonProcessor { - public static SimpleCollation read(InputStream inputStream) throws IOException { - try (JsonReader reader = Json.createReader(inputStream)) { - final JsonStructure collationNode = reader.read(); - if (collationNode.getValueType() != JsonValue.ValueType.OBJECT) { - throw new IOException("Expecting JSON object"); - } - final JsonObject collationObject = (JsonObject) collationNode; - - JsonArray witnessesNode; - try { - witnessesNode = collationObject.getJsonArray("witnesses"); - } catch (ClassCastException e) { - throw new IOException("Expecting 'witnesses' array"); - } - - final List witnesses = new ArrayList<>(witnessesNode.size()); - - for (JsonValue witnessNode : witnessesNode) { - if (witnessNode.getValueType() != JsonValue.ValueType.OBJECT) { - throw new IOException("Expecting witness object"); - } - final JsonObject witnessObject = (JsonObject) witnessNode; + public static SimpleCollation read(InputStream inputStream) throws IOException { + try (JsonReader reader = Json.createReader(inputStream)) { + final JsonStructure collationNode = reader.read(); + if (collationNode.getValueType() != JsonValue.ValueType.OBJECT) { + throw new IOException("Expecting JSON object"); + } + final JsonObject collationObject = (JsonObject) collationNode; - final String witnessId; - try { - witnessId = witnessObject.getString("id").trim(); - } catch (ClassCastException e) { - throw new IOException("Expected textual witness 'id'"); - } - if (witnessId.length() == 0) { - throw new IOException("Empty witness 'id' encountered"); - } + JsonArray witnessesNode; + try { + witnessesNode = collationObject.getJsonArray("witnesses"); + } catch (ClassCastException e) { + throw new IOException("Expecting 'witnesses' array"); + } - final SimpleWitness witness = new SimpleWitness(witnessId); - if (witnesses.contains(witness)) { - throw new IOException(String.format("Duplicate sigil for witness '%s", witness)); - } + final List witnesses = new ArrayList<>(witnessesNode.size()); - final JsonValue contentNode = witnessObject.get("content"); - final JsonValue tokensNode = witnessObject.get("tokens"); - if (contentNode == null && tokensNode == null) { - throw new IOException(String.format("Expected either 'tokens' or 'content' field in witness \"%s\"", witness)); - } + for (JsonValue witnessNode : witnessesNode) { + if (witnessNode.getValueType() != JsonValue.ValueType.OBJECT) { + throw new IOException("Expecting witness object"); + } + final JsonObject witnessObject = (JsonObject) witnessNode; + + final String witnessId; + try { + witnessId = witnessObject.getString("id").trim(); + } catch (ClassCastException e) { + throw new IOException("Expected textual witness 'id'"); + } + if (witnessId.length() == 0) { + throw new IOException("Empty witness 'id' encountered"); + } + + final SimpleWitness witness = new SimpleWitness(witnessId); + if (witnesses.contains(witness)) { + throw new IOException(String.format("Duplicate sigil for witness '%s", witness)); + } + + final JsonValue contentNode = witnessObject.get("content"); + final JsonValue tokensNode = witnessObject.get("tokens"); + if (contentNode == null && tokensNode == null) { + throw new IOException(String.format("Expected either 'tokens' or 'content' field in witness \"%s\"", witness)); + } + + if (tokensNode != null) { + if (tokensNode.getValueType() != JsonValue.ValueType.ARRAY) { + throw new IOException(String.format("Expected 'tokens' array in witness \"%s\"", witness)); + } + final JsonArray tokensArray = (JsonArray) tokensNode; + final List tokens = new ArrayList<>(tokensArray.size()); + for (JsonValue tokenNode : tokensArray) { + if (tokenNode.getValueType() != JsonValue.ValueType.OBJECT) { + throw new IOException(String.format("Expected token object in 'tokens' field in witness \"%s\"", witness)); + } + final JsonObject tokenObject = (JsonObject) tokenNode; + String tokenContent; + try { + tokenContent = tokenObject.getString("t"); + } catch (ClassCastException | NullPointerException e) { + throw new IOException(String.format("Expected textual token content field 't' in witness \"%s\"", witness)); + } + + String normalizedTokenContent; + if (tokenObject.containsKey("n")) { + try { + normalizedTokenContent = tokenObject.getString("n"); + } catch (ClassCastException e) { + throw new IOException(String.format("Expected textual normalized token content in witness \"%s\"", witness)); + } + } else { + normalizedTokenContent = SimpleWitness.TOKEN_NORMALIZER.apply(tokenContent); + } + + if (normalizedTokenContent == null || normalizedTokenContent.length() == 0) { + throw new IOException(String.format("Empty token encountered in witness \"%s\"", witness)); + } - if (tokensNode != null) { - if (tokensNode.getValueType() != JsonValue.ValueType.ARRAY) { - throw new IOException(String.format("Expected 'tokens' array in witness \"%s\"", witness)); - } - final JsonArray tokensArray = (JsonArray) tokensNode; - final List tokens = new ArrayList<>(tokensArray.size()); - for (JsonValue tokenNode : tokensArray) { - if (tokenNode.getValueType() != JsonValue.ValueType.OBJECT) { - throw new IOException(String.format("Expected token object in 'tokens' field in witness \"%s\"", witness)); + tokens.add(new Token(witness, tokenContent, normalizedTokenContent, tokenObject)); + } + witness.setTokens(tokens); + } else { + if (contentNode.getValueType() != JsonValue.ValueType.STRING) { + throw new IOException(String.format("Expected 'content' text field in witness \"%s\"", witness)); + } + witness.setTokenContents( + SimplePatternTokenizer.BY_WS_OR_PUNCT.apply(((JsonString) contentNode).getString()), + SimpleTokenNormalizers.LC_TRIM_WS + ); + } + witnesses.add(witness); } - final JsonObject tokenObject = (JsonObject) tokenNode; - String tokenContent; - try { - tokenContent = tokenObject.getString("t"); - } catch (ClassCastException | NullPointerException e) { - throw new IOException(String.format("Expected textual token content field 't' in witness \"%s\"", witness)); + + if (witnesses.isEmpty()) { + throw new IOException("No witnesses in collation"); } - String normalizedTokenContent; - if (tokenObject.containsKey("n")) { - try { - normalizedTokenContent = tokenObject.getString("n"); - } catch (ClassCastException e) { - throw new IOException(String.format("Expected textual normalized token content in witness \"%s\"", witness)); - } - } else { - normalizedTokenContent = SimpleWitness.TOKEN_NORMALIZER.apply(tokenContent); + Comparator tokenComparator = null; + final JsonValue tokenComparatorNode = collationObject.get("tokenComparator"); + if (tokenComparatorNode.getValueType() == JsonValue.ValueType.OBJECT) { + final JsonObject tokenComparatorObject = (JsonObject) tokenComparatorNode; + try { + if ("levenshtein".equals(tokenComparatorObject.getString("type"))) { + final int configuredDistance = tokenComparatorObject.getInt("distance", 0); + tokenComparator = new EditDistanceTokenComparator(configuredDistance == 0 ? 1 : configuredDistance); + } + } catch (ClassCastException e) { + // ignored + } + } + if (tokenComparator == null) { + tokenComparator = new EqualityTokenComparator(); } - if (normalizedTokenContent == null || normalizedTokenContent.length() == 0) { - throw new IOException(String.format("Empty token encountered in witness \"%s\"", witness)); + CollationAlgorithm collationAlgorithm = null; + final JsonValue collationAlgorithmNode = collationObject.get("algorithm"); + if (collationAlgorithmNode.getValueType() == JsonValue.ValueType.STRING) { + final String collationAlgorithmValue = ((JsonString) collationAlgorithmNode).getString(); + if ("needleman-wunsch".equalsIgnoreCase(collationAlgorithmValue)) { + collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(tokenComparator); + } else if ("gst".equalsIgnoreCase(collationAlgorithmValue)) { + collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(tokenComparator, 2); + } else if ("medite".equalsIgnoreCase(collationAlgorithmValue)) { + collationAlgorithm = CollationAlgorithmFactory.medite(tokenComparator, SimpleToken.TOKEN_MATCH_EVALUATOR); + } + } + if (collationAlgorithm == null) { + collationAlgorithm = CollationAlgorithmFactory.dekker(tokenComparator); } - tokens.add(new Token(witness, tokenContent, normalizedTokenContent, tokenObject)); - } - witness.setTokens(tokens); - } else { - if (contentNode.getValueType() != JsonValue.ValueType.STRING) { - throw new IOException(String.format("Expected 'content' text field in witness \"%s\"", witness)); - } - witness.setTokenContents( - SimplePatternTokenizer.BY_WS_OR_PUNCT.apply(((JsonString) contentNode).getString()), - SimpleTokenNormalizers.LC_TRIM_WS - ); - } - witnesses.add(witness); - } - - if (witnesses.isEmpty()) { - throw new IOException("No witnesses in collation"); - } - - Comparator tokenComparator = null; - final JsonValue tokenComparatorNode = collationObject.get("tokenComparator"); - if (tokenComparatorNode.getValueType() == JsonValue.ValueType.OBJECT) { - final JsonObject tokenComparatorObject = (JsonObject) tokenComparatorNode; - try { - if ("levenshtein".equals(tokenComparatorObject.getString("type"))) { - final int configuredDistance = tokenComparatorObject.getInt("distance", 0); - tokenComparator = new EditDistanceTokenComparator(configuredDistance == 0 ? 1 : configuredDistance); - } - } catch (ClassCastException e) { - // ignored - } - } - if (tokenComparator == null) { - tokenComparator = new EqualityTokenComparator(); - } - - CollationAlgorithm collationAlgorithm = null; - final JsonValue collationAlgorithmNode = collationObject.get("algorithm"); - if (collationAlgorithmNode.getValueType() == JsonValue.ValueType.STRING) { - final String collationAlgorithmValue = ((JsonString) collationAlgorithmNode).getString(); - if ("needleman-wunsch".equalsIgnoreCase(collationAlgorithmValue)) { - collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(tokenComparator); - } else if ("gst".equalsIgnoreCase(collationAlgorithmValue)) { - collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(tokenComparator, 2); - } else if ("medite".equalsIgnoreCase(collationAlgorithmValue)) { - collationAlgorithm = CollationAlgorithmFactory.medite(tokenComparator, SimpleToken.TOKEN_MATCH_EVALUATOR); - } - } - if (collationAlgorithm == null) { - collationAlgorithm = CollationAlgorithmFactory.dekker(tokenComparator); - } - - boolean joined = true; - try { - joined = collationObject.getBoolean("joined", true); - } catch (ClassCastException e) { - // ignored - } - - if (collationAlgorithm instanceof DekkerAlgorithm) { - boolean mergeTranspositions = true; - try { - mergeTranspositions = collationObject.getBoolean("transpositions", true); - } catch (ClassCastException e) { - // ignored + boolean joined = true; + try { + joined = collationObject.getBoolean("joined", true); + } catch (ClassCastException e) { + // ignored + } + + if (collationAlgorithm instanceof DekkerAlgorithm) { + boolean mergeTranspositions = true; + try { + mergeTranspositions = collationObject.getBoolean("transpositions", true); + } catch (ClassCastException e) { + // ignored + } + ((DekkerAlgorithm) collationAlgorithm).setMergeTranspositions(mergeTranspositions); + } + return new SimpleCollation(witnesses, collationAlgorithm, joined); } - ((DekkerAlgorithm) collationAlgorithm).setMergeTranspositions(mergeTranspositions); - } - return new SimpleCollation(witnesses, collationAlgorithm, joined); } - } - public static void write(VariantGraph graph, OutputStream outputStream) throws IOException { - try (final JsonGenerator jgen = Json.createGenerator(outputStream)) { - write(jgen, graph); + public static void write(VariantGraph graph, OutputStream outputStream) throws IOException { + try (final JsonGenerator jgen = Json.createGenerator(outputStream)) { + write(jgen, graph); + } } - } - public static void write(VariantGraph graph, PrintWriter writer) throws IOException { - try (final JsonGenerator jgen = Json.createGenerator(writer)) { - write(jgen, graph); + public static void write(VariantGraph graph, PrintWriter writer) throws IOException { + try (final JsonGenerator jgen = Json.createGenerator(writer)) { + write(jgen, graph); + } } - } - - protected static void write(JsonGenerator jgen, VariantGraph graph) { - ParallelSegmentationApparatus.generate(VariantGraphRanking.of(graph), new ParallelSegmentationApparatus.GeneratorCallback() { - @Override - public void start() { - jgen.writeStartObject(); - - jgen.writeStartArray("witnesses"); - graph.witnesses().stream().sorted(Witness.SIGIL_COMPARATOR).map(Witness::getSigil).forEach(jgen::write); - jgen.writeEnd(); - - - jgen.writeStartArray("table"); - } - - @Override - public void segment(SortedMap> contents) { - jgen.writeStartArray(); - contents.values().stream().forEach(tokens -> { - jgen.writeStartArray(); - StreamSupport.stream(Spliterators.spliteratorUnknownSize(tokens.iterator(), Spliterator.NONNULL | Spliterator.IMMUTABLE), false) - .filter(t -> t instanceof SimpleToken) - .map(t -> (SimpleToken) t) - .sorted() - .forEach(t -> { - if (t instanceof Token) { - jgen.write(((Token) t).getJsonNode()); - } else { - jgen.write(t.getContent()); - } - }); - jgen.writeEnd(); + + protected static void write(JsonGenerator jgen, VariantGraph graph) { + ParallelSegmentationApparatus.generate(VariantGraphRanking.of(graph), new ParallelSegmentationApparatus.GeneratorCallback() { + @Override + public void start() { + jgen.writeStartObject(); + + jgen.writeStartArray("witnesses"); + graph.witnesses().stream().sorted(Witness.SIGIL_COMPARATOR).map(Witness::getSigil).forEach(jgen::write); + jgen.writeEnd(); + + + jgen.writeStartArray("table"); + } + + @Override + public void segment(SortedMap> contents) { + jgen.writeStartArray(); + contents.values().stream().forEach(tokens -> { + jgen.writeStartArray(); + StreamSupport.stream(Spliterators.spliteratorUnknownSize(tokens.iterator(), Spliterator.NONNULL | Spliterator.IMMUTABLE), false) + .filter(t -> t instanceof SimpleToken) + .map(t -> (SimpleToken) t) + .sorted() + .forEach(t -> { + if (t instanceof Token) { + jgen.write(((Token) t).getJsonNode()); + } else { + jgen.write(t.getContent()); + } + }); + jgen.writeEnd(); + }); + jgen.writeEnd(); + } + + @Override + public void end() { + jgen.writeEnd(); + jgen.writeEnd(); + } }); - jgen.writeEnd(); - } - - @Override - public void end() { - jgen.writeEnd(); - jgen.writeEnd(); - } - }); - } - public static class Token extends SimpleToken { - - private final JsonObject jsonNode; - - public Token(SimpleWitness witness, String content, String normalized, JsonObject jsonNode) { - super(witness, content, normalized); - this.jsonNode = jsonNode; } - public JsonObject getJsonNode() { - return jsonNode; + public static class Token extends SimpleToken { + + private final JsonObject jsonNode; + + public Token(SimpleWitness witness, String content, String normalized, JsonObject jsonNode) { + super(witness, content, normalized); + this.jsonNode = jsonNode; + } + + public JsonObject getJsonNode() { + return jsonNode; + } } - } - private JsonProcessor() { - } + private JsonProcessor() { + } } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/NodeToken.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/NodeToken.java index 6f1ffcc19..50b856e15 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/NodeToken.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/NodeToken.java @@ -27,10 +27,10 @@ * @author Gregor Middell */ public class NodeToken extends SimpleToken { - final Node node; + final Node node; - public NodeToken(SimpleWitness witness, String content, String normalized, Node node) { - super(witness, content, normalized); - this.node = node; - } + public NodeToken(SimpleWitness witness, String content, String normalized, Node node) { + super(witness, content, normalized); + this.node = node; + } } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java index a1a5044df..f06c36856 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/PluginScript.java @@ -46,140 +46,140 @@ */ public class PluginScript { - static final Charset SCRIPT_CHARSET = Charset.forName("UTF-8"); + static final Charset SCRIPT_CHARSET = Charset.forName("UTF-8"); - static final String TOKENIZER_FUNCTION = "tokenize"; - static final String NORMALIZER_FUNCTION = "normalize"; - static final String COMPARATOR_FUNCTION = "compare"; + static final String TOKENIZER_FUNCTION = "tokenize"; + static final String NORMALIZER_FUNCTION = "normalize"; + static final String COMPARATOR_FUNCTION = "compare"; - final Invocable script; - final boolean tokenizer; - final boolean normalizer; - final boolean comparator; + final Invocable script; + final boolean tokenizer; + final boolean normalizer; + final boolean comparator; - public static PluginScript read(URL source) throws ScriptException, IOException { - try (Reader sourceReader = new BufferedReader(new InputStreamReader(source.openStream(), SCRIPT_CHARSET))) { - return read(source.toString(), sourceReader); - } - } - - public static PluginScript read(String filename, Reader source) throws ScriptException, IOException { - final ScriptEngine scriptEngine = Objects.requireNonNull(new ScriptEngineManager().getEngineByExtension("js")); - scriptEngine.put(ScriptEngine.FILENAME, filename); - - final CompiledScript script = ((Compilable) scriptEngine).compile(source); - script.eval(); - - return new PluginScript((Invocable) script.getEngine()); - } - - PluginScript(Invocable script) throws ScriptException { - this.script = script; - tokenizer = hasFunction(TOKENIZER_FUNCTION, ""); - normalizer = hasFunction(NORMALIZER_FUNCTION, ""); - comparator = hasFunction(COMPARATOR_FUNCTION, "", ""); - } - - Function> tokenizer() { - return (tokenizer ? new Function>() { - @Override - public Stream apply(String input) { - final Object result = invoke(TOKENIZER_FUNCTION, input); - if (!(result instanceof Iterable)) { - throw new PluginScriptExecutionException("Wrong result type of " + - TOKENIZER_FUNCTION + "(); expected an iterable type, found " + - result.getClass()); - } - final List tokens = new LinkedList<>(); - for (Object token : (Iterable) result) { - if (token == null) { - throw new PluginScriptExecutionException(TOKENIZER_FUNCTION + "() returned null token"); - } - if (!(token instanceof String)) { - throw new PluginScriptExecutionException("Wrong result type of " + - TOKENIZER_FUNCTION + "(); expected tokens of type string, found " + - token.getClass()); - } - tokens.add((String) token); + public static PluginScript read(URL source) throws ScriptException, IOException { + try (Reader sourceReader = new BufferedReader(new InputStreamReader(source.openStream(), SCRIPT_CHARSET))) { + return read(source.toString(), sourceReader); } + } - return tokens.stream(); - } - } : null); - } - - Function normalizer() { - return (normalizer ? new Function() { - @Override - public String apply(String input) { - final Object result = invoke(NORMALIZER_FUNCTION, input); - if (!(result instanceof String)) { - throw new PluginScriptExecutionException("Wrong result type of " + - NORMALIZER_FUNCTION + "(); expected a string, found " + - result.getClass()); - } - return (String) result; - } - } : null); - } - - Comparator comparator() { - return (comparator ? new Comparator() { - @Override - public int compare(Token o1, Token o2) { - if (!(o1 instanceof SimpleToken)) { - throw new PluginScriptExecutionException(COMPARATOR_FUNCTION + "() called with wrong token type '" + o1.getClass()); - } - if (!(o2 instanceof SimpleToken)) { - throw new PluginScriptExecutionException(COMPARATOR_FUNCTION + "() called with wrong token type '" + o2.getClass()); - } + public static PluginScript read(String filename, Reader source) throws ScriptException, IOException { + final ScriptEngine scriptEngine = Objects.requireNonNull(new ScriptEngineManager().getEngineByExtension("js")); + scriptEngine.put(ScriptEngine.FILENAME, filename); - final Object result = invoke(COMPARATOR_FUNCTION, ((SimpleToken) o1).getNormalized(), ((SimpleToken) o2).getNormalized()); - if (result instanceof Number) { - return ((Number) result).intValue(); - } - if (result instanceof Boolean) { - return (((Boolean) result) ? 0 : 1); - } + final CompiledScript script = ((Compilable) scriptEngine).compile(source); + script.eval(); + + return new PluginScript((Invocable) script.getEngine()); + } + + PluginScript(Invocable script) throws ScriptException { + this.script = script; + tokenizer = hasFunction(TOKENIZER_FUNCTION, ""); + normalizer = hasFunction(NORMALIZER_FUNCTION, ""); + comparator = hasFunction(COMPARATOR_FUNCTION, "", ""); + } - throw new PluginScriptExecutionException("Wrong result type of " + - COMPARATOR_FUNCTION + "(); expected a number or boolean type, found " + - result.getClass()); - } - } : null); - } - - boolean hasFunction(String name, Object... testArgs) throws ScriptException { - try { - script.invokeFunction(name, testArgs); - return true; - } catch (NoSuchMethodException e) { - return false; + Function> tokenizer() { + return (tokenizer ? new Function>() { + @Override + public Stream apply(String input) { + final Object result = invoke(TOKENIZER_FUNCTION, input); + if (!(result instanceof Iterable)) { + throw new PluginScriptExecutionException("Wrong result type of " + + TOKENIZER_FUNCTION + "(); expected an iterable type, found " + + result.getClass()); + } + final List tokens = new LinkedList<>(); + for (Object token : (Iterable) result) { + if (token == null) { + throw new PluginScriptExecutionException(TOKENIZER_FUNCTION + "() returned null token"); + } + if (!(token instanceof String)) { + throw new PluginScriptExecutionException("Wrong result type of " + + TOKENIZER_FUNCTION + "(); expected tokens of type string, found " + + token.getClass()); + } + tokens.add((String) token); + } + + return tokens.stream(); + } + } : null); } - } - - Object invoke(String function, Object... args) throws PluginScriptExecutionException { - try { - Object result = script.invokeFunction(function, args); - if (result == null) { - throw new PluginScriptExecutionException(function + "() returned null"); - } - return result; - } catch (ScriptException e) { - throw new PluginScriptExecutionException(e); - } catch (NoSuchMethodException e) { - throw new IllegalStateException(e); + + Function normalizer() { + return (normalizer ? new Function() { + @Override + public String apply(String input) { + final Object result = invoke(NORMALIZER_FUNCTION, input); + if (!(result instanceof String)) { + throw new PluginScriptExecutionException("Wrong result type of " + + NORMALIZER_FUNCTION + "(); expected a string, found " + + result.getClass()); + } + return (String) result; + } + } : null); } - } - static class PluginScriptExecutionException extends RuntimeException { + Comparator comparator() { + return (comparator ? new Comparator() { + @Override + public int compare(Token o1, Token o2) { + if (!(o1 instanceof SimpleToken)) { + throw new PluginScriptExecutionException(COMPARATOR_FUNCTION + "() called with wrong token type '" + o1.getClass()); + } + if (!(o2 instanceof SimpleToken)) { + throw new PluginScriptExecutionException(COMPARATOR_FUNCTION + "() called with wrong token type '" + o2.getClass()); + } + + final Object result = invoke(COMPARATOR_FUNCTION, ((SimpleToken) o1).getNormalized(), ((SimpleToken) o2).getNormalized()); + if (result instanceof Number) { + return ((Number) result).intValue(); + } + if (result instanceof Boolean) { + return (((Boolean) result) ? 0 : 1); + } + + throw new PluginScriptExecutionException("Wrong result type of " + + COMPARATOR_FUNCTION + "(); expected a number or boolean type, found " + + result.getClass()); + } + } : null); + } - PluginScriptExecutionException(String message) { - super(message); + boolean hasFunction(String name, Object... testArgs) throws ScriptException { + try { + script.invokeFunction(name, testArgs); + return true; + } catch (NoSuchMethodException e) { + return false; + } } - PluginScriptExecutionException(ScriptException scriptException) { - super(scriptException); + Object invoke(String function, Object... args) throws PluginScriptExecutionException { + try { + Object result = script.invokeFunction(function, args); + if (result == null) { + throw new PluginScriptExecutionException(function + "() returned null"); + } + return result; + } catch (ScriptException e) { + throw new PluginScriptExecutionException(e); + } catch (NoSuchMethodException e) { + throw new IllegalStateException(e); + } + } + + static class PluginScriptExecutionException extends RuntimeException { + + PluginScriptExecutionException(String message) { + super(message); + } + + PluginScriptExecutionException(ScriptException scriptException) { + super(scriptException); + } } - } } diff --git a/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java b/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java index 5a8f17bf2..7c59b50cb 100644 --- a/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java +++ b/collatex-tools/src/main/java/eu/interedition/collatex/tools/URLWitness.java @@ -51,49 +51,49 @@ */ public class URLWitness extends SimpleWitness { - public final URL url; + public final URL url; - public URLWitness(String sigil, URL url) { - super(sigil); - this.url = url; - } + public URLWitness(String sigil, URL url) { + super(sigil); + this.url = url; + } - public URLWitness read( - Function> tokenizer, - Function normalizer, - Charset charset, - XPathExpression tokenXPath) - throws IOException, XPathExpressionException, SAXException { - try (InputStream stream = url.openStream()) { - if (tokenXPath != null) { - final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - final Document document = documentBuilder.parse(stream); - document.normalizeDocument(); + public URLWitness read( + Function> tokenizer, + Function normalizer, + Charset charset, + XPathExpression tokenXPath) + throws IOException, XPathExpressionException, SAXException { + try (InputStream stream = url.openStream()) { + if (tokenXPath != null) { + final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + final Document document = documentBuilder.parse(stream); + document.normalizeDocument(); - final NodeList tokenNodes = (NodeList) tokenXPath.evaluate(document, XPathConstants.NODESET); - final List tokens = new ArrayList<>(tokenNodes.getLength()); - for (int nc = 0; nc < tokenNodes.getLength(); nc++) { - final Node tokenNode = tokenNodes.item(nc); - final String tokenText = tokenNode.getTextContent(); - tokens.add(new NodeToken(this, tokenText, normalizer.apply(tokenText), tokenNode)); - } - setTokens(tokens); - } else { + final NodeList tokenNodes = (NodeList) tokenXPath.evaluate(document, XPathConstants.NODESET); + final List tokens = new ArrayList<>(tokenNodes.getLength()); + for (int nc = 0; nc < tokenNodes.getLength(); nc++) { + final Node tokenNode = tokenNodes.item(nc); + final String tokenText = tokenNode.getTextContent(); + tokens.add(new NodeToken(this, tokenText, normalizer.apply(tokenText), tokenNode)); + } + setTokens(tokens); + } else { - final BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charset)); - final StringWriter writer = new StringWriter(); - final char[] buf = new char[1024]; - while (reader.read(buf) != -1) { - writer.write(buf); - } - setTokens(tokenizer.apply(writer.toString()) + final BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charset)); + final StringWriter writer = new StringWriter(); + final char[] buf = new char[1024]; + while (reader.read(buf) != -1) { + writer.write(buf); + } + setTokens(tokenizer.apply(writer.toString()) .map(tokenText -> new SimpleToken(this, tokenText, normalizer.apply(tokenText))) .collect(Collectors.toList()) - ); - } - } catch (ParserConfigurationException e) { - throw new SAXException(e); + ); + } + } catch (ParserConfigurationException e) { + throw new SAXException(e); + } + return this; } - return this; - } } diff --git a/pom.xml b/pom.xml index b72167e6f..6b501e789 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,6 @@ - + 4.0.0 @@ -59,28 +60,28 @@ - - 1.8 - 1.8 - + + 1.8 + 1.8 + - - - junit - junit - 4.10 - test - - + + + junit + junit + 4.10 + test + + + + + + + eu.interedition + collatex-core + ${project.version} + + + - - - - eu.interedition - collatex-core - ${project.version} - - - - From 2540733d0ee7671a691a7cc7de2907a4eafeab53 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Sat, 14 Feb 2015 16:35:48 +0100 Subject: [PATCH 40/52] Update copyright headers --- .../collatex/CollationAlgorithm.java | 2 +- .../collatex/CollationAlgorithmFactory.java | 2 +- .../java/eu/interedition/collatex/Token.java | 2 +- .../interedition/collatex/VariantGraph.java | 2 +- .../eu/interedition/collatex/Witness.java | 2 +- .../collatex/dekker/DekkerAlgorithm.java | 2 +- .../interedition/collatex/dekker/Match.java | 2 +- .../collatex/dekker/PhraseMatchDetector.java | 2 +- .../collatex/dekker/TokenLinker.java | 2 +- .../dekker/TranspositionDetector.java | 2 +- .../interedition/collatex/dekker/Tuple.java | 2 +- .../collatex/dekker/matrix/Archipelago.java | 2 +- .../collatex/dekker/matrix/Coordinate.java | 2 +- .../collatex/dekker/matrix/Island.java | 2 +- .../dekker/matrix/IslandCompetition.java | 19 +++++++++++++++++++ .../dekker/matrix/IslandConflictResolver.java | 2 +- .../collatex/dekker/matrix/MatchTable.java | 2 +- .../dekker/matrix/MatchTableLinker.java | 2 +- .../dekker/matrix/MatchTableSelection.java | 19 +++++++++++++++++++ .../dekker/matrix/MatchTableSerializer.java | 2 +- .../collatex/dekker/matrix/package-info.java | 2 +- .../collatex/dekker/package-info.java | 2 +- .../collatex/matching/EditDistance.java | 2 +- .../matching/EditDistanceTokenComparator.java | 2 +- .../matching/EqualityTokenComparator.java | 2 +- .../collatex/matching/Matches.java | 2 +- .../StrictEqualityTokenComparator.java | 2 +- .../collatex/matching/package-info.java | 2 +- .../medite/AlignmentDecisionGraph.java | 2 +- .../interedition/collatex/medite/Matches.java | 2 +- .../collatex/medite/MediteAlgorithm.java | 2 +- .../collatex/medite/SuffixTree.java | 2 +- .../collatex/medite/package-info.java | 2 +- .../NeedlemanWunschAlgorithm.java | 2 +- .../NeedlemanWunschScorer.java | 2 +- .../needlemanwunsch/package-info.java | 2 +- .../interedition/collatex/package-info.java | 2 +- .../collatex/simple/SimpleCollation.java | 2 +- .../simple/SimplePatternTokenizer.java | 2 +- .../collatex/simple/SimpleToken.java | 2 +- .../simple/SimpleTokenNormalizers.java | 2 +- .../simple/SimpleVariantGraphSerializer.java | 2 +- .../collatex/simple/SimpleWitness.java | 2 +- .../simple/SimpleWitnessTeiBuilder.java | 2 +- .../collatex/simple/package-info.java | 2 +- .../util/GreedyStringTilingAlgorithm.java | 2 +- .../util/ParallelSegmentationApparatus.java | 2 +- .../collatex/util/VariantGraphRanking.java | 2 +- .../collatex/util/VariantGraphTraversal.java | 2 +- .../collatex/util/VertexMatch.java | 2 +- .../collatex/util/package-info.java | 2 +- collatex-core/src/main/javadoc/overview.html | 2 +- .../interedition/collatex/AbstractTest.java | 2 +- .../collatex/ScriptEngineTest.java | 2 +- .../collatex/VariantGraphTest.java | 2 +- .../collatex/dekker/AlignmentTest.java | 2 +- .../collatex/dekker/BeckettTest.java | 2 +- .../collatex/dekker/DarwinTest.java | 2 +- .../collatex/dekker/SpencerHoweTest.java | 2 +- .../dekker/TranspositionGraphTest.java | 19 +++++++++++++++++++ .../dekker/TranspositionRenderingTest.java | 2 +- .../dekker/VariantGraphRankerTest.java | 2 +- .../collatex/dekker/VariantGraphTest.java | 2 +- .../collatex/dekker/matrix/HermansTest.java | 2 +- .../matrix/IslandConflictResolverTest.java | 19 +++++++++++++++++++ .../collatex/dekker/matrix/IslandTest.java | 2 +- .../dekker/matrix/MatchTableLinkerTest.java | 2 +- .../dekker/matrix/MatchTableTest.java | 2 +- .../collatex/lab/CollateXLaboratory.java | 2 +- .../collatex/lab/MatchMatrixCellStatus.java | 2 +- .../collatex/lab/MatchMatrixTableModel.java | 2 +- .../collatex/lab/MatchTableCell.java | 2 +- .../collatex/lab/WitnessPanel.java | 2 +- .../collatex/matching/MatchesTest.java | 2 +- .../collatex/matching/NearMatcherTest.java | 2 +- .../collatex/medite/MediteTest.java | 2 +- .../collatex/medite/SuffixTreeTest.java | 2 +- .../needlemanwunsch/NeedlemanWunschTest.java | 2 +- .../collatex/output/AlignmentTableTest.java | 2 +- .../simple/SimpleWitnessTeiBuilderTest.java | 2 +- .../collatex/simple/SimpleWitnessTest.java | 2 +- .../interedition/collatex/tools/CollateX.java | 2 +- .../collatex/tools/CollatorService.java | 19 +++++++++++++++++++ .../collatex/tools/JsonProcessor.java | 19 +++++++++++++++++++ .../collatex/tools/NodeToken.java | 2 +- .../collatex/tools/PluginScript.java | 2 +- .../collatex/tools/URLWitness.java | 2 +- 87 files changed, 195 insertions(+), 81 deletions(-) diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java index ba974fc6a..3b36b26a5 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithm.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java index 7e0a7df1b..56fadd6e4 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/CollationAlgorithmFactory.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/Token.java b/collatex-core/src/main/java/eu/interedition/collatex/Token.java index ac0da98ba..e43705a91 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/Token.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/Token.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java index 735e121de..2c6e3276d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/VariantGraph.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java index 9de6b4c5c..b0fa4c834 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/Witness.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/Witness.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java index b40a43163..e2141be8e 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/DekkerAlgorithm.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java index dd4c0cd2d..53e903a8c 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Match.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java index e64530349..887b4cfd5 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/PhraseMatchDetector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java index 7c26f836b..137b76d2c 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TokenLinker.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java index 6371ec027..75c461c35 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/TranspositionDetector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java index 9b3762c20..f85b564cd 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/Tuple.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java index 6d6b1a87f..636ea8fbb 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Archipelago.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java index be3779104..56da8d35b 100755 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Coordinate.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java index 94cc8b0ea..62b3b248b 100755 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/Island.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandCompetition.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandCompetition.java index 805742618..9c18fb9b6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandCompetition.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandCompetition.java @@ -1,3 +1,22 @@ +/* + * Copyright (c) 2015 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + package eu.interedition.collatex.dekker.matrix; public enum IslandCompetition { diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java index cd7e31edd..a18304f0d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/IslandConflictResolver.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java index d674b4fcc..23260e2bb 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTable.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java index c99a0079e..9e8e46243 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableLinker.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java index 1f6101be8..7a9beb0f6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSelection.java @@ -1,3 +1,22 @@ +/* + * Copyright (c) 2015 The Interedition Development Group. + * + * This file is part of CollateX. + * + * CollateX is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CollateX is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CollateX. If not, see . + */ + package eu.interedition.collatex.dekker.matrix; import eu.interedition.collatex.VariantGraph; diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSerializer.java index b05c4ea04..5e3a2a763 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/MatchTableSerializer.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/package-info.java index 38fbfb70d..a1d331bb1 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/package-info.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/matrix/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/dekker/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/dekker/package-info.java index 829bca94e..c367a10e8 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/dekker/package-info.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/dekker/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistance.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistance.java index 56cbafad5..674a50de2 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistance.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistance.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistanceTokenComparator.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistanceTokenComparator.java index cd9ef3ba9..35879c766 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistanceTokenComparator.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/EditDistanceTokenComparator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/EqualityTokenComparator.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/EqualityTokenComparator.java index 7ae42e492..fb213fe09 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/EqualityTokenComparator.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/EqualityTokenComparator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java index 05b9b6684..184ad2217 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/Matches.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/StrictEqualityTokenComparator.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/StrictEqualityTokenComparator.java index 686452acb..b52b9ac43 100755 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/StrictEqualityTokenComparator.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/StrictEqualityTokenComparator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/matching/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/matching/package-info.java index 6e3ba47b9..54468559a 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/matching/package-info.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/matching/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java index a115a9fa1..76fc172f3 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/AlignmentDecisionGraph.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java index 17b1fa479..4dff218cf 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/Matches.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java index f3d025e89..c956c1279 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/MediteAlgorithm.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java index 08f4261d8..61f4442ec 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/SuffixTree.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/medite/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/medite/package-info.java index 12379cd47..9237c7123 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/medite/package-info.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/medite/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java index 52af9f155..905f70a23 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschAlgorithm.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschScorer.java b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschScorer.java index ebfa55f43..725ba32d8 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschScorer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/NeedlemanWunschScorer.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/package-info.java index 0f5af1587..a0a2e1cd6 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/package-info.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/needlemanwunsch/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/package-info.java index 6f4ceb757..043a3d988 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/package-info.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java index 3c04cb3e6..ed1da5ebe 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleCollation.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java index d32a5ea83..ffe5d8bf4 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimplePatternTokenizer.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java index 48852f1d0..2b9e53a43 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleToken.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java index 9d98627a9..088ae41d8 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleTokenNormalizers.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java index 999856246..ccaab2f9b 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleVariantGraphSerializer.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java index 43d35f67f..b2270b8a7 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitness.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java index 9e463d307..ed97e53cb 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/SimpleWitnessTeiBuilder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/simple/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/simple/package-info.java index 10a15f836..7cf33312d 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/simple/package-info.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/simple/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java b/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java index 5d7f450d3..bc610e6ed 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/GreedyStringTilingAlgorithm.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java index a49078354..81874c51a 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/ParallelSegmentationApparatus.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java index bc04ac1af..980da014c 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphRanking.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java index 2e48c37d5..2733270c5 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VariantGraphTraversal.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java b/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java index 8a4759800..6ec7d4eb4 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/VertexMatch.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/java/eu/interedition/collatex/util/package-info.java b/collatex-core/src/main/java/eu/interedition/collatex/util/package-info.java index ea2d0f32b..e3d780285 100644 --- a/collatex-core/src/main/java/eu/interedition/collatex/util/package-info.java +++ b/collatex-core/src/main/java/eu/interedition/collatex/util/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The Interedition Development Group. + * Copyright (c) 2015 The Interedition Development Group. * * This file is part of CollateX. * diff --git a/collatex-core/src/main/javadoc/overview.html b/collatex-core/src/main/javadoc/overview.html index d2c72b872..5179560fd 100644 --- a/collatex-core/src/main/javadoc/overview.html +++ b/collatex-core/src/main/javadoc/overview.html @@ -1,5 +1,5 @@