diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml new file mode 100644 index 00000000..91106d3f --- /dev/null +++ b/.github/workflows/maven.yml @@ -0,0 +1,17 @@ +name: Java CI + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + - name: Set up JDK 1.8 + uses: actions/setup-java@v1 + with: + java-version: 1.8 + - name: Build with Maven + run: mvn -B package --file pom.xml diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..b3644e4a --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +epublib-core/bin/ +epublib-core/bin/ +epublib-tools/bin/ +epublib-core/.classpath +epublib-core/.project +epublib-tools/.classpath +epublib-tools/.project +epublib-core/.settings +epublib-tools/.settings/ +!/.idea/ +*.iml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..18f095e2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,9 @@ +language: java + +script: mvn -f epublib-parent/pom.xml clean package + +dist: trusty + +jdk: + - openjdk7 + - openjdk8 \ No newline at end of file diff --git a/CREDITS b/CREDITS new file mode 100644 index 00000000..496b8665 --- /dev/null +++ b/CREDITS @@ -0,0 +1,4 @@ +Some Icons are Copyright Yusuke Kamiyamane. All rights reserved. Licensed under a Creative Commons Attribution 3.0 license. + +Contains the class org.apache.commons.io.XmlStreamReader from the apache commons io class. +See http://commons.apache.org/io/ for more info. diff --git a/README b/README deleted file mode 100644 index f4112a0a..00000000 --- a/README +++ /dev/null @@ -1 +0,0 @@ -Hello, worldl \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..b0cc05c1 --- /dev/null +++ b/README.md @@ -0,0 +1,116 @@ +# epublib +Epublib is a java library for reading/writing/manipulating epub files. + +It consists of 2 parts: a core that reads/writes epub and a collection of tools. +The tools contain an epub cleanup tool, a tool to create epubs from html files, a tool to create an epub from an uncompress html file. +It also contains a swing-based epub viewer. +![Epublib viewer](http://www.siegmann.nl/wp-content/uploads/Alice%E2%80%99s-Adventures-in-Wonderland_2011-01-30_18-17-30.png) + +The core runs both on android and a standard java environment. The tools run only on a standard java environment. + +This means that reading/writing epub files works on Android. + +## Build status +* Travis Build Status: [![Build Status](https://travis-ci.org/psiegman/epublib.svg?branch=master)](https://travis-ci.org/psiegman/epublib) + +## Command line examples + +Set the author of an existing epub + java -jar epublib-3.0-SNAPSHOT.one-jar.jar --in input.epub --out result.epub --author Tester,Joe + +Set the cover image of an existing epub + java -jar epublib-3.0-SNAPSHOT.one-jar.jar --in input.epub --out result.epub --cover-image my_cover.jpg + +## Creating an epub programmatically + + package nl.siegmann.epublib.examples; + + import java.io.InputStream; + import java.io.FileOutputStream; + + import nl.siegmann.epublib.domain.Author; + import nl.siegmann.epublib.domain.Book; + import nl.siegmann.epublib.domain.Metadata; + import nl.siegmann.epublib.domain.Resource; + import nl.siegmann.epublib.domain.TOCReference; + + import nl.siegmann.epublib.epub.EpubWriter; + + public class Translator { + private static InputStream getResource( String path ) { + return Translator.class.getResourceAsStream( path ); + } + + private static Resource getResource( String path, String href ) { + return new Resource( getResource( path ), href ); + } + + public static void main(String[] args) { + try { + // Create new Book + Book book = new Book(); + Metadata metadata = book.getMetadata(); + + // Set the title + metadata.addTitle("Epublib test book 1"); + + // Add an Author + metadata.addAuthor(new Author("Joe", "Tester")); + + // Set cover image + book.setCoverImage( + getResource("/book1/test_cover.png", "cover.png") ); + + // Add Chapter 1 + book.addSection("Introduction", + getResource("/book1/chapter1.html", "chapter1.html") ); + + // Add css file + book.getResources().add( + getResource("/book1/book1.css", "book1.css") ); + + // Add Chapter 2 + TOCReference chapter2 = book.addSection( "Second Chapter", + getResource("/book1/chapter2.html", "chapter2.html") ); + + // Add image used by Chapter 2 + book.getResources().add( + getResource("/book1/flowers_320x240.jpg", "flowers.jpg")); + + // Add Chapter2, Section 1 + book.addSection(chapter2, "Chapter 2, section 1", + getResource("/book1/chapter2_1.html", "chapter2_1.html")); + + // Add Chapter 3 + book.addSection("Conclusion", + getResource("/book1/chapter3.html", "chapter3.html")); + + // Create EpubWriter + EpubWriter epubWriter = new EpubWriter(); + + // Write the Book as Epub + epubWriter.write(book, new FileOutputStream("test1_book1.epub")); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + +## Usage in Android + +Add the following lines to your `app` module's `build.gradle` file: + + repositories { + maven { + url 'https://github.com/psiegman/mvn-repo/raw/master/releases' + } + } + + dependencies { + implementation('nl.siegmann.epublib:epublib-core:4.0') { + exclude group: 'org.slf4j' + exclude group: 'xmlpull' + } + implementation 'org.slf4j:slf4j-android:1.7.25' + } diff --git a/epublib-core/.gitignore b/epublib-core/.gitignore new file mode 100644 index 00000000..3dfdcd84 --- /dev/null +++ b/epublib-core/.gitignore @@ -0,0 +1,2 @@ +/target +/test1_book1.epub diff --git a/epublib-core/build.sbt b/epublib-core/build.sbt new file mode 100644 index 00000000..b75cb493 --- /dev/null +++ b/epublib-core/build.sbt @@ -0,0 +1,25 @@ +autoScalaLibrary := false + +crossPaths := false + +name := "epublib-core" + +organization := "nl.siegmann.epublib" + +version := "4.0" + +publishMavenStyle := true + +javacOptions in doc += "-Xdoclint:none" + +libraryDependencies += "net.sf.kxml" % "kxml2" % "2.3.0" + +libraryDependencies += "xmlpull" % "xmlpull" % "1.1.3.4d_b4_min" + +libraryDependencies += "org.slf4j" % "slf4j-api" % "1.6.1" + +libraryDependencies += "org.slf4j" % "slf4j-simple" % "1.6.1" + +libraryDependencies += "junit" % "junit" % "4.10" + + diff --git a/epublib-core/pom.xml b/epublib-core/pom.xml new file mode 100644 index 00000000..be1dc21c --- /dev/null +++ b/epublib-core/pom.xml @@ -0,0 +1,105 @@ + + + + + 4.0.0 + + nl.siegmann.epublib + epublib-core + epublib-core + A java library for reading/writing/manipulating epub files + http://www.siegmann.nl/epublib + 2009 + + + nl.siegmann.epublib + epublib-parent + 4.0.1-EPUB3-SNAPSHOT + ../epublib-parent/pom.xml + + + + + net.sf.kxml + kxml2 + + + xmlpull + xmlpull + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-simple + + + junit + junit + test + + + org.mockito + mockito-all + test + + + + + + + org.apache.maven.plugins + maven-shade-plugin + ${maven-shade-plugin.version} + + + package + + shade + + + true + complete + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + ${source.version} + ${target.version} + + + + + + + + org.apache.maven.plugins + maven-site-plugin + ${maven-site-plugin.version} + + + + + + maven + http://repo1.maven.org/maven2/ + + + jboss + https://repository.jboss.org/nexus/ + + + net.java.repository + Java.net repository + http://download.java.net/maven/2/ + + + diff --git a/epublib-core/src/doc/schema.min.svg b/epublib-core/src/doc/schema.min.svg new file mode 100644 index 00000000..7da89441 --- /dev/null +++ b/epublib-core/src/doc/schema.min.svg @@ -0,0 +1,131 @@ + + + + + + + + + + + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Spine + + + +Table of Contents + + + +Guide +Chapter 1 +Chapter 1 +Part 2 +Chapter 2 +Chapter 1 +Chapter 2 +Cover +Resources +Preface + + + + + + + + + diff --git a/epublib-core/src/doc/schema.svg b/epublib-core/src/doc/schema.svg new file mode 100644 index 00000000..9976234b --- /dev/null +++ b/epublib-core/src/doc/schema.svg @@ -0,0 +1,1070 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Spine + + + + Table of Contents + + + + Guide + Chapter 1 + Chapter 1 + Part 2 + Chapter 2 + Chapter 1 + Chapter 2 + Cover + Resources + Preface + + + + + + + + + diff --git a/epublib-core/src/examples/java/nl/siegmann/epublib/examples/Simple1.java b/epublib-core/src/examples/java/nl/siegmann/epublib/examples/Simple1.java new file mode 100644 index 00000000..8bb47c71 --- /dev/null +++ b/epublib-core/src/examples/java/nl/siegmann/epublib/examples/Simple1.java @@ -0,0 +1,54 @@ +package nl.siegmann.epublib.examples; + +import java.io.FileOutputStream; + +import nl.siegmann.epublib.domain.Author; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.InputStreamResource; +import nl.siegmann.epublib.domain.Section; +import nl.siegmann.epublib.epub.EpubWriter; +import nl.siegmann.epublib.service.MediatypeService; + +public class Simple1 { + public static void main(String[] args) { + try { + // Create new Book + Book book = new Book(); + + // Set the title + book.getMetadata().addTitle("Epublib test book 1"); + + // Add an Author + book.getMetadata().addAuthor(new Author("Joe", "Tester")); + + // Set cover image + book.setCoverImage(new InputStreamResource(Simple1.class.getResourceAsStream("/book1/cover.png"), MediatypeService.PNG)); + + // Add Chapter 1 + book.addResourceAsSection("Introduction", new InputStreamResource(Simple1.class.getResourceAsStream("/book1/chapter1.html"), MediatypeService.XHTML)); + + // Add css file + book.getResources().add(new InputStreamResource(Simple1.class.getResourceAsStream("/book1/book1.css"), "book1.css")); + + // Add Chapter 2 + Section chapter2 = book.addResourceAsSection("Second Chapter", new InputStreamResource(Simple1.class.getResourceAsStream("/book1/chapter2.html"), "chapter2.html")); + + // Add image used by Chapter 2 + book.getResources().add(new InputStreamResource(Simple1.class.getResourceAsStream("/book1/flowers_320x240.jpg"), "flowers.jpg")); + + // Add Chapter2, Section 1 + book.addResourceAsSubSection(chapter2, "Chapter 2, section 1", new InputStreamResource(Simple1.class.getResourceAsStream("/book1/chapter2_1.html"), "chapter2_1.html")); + + // Add Chapter 3 + book.addResourceAsSection("Conclusion", new InputStreamResource(Simple1.class.getResourceAsStream("/book1/chapter3.html"), "chapter3.html")); + + // Create EpubWriter + EpubWriter epubWriter = new EpubWriter(); + + // Write the Book as Epub + epubWriter.write(book, new FileOutputStream("test1_book1.epub")); + } catch(Exception e) { + e.printStackTrace(); + } + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/Adler32.java b/epublib-core/src/main/java/net/sf/jazzlib/Adler32.java new file mode 100644 index 00000000..198189a2 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/Adler32.java @@ -0,0 +1,198 @@ +/* Adler32.java - Computes Adler32 data checksum of a data stream + Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/* + * Written using on-line Java Platform 1.2 API Specification, as well + * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998). + * The actual Adler32 algorithm is taken from RFC 1950. + * Status: Believed complete and correct. + */ + +/** + * Computes Adler32 checksum for a stream of data. An Adler32 checksum is not as + * reliable as a CRC32 checksum, but a lot faster to compute. + *

+ * The specification for Adler32 may be found in RFC 1950. (ZLIB Compressed Data + * Format Specification version 3.3) + *

+ *

+ * From that document: + *

+ * "ADLER32 (Adler-32 checksum) This contains a checksum value of the + * uncompressed data (excluding any dictionary data) computed according to + * Adler-32 algorithm. This algorithm is a 32-bit extension and improvement of + * the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard. + *

+ * Adler-32 is composed of two sums accumulated per byte: s1 is the sum of all + * bytes, s2 is the sum of all s1 values. Both sums are done modulo 65521. s1 is + * initialized to 1, s2 to zero. The Adler-32 checksum is stored as s2*65536 + + * s1 in most- significant-byte first (network) order." + *

+ * "8.2. The Adler-32 algorithm + *

+ * The Adler-32 algorithm is much faster than the CRC32 algorithm yet still + * provides an extremely low probability of undetected errors. + *

+ * The modulo on unsigned long accumulators can be delayed for 5552 bytes, so + * the modulo operation time is negligible. If the bytes are a, b, c, the second + * sum is 3a + 2b + c + 3, and so is position and order sensitive, unlike the + * first sum, which is just a checksum. That 65521 is prime is important to + * avoid a possible large class of two-byte errors that leave the check + * unchanged. (The Fletcher checksum uses 255, which is not prime and which also + * makes the Fletcher check insensitive to single byte changes 0 <-> 255.) + *

+ * The sum s1 is initialized to 1 instead of zero to make the length of the + * sequence part of s2, so that the length does not have to be checked + * separately. (Any sequence of zeroes has a Fletcher checksum of zero.)" + * + * @author John Leuner, Per Bothner + * @since JDK 1.1 + * + * @see InflaterInputStream + * @see DeflaterOutputStream + */ +public class Adler32 implements Checksum { + + /** largest prime smaller than 65536 */ + private static final int BASE = 65521; + + private int checksum; // we do all in int. + + // Note that java doesn't have unsigned integers, + // so we have to be careful with what arithmetic + // we do. We return the checksum as a long to + // avoid sign confusion. + + /** + * Creates a new instance of the Adler32 class. The checksum + * starts off with a value of 1. + */ + public Adler32() { + reset(); + } + + /** + * Resets the Adler32 checksum to the initial value. + */ + @Override + public void reset() { + checksum = 1; // Initialize to 1 + } + + /** + * Updates the checksum with the byte b. + * + * @param bval + * the data value to add. The high byte of the int is ignored. + */ + @Override + public void update(final int bval) { + // We could make a length 1 byte array and call update again, but I + // would rather not have that overhead + int s1 = checksum & 0xffff; + int s2 = checksum >>> 16; + + s1 = (s1 + (bval & 0xFF)) % BASE; + s2 = (s1 + s2) % BASE; + + checksum = (s2 << 16) + s1; + } + + /** + * Updates the checksum with the bytes taken from the array. + * + * @param buffer + * an array of bytes + */ + public void update(final byte[] buffer) { + update(buffer, 0, buffer.length); + } + + /** + * Updates the checksum with the bytes taken from the array. + * + * @param buf + * an array of bytes + * @param off + * the start of the data used for this update + * @param len + * the number of bytes to use for this update + */ + @Override + public void update(final byte[] buf, int off, int len) { + // (By Per Bothner) + int s1 = checksum & 0xffff; + int s2 = checksum >>> 16; + + while (len > 0) { + // We can defer the modulo operation: + // s1 maximally grows from 65521 to 65521 + 255 * 3800 + // s2 maximally grows by 3800 * median(s1) = 2090079800 < 2^31 + int n = 3800; + if (n > len) { + n = len; + } + len -= n; + while (--n >= 0) { + s1 = s1 + (buf[off++] & 0xFF); + s2 = s2 + s1; + } + s1 %= BASE; + s2 %= BASE; + } + + /* + * Old implementation, borrowed from somewhere: int n; + * + * while (len-- > 0) { + * + * s1 = (s1 + (bs[offset++] & 0xff)) % BASE; s2 = (s2 + s1) % BASE; } + */ + + checksum = (s2 << 16) | s1; + } + + /** + * Returns the Adler32 data checksum computed so far. + */ + @Override + public long getValue() { + return checksum & 0xffffffffL; + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/CRC32.java b/epublib-core/src/main/java/net/sf/jazzlib/CRC32.java new file mode 100644 index 00000000..f5d40cd4 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/CRC32.java @@ -0,0 +1,138 @@ +/* CRC32.java - Computes CRC32 data checksum of a data stream + Copyright (C) 1999. 2000, 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/* + * Written using on-line Java Platform 1.2 API Specification, as well + * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998). + * The actual CRC32 algorithm is taken from RFC 1952. + * Status: Believed complete and correct. + */ + +/** + * Computes CRC32 data checksum of a data stream. The actual CRC32 algorithm is + * described in RFC 1952 (GZIP file format specification version 4.3). Can be + * used to get the CRC32 over a stream if used with checked input/output + * streams. + * + * @see InflaterInputStream + * @see DeflaterOutputStream + * + * @author Per Bothner + * @date April 1, 1999. + */ +public class CRC32 implements Checksum { + /** The crc data checksum so far. */ + private int crc = 0; + + /** The fast CRC table. Computed once when the CRC32 class is loaded. */ + private static int[] crc_table = make_crc_table(); + + /** Make the table for a fast CRC. */ + private static int[] make_crc_table() { + final int[] crc_table = new int[256]; + for (int n = 0; n < 256; n++) { + int c = n; + for (int k = 8; --k >= 0;) { + if ((c & 1) != 0) { + c = 0xedb88320 ^ (c >>> 1); + } else { + c = c >>> 1; + } + } + crc_table[n] = c; + } + return crc_table; + } + + /** + * Returns the CRC32 data checksum computed so far. + */ + @Override + public long getValue() { + return crc & 0xffffffffL; + } + + /** + * Resets the CRC32 data checksum as if no update was ever called. + */ + @Override + public void reset() { + crc = 0; + } + + /** + * Updates the checksum with the int bval. + * + * @param bval + * (the byte is taken as the lower 8 bits of bval) + */ + + @Override + public void update(final int bval) { + int c = ~crc; + c = crc_table[(c ^ bval) & 0xff] ^ (c >>> 8); + crc = ~c; + } + + /** + * Adds the byte array to the data checksum. + * + * @param buf + * the buffer which contains the data + * @param off + * the offset in the buffer where the data starts + * @param len + * the length of the data + */ + @Override + public void update(final byte[] buf, int off, int len) { + int c = ~crc; + while (--len >= 0) { + c = crc_table[(c ^ buf[off++]) & 0xff] ^ (c >>> 8); + } + crc = ~c; + } + + /** + * Adds the complete byte array to the data checksum. + */ + public void update(final byte[] buf) { + update(buf, 0, buf.length); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/CheckedInputStream.java b/epublib-core/src/main/java/net/sf/jazzlib/CheckedInputStream.java new file mode 100644 index 00000000..80289057 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/CheckedInputStream.java @@ -0,0 +1,135 @@ +/* CheckedInputStream.java - Compute checksum of data being read + Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/* Written using on-line Java Platform 1.2 API Specification + * and JCL book. + * Believed complete and correct. + */ + +/** + * InputStream that computes a checksum of the data being read using a supplied + * Checksum object. + * + * @see Checksum + * + * @author Tom Tromey + * @date May 17, 1999 + */ +public class CheckedInputStream extends FilterInputStream { + /** + * Creates a new CheckInputStream on top of the supplied OutputStream using + * the supplied Checksum. + */ + public CheckedInputStream(final InputStream in, final Checksum sum) { + super(in); + this.sum = sum; + } + + /** + * Returns the Checksum object used. To get the data checksum computed so + * far call getChecksum.getValue(). + */ + public Checksum getChecksum() { + return sum; + } + + /** + * Reads one byte, updates the checksum and returns the read byte (or -1 + * when the end of file was reached). + */ + @Override + public int read() throws IOException { + final int x = in.read(); + if (x != -1) { + sum.update(x); + } + return x; + } + + /** + * Reads at most len bytes in the supplied buffer and updates the checksum + * with it. Returns the number of bytes actually read or -1 when the end of + * file was reached. + */ + @Override + public int read(final byte[] buf, final int off, final int len) + throws IOException { + final int r = in.read(buf, off, len); + if (r != -1) { + sum.update(buf, off, r); + } + return r; + } + + /** + * Skips n bytes by reading them in a temporary buffer and updating the the + * checksum with that buffer. Returns the actual number of bytes skiped + * which can be less then requested when the end of file is reached. + */ + @Override + public long skip(long n) throws IOException { + if (n == 0) { + return 0; + } + + int min = (int) Math.min(n, 1024); + final byte[] buf = new byte[min]; + + long s = 0; + while (n > 0) { + final int r = in.read(buf, 0, min); + if (r == -1) { + break; + } + n -= r; + s += r; + min = (int) Math.min(n, 1024); + sum.update(buf, 0, r); + } + + return s; + } + + /** The checksum object. */ + private final Checksum sum; +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/CheckedOutputStream.java b/epublib-core/src/main/java/net/sf/jazzlib/CheckedOutputStream.java new file mode 100644 index 00000000..7077ec09 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/CheckedOutputStream.java @@ -0,0 +1,97 @@ +/* CheckedOutputStream.java - Compute checksum of data being written. + Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/* Written using on-line Java Platform 1.2 API Specification + * and JCL book. + * Believed complete and correct. + */ + +/** + * OutputStream that computes a checksum of data being written using a supplied + * Checksum object. + * + * @see Checksum + * + * @author Tom Tromey + * @date May 17, 1999 + */ +public class CheckedOutputStream extends FilterOutputStream { + /** + * Creates a new CheckInputStream on top of the supplied OutputStream using + * the supplied Checksum. + */ + public CheckedOutputStream(final OutputStream out, final Checksum cksum) { + super(out); + this.sum = cksum; + } + + /** + * Returns the Checksum object used. To get the data checksum computed so + * far call getChecksum.getValue(). + */ + public Checksum getChecksum() { + return sum; + } + + /** + * Writes one byte to the OutputStream and updates the Checksum. + */ + @Override + public void write(final int bval) throws IOException { + out.write(bval); + sum.update(bval); + } + + /** + * Writes the byte array to the OutputStream and updates the Checksum. + */ + @Override + public void write(final byte[] buf, final int off, final int len) + throws IOException { + out.write(buf, off, len); + sum.update(buf, off, len); + } + + /** The checksum object. */ + private final Checksum sum; +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/Checksum.java b/epublib-core/src/main/java/net/sf/jazzlib/Checksum.java new file mode 100644 index 00000000..7bae782c --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/Checksum.java @@ -0,0 +1,89 @@ +/* Checksum.java - Interface to compute a data checksum + Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/* + * Written using on-line Java Platform 1.2 API Specification, as well + * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998). + * Status: Believed complete and correct. + */ + +/** + * Interface to compute a data checksum used by checked input/output streams. A + * data checksum can be updated by one byte or with a byte array. After each + * update the value of the current checksum can be returned by calling + * getValue. The complete checksum object can also be reset so it + * can be used again with new data. + * + * @see CheckedInputStream + * @see CheckedOutputStream + * + * @author Per Bothner + * @author Jochen Hoenicke + */ +public interface Checksum { + /** + * Returns the data checksum computed so far. + */ + long getValue(); + + /** + * Resets the data checksum as if no update was ever called. + */ + void reset(); + + /** + * Adds one byte to the data checksum. + * + * @param bval + * the data value to add. The high byte of the int is ignored. + */ + void update(int bval); + + /** + * Adds the byte array to the data checksum. + * + * @param buf + * the buffer which contains the data + * @param off + * the offset in the buffer where the data starts + * @param len + * the length of the data + */ + void update(byte[] buf, int off, int len); +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/DataFormatException.java b/epublib-core/src/main/java/net/sf/jazzlib/DataFormatException.java new file mode 100644 index 00000000..79501ec8 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/DataFormatException.java @@ -0,0 +1,69 @@ +/* DataformatException.java -- thrown when compressed data is corrupt + Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/** + * Exception thrown when compressed data is corrupt. + * + * @author Tom Tromey + * @author John Leuner + * @since 1.1 + * @status updated to 1.4 + */ +public class DataFormatException extends Exception { + /** + * Compatible with JDK 1.1+. + */ + private static final long serialVersionUID = 2219632870893641452L; + + /** + * Create an exception without a message. + */ + public DataFormatException() { + } + + /** + * Create an exception with a message. + * + * @param msg + * the message + */ + public DataFormatException(final String msg) { + super(msg); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/Deflater.java b/epublib-core/src/main/java/net/sf/jazzlib/Deflater.java new file mode 100644 index 00000000..c9af5fe0 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/Deflater.java @@ -0,0 +1,511 @@ +/* Deflater.java - Compress a data stream + Copyright (C) 1999, 2000, 2001, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/** + * This is the Deflater class. The deflater class compresses input with the + * deflate algorithm described in RFC 1951. It has several compression levels + * and three different strategies described below. + * + * This class is not thread safe. This is inherent in the API, due to the + * split of deflate and setInput. + * + * @author Jochen Hoenicke + * @author Tom Tromey + */ +public class Deflater { + /** + * The best and slowest compression level. This tries to find very long and + * distant string repetitions. + */ + public static final int BEST_COMPRESSION = 9; + /** + * The worst but fastest compression level. + */ + public static final int BEST_SPEED = 1; + /** + * The default compression level. + */ + public static final int DEFAULT_COMPRESSION = -1; + /** + * This level won't compress at all but output uncompressed blocks. + */ + public static final int NO_COMPRESSION = 0; + + /** + * The default strategy. + */ + public static final int DEFAULT_STRATEGY = 0; + /** + * This strategy will only allow longer string repetitions. It is useful for + * random data with a small character set. + */ + public static final int FILTERED = 1; + + /** + * This strategy will not look for string repetitions at all. It only + * encodes with Huffman trees (which means, that more common characters get + * a smaller encoding. + */ + public static final int HUFFMAN_ONLY = 2; + + /** + * The compression method. This is the only method supported so far. There + * is no need to use this constant at all. + */ + public static final int DEFLATED = 8; + + /* + * The Deflater can do the following state transitions: + * + * (1) -> INIT_STATE ----> INIT_FINISHING_STATE ---. / | (2) (5) | / v (5) | + * (3)| SETDICT_STATE ---> SETDICT_FINISHING_STATE |(3) \ | (3) | ,-------' + * | | | (3) / v v (5) v v (1) -> BUSY_STATE ----> FINISHING_STATE | (6) v + * FINISHED_STATE \_____________________________________/ | (7) v + * CLOSED_STATE + * + * (1) If we should produce a header we start in INIT_STATE, otherwise we + * start in BUSY_STATE. (2) A dictionary may be set only when we are in + * INIT_STATE, then we change the state as indicated. (3) Whether a + * dictionary is set or not, on the first call of deflate we change to + * BUSY_STATE. (4) -- intentionally left blank -- :) (5) FINISHING_STATE is + * entered, when flush() is called to indicate that there is no more INPUT. + * There are also states indicating, that the header wasn't written yet. (6) + * FINISHED_STATE is entered, when everything has been flushed to the + * internal pending output buffer. (7) At any time (7) + */ + + private static final int IS_SETDICT = 0x01; + private static final int IS_FLUSHING = 0x04; + private static final int IS_FINISHING = 0x08; + + private static final int INIT_STATE = 0x00; + private static final int SETDICT_STATE = 0x01; + private static final int BUSY_STATE = 0x10; + private static final int FLUSHING_STATE = 0x14; + private static final int FINISHING_STATE = 0x1c; + private static final int FINISHED_STATE = 0x1e; + private static final int CLOSED_STATE = 0x7f; + + /** Compression level. */ + private int level; + + /** should we include a header. */ + private final boolean noHeader; + + /** The current state. */ + private int state; + + /** The total bytes of output written. */ + private int totalOut; + + /** The pending output. */ + private DeflaterPending pending; + + /** The deflater engine. */ + private DeflaterEngine engine; + + /** + * Creates a new deflater with default compression level. + */ + public Deflater() { + this(DEFAULT_COMPRESSION, false); + } + + /** + * Creates a new deflater with given compression level. + * + * @param lvl + * the compression level, a value between NO_COMPRESSION and + * BEST_COMPRESSION, or DEFAULT_COMPRESSION. + * @exception IllegalArgumentException + * if lvl is out of range. + */ + public Deflater(final int lvl) { + this(lvl, false); + } + + /** + * Creates a new deflater with given compression level. + * + * @param lvl + * the compression level, a value between NO_COMPRESSION and + * BEST_COMPRESSION. + * @param nowrap + * true, iff we should suppress the deflate header at the + * beginning and the adler checksum at the end of the output. + * This is useful for the GZIP format. + * @exception IllegalArgumentException + * if lvl is out of range. + */ + public Deflater(int lvl, final boolean nowrap) { + if (lvl == DEFAULT_COMPRESSION) { + lvl = 6; + } else if ((lvl < NO_COMPRESSION) || (lvl > BEST_COMPRESSION)) { + throw new IllegalArgumentException(); + } + + pending = new DeflaterPending(); + engine = new DeflaterEngine(pending); + this.noHeader = nowrap; + setStrategy(DEFAULT_STRATEGY); + setLevel(lvl); + reset(); + } + + /** + * Resets the deflater. The deflater acts afterwards as if it was just + * created with the same compression level and strategy as it had before. + */ + public void reset() { + state = (noHeader ? BUSY_STATE : INIT_STATE); + totalOut = 0; + pending.reset(); + engine.reset(); + } + + /** + * Frees all objects allocated by the compressor. There's no reason to call + * this, since you can just rely on garbage collection. Exists only for + * compatibility against Sun's JDK, where the compressor allocates native + * memory. If you call any method (even reset) afterwards the behaviour is + * undefined. + * + * @deprecated Just clear all references to deflater instead. + */ + @Deprecated + public void end() { + engine = null; + pending = null; + state = CLOSED_STATE; + } + + /** + * Gets the current adler checksum of the data that was processed so far. + */ + public int getAdler() { + return engine.getAdler(); + } + + /** + * Gets the number of input bytes processed so far. + */ + public int getTotalIn() { + return engine.getTotalIn(); + } + + /** + * Gets the number of output bytes so far. + */ + public int getTotalOut() { + return totalOut; + } + + /** + * Finalizes this object. + */ + @Override + protected void finalize() { + /* Exists solely for compatibility. We don't have any native state. */ + } + + /** + * Flushes the current input block. Further calls to deflate() will produce + * enough output to inflate everything in the current input block. This is + * not part of Sun's JDK so I have made it package private. It is used by + * DeflaterOutputStream to implement flush(). + */ + void flush() { + state |= IS_FLUSHING; + } + + /** + * Finishes the deflater with the current input block. It is an error to + * give more input after this method was called. This method must be called + * to force all bytes to be flushed. + */ + public void finish() { + state |= IS_FLUSHING | IS_FINISHING; + } + + /** + * Returns true iff the stream was finished and no more output bytes are + * available. + */ + public boolean finished() { + return (state == FINISHED_STATE) && pending.isFlushed(); + } + + /** + * Returns true, if the input buffer is empty. You should then call + * setInput().
+ * + * NOTE: This method can also return true when the stream was + * finished. + */ + public boolean needsInput() { + return engine.needsInput(); + } + + /** + * Sets the data which should be compressed next. This should be only called + * when needsInput indicates that more input is needed. If you call setInput + * when needsInput() returns false, the previous input that is still pending + * will be thrown away. The given byte array should not be changed, before + * needsInput() returns true again. This call is equivalent to + * setInput(input, 0, input.length). + * + * @param input + * the buffer containing the input data. + * @exception IllegalStateException + * if the buffer was finished() or ended(). + */ + public void setInput(final byte[] input) { + setInput(input, 0, input.length); + } + + /** + * Sets the data which should be compressed next. This should be only called + * when needsInput indicates that more input is needed. The given byte array + * should not be changed, before needsInput() returns true again. + * + * @param input + * the buffer containing the input data. + * @param off + * the start of the data. + * @param len + * the length of the data. + * @exception IllegalStateException + * if the buffer was finished() or ended() or if previous + * input is still pending. + */ + public void setInput(final byte[] input, final int off, final int len) { + if ((state & IS_FINISHING) != 0) { + throw new IllegalStateException("finish()/end() already called"); + } + engine.setInput(input, off, len); + } + + /** + * Sets the compression level. There is no guarantee of the exact position + * of the change, but if you call this when needsInput is true the change of + * compression level will occur somewhere near before the end of the so far + * given input. + * + * @param lvl + * the new compression level. + */ + public void setLevel(int lvl) { + if (lvl == DEFAULT_COMPRESSION) { + lvl = 6; + } else if ((lvl < NO_COMPRESSION) || (lvl > BEST_COMPRESSION)) { + throw new IllegalArgumentException(); + } + + if (level != lvl) { + level = lvl; + engine.setLevel(lvl); + } + } + + /** + * Sets the compression strategy. Strategy is one of DEFAULT_STRATEGY, + * HUFFMAN_ONLY and FILTERED. For the exact position where the strategy is + * changed, the same as for setLevel() applies. + * + * @param stgy + * the new compression strategy. + */ + public void setStrategy(final int stgy) { + if ((stgy != DEFAULT_STRATEGY) && (stgy != FILTERED) + && (stgy != HUFFMAN_ONLY)) { + throw new IllegalArgumentException(); + } + engine.setStrategy(stgy); + } + + /** + * Deflates the current input block to the given array. It returns the + * number of bytes compressed, or 0 if either needsInput() or finished() + * returns true or length is zero. + * + * @param output + * the buffer where to write the compressed data. + */ + public int deflate(final byte[] output) { + return deflate(output, 0, output.length); + } + + /** + * Deflates the current input block to the given array. It returns the + * number of bytes compressed, or 0 if either needsInput() or finished() + * returns true or length is zero. + * + * @param output + * the buffer where to write the compressed data. + * @param offset + * the offset into the output array. + * @param length + * the maximum number of bytes that may be written. + * @exception IllegalStateException + * if end() was called. + * @exception IndexOutOfBoundsException + * if offset and/or length don't match the array length. + */ + public int deflate(final byte[] output, int offset, int length) { + final int origLength = length; + + if (state == CLOSED_STATE) { + throw new IllegalStateException("Deflater closed"); + } + + if (state < BUSY_STATE) { + /* output header */ + int header = (DEFLATED + ((DeflaterConstants.MAX_WBITS - 8) << 4)) << 8; + int level_flags = (level - 1) >> 1; + if ((level_flags < 0) || (level_flags > 3)) { + level_flags = 3; + } + header |= level_flags << 6; + if ((state & IS_SETDICT) != 0) { + /* Dictionary was set */ + header |= DeflaterConstants.PRESET_DICT; + } + header += 31 - (header % 31); + + pending.writeShortMSB(header); + if ((state & IS_SETDICT) != 0) { + final int chksum = engine.getAdler(); + engine.resetAdler(); + pending.writeShortMSB(chksum >> 16); + pending.writeShortMSB(chksum & 0xffff); + } + + state = BUSY_STATE | (state & (IS_FLUSHING | IS_FINISHING)); + } + + for (;;) { + final int count = pending.flush(output, offset, length); + offset += count; + totalOut += count; + length -= count; + if ((length == 0) || (state == FINISHED_STATE)) { + break; + } + + if (!engine.deflate((state & IS_FLUSHING) != 0, + (state & IS_FINISHING) != 0)) { + if (state == BUSY_STATE) { + /* We need more input now */ + return origLength - length; + } else if (state == FLUSHING_STATE) { + if (level != NO_COMPRESSION) { + /* + * We have to supply some lookahead. 8 bit lookahead are + * needed by the zlib inflater, and we must fill the + * next byte, so that all bits are flushed. + */ + int neededbits = 8 + ((-pending.getBitCount()) & 7); + while (neededbits > 0) { + /* + * write a static tree block consisting solely of an + * EOF: + */ + pending.writeBits(2, 10); + neededbits -= 10; + } + } + state = BUSY_STATE; + } else if (state == FINISHING_STATE) { + pending.alignToByte(); + /* We have completed the stream */ + if (!noHeader) { + final int adler = engine.getAdler(); + pending.writeShortMSB(adler >> 16); + pending.writeShortMSB(adler & 0xffff); + } + state = FINISHED_STATE; + } + } + } + + return origLength - length; + } + + /** + * Sets the dictionary which should be used in the deflate process. This + * call is equivalent to setDictionary(dict, 0, + * dict.length). + * + * @param dict + * the dictionary. + * @exception IllegalStateException + * if setInput () or deflate () were already called or + * another dictionary was already set. + */ + public void setDictionary(final byte[] dict) { + setDictionary(dict, 0, dict.length); + } + + /** + * Sets the dictionary which should be used in the deflate process. The + * dictionary should be a byte array containing strings that are likely to + * occur in the data which should be compressed. The dictionary is not + * stored in the compressed output, only a checksum. To decompress the + * output you need to supply the same dictionary again. + * + * @param dict + * the dictionary. + * @param offset + * an offset into the dictionary. + * @param length + * the length of the dictionary. + * @exception IllegalStateException + * if setInput () or deflate () were already called or + * another dictionary was already set. + */ + public void setDictionary(final byte[] dict, final int offset, + final int length) { + if (state != INIT_STATE) { + throw new IllegalStateException(); + } + + state = SETDICT_STATE; + engine.setDictionary(dict, offset, length); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/DeflaterConstants.java b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterConstants.java new file mode 100644 index 00000000..b3985f99 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterConstants.java @@ -0,0 +1,77 @@ +/* net.sf.jazzlib.DeflaterConstants + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +interface DeflaterConstants { + final static boolean DEBUGGING = false; + + final static int STORED_BLOCK = 0; + final static int STATIC_TREES = 1; + final static int DYN_TREES = 2; + final static int PRESET_DICT = 0x20; + + final static int DEFAULT_MEM_LEVEL = 8; + + final static int MAX_MATCH = 258; + final static int MIN_MATCH = 3; + + final static int MAX_WBITS = 15; + final static int WSIZE = 1 << MAX_WBITS; + final static int WMASK = WSIZE - 1; + + final static int HASH_BITS = DEFAULT_MEM_LEVEL + 7; + final static int HASH_SIZE = 1 << HASH_BITS; + final static int HASH_MASK = HASH_SIZE - 1; + final static int HASH_SHIFT = ((HASH_BITS + MIN_MATCH) - 1) / MIN_MATCH; + + final static int MIN_LOOKAHEAD = MAX_MATCH + MIN_MATCH + 1; + final static int MAX_DIST = WSIZE - MIN_LOOKAHEAD; + + final static int PENDING_BUF_SIZE = 1 << (DEFAULT_MEM_LEVEL + 8); + final static int MAX_BLOCK_SIZE = Math.min(65535, PENDING_BUF_SIZE - 5); + + final static int DEFLATE_STORED = 0; + final static int DEFLATE_FAST = 1; + final static int DEFLATE_SLOW = 2; + + final static int GOOD_LENGTH[] = { 0, 4, 4, 4, 4, 8, 8, 8, 32, 32 }; + final static int MAX_LAZY[] = { 0, 4, 5, 6, 4, 16, 16, 32, 128, 258 }; + final static int NICE_LENGTH[] = { 0, 8, 16, 32, 16, 32, 128, 128, 258, 258 }; + final static int MAX_CHAIN[] = { 0, 4, 8, 32, 16, 32, 128, 256, 1024, 4096 }; + final static int COMPR_FUNC[] = { 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/DeflaterEngine.java b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterEngine.java new file mode 100644 index 00000000..814d0c32 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterEngine.java @@ -0,0 +1,674 @@ +/* net.sf.jazzlib.DeflaterEngine + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +class DeflaterEngine implements DeflaterConstants { + private final static int TOO_FAR = 4096; + + private int ins_h; + + /** + * Hashtable, hashing three characters to an index for window, so that + * window[index]..window[index+2] have this hash code. Note that the array + * should really be unsigned short, so you need to and the values with + * 0xffff. + */ + private final short[] head; + + /** + * prev[index & WMASK] points to the previous index that has the same hash + * code as the string starting at index. This way entries with the same hash + * code are in a linked list. Note that the array should really be unsigned + * short, so you need to and the values with 0xffff. + */ + private final short[] prev; + + private int matchStart, matchLen; + private boolean prevAvailable; + private int blockStart; + + /** + * strstart points to the current character in window. + */ + private int strstart; + + /** + * lookahead is the number of characters starting at strstart in window that + * are valid. So window[strstart] until window[strstart+lookahead-1] are + * valid characters. + */ + private int lookahead; + + /** + * This array contains the part of the uncompressed stream that is of + * relevance. The current character is indexed by strstart. + */ + private final byte[] window; + + private int strategy, max_chain, max_lazy, niceLength, goodLength; + + /** The current compression function. */ + private int comprFunc; + + /** The input data for compression. */ + private byte[] inputBuf; + + /** The total bytes of input read. */ + private int totalIn; + + /** The offset into inputBuf, where input data starts. */ + private int inputOff; + + /** The end offset of the input data. */ + private int inputEnd; + + private final DeflaterPending pending; + private final DeflaterHuffman huffman; + + /** The adler checksum */ + private final Adler32 adler; + + /* + * DEFLATE ALGORITHM: + * + * The uncompressed stream is inserted into the window array. When the + * window array is full the first half is thrown away and the second half is + * copied to the beginning. + * + * The head array is a hash table. Three characters build a hash value and + * they the value points to the corresponding index in window of the last + * string with this hash. The prev array implements a linked list of matches + * with the same hash: prev[index & WMASK] points to the previous index with + * the same hash. + */ + + DeflaterEngine(final DeflaterPending pending) { + this.pending = pending; + huffman = new DeflaterHuffman(pending); + adler = new Adler32(); + + window = new byte[2 * WSIZE]; + head = new short[HASH_SIZE]; + prev = new short[WSIZE]; + + /* + * We start at index 1, to avoid a implementation deficiency, that we + * cannot build a repeat pattern at index 0. + */ + blockStart = strstart = 1; + } + + public void reset() { + huffman.reset(); + adler.reset(); + blockStart = strstart = 1; + lookahead = 0; + totalIn = 0; + prevAvailable = false; + matchLen = MIN_MATCH - 1; + for (int i = 0; i < HASH_SIZE; i++) { + head[i] = 0; + } + for (int i = 0; i < WSIZE; i++) { + prev[i] = 0; + } + } + + public final void resetAdler() { + adler.reset(); + } + + public final int getAdler() { + final int chksum = (int) adler.getValue(); + return chksum; + } + + public final int getTotalIn() { + return totalIn; + } + + public final void setStrategy(final int strat) { + strategy = strat; + } + + public void setLevel(final int lvl) { + goodLength = DeflaterConstants.GOOD_LENGTH[lvl]; + max_lazy = DeflaterConstants.MAX_LAZY[lvl]; + niceLength = DeflaterConstants.NICE_LENGTH[lvl]; + max_chain = DeflaterConstants.MAX_CHAIN[lvl]; + + if (DeflaterConstants.COMPR_FUNC[lvl] != comprFunc) { + if (DeflaterConstants.DEBUGGING) { + System.err.println("Change from " + comprFunc + " to " + + DeflaterConstants.COMPR_FUNC[lvl]); + } + switch (comprFunc) { + case DEFLATE_STORED: + if (strstart > blockStart) { + huffman.flushStoredBlock(window, blockStart, strstart + - blockStart, false); + blockStart = strstart; + } + updateHash(); + break; + case DEFLATE_FAST: + if (strstart > blockStart) { + huffman.flushBlock(window, blockStart, strstart + - blockStart, false); + blockStart = strstart; + } + break; + case DEFLATE_SLOW: + if (prevAvailable) { + huffman.tallyLit(window[strstart - 1] & 0xff); + } + if (strstart > blockStart) { + huffman.flushBlock(window, blockStart, strstart + - blockStart, false); + blockStart = strstart; + } + prevAvailable = false; + matchLen = MIN_MATCH - 1; + break; + } + comprFunc = COMPR_FUNC[lvl]; + } + } + + private final void updateHash() { + if (DEBUGGING) { + System.err.println("updateHash: " + strstart); + } + ins_h = (window[strstart] << HASH_SHIFT) ^ window[strstart + 1]; + } + + /** + * Inserts the current string in the head hash and returns the previous + * value for this hash. + */ + private final int insertString() { + short match; + final int hash = ((ins_h << HASH_SHIFT) ^ window[strstart + + (MIN_MATCH - 1)]) + & HASH_MASK; + + if (DEBUGGING) { + if (hash != (((window[strstart] << (2 * HASH_SHIFT)) + ^ (window[strstart + 1] << HASH_SHIFT) ^ (window[strstart + 2])) & HASH_MASK)) { + throw new InternalError("hash inconsistent: " + hash + "/" + + window[strstart] + "," + window[strstart + 1] + "," + + window[strstart + 2] + "," + HASH_SHIFT); + } + } + + prev[strstart & WMASK] = match = head[hash]; + head[hash] = (short) strstart; + ins_h = hash; + return match & 0xffff; + } + + private void slideWindow() { + System.arraycopy(window, WSIZE, window, 0, WSIZE); + matchStart -= WSIZE; + strstart -= WSIZE; + blockStart -= WSIZE; + + /* + * Slide the hash table (could be avoided with 32 bit values at the + * expense of memory usage). + */ + for (int i = 0; i < HASH_SIZE; i++) { + final int m = head[i] & 0xffff; + head[i] = m >= WSIZE ? (short) (m - WSIZE) : 0; + } + + /* + * Slide the prev table. + */ + for (int i = 0; i < WSIZE; i++) { + final int m = prev[i] & 0xffff; + prev[i] = m >= WSIZE ? (short) (m - WSIZE) : 0; + } + } + + /** + * Fill the window when the lookahead becomes insufficient. Updates strstart + * and lookahead. + * + * OUT assertions: strstart + lookahead <= 2*WSIZE lookahead >= + * MIN_LOOKAHEAD or inputOff == inputEnd + */ + private void fillWindow() { + /* + * If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (strstart >= (WSIZE + MAX_DIST)) { + slideWindow(); + } + + /* + * If there is not enough lookahead, but still some input left, read in + * the input + */ + while ((lookahead < DeflaterConstants.MIN_LOOKAHEAD) + && (inputOff < inputEnd)) { + int more = (2 * WSIZE) - lookahead - strstart; + + if (more > (inputEnd - inputOff)) { + more = inputEnd - inputOff; + } + + System.arraycopy(inputBuf, inputOff, window, strstart + lookahead, + more); + adler.update(inputBuf, inputOff, more); + inputOff += more; + totalIn += more; + lookahead += more; + } + + if (lookahead >= MIN_MATCH) { + updateHash(); + } + } + + /** + * Find the best (longest) string in the window matching the string starting + * at strstart. + * + * Preconditions: strstart + MAX_MATCH <= window.length. + * + * + * @param curMatch + */ + private boolean findLongestMatch(int curMatch) { + int chainLength = this.max_chain; + int niceLength = this.niceLength; + final short[] prev = this.prev; + int scan = this.strstart; + int match; + int best_end = this.strstart + matchLen; + int best_len = Math.max(matchLen, MIN_MATCH - 1); + + final int limit = Math.max(strstart - MAX_DIST, 0); + + final int strend = (scan + MAX_MATCH) - 1; + byte scan_end1 = window[best_end - 1]; + byte scan_end = window[best_end]; + + /* Do not waste too much time if we already have a good match: */ + if (best_len >= this.goodLength) { + chainLength >>= 2; + } + + /* + * Do not look for matches beyond the end of the input. This is + * necessary to make deflate deterministic. + */ + if (niceLength > lookahead) { + niceLength = lookahead; + } + + if (DeflaterConstants.DEBUGGING + && (strstart > ((2 * WSIZE) - MIN_LOOKAHEAD))) { + throw new InternalError("need lookahead"); + } + + do { + if (DeflaterConstants.DEBUGGING && (curMatch >= strstart)) { + throw new InternalError("future match"); + } + if ((window[curMatch + best_len] != scan_end) + || (window[(curMatch + best_len) - 1] != scan_end1) + || (window[curMatch] != window[scan]) + || (window[curMatch + 1] != window[scan + 1])) { + continue; + } + + match = curMatch + 2; + scan += 2; + + /* + * We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + while ((window[++scan] == window[++match]) + && (window[++scan] == window[++match]) + && (window[++scan] == window[++match]) + && (window[++scan] == window[++match]) + && (window[++scan] == window[++match]) + && (window[++scan] == window[++match]) + && (window[++scan] == window[++match]) + && (window[++scan] == window[++match]) && (scan < strend)) { + ; + } + + if (scan > best_end) { + // if (DeflaterConstants.DEBUGGING && ins_h == 0) + // System.err.println("Found match: "+curMatch+"-"+(scan-strstart)); + matchStart = curMatch; + best_end = scan; + best_len = scan - strstart; + if (best_len >= niceLength) { + break; + } + + scan_end1 = window[best_end - 1]; + scan_end = window[best_end]; + } + scan = strstart; + } while (((curMatch = (prev[curMatch & WMASK] & 0xffff)) > limit) + && (--chainLength != 0)); + + matchLen = Math.min(best_len, lookahead); + return matchLen >= MIN_MATCH; + } + + void setDictionary(final byte[] buffer, int offset, int length) { + if (DeflaterConstants.DEBUGGING && (strstart != 1)) { + throw new IllegalStateException("strstart not 1"); + } + adler.update(buffer, offset, length); + if (length < MIN_MATCH) { + return; + } + if (length > MAX_DIST) { + offset += length - MAX_DIST; + length = MAX_DIST; + } + + System.arraycopy(buffer, offset, window, strstart, length); + + updateHash(); + length--; + while (--length > 0) { + insertString(); + strstart++; + } + strstart += 2; + blockStart = strstart; + } + + private boolean deflateStored(final boolean flush, final boolean finish) { + if (!flush && (lookahead == 0)) { + return false; + } + + strstart += lookahead; + lookahead = 0; + + int storedLen = strstart - blockStart; + + if ((storedLen >= DeflaterConstants.MAX_BLOCK_SIZE) + /* Block is full */ + || ((blockStart < WSIZE) && (storedLen >= MAX_DIST)) + /* Block may move out of window */ + || flush) { + boolean lastBlock = finish; + if (storedLen > DeflaterConstants.MAX_BLOCK_SIZE) { + storedLen = DeflaterConstants.MAX_BLOCK_SIZE; + lastBlock = false; + } + + if (DeflaterConstants.DEBUGGING) { + System.err.println("storedBlock[" + storedLen + "," + lastBlock + + "]"); + } + + huffman.flushStoredBlock(window, blockStart, storedLen, lastBlock); + blockStart += storedLen; + return !lastBlock; + } + return true; + } + + private boolean deflateFast(final boolean flush, final boolean finish) { + if ((lookahead < MIN_LOOKAHEAD) && !flush) { + return false; + } + + while ((lookahead >= MIN_LOOKAHEAD) || flush) { + if (lookahead == 0) { + /* We are flushing everything */ + huffman.flushBlock(window, blockStart, strstart - blockStart, + finish); + blockStart = strstart; + return false; + } + + if (strstart > ((2 * WSIZE) - MIN_LOOKAHEAD)) { + /* + * slide window, as findLongestMatch need this. This should only + * happen when flushing and the window is almost full. + */ + slideWindow(); + } + + int hashHead; + if ((lookahead >= MIN_MATCH) && ((hashHead = insertString()) != 0) + && (strategy != Deflater.HUFFMAN_ONLY) + && ((strstart - hashHead) <= MAX_DIST) + && findLongestMatch(hashHead)) { + /* longestMatch sets matchStart and matchLen */ + if (DeflaterConstants.DEBUGGING) { + for (int i = 0; i < matchLen; i++) { + if (window[strstart + i] != window[matchStart + i]) { + throw new InternalError(); + } + } + } + huffman.tallyDist(strstart - matchStart, matchLen); + + lookahead -= matchLen; + if ((matchLen <= max_lazy) && (lookahead >= MIN_MATCH)) { + while (--matchLen > 0) { + strstart++; + insertString(); + } + strstart++; + } else { + strstart += matchLen; + if (lookahead >= (MIN_MATCH - 1)) { + updateHash(); + } + } + matchLen = MIN_MATCH - 1; + continue; + } else { + /* No match found */ + huffman.tallyLit(window[strstart] & 0xff); + strstart++; + lookahead--; + } + + if (huffman.isFull()) { + final boolean lastBlock = finish && (lookahead == 0); + huffman.flushBlock(window, blockStart, strstart - blockStart, + lastBlock); + blockStart = strstart; + return !lastBlock; + } + } + return true; + } + + private boolean deflateSlow(final boolean flush, final boolean finish) { + if ((lookahead < MIN_LOOKAHEAD) && !flush) { + return false; + } + + while ((lookahead >= MIN_LOOKAHEAD) || flush) { + if (lookahead == 0) { + if (prevAvailable) { + huffman.tallyLit(window[strstart - 1] & 0xff); + } + prevAvailable = false; + + /* We are flushing everything */ + if (DeflaterConstants.DEBUGGING && !flush) { + throw new InternalError("Not flushing, but no lookahead"); + } + huffman.flushBlock(window, blockStart, strstart - blockStart, + finish); + blockStart = strstart; + return false; + } + + if (strstart >= ((2 * WSIZE) - MIN_LOOKAHEAD)) { + /* + * slide window, as findLongestMatch need this. This should only + * happen when flushing and the window is almost full. + */ + slideWindow(); + } + + final int prevMatch = matchStart; + int prevLen = matchLen; + if (lookahead >= MIN_MATCH) { + final int hashHead = insertString(); + if ((strategy != Deflater.HUFFMAN_ONLY) && (hashHead != 0) + && ((strstart - hashHead) <= MAX_DIST) + && findLongestMatch(hashHead)) { + /* longestMatch sets matchStart and matchLen */ + + /* Discard match if too small and too far away */ + if ((matchLen <= 5) + && ((strategy == Deflater.FILTERED) || ((matchLen == MIN_MATCH) && ((strstart - matchStart) > TOO_FAR)))) { + matchLen = MIN_MATCH - 1; + } + } + } + + /* previous match was better */ + if ((prevLen >= MIN_MATCH) && (matchLen <= prevLen)) { + if (DeflaterConstants.DEBUGGING) { + for (int i = 0; i < matchLen; i++) { + if (window[(strstart - 1) + i] != window[prevMatch + i]) { + throw new InternalError(); + } + } + } + huffman.tallyDist(strstart - 1 - prevMatch, prevLen); + prevLen -= 2; + do { + strstart++; + lookahead--; + if (lookahead >= MIN_MATCH) { + insertString(); + } + } while (--prevLen > 0); + strstart++; + lookahead--; + prevAvailable = false; + matchLen = MIN_MATCH - 1; + } else { + if (prevAvailable) { + huffman.tallyLit(window[strstart - 1] & 0xff); + } + prevAvailable = true; + strstart++; + lookahead--; + } + + if (huffman.isFull()) { + int len = strstart - blockStart; + if (prevAvailable) { + len--; + } + final boolean lastBlock = (finish && (lookahead == 0) && !prevAvailable); + huffman.flushBlock(window, blockStart, len, lastBlock); + blockStart += len; + return !lastBlock; + } + } + return true; + } + + public boolean deflate(final boolean flush, final boolean finish) { + boolean progress; + do { + fillWindow(); + final boolean canFlush = flush && (inputOff == inputEnd); + if (DeflaterConstants.DEBUGGING) { + System.err.println("window: [" + blockStart + "," + strstart + + "," + lookahead + "], " + comprFunc + "," + canFlush); + } + switch (comprFunc) { + case DEFLATE_STORED: + progress = deflateStored(canFlush, finish); + break; + case DEFLATE_FAST: + progress = deflateFast(canFlush, finish); + break; + case DEFLATE_SLOW: + progress = deflateSlow(canFlush, finish); + break; + default: + throw new InternalError(); + } + } while (pending.isFlushed() /* repeat while we have no pending output */ + && progress); /* and progress was made */ + + return progress; + } + + public void setInput(final byte[] buf, final int off, final int len) { + if (inputOff < inputEnd) { + throw new IllegalStateException( + "Old input was not completely processed"); + } + + final int end = off + len; + + /* + * We want to throw an ArrayIndexOutOfBoundsException early. The check + * is very tricky: it also handles integer wrap around. + */ + if ((0 > off) || (off > end) || (end > buf.length)) { + throw new ArrayIndexOutOfBoundsException(); + } + + inputBuf = buf; + inputOff = off; + inputEnd = end; + } + + public final boolean needsInput() { + return inputEnd == inputOff; + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/DeflaterHuffman.java b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterHuffman.java new file mode 100644 index 00000000..75913ac6 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterHuffman.java @@ -0,0 +1,748 @@ +/* net.sf.jazzlib.DeflaterHuffman + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/** + * This is the DeflaterHuffman class. + * + * This class is not thread safe. This is inherent in the API, due to the + * split of deflate and setInput. + * + * @author Jochen Hoenicke + * @date Jan 6, 2000 + */ +class DeflaterHuffman { + private static final int BUFSIZE = 1 << (DeflaterConstants.DEFAULT_MEM_LEVEL + 6); + private static final int LITERAL_NUM = 286; + private static final int DIST_NUM = 30; + private static final int BITLEN_NUM = 19; + private static final int REP_3_6 = 16; + private static final int REP_3_10 = 17; + private static final int REP_11_138 = 18; + private static final int EOF_SYMBOL = 256; + private static final int[] BL_ORDER = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, + 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + + private final static String bit4Reverse = "\000\010\004\014\002\012\006\016\001\011\005\015\003\013\007\017"; + + class Tree { + short[] freqs; + short[] codes; + byte[] length; + int[] bl_counts; + int minNumCodes, numCodes; + int maxLength; + + Tree(final int elems, final int minCodes, final int maxLength) { + this.minNumCodes = minCodes; + this.maxLength = maxLength; + freqs = new short[elems]; + bl_counts = new int[maxLength]; + } + + void reset() { + for (int i = 0; i < freqs.length; i++) { + freqs[i] = 0; + } + codes = null; + length = null; + } + + final void writeSymbol(final int code) { + if (DeflaterConstants.DEBUGGING) { + freqs[code]--; + // System.err.print("writeSymbol("+freqs.length+","+code+"): "); + } + pending.writeBits(codes[code] & 0xffff, length[code]); + } + + final void checkEmpty() { + boolean empty = true; + for (int i = 0; i < freqs.length; i++) { + if (freqs[i] != 0) { + System.err.println("freqs[" + i + "] == " + freqs[i]); + empty = false; + } + } + if (!empty) { + throw new InternalError(); + } + System.err.println("checkEmpty suceeded!"); + } + + void setStaticCodes(final short[] stCodes, final byte[] stLength) { + codes = stCodes; + length = stLength; + } + + public void buildCodes() { + final int[] nextCode = new int[maxLength]; + int code = 0; + codes = new short[freqs.length]; + + if (DeflaterConstants.DEBUGGING) { + System.err.println("buildCodes: " + freqs.length); + } + for (int bits = 0; bits < maxLength; bits++) { + nextCode[bits] = code; + code += bl_counts[bits] << (15 - bits); + if (DeflaterConstants.DEBUGGING) { + System.err.println("bits: " + (bits + 1) + " count: " + + bl_counts[bits] + " nextCode: " + + Integer.toHexString(code)); + } + } + if (DeflaterConstants.DEBUGGING && (code != 65536)) { + throw new RuntimeException("Inconsistent bl_counts!"); + } + + for (int i = 0; i < numCodes; i++) { + final int bits = length[i]; + if (bits > 0) { + if (DeflaterConstants.DEBUGGING) { + System.err.println("codes[" + i + "] = rev(" + + Integer.toHexString(nextCode[bits - 1]) + + ")," + bits); + } + codes[i] = bitReverse(nextCode[bits - 1]); + nextCode[bits - 1] += 1 << (16 - bits); + } + } + } + + private void buildLength(final int childs[]) { + this.length = new byte[freqs.length]; + final int numNodes = childs.length / 2; + final int numLeafs = (numNodes + 1) / 2; + int overflow = 0; + + for (int i = 0; i < maxLength; i++) { + bl_counts[i] = 0; + } + + /* First calculate optimal bit lengths */ + final int lengths[] = new int[numNodes]; + lengths[numNodes - 1] = 0; + for (int i = numNodes - 1; i >= 0; i--) { + if (childs[(2 * i) + 1] != -1) { + int bitLength = lengths[i] + 1; + if (bitLength > maxLength) { + bitLength = maxLength; + overflow++; + } + lengths[childs[2 * i]] = lengths[childs[(2 * i) + 1]] = bitLength; + } else { + /* A leaf node */ + final int bitLength = lengths[i]; + bl_counts[bitLength - 1]++; + this.length[childs[2 * i]] = (byte) lengths[i]; + } + } + + if (DeflaterConstants.DEBUGGING) { + System.err.println("Tree " + freqs.length + " lengths:"); + for (int i = 0; i < numLeafs; i++) { + System.err.println("Node " + childs[2 * i] + " freq: " + + freqs[childs[2 * i]] + " len: " + + length[childs[2 * i]]); + } + } + + if (overflow == 0) { + return; + } + + int incrBitLen = maxLength - 1; + do { + /* Find the first bit length which could increase: */ + while (bl_counts[--incrBitLen] == 0) { + ; + } + + /* + * Move this node one down and remove a corresponding amount of + * overflow nodes. + */ + do { + bl_counts[incrBitLen]--; + bl_counts[++incrBitLen]++; + overflow -= 1 << (maxLength - 1 - incrBitLen); + } while ((overflow > 0) && (incrBitLen < (maxLength - 1))); + } while (overflow > 0); + + /* + * We may have overshot above. Move some nodes from maxLength to + * maxLength-1 in that case. + */ + bl_counts[maxLength - 1] += overflow; + bl_counts[maxLength - 2] -= overflow; + + /* + * Now recompute all bit lengths, scanning in increasing frequency. + * It is simpler to reconstruct all lengths instead of fixing only + * the wrong ones. This idea is taken from 'ar' written by Haruhiko + * Okumura. + * + * The nodes were inserted with decreasing frequency into the childs + * array. + */ + int nodePtr = 2 * numLeafs; + for (int bits = maxLength; bits != 0; bits--) { + int n = bl_counts[bits - 1]; + while (n > 0) { + final int childPtr = 2 * childs[nodePtr++]; + if (childs[childPtr + 1] == -1) { + /* We found another leaf */ + length[childs[childPtr]] = (byte) bits; + n--; + } + } + } + if (DeflaterConstants.DEBUGGING) { + System.err.println("*** After overflow elimination. ***"); + for (int i = 0; i < numLeafs; i++) { + System.err.println("Node " + childs[2 * i] + " freq: " + + freqs[childs[2 * i]] + " len: " + + length[childs[2 * i]]); + } + } + } + + void buildTree() { + final int numSymbols = freqs.length; + + /* + * heap is a priority queue, sorted by frequency, least frequent + * nodes first. The heap is a binary tree, with the property, that + * the parent node is smaller than both child nodes. This assures + * that the smallest node is the first parent. + * + * The binary tree is encoded in an array: 0 is root node and the + * nodes 2*n+1, 2*n+2 are the child nodes of node n. + */ + final int[] heap = new int[numSymbols]; + int heapLen = 0; + int maxCode = 0; + for (int n = 0; n < numSymbols; n++) { + final int freq = freqs[n]; + if (freq != 0) { + /* Insert n into heap */ + int pos = heapLen++; + int ppos; + while ((pos > 0) + && (freqs[heap[ppos = (pos - 1) / 2]] > freq)) { + heap[pos] = heap[ppos]; + pos = ppos; + } + heap[pos] = n; + maxCode = n; + } + } + + /* + * We could encode a single literal with 0 bits but then we don't + * see the literals. Therefore we force at least two literals to + * avoid this case. We don't care about order in this case, both + * literals get a 1 bit code. + */ + while (heapLen < 2) { + final int node = maxCode < 2 ? ++maxCode : 0; + heap[heapLen++] = node; + } + + numCodes = Math.max(maxCode + 1, minNumCodes); + + final int numLeafs = heapLen; + final int[] childs = new int[(4 * heapLen) - 2]; + final int[] values = new int[(2 * heapLen) - 1]; + int numNodes = numLeafs; + for (int i = 0; i < heapLen; i++) { + final int node = heap[i]; + childs[2 * i] = node; + childs[(2 * i) + 1] = -1; + values[i] = freqs[node] << 8; + heap[i] = i; + } + + /* + * Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + do { + final int first = heap[0]; + int last = heap[--heapLen]; + + /* Propagate the hole to the leafs of the heap */ + int ppos = 0; + int path = 1; + while (path < heapLen) { + if (((path + 1) < heapLen) + && (values[heap[path]] > values[heap[path + 1]])) { + path++; + } + + heap[ppos] = heap[path]; + ppos = path; + path = (path * 2) + 1; + } + + /* + * Now propagate the last element down along path. Normally it + * shouldn't go too deep. + */ + int lastVal = values[last]; + while (((path = ppos) > 0) + && (values[heap[ppos = (path - 1) / 2]] > lastVal)) { + heap[path] = heap[ppos]; + } + heap[path] = last; + + final int second = heap[0]; + + /* Create a new node father of first and second */ + last = numNodes++; + childs[2 * last] = first; + childs[(2 * last) + 1] = second; + final int mindepth = Math.min(values[first] & 0xff, + values[second] & 0xff); + values[last] = lastVal = ((values[first] + values[second]) - mindepth) + 1; + + /* Again, propagate the hole to the leafs */ + ppos = 0; + path = 1; + while (path < heapLen) { + if (((path + 1) < heapLen) + && (values[heap[path]] > values[heap[path + 1]])) { + path++; + } + + heap[ppos] = heap[path]; + ppos = path; + path = (ppos * 2) + 1; + } + + /* Now propagate the new element down along path */ + while (((path = ppos) > 0) + && (values[heap[ppos = (path - 1) / 2]] > lastVal)) { + heap[path] = heap[ppos]; + } + heap[path] = last; + } while (heapLen > 1); + + if (heap[0] != ((childs.length / 2) - 1)) { + throw new RuntimeException("Weird!"); + } + + buildLength(childs); + } + + int getEncodedLength() { + int len = 0; + for (int i = 0; i < freqs.length; i++) { + len += freqs[i] * length[i]; + } + return len; + } + + void calcBLFreq(final Tree blTree) { + int max_count; /* max repeat count */ + int min_count; /* min repeat count */ + int count; /* repeat count of the current code */ + int curlen = -1; /* length of current code */ + + int i = 0; + while (i < numCodes) { + count = 1; + final int nextlen = length[i]; + if (nextlen == 0) { + max_count = 138; + min_count = 3; + } else { + max_count = 6; + min_count = 3; + if (curlen != nextlen) { + blTree.freqs[nextlen]++; + count = 0; + } + } + curlen = nextlen; + i++; + + while ((i < numCodes) && (curlen == length[i])) { + i++; + if (++count >= max_count) { + break; + } + } + + if (count < min_count) { + blTree.freqs[curlen] += count; + } else if (curlen != 0) { + blTree.freqs[REP_3_6]++; + } else if (count <= 10) { + blTree.freqs[REP_3_10]++; + } else { + blTree.freqs[REP_11_138]++; + } + } + } + + void writeTree(final Tree blTree) { + int max_count; /* max repeat count */ + int min_count; /* min repeat count */ + int count; /* repeat count of the current code */ + int curlen = -1; /* length of current code */ + + int i = 0; + while (i < numCodes) { + count = 1; + final int nextlen = length[i]; + if (nextlen == 0) { + max_count = 138; + min_count = 3; + } else { + max_count = 6; + min_count = 3; + if (curlen != nextlen) { + blTree.writeSymbol(nextlen); + count = 0; + } + } + curlen = nextlen; + i++; + + while ((i < numCodes) && (curlen == length[i])) { + i++; + if (++count >= max_count) { + break; + } + } + + if (count < min_count) { + while (count-- > 0) { + blTree.writeSymbol(curlen); + } + } else if (curlen != 0) { + blTree.writeSymbol(REP_3_6); + pending.writeBits(count - 3, 2); + } else if (count <= 10) { + blTree.writeSymbol(REP_3_10); + pending.writeBits(count - 3, 3); + } else { + blTree.writeSymbol(REP_11_138); + pending.writeBits(count - 11, 7); + } + } + } + } + + DeflaterPending pending; + private final Tree literalTree, distTree, blTree; + + private final short d_buf[]; + private final byte l_buf[]; + private int last_lit; + private int extra_bits; + + private static short staticLCodes[]; + private static byte staticLLength[]; + private static short staticDCodes[]; + private static byte staticDLength[]; + + /** + * Reverse the bits of a 16 bit value. + */ + static short bitReverse(final int value) { + return (short) ((bit4Reverse.charAt(value & 0xf) << 12) + | (bit4Reverse.charAt((value >> 4) & 0xf) << 8) + | (bit4Reverse.charAt((value >> 8) & 0xf) << 4) | bit4Reverse + .charAt(value >> 12)); + } + + static { + /* See RFC 1951 3.2.6 */ + /* Literal codes */ + staticLCodes = new short[LITERAL_NUM]; + staticLLength = new byte[LITERAL_NUM]; + int i = 0; + while (i < 144) { + staticLCodes[i] = bitReverse((0x030 + i) << 8); + staticLLength[i++] = 8; + } + while (i < 256) { + staticLCodes[i] = bitReverse(((0x190 - 144) + i) << 7); + staticLLength[i++] = 9; + } + while (i < 280) { + staticLCodes[i] = bitReverse(((0x000 - 256) + i) << 9); + staticLLength[i++] = 7; + } + while (i < LITERAL_NUM) { + staticLCodes[i] = bitReverse(((0x0c0 - 280) + i) << 8); + staticLLength[i++] = 8; + } + + /* Distant codes */ + staticDCodes = new short[DIST_NUM]; + staticDLength = new byte[DIST_NUM]; + for (i = 0; i < DIST_NUM; i++) { + staticDCodes[i] = bitReverse(i << 11); + staticDLength[i] = 5; + } + } + + public DeflaterHuffman(final DeflaterPending pending) { + this.pending = pending; + + literalTree = new Tree(LITERAL_NUM, 257, 15); + distTree = new Tree(DIST_NUM, 1, 15); + blTree = new Tree(BITLEN_NUM, 4, 7); + + d_buf = new short[BUFSIZE]; + l_buf = new byte[BUFSIZE]; + } + + public final void reset() { + last_lit = 0; + extra_bits = 0; + literalTree.reset(); + distTree.reset(); + blTree.reset(); + } + + private final int l_code(int len) { + if (len == 255) { + return 285; + } + + int code = 257; + while (len >= 8) { + code += 4; + len >>= 1; + } + return code + len; + } + + private final int d_code(int distance) { + int code = 0; + while (distance >= 4) { + code += 2; + distance >>= 1; + } + return code + distance; + } + + public void sendAllTrees(final int blTreeCodes) { + blTree.buildCodes(); + literalTree.buildCodes(); + distTree.buildCodes(); + pending.writeBits(literalTree.numCodes - 257, 5); + pending.writeBits(distTree.numCodes - 1, 5); + pending.writeBits(blTreeCodes - 4, 4); + for (int rank = 0; rank < blTreeCodes; rank++) { + pending.writeBits(blTree.length[BL_ORDER[rank]], 3); + } + literalTree.writeTree(blTree); + distTree.writeTree(blTree); + if (DeflaterConstants.DEBUGGING) { + blTree.checkEmpty(); + } + } + + public void compressBlock() { + for (int i = 0; i < last_lit; i++) { + final int litlen = l_buf[i] & 0xff; + int dist = d_buf[i]; + if (dist-- != 0) { + if (DeflaterConstants.DEBUGGING) { + System.err.print("[" + (dist + 1) + "," + (litlen + 3) + + "]: "); + } + + final int lc = l_code(litlen); + literalTree.writeSymbol(lc); + + int bits = (lc - 261) / 4; + if ((bits > 0) && (bits <= 5)) { + pending.writeBits(litlen & ((1 << bits) - 1), bits); + } + + final int dc = d_code(dist); + distTree.writeSymbol(dc); + + bits = (dc / 2) - 1; + if (bits > 0) { + pending.writeBits(dist & ((1 << bits) - 1), bits); + } + } else { + if (DeflaterConstants.DEBUGGING) { + if ((litlen > 32) && (litlen < 127)) { + System.err.print("(" + (char) litlen + "): "); + } else { + System.err.print("{" + litlen + "}: "); + } + } + literalTree.writeSymbol(litlen); + } + } + if (DeflaterConstants.DEBUGGING) { + System.err.print("EOF: "); + } + literalTree.writeSymbol(EOF_SYMBOL); + if (DeflaterConstants.DEBUGGING) { + literalTree.checkEmpty(); + distTree.checkEmpty(); + } + } + + public void flushStoredBlock(final byte[] stored, final int stored_offset, + final int stored_len, final boolean lastBlock) { + if (DeflaterConstants.DEBUGGING) { + System.err.println("Flushing stored block " + stored_len); + } + pending.writeBits((DeflaterConstants.STORED_BLOCK << 1) + + (lastBlock ? 1 : 0), 3); + pending.alignToByte(); + pending.writeShort(stored_len); + pending.writeShort(~stored_len); + pending.writeBlock(stored, stored_offset, stored_len); + reset(); + } + + public void flushBlock(final byte[] stored, final int stored_offset, + final int stored_len, final boolean lastBlock) { + literalTree.freqs[EOF_SYMBOL]++; + + /* Build trees */ + literalTree.buildTree(); + distTree.buildTree(); + + /* Calculate bitlen frequency */ + literalTree.calcBLFreq(blTree); + distTree.calcBLFreq(blTree); + + /* Build bitlen tree */ + blTree.buildTree(); + + int blTreeCodes = 4; + for (int i = 18; i > blTreeCodes; i--) { + if (blTree.length[BL_ORDER[i]] > 0) { + blTreeCodes = i + 1; + } + } + int opt_len = 14 + (blTreeCodes * 3) + blTree.getEncodedLength() + + literalTree.getEncodedLength() + distTree.getEncodedLength() + + extra_bits; + + int static_len = extra_bits; + for (int i = 0; i < LITERAL_NUM; i++) { + static_len += literalTree.freqs[i] * staticLLength[i]; + } + for (int i = 0; i < DIST_NUM; i++) { + static_len += distTree.freqs[i] * staticDLength[i]; + } + if (opt_len >= static_len) { + /* Force static trees */ + opt_len = static_len; + } + + if ((stored_offset >= 0) && ((stored_len + 4) < (opt_len >> 3))) { + /* Store Block */ + if (DeflaterConstants.DEBUGGING) { + System.err.println("Storing, since " + stored_len + " < " + + opt_len + " <= " + static_len); + } + flushStoredBlock(stored, stored_offset, stored_len, lastBlock); + } else if (opt_len == static_len) { + /* Encode with static tree */ + pending.writeBits((DeflaterConstants.STATIC_TREES << 1) + + (lastBlock ? 1 : 0), 3); + literalTree.setStaticCodes(staticLCodes, staticLLength); + distTree.setStaticCodes(staticDCodes, staticDLength); + compressBlock(); + reset(); + } else { + /* Encode with dynamic tree */ + pending.writeBits((DeflaterConstants.DYN_TREES << 1) + + (lastBlock ? 1 : 0), 3); + sendAllTrees(blTreeCodes); + compressBlock(); + reset(); + } + } + + public final boolean isFull() { + return last_lit == BUFSIZE; + } + + public final boolean tallyLit(final int lit) { + if (DeflaterConstants.DEBUGGING) { + if ((lit > 32) && (lit < 127)) { + System.err.println("(" + (char) lit + ")"); + } else { + System.err.println("{" + lit + "}"); + } + } + d_buf[last_lit] = 0; + l_buf[last_lit++] = (byte) lit; + literalTree.freqs[lit]++; + return last_lit == BUFSIZE; + } + + public final boolean tallyDist(final int dist, final int len) { + if (DeflaterConstants.DEBUGGING) { + System.err.println("[" + dist + "," + len + "]"); + } + + d_buf[last_lit] = (short) dist; + l_buf[last_lit++] = (byte) (len - 3); + + final int lc = l_code(len - 3); + literalTree.freqs[lc]++; + if ((lc >= 265) && (lc < 285)) { + extra_bits += (lc - 261) / 4; + } + + final int dc = d_code(dist - 1); + distTree.freqs[dc]++; + if (dc >= 4) { + extra_bits += (dc / 2) - 1; + } + return last_lit == BUFSIZE; + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/DeflaterOutputStream.java b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterOutputStream.java new file mode 100644 index 00000000..bbc021fd --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterOutputStream.java @@ -0,0 +1,210 @@ +/* DeflaterOutputStream.java - Output filter for compressing. + Copyright (C) 1999, 2000, 2001, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/* Written using on-line Java Platform 1.2 API Specification + * and JCL book. + * Believed complete and correct. + */ + +/** + * This is a special FilterOutputStream deflating the bytes that are written + * through it. It uses the Deflater for deflating. + * + * A special thing to be noted is that flush() doesn't flush everything in Sun's + * JDK, but it does so in jazzlib. This is because Sun's Deflater doesn't have a + * way to flush() everything, without finishing the stream. + * + * @author Tom Tromey, Jochen Hoenicke + * @date Jan 11, 2001 + */ +public class DeflaterOutputStream extends FilterOutputStream { + /** + * This buffer is used temporarily to retrieve the bytes from the deflater + * and write them to the underlying output stream. + */ + protected byte[] buf; + + /** + * The deflater which is used to deflate the stream. + */ + protected Deflater def; + + /** + * Deflates everything in the def's input buffers. This will call + * def.deflate() until all bytes from the input buffers are + * processed. + */ + protected void deflate() throws IOException { + while (!def.needsInput()) { + final int len = def.deflate(buf, 0, buf.length); + + // System.err.println("DOS deflated " + len + " out of " + + // buf.length); + if (len <= 0) { + break; + } + out.write(buf, 0, len); + } + + if (!def.needsInput()) { + throw new InternalError("Can't deflate all input?"); + } + } + + /** + * Creates a new DeflaterOutputStream with a default Deflater and default + * buffer size. + * + * @param out + * the output stream where deflated output should be written. + */ + public DeflaterOutputStream(final OutputStream out) { + this(out, new Deflater(), 512); + } + + /** + * Creates a new DeflaterOutputStream with the given Deflater and default + * buffer size. + * + * @param out + * the output stream where deflated output should be written. + * @param defl + * the underlying deflater. + */ + public DeflaterOutputStream(final OutputStream out, final Deflater defl) { + this(out, defl, 512); + } + + /** + * Creates a new DeflaterOutputStream with the given Deflater and buffer + * size. + * + * @param out + * the output stream where deflated output should be written. + * @param defl + * the underlying deflater. + * @param bufsize + * the buffer size. + * @exception IllegalArgumentException + * if bufsize isn't positive. + */ + public DeflaterOutputStream(final OutputStream out, final Deflater defl, + final int bufsize) { + super(out); + if (bufsize <= 0) { + throw new IllegalArgumentException("bufsize <= 0"); + } + buf = new byte[bufsize]; + def = defl; + } + + /** + * Flushes the stream by calling flush() on the deflater and then on the + * underlying stream. This ensures that all bytes are flushed. This function + * doesn't work in Sun's JDK, but only in jazzlib. + */ + @Override + public void flush() throws IOException { + def.flush(); + deflate(); + out.flush(); + } + + /** + * Finishes the stream by calling finish() on the deflater. This was the + * only way to ensure that all bytes are flushed in Sun's JDK. + */ + public void finish() throws IOException { + def.finish(); + while (!def.finished()) { + final int len = def.deflate(buf, 0, buf.length); + if (len <= 0) { + break; + } + out.write(buf, 0, len); + } + if (!def.finished()) { + throw new InternalError("Can't deflate all input?"); + } + out.flush(); + } + + /** + * Calls finish () and closes the stream. + */ + @Override + public void close() throws IOException { + finish(); + out.close(); + } + + /** + * Writes a single byte to the compressed output stream. + * + * @param bval + * the byte value. + */ + @Override + public void write(final int bval) throws IOException { + final byte[] b = new byte[1]; + b[0] = (byte) bval; + write(b, 0, 1); + } + + /** + * Writes a len bytes from an array to the compressed stream. + * + * @param buf + * the byte array. + * @param off + * the offset into the byte array where to start. + * @param len + * the number of bytes to write. + */ + @Override + public void write(final byte[] buf, final int off, final int len) + throws IOException { + def.setInput(buf, off, len); + deflate(); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/DeflaterPending.java b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterPending.java new file mode 100644 index 00000000..e3f0dcaa --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/DeflaterPending.java @@ -0,0 +1,51 @@ +/* net.sf.jazzlib.DeflaterPending + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/** + * This class stores the pending output of the Deflater. + * + * @author Jochen Hoenicke + * @date Jan 5, 2000 + */ + +class DeflaterPending extends PendingBuffer { + public DeflaterPending() { + super(DeflaterConstants.PENDING_BUF_SIZE); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/GZIPInputStream.java b/epublib-core/src/main/java/net/sf/jazzlib/GZIPInputStream.java new file mode 100644 index 00000000..e9111ede --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/GZIPInputStream.java @@ -0,0 +1,369 @@ +/* GZIPInputStream.java - Input filter for reading gzip file + Copyright (C) 1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +/** + * This filter stream is used to decompress a "GZIP" format stream. The "GZIP" + * format is described in RFC 1952. + * + * @author John Leuner + * @author Tom Tromey + * @since JDK 1.1 + */ +public class GZIPInputStream extends InflaterInputStream { + /** + * The magic number found at the start of a GZIP stream. + */ + public static final int GZIP_MAGIC = 0x1f8b; + + /** + * The mask for bit 0 of the flag byte. + */ + static final int FTEXT = 0x1; + + /** + * The mask for bit 1 of the flag byte. + */ + static final int FHCRC = 0x2; + + /** + * The mask for bit 2 of the flag byte. + */ + static final int FEXTRA = 0x4; + + /** + * The mask for bit 3 of the flag byte. + */ + static final int FNAME = 0x8; + + /** + * The mask for bit 4 of the flag byte. + */ + static final int FCOMMENT = 0x10; + + /** + * The CRC-32 checksum value for uncompressed data. + */ + protected CRC32 crc; + + /** + * Indicates whether or not the end of the stream has been reached. + */ + protected boolean eos; + + /** + * Indicates whether or not the GZIP header has been read in. + */ + private boolean readGZIPHeader; + + /** + * Creates a GZIPInputStream with the default buffer size. + * + * @param in + * The stream to read compressed data from (in GZIP format). + * + * @throws IOException + * if an error occurs during an I/O operation. + */ + public GZIPInputStream(final InputStream in) throws IOException { + this(in, 4096); + } + + /** + * Creates a GZIPInputStream with the specified buffer size. + * + * @param in + * The stream to read compressed data from (in GZIP format). + * @param size + * The size of the buffer to use. + * + * @throws IOException + * if an error occurs during an I/O operation. + * @throws IllegalArgumentException + * if size is less than or equal to 0. + */ + public GZIPInputStream(final InputStream in, final int size) + throws IOException { + super(in, new Inflater(true), size); + crc = new CRC32(); + } + + /** + * Closes the input stream. + * + * @throws IOException + * if an error occurs during an I/O operation. + */ + @Override + public void close() throws IOException { + // Nothing to do here. + super.close(); + } + + /** + * Reads in GZIP-compressed data and stores it in uncompressed form into an + * array of bytes. The method will block until either enough input data + * becomes available or the compressed stream reaches its end. + * + * @param buf + * the buffer into which the uncompressed data will be stored. + * @param offset + * the offset indicating where in buf the + * uncompressed data should be placed. + * @param len + * the number of uncompressed bytes to be read. + */ + @Override + public int read(final byte[] buf, final int offset, final int len) + throws IOException { + // We first have to slurp in the GZIP header, then we feed all the + // rest of the data to the superclass. + // + // As we do that we continually update the CRC32. Once the data is + // finished, we check the CRC32. + // + // This means we don't need our own buffer, as everything is done + // in the superclass. + if (!readGZIPHeader) { + readHeader(); + } + + if (eos) { + return -1; + } + + // System.err.println("GZIPIS.read(byte[], off, len ... " + offset + + // " and len " + len); + + /* + * We don't have to read the header, so we just grab data from the + * superclass. + */ + final int numRead = super.read(buf, offset, len); + if (numRead > 0) { + crc.update(buf, offset, numRead); + } + + if (inf.finished()) { + readFooter(); + } + return numRead; + } + + /** + * Reads in the GZIP header. + */ + private void readHeader() throws IOException { + /* 1. Check the two magic bytes */ + final CRC32 headCRC = new CRC32(); + int magic = in.read(); + if (magic < 0) { + eos = true; + return; + } + headCRC.update(magic); + if (magic != (GZIP_MAGIC >> 8)) { + throw new IOException( + "Error in GZIP header, first byte doesn't match"); + } + + magic = in.read(); + if (magic != (GZIP_MAGIC & 0xff)) { + throw new IOException( + "Error in GZIP header, second byte doesn't match"); + } + headCRC.update(magic); + + /* 2. Check the compression type (must be 8) */ + final int CM = in.read(); + if (CM != 8) { + throw new IOException( + "Error in GZIP header, data not in deflate format"); + } + headCRC.update(CM); + + /* 3. Check the flags */ + final int flags = in.read(); + if (flags < 0) { + throw new EOFException("Early EOF in GZIP header"); + } + headCRC.update(flags); + + /* + * This flag byte is divided into individual bits as follows: + * + * bit 0 FTEXT bit 1 FHCRC bit 2 FEXTRA bit 3 FNAME bit 4 FCOMMENT bit 5 + * reserved bit 6 reserved bit 7 reserved + */ + + /* 3.1 Check the reserved bits are zero */ + if ((flags & 0xd0) != 0) { + throw new IOException("Reserved flag bits in GZIP header != 0"); + } + + /* 4.-6. Skip the modification time, extra flags, and OS type */ + for (int i = 0; i < 6; i++) { + final int readByte = in.read(); + if (readByte < 0) { + throw new EOFException("Early EOF in GZIP header"); + } + headCRC.update(readByte); + } + + /* 7. Read extra field */ + if ((flags & FEXTRA) != 0) { + /* Skip subfield id */ + for (int i = 0; i < 2; i++) { + final int readByte = in.read(); + if (readByte < 0) { + throw new EOFException("Early EOF in GZIP header"); + } + headCRC.update(readByte); + } + if ((in.read() < 0) || (in.read() < 0)) { + throw new EOFException("Early EOF in GZIP header"); + } + + int len1, len2, extraLen; + len1 = in.read(); + len2 = in.read(); + if ((len1 < 0) || (len2 < 0)) { + throw new EOFException("Early EOF in GZIP header"); + } + headCRC.update(len1); + headCRC.update(len2); + + extraLen = (len1 << 8) | len2; + for (int i = 0; i < extraLen; i++) { + final int readByte = in.read(); + if (readByte < 0) { + throw new EOFException("Early EOF in GZIP header"); + } + headCRC.update(readByte); + } + } + + /* 8. Read file name */ + if ((flags & FNAME) != 0) { + int readByte; + while ((readByte = in.read()) > 0) { + headCRC.update(readByte); + } + if (readByte < 0) { + throw new EOFException("Early EOF in GZIP file name"); + } + headCRC.update(readByte); + } + + /* 9. Read comment */ + if ((flags & FCOMMENT) != 0) { + int readByte; + while ((readByte = in.read()) > 0) { + headCRC.update(readByte); + } + + if (readByte < 0) { + throw new EOFException("Early EOF in GZIP comment"); + } + headCRC.update(readByte); + } + + /* 10. Read header CRC */ + if ((flags & FHCRC) != 0) { + int tempByte; + int crcval = in.read(); + if (crcval < 0) { + throw new EOFException("Early EOF in GZIP header"); + } + + tempByte = in.read(); + if (tempByte < 0) { + throw new EOFException("Early EOF in GZIP header"); + } + + crcval = (crcval << 8) | tempByte; + if (crcval != ((int) headCRC.getValue() & 0xffff)) { + throw new IOException("Header CRC value mismatch"); + } + } + + readGZIPHeader = true; + // System.err.println("Read GZIP header"); + } + + private void readFooter() throws IOException { + final byte[] footer = new byte[8]; + int avail = inf.getRemaining(); + if (avail > 8) { + avail = 8; + } + System.arraycopy(buf, len - inf.getRemaining(), footer, 0, avail); + int needed = 8 - avail; + while (needed > 0) { + final int count = in.read(footer, 8 - needed, needed); + if (count <= 0) { + throw new EOFException("Early EOF in GZIP footer"); + } + needed -= count; // Jewel Jan 16 + } + + final int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) + | ((footer[2] & 0xff) << 16) | (footer[3] << 24); + if (crcval != (int) crc.getValue()) { + throw new IOException("GZIP crc sum mismatch, theirs \"" + + Integer.toHexString(crcval) + "\" and ours \"" + + Integer.toHexString((int) crc.getValue())); + } + + final int total = (footer[4] & 0xff) | ((footer[5] & 0xff) << 8) + | ((footer[6] & 0xff) << 16) | (footer[7] << 24); + if (total != inf.getTotalOut()) { + throw new IOException("Number of bytes mismatch"); + } + + /* + * FIXME" XXX Should we support multiple members. Difficult, since there + * may be some bytes still in buf + */ + eos = true; + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/GZIPOutputStream.java b/epublib-core/src/main/java/net/sf/jazzlib/GZIPOutputStream.java new file mode 100644 index 00000000..26d27c4d --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/GZIPOutputStream.java @@ -0,0 +1,150 @@ +/* GZIPOutputStream.java - Create a file in gzip format + Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * This filter stream is used to compress a stream into a "GZIP" stream. The + * "GZIP" format is described in RFC 1952. + * + * @author John Leuner + * @author Tom Tromey + * @since JDK 1.1 + */ + +/* + * Written using on-line Java Platform 1.2 API Specification and JCL book. + * Believed complete and correct. + */ + +public class GZIPOutputStream extends DeflaterOutputStream { + /** + * CRC-32 value for uncompressed data + */ + protected CRC32 crc; + + /* + * Creates a GZIPOutputStream with the default buffer size + * + * + * @param out The stream to read data (to be compressed) from + */ + public GZIPOutputStream(final OutputStream out) throws IOException { + this(out, 4096); + } + + /** + * Creates a GZIPOutputStream with the specified buffer size + * + * @param out + * The stream to read compressed data from + * @param size + * Size of the buffer to use + */ + public GZIPOutputStream(final OutputStream out, final int size) + throws IOException { + super(out, new Deflater(Deflater.DEFAULT_COMPRESSION, true), size); + + crc = new CRC32(); + final int mod_time = (int) (System.currentTimeMillis() / 1000L); + final byte[] gzipHeader = { + /* The two magic bytes */ + (byte) (GZIPInputStream.GZIP_MAGIC >> 8), + (byte) GZIPInputStream.GZIP_MAGIC, + + /* The compression type */ + (byte) Deflater.DEFLATED, + + /* The flags (not set) */ + 0, + + /* The modification time */ + (byte) mod_time, (byte) (mod_time >> 8), + (byte) (mod_time >> 16), (byte) (mod_time >> 24), + + /* The extra flags */ + 0, + + /* The OS type (unknown) */ + (byte) 255 }; + + out.write(gzipHeader); + // System.err.println("wrote GZIP header (" + gzipHeader.length + + // " bytes )"); + } + + @Override + public synchronized void write(final byte[] buf, final int off, + final int len) throws IOException { + super.write(buf, off, len); + crc.update(buf, off, len); + } + + /** + * Writes remaining compressed output data to the output stream and closes + * it. + */ + @Override + public void close() throws IOException { + finish(); + out.close(); + } + + @Override + public void finish() throws IOException { + super.finish(); + + final int totalin = def.getTotalIn(); + final int crcval = (int) (crc.getValue() & 0xffffffff); + + // System.err.println("CRC val is " + Integer.toHexString( crcval ) + + // " and length " + Integer.toHexString(totalin)); + + final byte[] gzipFooter = { (byte) crcval, (byte) (crcval >> 8), + (byte) (crcval >> 16), (byte) (crcval >> 24), + + (byte) totalin, (byte) (totalin >> 8), (byte) (totalin >> 16), + (byte) (totalin >> 24) }; + + out.write(gzipFooter); + // System.err.println("wrote GZIP trailer (" + gzipFooter.length + + // " bytes )"); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/Inflater.java b/epublib-core/src/main/java/net/sf/jazzlib/Inflater.java new file mode 100644 index 00000000..9e5b9b61 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/Inflater.java @@ -0,0 +1,710 @@ +/* Inflater.java - Decompress a data stream + Copyright (C) 1999, 2000, 2001, 2003 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/* Written using on-line Java Platform 1.2 API Specification + * and JCL book. + * Believed complete and correct. + */ + +/** + * Inflater is used to decompress data that has been compressed according to the + * "deflate" standard described in rfc1950. + * + * The usage is as following. First you have to set some input with + * setInput(), then inflate() it. If inflate doesn't inflate any + * bytes there may be three reasons: + *

+ * Once the first output byte is produced, a dictionary will not be needed at a + * later stage. + * + * @author John Leuner, Jochen Hoenicke + * @author Tom Tromey + * @date May 17, 1999 + * @since JDK 1.1 + */ +public class Inflater { + /* Copy lengths for literal codes 257..285 */ + private static final int CPLENS[] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, + 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, + 227, 258 }; + + /* Extra bits for literal codes 257..285 */ + private static final int CPLEXT[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + + /* Copy offsets for distance codes 0..29 */ + private static final int CPDIST[] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, + 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, + 4097, 6145, 8193, 12289, 16385, 24577 }; + + /* Extra bits for distance codes */ + private static final int CPDEXT[] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 }; + + /* This are the state in which the inflater can be. */ + private static final int DECODE_HEADER = 0; + private static final int DECODE_DICT = 1; + private static final int DECODE_BLOCKS = 2; + private static final int DECODE_STORED_LEN1 = 3; + private static final int DECODE_STORED_LEN2 = 4; + private static final int DECODE_STORED = 5; + private static final int DECODE_DYN_HEADER = 6; + private static final int DECODE_HUFFMAN = 7; + private static final int DECODE_HUFFMAN_LENBITS = 8; + private static final int DECODE_HUFFMAN_DIST = 9; + private static final int DECODE_HUFFMAN_DISTBITS = 10; + private static final int DECODE_CHKSUM = 11; + private static final int FINISHED = 12; + + /** This variable contains the current state. */ + private int mode; + + /** + * The adler checksum of the dictionary or of the decompressed stream, as it + * is written in the header resp. footer of the compressed stream.
+ * + * Only valid if mode is DECODE_DICT or DECODE_CHKSUM. + */ + private int readAdler; + /** + * The number of bits needed to complete the current state. This is valid, + * if mode is DECODE_DICT, DECODE_CHKSUM, DECODE_HUFFMAN_LENBITS or + * DECODE_HUFFMAN_DISTBITS. + */ + private int neededBits; + private int repLength, repDist; + private int uncomprLen; + /** + * True, if the last block flag was set in the last block of the inflated + * stream. This means that the stream ends after the current block. + */ + private boolean isLastBlock; + + /** + * The total number of inflated bytes. + */ + private int totalOut; + /** + * The total number of bytes set with setInput(). This is not the value + * returned by getTotalIn(), since this also includes the unprocessed input. + */ + private int totalIn; + /** + * This variable stores the nowrap flag that was given to the constructor. + * True means, that the inflated stream doesn't contain a header nor the + * checksum in the footer. + */ + private final boolean nowrap; + + private StreamManipulator input; + private OutputWindow outputWindow; + private InflaterDynHeader dynHeader; + private InflaterHuffmanTree litlenTree, distTree; + private Adler32 adler; + + /** + * Creates a new inflater. + */ + public Inflater() { + this(false); + } + + /** + * Creates a new inflater. + * + * @param nowrap + * true if no header and checksum field appears in the stream. + * This is used for GZIPed input. For compatibility with Sun JDK + * you should provide one byte of input more than needed in this + * case. + */ + public Inflater(final boolean nowrap) { + this.nowrap = nowrap; + this.adler = new Adler32(); + input = new StreamManipulator(); + outputWindow = new OutputWindow(); + mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER; + } + + /** + * Finalizes this object. + */ + @Override + protected void finalize() { + /* Exists only for compatibility */ + } + + /** + * Frees all objects allocated by the inflater. There's no reason to call + * this, since you can just rely on garbage collection (even for the Sun + * implementation). Exists only for compatibility with Sun's JDK, where the + * compressor allocates native memory. If you call any method (even reset) + * afterwards the behaviour is undefined. + * + * @deprecated Just clear all references to inflater instead. + */ + @Deprecated + public void end() { + outputWindow = null; + input = null; + dynHeader = null; + litlenTree = null; + distTree = null; + adler = null; + } + + /** + * Returns true, if the inflater has finished. This means, that no input is + * needed and no output can be produced. + */ + public boolean finished() { + return (mode == FINISHED) && (outputWindow.getAvailable() == 0); + } + + /** + * Gets the adler checksum. This is either the checksum of all uncompressed + * bytes returned by inflate(), or if needsDictionary() returns true (and + * thus no output was yet produced) this is the adler checksum of the + * expected dictionary. + * + * @returns the adler checksum. + */ + public int getAdler() { + return needsDictionary() ? readAdler : (int) adler.getValue(); + } + + /** + * Gets the number of unprocessed input. Useful, if the end of the stream is + * reached and you want to further process the bytes after the deflate + * stream. + * + * @return the number of bytes of the input which were not processed. + */ + public int getRemaining() { + return input.getAvailableBytes(); + } + + /** + * Gets the total number of processed compressed input bytes. + * + * @return the total number of bytes of processed input bytes. + */ + public int getTotalIn() { + return totalIn - getRemaining(); + } + + /** + * Gets the total number of output bytes returned by inflate(). + * + * @return the total number of output bytes. + */ + public int getTotalOut() { + return totalOut; + } + + /** + * Inflates the compressed stream to the output buffer. If this returns 0, + * you should check, whether needsDictionary(), needsInput() or finished() + * returns true, to determine why no further output is produced. + * + * @param buffer + * the output buffer. + * @return the number of bytes written to the buffer, 0 if no further output + * can be produced. + * @exception DataFormatException + * if deflated stream is invalid. + * @exception IllegalArgumentException + * if buf has length 0. + */ + public int inflate(final byte[] buf) throws DataFormatException { + return inflate(buf, 0, buf.length); + } + + /** + * Inflates the compressed stream to the output buffer. If this returns 0, + * you should check, whether needsDictionary(), needsInput() or finished() + * returns true, to determine why no further output is produced. + * + * @param buffer + * the output buffer. + * @param off + * the offset into buffer where the output should start. + * @param len + * the maximum length of the output. + * @return the number of bytes written to the buffer, 0 if no further output + * can be produced. + * @exception DataFormatException + * if deflated stream is invalid. + * @exception IndexOutOfBoundsException + * if the off and/or len are wrong. + */ + public int inflate(final byte[] buf, int off, int len) + throws DataFormatException { + /* Special case: len may be zero */ + if (len == 0) { + return 0; + } + /* Check for correct buff, off, len triple */ + if ((0 > off) || (off > (off + len)) || ((off + len) > buf.length)) { + throw new ArrayIndexOutOfBoundsException(); + } + int count = 0; + int more; + do { + if (mode != DECODE_CHKSUM) { + /* + * Don't give away any output, if we are waiting for the + * checksum in the input stream. + * + * With this trick we have always: needsInput() and not + * finished() implies more output can be produced. + */ + more = outputWindow.copyOutput(buf, off, len); + adler.update(buf, off, more); + off += more; + count += more; + totalOut += more; + len -= more; + if (len == 0) { + return count; + } + } + } while (decode() + || ((outputWindow.getAvailable() > 0) && (mode != DECODE_CHKSUM))); + return count; + } + + /** + * Returns true, if a preset dictionary is needed to inflate the input. + */ + public boolean needsDictionary() { + return (mode == DECODE_DICT) && (neededBits == 0); + } + + /** + * Returns true, if the input buffer is empty. You should then call + * setInput().
+ * + * NOTE: This method also returns true when the stream is finished. + */ + public boolean needsInput() { + return input.needsInput(); + } + + /** + * Resets the inflater so that a new stream can be decompressed. All pending + * input and output will be discarded. + */ + public void reset() { + mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER; + totalIn = totalOut = 0; + input.reset(); + outputWindow.reset(); + dynHeader = null; + litlenTree = null; + distTree = null; + isLastBlock = false; + adler.reset(); + } + + /** + * Sets the preset dictionary. This should only be called, if + * needsDictionary() returns true and it should set the same dictionary, + * that was used for deflating. The getAdler() function returns the checksum + * of the dictionary needed. + * + * @param buffer + * the dictionary. + * @exception IllegalStateException + * if no dictionary is needed. + * @exception IllegalArgumentException + * if the dictionary checksum is wrong. + */ + public void setDictionary(final byte[] buffer) { + setDictionary(buffer, 0, buffer.length); + } + + /** + * Sets the preset dictionary. This should only be called, if + * needsDictionary() returns true and it should set the same dictionary, + * that was used for deflating. The getAdler() function returns the checksum + * of the dictionary needed. + * + * @param buffer + * the dictionary. + * @param off + * the offset into buffer where the dictionary starts. + * @param len + * the length of the dictionary. + * @exception IllegalStateException + * if no dictionary is needed. + * @exception IllegalArgumentException + * if the dictionary checksum is wrong. + * @exception IndexOutOfBoundsException + * if the off and/or len are wrong. + */ + public void setDictionary(final byte[] buffer, final int off, final int len) { + if (!needsDictionary()) { + throw new IllegalStateException(); + } + + adler.update(buffer, off, len); + if ((int) adler.getValue() != readAdler) { + throw new IllegalArgumentException("Wrong adler checksum"); + } + adler.reset(); + outputWindow.copyDict(buffer, off, len); + mode = DECODE_BLOCKS; + } + + /** + * Sets the input. This should only be called, if needsInput() returns true. + * + * @param buffer + * the input. + * @exception IllegalStateException + * if no input is needed. + */ + public void setInput(final byte[] buf) { + setInput(buf, 0, buf.length); + } + + /** + * Sets the input. This should only be called, if needsInput() returns true. + * + * @param buffer + * the input. + * @param off + * the offset into buffer where the input starts. + * @param len + * the length of the input. + * @exception IllegalStateException + * if no input is needed. + * @exception IndexOutOfBoundsException + * if the off and/or len are wrong. + */ + public void setInput(final byte[] buf, final int off, final int len) { + input.setInput(buf, off, len); + totalIn += len; + } + + /** + * Decodes the deflate header. + * + * @return false if more input is needed. + * @exception DataFormatException + * if header is invalid. + */ + private boolean decodeHeader() throws DataFormatException { + int header = input.peekBits(16); + if (header < 0) { + return false; + } + input.dropBits(16); + + /* The header is written in "wrong" byte order */ + header = ((header << 8) | (header >> 8)) & 0xffff; + if ((header % 31) != 0) { + throw new DataFormatException("Header checksum illegal"); + } + + if ((header & 0x0f00) != (Deflater.DEFLATED << 8)) { + throw new DataFormatException("Compression Method unknown"); + } + + /* + * Maximum size of the backwards window in bits. We currently ignore + * this, but we could use it to make the inflater window more space + * efficient. On the other hand the full window (15 bits) is needed most + * times, anyway. int max_wbits = ((header & 0x7000) >> 12) + 8; + */ + + if ((header & 0x0020) == 0) // Dictionary flag? + { + mode = DECODE_BLOCKS; + } else { + mode = DECODE_DICT; + neededBits = 32; + } + return true; + } + + /** + * Decodes the dictionary checksum after the deflate header. + * + * @return false if more input is needed. + */ + private boolean decodeDict() { + while (neededBits > 0) { + final int dictByte = input.peekBits(8); + if (dictByte < 0) { + return false; + } + input.dropBits(8); + readAdler = (readAdler << 8) | dictByte; + neededBits -= 8; + } + return false; + } + + /** + * Decodes the huffman encoded symbols in the input stream. + * + * @return false if more input is needed, true if output window is full or + * the current block ends. + * @exception DataFormatException + * if deflated stream is invalid. + */ + private boolean decodeHuffman() throws DataFormatException { + int free = outputWindow.getFreeSpace(); + while (free >= 258) { + int symbol; + switch (mode) { + case DECODE_HUFFMAN: + /* This is the inner loop so it is optimized a bit */ + while (((symbol = litlenTree.getSymbol(input)) & ~0xff) == 0) { + outputWindow.write(symbol); + if (--free < 258) { + return true; + } + } + if (symbol < 257) { + if (symbol < 0) { + return false; + } else { + /* symbol == 256: end of block */ + distTree = null; + litlenTree = null; + mode = DECODE_BLOCKS; + return true; + } + } + + try { + repLength = CPLENS[symbol - 257]; + neededBits = CPLEXT[symbol - 257]; + } catch (final ArrayIndexOutOfBoundsException ex) { + throw new DataFormatException("Illegal rep length code"); + } + /* fall through */ + case DECODE_HUFFMAN_LENBITS: + if (neededBits > 0) { + mode = DECODE_HUFFMAN_LENBITS; + final int i = input.peekBits(neededBits); + if (i < 0) { + return false; + } + input.dropBits(neededBits); + repLength += i; + } + mode = DECODE_HUFFMAN_DIST; + /* fall through */ + case DECODE_HUFFMAN_DIST: + symbol = distTree.getSymbol(input); + if (symbol < 0) { + return false; + } + try { + repDist = CPDIST[symbol]; + neededBits = CPDEXT[symbol]; + } catch (final ArrayIndexOutOfBoundsException ex) { + throw new DataFormatException("Illegal rep dist code"); + } + /* fall through */ + case DECODE_HUFFMAN_DISTBITS: + if (neededBits > 0) { + mode = DECODE_HUFFMAN_DISTBITS; + final int i = input.peekBits(neededBits); + if (i < 0) { + return false; + } + input.dropBits(neededBits); + repDist += i; + } + outputWindow.repeat(repLength, repDist); + free -= repLength; + mode = DECODE_HUFFMAN; + break; + default: + throw new IllegalStateException(); + } + } + return true; + } + + /** + * Decodes the adler checksum after the deflate stream. + * + * @return false if more input is needed. + * @exception DataFormatException + * if checksum doesn't match. + */ + private boolean decodeChksum() throws DataFormatException { + while (neededBits > 0) { + final int chkByte = input.peekBits(8); + if (chkByte < 0) { + return false; + } + input.dropBits(8); + readAdler = (readAdler << 8) | chkByte; + neededBits -= 8; + } + if ((int) adler.getValue() != readAdler) { + throw new DataFormatException("Adler chksum doesn't match: " + + Integer.toHexString((int) adler.getValue()) + " vs. " + + Integer.toHexString(readAdler)); + } + mode = FINISHED; + return false; + } + + /** + * Decodes the deflated stream. + * + * @return false if more input is needed, or if finished. + * @exception DataFormatException + * if deflated stream is invalid. + */ + private boolean decode() throws DataFormatException { + switch (mode) { + case DECODE_HEADER: + return decodeHeader(); + case DECODE_DICT: + return decodeDict(); + case DECODE_CHKSUM: + return decodeChksum(); + + case DECODE_BLOCKS: + if (isLastBlock) { + if (nowrap) { + mode = FINISHED; + return false; + } else { + input.skipToByteBoundary(); + neededBits = 32; + mode = DECODE_CHKSUM; + return true; + } + } + + final int type = input.peekBits(3); + if (type < 0) { + return false; + } + input.dropBits(3); + + if ((type & 1) != 0) { + isLastBlock = true; + } + switch (type >> 1) { + case DeflaterConstants.STORED_BLOCK: + input.skipToByteBoundary(); + mode = DECODE_STORED_LEN1; + break; + case DeflaterConstants.STATIC_TREES: + litlenTree = InflaterHuffmanTree.defLitLenTree; + distTree = InflaterHuffmanTree.defDistTree; + mode = DECODE_HUFFMAN; + break; + case DeflaterConstants.DYN_TREES: + dynHeader = new InflaterDynHeader(); + mode = DECODE_DYN_HEADER; + break; + default: + throw new DataFormatException("Unknown block type " + type); + } + return true; + + case DECODE_STORED_LEN1: { + if ((uncomprLen = input.peekBits(16)) < 0) { + return false; + } + input.dropBits(16); + mode = DECODE_STORED_LEN2; + } + /* fall through */ + case DECODE_STORED_LEN2: { + final int nlen = input.peekBits(16); + if (nlen < 0) { + return false; + } + input.dropBits(16); + if (nlen != (uncomprLen ^ 0xffff)) { + throw new DataFormatException("broken uncompressed block"); + } + mode = DECODE_STORED; + } + /* fall through */ + case DECODE_STORED: { + final int more = outputWindow.copyStored(input, uncomprLen); + uncomprLen -= more; + if (uncomprLen == 0) { + mode = DECODE_BLOCKS; + return true; + } + return !input.needsInput(); + } + + case DECODE_DYN_HEADER: + if (!dynHeader.decode(input)) { + return false; + } + litlenTree = dynHeader.buildLitLenTree(); + distTree = dynHeader.buildDistTree(); + mode = DECODE_HUFFMAN; + /* fall through */ + case DECODE_HUFFMAN: + case DECODE_HUFFMAN_LENBITS: + case DECODE_HUFFMAN_DIST: + case DECODE_HUFFMAN_DISTBITS: + return decodeHuffman(); + case FINISHED: + return false; + default: + throw new IllegalStateException(); + } + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/InflaterDynHeader.java b/epublib-core/src/main/java/net/sf/jazzlib/InflaterDynHeader.java new file mode 100644 index 00000000..47e1eac5 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/InflaterDynHeader.java @@ -0,0 +1,195 @@ +/* net.sf.jazzlib.InflaterDynHeader + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +class InflaterDynHeader { + private static final int LNUM = 0; + private static final int DNUM = 1; + private static final int BLNUM = 2; + private static final int BLLENS = 3; + private static final int LENS = 4; + private static final int REPS = 5; + + private static final int repMin[] = { 3, 3, 11 }; + private static final int repBits[] = { 2, 3, 7 }; + + private byte[] blLens; + private byte[] litdistLens; + + private InflaterHuffmanTree blTree; + + private int mode; + private int lnum, dnum, blnum, num; + private int repSymbol; + private byte lastLen; + private int ptr; + + private static final int[] BL_ORDER = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, + 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + + public InflaterDynHeader() { + } + + public boolean decode(final StreamManipulator input) + throws DataFormatException { + decode_loop: for (;;) { + switch (mode) { + case LNUM: + lnum = input.peekBits(5); + if (lnum < 0) { + return false; + } + lnum += 257; + input.dropBits(5); + // System.err.println("LNUM: "+lnum); + mode = DNUM; + /* fall through */ + case DNUM: + dnum = input.peekBits(5); + if (dnum < 0) { + return false; + } + dnum++; + input.dropBits(5); + // System.err.println("DNUM: "+dnum); + num = lnum + dnum; + litdistLens = new byte[num]; + mode = BLNUM; + /* fall through */ + case BLNUM: + blnum = input.peekBits(4); + if (blnum < 0) { + return false; + } + blnum += 4; + input.dropBits(4); + blLens = new byte[19]; + ptr = 0; + // System.err.println("BLNUM: "+blnum); + mode = BLLENS; + /* fall through */ + case BLLENS: + while (ptr < blnum) { + final int len = input.peekBits(3); + if (len < 0) { + return false; + } + input.dropBits(3); + // System.err.println("blLens["+BL_ORDER[ptr]+"]: "+len); + blLens[BL_ORDER[ptr]] = (byte) len; + ptr++; + } + blTree = new InflaterHuffmanTree(blLens); + blLens = null; + ptr = 0; + mode = LENS; + /* fall through */ + case LENS: { + int symbol; + while (((symbol = blTree.getSymbol(input)) & ~15) == 0) { + /* Normal case: symbol in [0..15] */ + + // System.err.println("litdistLens["+ptr+"]: "+symbol); + litdistLens[ptr++] = lastLen = (byte) symbol; + + if (ptr == num) { + /* Finished */ + return true; + } + } + + /* need more input ? */ + if (symbol < 0) { + return false; + } + + /* otherwise repeat code */ + if (symbol >= 17) { + /* repeat zero */ + // System.err.println("repeating zero"); + lastLen = 0; + } else { + if (ptr == 0) { + throw new DataFormatException(); + } + } + repSymbol = symbol - 16; + mode = REPS; + } + /* fall through */ + + case REPS: { + final int bits = repBits[repSymbol]; + int count = input.peekBits(bits); + if (count < 0) { + return false; + } + input.dropBits(bits); + count += repMin[repSymbol]; + // System.err.println("litdistLens repeated: "+count); + + if ((ptr + count) > num) { + throw new DataFormatException(); + } + while (count-- > 0) { + litdistLens[ptr++] = lastLen; + } + + if (ptr == num) { + /* Finished */ + return true; + } + } + mode = LENS; + continue decode_loop; + } + } + } + + public InflaterHuffmanTree buildLitLenTree() throws DataFormatException { + final byte[] litlenLens = new byte[lnum]; + System.arraycopy(litdistLens, 0, litlenLens, 0, lnum); + return new InflaterHuffmanTree(litlenLens); + } + + public InflaterHuffmanTree buildDistTree() throws DataFormatException { + final byte[] distLens = new byte[dnum]; + System.arraycopy(litdistLens, lnum, distLens, 0, dnum); + return new InflaterHuffmanTree(distLens); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/InflaterHuffmanTree.java b/epublib-core/src/main/java/net/sf/jazzlib/InflaterHuffmanTree.java new file mode 100644 index 00000000..164fabac --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/InflaterHuffmanTree.java @@ -0,0 +1,199 @@ +/* net.sf.jazzlib.InflaterHuffmanTree + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +public class InflaterHuffmanTree { + private final static int MAX_BITLEN = 15; + private short[] tree; + + public static InflaterHuffmanTree defLitLenTree, defDistTree; + + static { + try { + byte[] codeLengths = new byte[288]; + int i = 0; + while (i < 144) { + codeLengths[i++] = 8; + } + while (i < 256) { + codeLengths[i++] = 9; + } + while (i < 280) { + codeLengths[i++] = 7; + } + while (i < 288) { + codeLengths[i++] = 8; + } + defLitLenTree = new InflaterHuffmanTree(codeLengths); + + codeLengths = new byte[32]; + i = 0; + while (i < 32) { + codeLengths[i++] = 5; + } + defDistTree = new InflaterHuffmanTree(codeLengths); + } catch (final DataFormatException ex) { + throw new InternalError( + "InflaterHuffmanTree: static tree length illegal"); + } + } + + /** + * Constructs a Huffman tree from the array of code lengths. + * + * @param codeLengths + * the array of code lengths + */ + public InflaterHuffmanTree(final byte[] codeLengths) + throws DataFormatException { + buildTree(codeLengths); + } + + private void buildTree(final byte[] codeLengths) throws DataFormatException { + final int[] blCount = new int[MAX_BITLEN + 1]; + final int[] nextCode = new int[MAX_BITLEN + 1]; + for (final byte codeLength : codeLengths) { + final int bits = codeLength; + if (bits > 0) { + blCount[bits]++; + } + } + + int code = 0; + int treeSize = 512; + for (int bits = 1; bits <= MAX_BITLEN; bits++) { + nextCode[bits] = code; + code += blCount[bits] << (16 - bits); + if (bits >= 10) { + /* We need an extra table for bit lengths >= 10. */ + final int start = nextCode[bits] & 0x1ff80; + final int end = code & 0x1ff80; + treeSize += (end - start) >> (16 - bits); + } + } + if (code != 65536) { + throw new DataFormatException("Code lengths don't add up properly."); + } + + /* + * Now create and fill the extra tables from longest to shortest bit + * len. This way the sub trees will be aligned. + */ + tree = new short[treeSize]; + int treePtr = 512; + for (int bits = MAX_BITLEN; bits >= 10; bits--) { + final int end = code & 0x1ff80; + code -= blCount[bits] << (16 - bits); + final int start = code & 0x1ff80; + for (int i = start; i < end; i += 1 << 7) { + tree[DeflaterHuffman.bitReverse(i)] = (short) ((-treePtr << 4) | bits); + treePtr += 1 << (bits - 9); + } + } + + for (int i = 0; i < codeLengths.length; i++) { + final int bits = codeLengths[i]; + if (bits == 0) { + continue; + } + code = nextCode[bits]; + int revcode = DeflaterHuffman.bitReverse(code); + if (bits <= 9) { + do { + tree[revcode] = (short) ((i << 4) | bits); + revcode += 1 << bits; + } while (revcode < 512); + } else { + int subTree = tree[revcode & 511]; + final int treeLen = 1 << (subTree & 15); + subTree = -(subTree >> 4); + do { + tree[subTree | (revcode >> 9)] = (short) ((i << 4) | bits); + revcode += 1 << bits; + } while (revcode < treeLen); + } + nextCode[bits] = code + (1 << (16 - bits)); + } + } + + /** + * Reads the next symbol from input. The symbol is encoded using the huffman + * tree. + * + * @param input + * the input source. + * @return the next symbol, or -1 if not enough input is available. + */ + public int getSymbol(final StreamManipulator input) + throws DataFormatException { + int lookahead, symbol; + if ((lookahead = input.peekBits(9)) >= 0) { + if ((symbol = tree[lookahead]) >= 0) { + input.dropBits(symbol & 15); + return symbol >> 4; + } + final int subtree = -(symbol >> 4); + final int bitlen = symbol & 15; + if ((lookahead = input.peekBits(bitlen)) >= 0) { + symbol = tree[subtree | (lookahead >> 9)]; + input.dropBits(symbol & 15); + return symbol >> 4; + } else { + final int bits = input.getAvailableBits(); + lookahead = input.peekBits(bits); + symbol = tree[subtree | (lookahead >> 9)]; + if ((symbol & 15) <= bits) { + input.dropBits(symbol & 15); + return symbol >> 4; + } else { + return -1; + } + } + } else { + final int bits = input.getAvailableBits(); + lookahead = input.peekBits(bits); + symbol = tree[lookahead]; + if ((symbol >= 0) && ((symbol & 15) <= bits)) { + input.dropBits(symbol & 15); + return symbol >> 4; + } else { + return -1; + } + } + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/InflaterInputStream.java b/epublib-core/src/main/java/net/sf/jazzlib/InflaterInputStream.java new file mode 100644 index 00000000..3241aa23 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/InflaterInputStream.java @@ -0,0 +1,260 @@ +/* InflaterInputStream.java - Input stream filter for decompressing + Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 + Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * This filter stream is used to decompress data compressed in the "deflate" + * format. The "deflate" format is described in RFC 1951. + * + * This stream may form the basis for other decompression filters, such as the + * GZIPInputStream. + * + * @author John Leuner + * @author Tom Tromey + * @since 1.1 + */ +public class InflaterInputStream extends FilterInputStream { + /** + * Decompressor for this filter + */ + protected Inflater inf; + + /** + * Byte array used as a buffer + */ + protected byte[] buf; + + /** + * Size of buffer + */ + protected int len; + + /* + * We just use this if we are decoding one byte at a time with the read() + * call + */ + private final byte[] onebytebuffer = new byte[1]; + + /** + * Create an InflaterInputStream with the default decompresseor and a + * default buffer size. + * + * @param in + * the InputStream to read bytes from + */ + public InflaterInputStream(final InputStream in) { + this(in, new Inflater(), 4096); + } + + /** + * Create an InflaterInputStream with the specified decompresseor and a + * default buffer size. + * + * @param in + * the InputStream to read bytes from + * @param inf + * the decompressor used to decompress data read from in + */ + public InflaterInputStream(final InputStream in, final Inflater inf) { + this(in, inf, 4096); + } + + /** + * Create an InflaterInputStream with the specified decompresseor and a + * specified buffer size. + * + * @param in + * the InputStream to read bytes from + * @param inf + * the decompressor used to decompress data read from in + * @param size + * size of the buffer to use + */ + public InflaterInputStream(final InputStream in, final Inflater inf, + final int size) { + super(in); + this.len = 0; + + if (in == null) { + throw new NullPointerException("in may not be null"); + } + if (inf == null) { + throw new NullPointerException("inf may not be null"); + } + if (size < 0) { + throw new IllegalArgumentException("size may not be negative"); + } + + this.inf = inf; + this.buf = new byte[size]; + } + + /** + * Returns 0 once the end of the stream (EOF) has been reached. Otherwise + * returns 1. + */ + @Override + public int available() throws IOException { + // According to the JDK 1.2 docs, this should only ever return 0 + // or 1 and should not be relied upon by Java programs. + return inf.finished() ? 0 : 1; + } + + /** + * Closes the input stream + */ + @Override + public synchronized void close() throws IOException { + if (in != null) { + in.close(); + } + in = null; + } + + /** + * Fills the buffer with more data to decompress. + */ + protected void fill() throws IOException { + if (in == null) { + throw new ZipException("InflaterInputStream is closed"); + } + + len = in.read(buf, 0, buf.length); + + if (len < 0) { + throw new ZipException("Deflated stream ends early."); + } + + inf.setInput(buf, 0, len); + } + + /** + * Reads one byte of decompressed data. + * + * The byte is in the lower 8 bits of the int. + */ + @Override + public int read() throws IOException { + final int nread = read(onebytebuffer, 0, 1); // read one byte + + if (nread > 0) { + return onebytebuffer[0] & 0xff; + } + + return -1; + } + + /** + * Decompresses data into the byte array + * + * @param b + * the array to read and decompress data into + * @param off + * the offset indicating where the data should be placed + * @param len + * the number of bytes to decompress + */ + @Override + public int read(final byte[] b, final int off, final int len) + throws IOException { + if (len == 0) { + return 0; + } + + for (;;) { + int count; + + try { + count = inf.inflate(b, off, len); + } catch (final DataFormatException dfe) { + throw new ZipException(dfe.getMessage()); + } + + if (count > 0) { + return count; + } + + if (inf.needsDictionary() | inf.finished()) { + return -1; + } else if (inf.needsInput()) { + fill(); + } else { + throw new InternalError("Don't know what to do"); + } + } + } + + /** + * Skip specified number of bytes of uncompressed data + * + * @param n + * number of bytes to skip + */ + @Override + public long skip(long n) throws IOException { + if (n < 0) { + throw new IllegalArgumentException(); + } + + if (n == 0) { + return 0; + } + + // Implementation copied from InputStream + // Throw away n bytes by reading them into a temp byte[]. + // Limit the temp array to 2Kb so we don't grab too much memory. + final int buflen = n > 2048 ? 2048 : (int) n; + final byte[] tmpbuf = new byte[buflen]; + final long origN = n; + + while (n > 0L) { + final int numread = read(tmpbuf, 0, n > buflen ? buflen : (int) n); + if (numread <= 0) { + break; + } + n -= numread; + } + + return origN - n; + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/OutputWindow.java b/epublib-core/src/main/java/net/sf/jazzlib/OutputWindow.java new file mode 100644 index 00000000..c06b33ae --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/OutputWindow.java @@ -0,0 +1,168 @@ +/* net.sf.jazzlib.OutputWindow + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/* + * Contains the output from the Inflation process. + * + * We need to have a window so that we can refer backwards into the output stream + * to repeat stuff. + * + * @author John Leuner + * @since JDK 1.1 + */ + +class OutputWindow { + private final int WINDOW_SIZE = 1 << 15; + private final int WINDOW_MASK = WINDOW_SIZE - 1; + + private final byte[] window = new byte[WINDOW_SIZE]; // The window is 2^15 + // bytes + private int window_end = 0; + private int window_filled = 0; + + public void write(final int abyte) { + if (window_filled++ == WINDOW_SIZE) { + throw new IllegalStateException("Window full"); + } + window[window_end++] = (byte) abyte; + window_end &= WINDOW_MASK; + } + + private final void slowRepeat(int rep_start, int len, final int dist) { + while (len-- > 0) { + window[window_end++] = window[rep_start++]; + window_end &= WINDOW_MASK; + rep_start &= WINDOW_MASK; + } + } + + public void repeat(int len, final int dist) { + if ((window_filled += len) > WINDOW_SIZE) { + throw new IllegalStateException("Window full"); + } + + int rep_start = (window_end - dist) & WINDOW_MASK; + final int border = WINDOW_SIZE - len; + if ((rep_start <= border) && (window_end < border)) { + if (len <= dist) { + System.arraycopy(window, rep_start, window, window_end, len); + window_end += len; + } else { + /* + * We have to copy manually, since the repeat pattern overlaps. + */ + while (len-- > 0) { + window[window_end++] = window[rep_start++]; + } + } + } else { + slowRepeat(rep_start, len, dist); + } + } + + public int copyStored(final StreamManipulator input, int len) { + len = Math.min(Math.min(len, WINDOW_SIZE - window_filled), + input.getAvailableBytes()); + int copied; + + final int tailLen = WINDOW_SIZE - window_end; + if (len > tailLen) { + copied = input.copyBytes(window, window_end, tailLen); + if (copied == tailLen) { + copied += input.copyBytes(window, 0, len - tailLen); + } + } else { + copied = input.copyBytes(window, window_end, len); + } + + window_end = (window_end + copied) & WINDOW_MASK; + window_filled += copied; + return copied; + } + + public void copyDict(final byte[] dict, int offset, int len) { + if (window_filled > 0) { + throw new IllegalStateException(); + } + + if (len > WINDOW_SIZE) { + offset += len - WINDOW_SIZE; + len = WINDOW_SIZE; + } + System.arraycopy(dict, offset, window, 0, len); + window_end = len & WINDOW_MASK; + } + + public int getFreeSpace() { + return WINDOW_SIZE - window_filled; + } + + public int getAvailable() { + return window_filled; + } + + public int copyOutput(final byte[] output, int offset, int len) { + int copy_end = window_end; + if (len > window_filled) { + len = window_filled; + } else { + copy_end = ((window_end - window_filled) + len) & WINDOW_MASK; + } + + final int copied = len; + final int tailLen = len - copy_end; + + if (tailLen > 0) { + System.arraycopy(window, WINDOW_SIZE - tailLen, output, offset, + tailLen); + offset += tailLen; + len = copy_end; + } + System.arraycopy(window, copy_end - len, output, offset, len); + window_filled -= copied; + if (window_filled < 0) { + throw new IllegalStateException(); + } + return copied; + } + + public void reset() { + window_filled = window_end = 0; + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/PendingBuffer.java b/epublib-core/src/main/java/net/sf/jazzlib/PendingBuffer.java new file mode 100644 index 00000000..8966d860 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/PendingBuffer.java @@ -0,0 +1,199 @@ +/* net.sf.jazzlib.PendingBuffer + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/** + * This class is general purpose class for writing data to a buffer. + * + * It allows you to write bits as well as bytes + * + * Based on DeflaterPending.java + * + * @author Jochen Hoenicke + * @date Jan 5, 2000 + */ + +class PendingBuffer { + protected byte[] buf; + int start; + int end; + + int bits; + int bitCount; + + public PendingBuffer() { + this(4096); + } + + public PendingBuffer(final int bufsize) { + buf = new byte[bufsize]; + } + + public final void reset() { + start = end = bitCount = 0; + } + + public final void writeByte(final int b) { + if (DeflaterConstants.DEBUGGING && (start != 0)) { + throw new IllegalStateException(); + } + buf[end++] = (byte) b; + } + + public final void writeShort(final int s) { + if (DeflaterConstants.DEBUGGING && (start != 0)) { + throw new IllegalStateException(); + } + buf[end++] = (byte) s; + buf[end++] = (byte) (s >> 8); + } + + public final void writeInt(final int s) { + if (DeflaterConstants.DEBUGGING && (start != 0)) { + throw new IllegalStateException(); + } + buf[end++] = (byte) s; + buf[end++] = (byte) (s >> 8); + buf[end++] = (byte) (s >> 16); + buf[end++] = (byte) (s >> 24); + } + + public final void writeBlock(final byte[] block, final int offset, + final int len) { + if (DeflaterConstants.DEBUGGING && (start != 0)) { + throw new IllegalStateException(); + } + System.arraycopy(block, offset, buf, end, len); + end += len; + } + + public final int getBitCount() { + return bitCount; + } + + public final void alignToByte() { + if (DeflaterConstants.DEBUGGING && (start != 0)) { + throw new IllegalStateException(); + } + if (bitCount > 0) { + buf[end++] = (byte) bits; + if (bitCount > 8) { + buf[end++] = (byte) (bits >>> 8); + } + } + bits = 0; + bitCount = 0; + } + + public final void writeBits(final int b, final int count) { + if (DeflaterConstants.DEBUGGING && (start != 0)) { + throw new IllegalStateException(); + } + if (DeflaterConstants.DEBUGGING) { + System.err.println("writeBits(" + Integer.toHexString(b) + "," + + count + ")"); + } + bits |= b << bitCount; + bitCount += count; + if (bitCount >= 16) { + buf[end++] = (byte) bits; + buf[end++] = (byte) (bits >>> 8); + bits >>>= 16; + bitCount -= 16; + } + } + + public final void writeShortMSB(final int s) { + if (DeflaterConstants.DEBUGGING && (start != 0)) { + throw new IllegalStateException(); + } + buf[end++] = (byte) (s >> 8); + buf[end++] = (byte) s; + } + + public final boolean isFlushed() { + return end == 0; + } + + /** + * Flushes the pending buffer into the given output array. If the output + * array is to small, only a partial flush is done. + * + * @param output + * the output array; + * @param offset + * the offset into output array; + * @param length + * the maximum number of bytes to store; + * @exception IndexOutOfBoundsException + * if offset or length are invalid. + */ + public final int flush(final byte[] output, final int offset, int length) { + if (bitCount >= 8) { + buf[end++] = (byte) bits; + bits >>>= 8; + bitCount -= 8; + } + if (length > (end - start)) { + length = end - start; + System.arraycopy(buf, start, output, offset, length); + start = 0; + end = 0; + } else { + System.arraycopy(buf, start, output, offset, length); + start += length; + } + return length; + } + + /** + * Flushes the pending buffer and returns that data in a new array + * + * @param output + * the output stream + */ + + public final byte[] toByteArray() { + final byte[] ret = new byte[end - start]; + System.arraycopy(buf, start, ret, 0, ret.length); + start = 0; + end = 0; + return ret; + } + +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/StreamManipulator.java b/epublib-core/src/main/java/net/sf/jazzlib/StreamManipulator.java new file mode 100644 index 00000000..d0a8fc8c --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/StreamManipulator.java @@ -0,0 +1,215 @@ +/* net.sf.jazzlib.StreamManipulator + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +/** + * This class allows us to retrieve a specified amount of bits from the input + * buffer, as well as copy big byte blocks. + * + * It uses an int buffer to store up to 31 bits for direct manipulation. This + * guarantees that we can get at least 16 bits, but we only need at most 15, so + * this is all safe. + * + * There are some optimizations in this class, for example, you must never peek + * more then 8 bits more than needed, and you must first peek bits before you + * may drop them. This is not a general purpose class but optimized for the + * behaviour of the Inflater. + * + * @author John Leuner, Jochen Hoenicke + */ + +class StreamManipulator { + private byte[] window; + private int window_start = 0; + private int window_end = 0; + + private int buffer = 0; + private int bits_in_buffer = 0; + + /** + * Get the next n bits but don't increase input pointer. n must be less or + * equal 16 and if you if this call succeeds, you must drop at least n-8 + * bits in the next call. + * + * @return the value of the bits, or -1 if not enough bits available. + */ + public final int peekBits(final int n) { + if (bits_in_buffer < n) { + if (window_start == window_end) { + return -1; + } + buffer |= ((window[window_start++] & 0xff) | ((window[window_start++] & 0xff) << 8)) << bits_in_buffer; + bits_in_buffer += 16; + } + return buffer & ((1 << n) - 1); + } + + /* + * Drops the next n bits from the input. You should have called peekBits + * with a bigger or equal n before, to make sure that enough bits are in the + * bit buffer. + */ + public final void dropBits(final int n) { + buffer >>>= n; + bits_in_buffer -= n; + } + + /** + * Gets the next n bits and increases input pointer. This is equivalent to + * peekBits followed by dropBits, except for correct error handling. + * + * @return the value of the bits, or -1 if not enough bits available. + */ + public final int getBits(final int n) { + final int bits = peekBits(n); + if (bits >= 0) { + dropBits(n); + } + return bits; + } + + /** + * Gets the number of bits available in the bit buffer. This must be only + * called when a previous peekBits() returned -1. + * + * @return the number of bits available. + */ + public final int getAvailableBits() { + return bits_in_buffer; + } + + /** + * Gets the number of bytes available. + * + * @return the number of bytes available. + */ + public final int getAvailableBytes() { + return (window_end - window_start) + (bits_in_buffer >> 3); + } + + /** + * Skips to the next byte boundary. + */ + public void skipToByteBoundary() { + buffer >>= (bits_in_buffer & 7); + bits_in_buffer &= ~7; + } + + public final boolean needsInput() { + return window_start == window_end; + } + + /* + * Copies length bytes from input buffer to output buffer starting at + * output[offset]. You have to make sure, that the buffer is byte aligned. + * If not enough bytes are available, copies fewer bytes. + * + * @param length the length to copy, 0 is allowed. + * + * @return the number of bytes copied, 0 if no byte is available. + */ + public int copyBytes(final byte[] output, int offset, int length) { + if (length < 0) { + throw new IllegalArgumentException("length negative"); + } + if ((bits_in_buffer & 7) != 0) { + /* bits_in_buffer may only be 0 or 8 */ + throw new IllegalStateException("Bit buffer is not aligned!"); + } + + int count = 0; + while ((bits_in_buffer > 0) && (length > 0)) { + output[offset++] = (byte) buffer; + buffer >>>= 8; + bits_in_buffer -= 8; + length--; + count++; + } + if (length == 0) { + return count; + } + + final int avail = window_end - window_start; + if (length > avail) { + length = avail; + } + System.arraycopy(window, window_start, output, offset, length); + window_start += length; + + if (((window_start - window_end) & 1) != 0) { + /* We always want an even number of bytes in input, see peekBits */ + buffer = (window[window_start++] & 0xff); + bits_in_buffer = 8; + } + return count + length; + } + + public StreamManipulator() { + } + + public void reset() { + window_start = window_end = buffer = bits_in_buffer = 0; + } + + public void setInput(final byte[] buf, int off, final int len) { + if (window_start < window_end) { + throw new IllegalStateException( + "Old input was not completely processed"); + } + + final int end = off + len; + + /* + * We want to throw an ArrayIndexOutOfBoundsException early. The check + * is very tricky: it also handles integer wrap around. + */ + if ((0 > off) || (off > end) || (end > buf.length)) { + throw new ArrayIndexOutOfBoundsException(); + } + + if ((len & 1) != 0) { + /* We always want an even number of bytes in input, see peekBits */ + buffer |= (buf[off++] & 0xff) << bits_in_buffer; + bits_in_buffer += 8; + } + + window = buf; + window_start = off; + window_end = end; + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/ZipConstants.java b/epublib-core/src/main/java/net/sf/jazzlib/ZipConstants.java new file mode 100644 index 00000000..bc2a803c --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/ZipConstants.java @@ -0,0 +1,95 @@ +/* net.sf.jazzlib.ZipConstants + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +interface ZipConstants { + /* The local file header */ + int LOCHDR = 30; + int LOCSIG = 'P' | ('K' << 8) | (3 << 16) | (4 << 24); + + int LOCVER = 4; + int LOCFLG = 6; + int LOCHOW = 8; + int LOCTIM = 10; + int LOCCRC = 14; + int LOCSIZ = 18; + int LOCLEN = 22; + int LOCNAM = 26; + int LOCEXT = 28; + + /* The Data descriptor */ + int EXTSIG = 'P' | ('K' << 8) | (7 << 16) | (8 << 24); + int EXTHDR = 16; + + int EXTCRC = 4; + int EXTSIZ = 8; + int EXTLEN = 12; + + /* The central directory file header */ + int CENSIG = 'P' | ('K' << 8) | (1 << 16) | (2 << 24); + int CENHDR = 46; + + int CENVEM = 4; + int CENVER = 6; + int CENFLG = 8; + int CENHOW = 10; + int CENTIM = 12; + int CENCRC = 16; + int CENSIZ = 20; + int CENLEN = 24; + int CENNAM = 28; + int CENEXT = 30; + int CENCOM = 32; + int CENDSK = 34; + int CENATT = 36; + int CENATX = 38; + int CENOFF = 42; + + /* The entries in the end of central directory */ + int ENDSIG = 'P' | ('K' << 8) | (5 << 16) | (6 << 24); + int ENDHDR = 22; + + /* The following two fields are missing in SUN JDK */ + int ENDNRD = 4; + int ENDDCD = 6; + int ENDSUB = 8; + int ENDTOT = 10; + int ENDSIZ = 12; + int ENDOFF = 16; + int ENDCOM = 20; +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/ZipEntry.java b/epublib-core/src/main/java/net/sf/jazzlib/ZipEntry.java new file mode 100644 index 00000000..33f5c9dd --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/ZipEntry.java @@ -0,0 +1,409 @@ +/* net.sf.jazzlib.ZipEntry + Copyright (C) 2001, 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.util.Calendar; +import java.util.Date; + +/** + * This class represents a member of a zip archive. ZipFile and ZipInputStream + * will give you instances of this class as information about the members in an + * archive. On the other hand ZipOutputStream needs an instance of this class to + * create a new member. + * + * @author Jochen Hoenicke + */ +public class ZipEntry implements ZipConstants, Cloneable { + private static int KNOWN_SIZE = 1; + private static int KNOWN_CSIZE = 2; + private static int KNOWN_CRC = 4; + private static int KNOWN_TIME = 8; + + private static Calendar cal; + + private final String name; + private int size; + private int compressedSize; + private int crc; + private int dostime; + private short known = 0; + private short method = -1; + private byte[] extra = null; + private String comment = null; + + int flags; /* used by ZipOutputStream */ + int offset; /* used by ZipFile and ZipOutputStream */ + + /** + * Compression method. This method doesn't compress at all. + */ + public final static int STORED = 0; + /** + * Compression method. This method uses the Deflater. + */ + public final static int DEFLATED = 8; + + /** + * Creates a zip entry with the given name. + * + * @param name + * the name. May include directory components separated by '/'. + * + * @exception NullPointerException + * when name is null. + * @exception IllegalArgumentException + * when name is bigger then 65535 chars. + */ + public ZipEntry(final String name) { + final int length = name.length(); + if (length > 65535) { + throw new IllegalArgumentException("name length is " + length); + } + this.name = name; + } + + /** + * Creates a copy of the given zip entry. + * + * @param e + * the entry to copy. + */ + public ZipEntry(final ZipEntry e) { + name = e.name; + known = e.known; + size = e.size; + compressedSize = e.compressedSize; + crc = e.crc; + dostime = e.dostime; + method = e.method; + extra = e.extra; + comment = e.comment; + } + + final void setDOSTime(final int dostime) { + this.dostime = dostime; + known |= KNOWN_TIME; + } + + final int getDOSTime() { + if ((known & KNOWN_TIME) == 0) { + return 0; + } else { + return dostime; + } + } + + /** + * Creates a copy of this zip entry. + */ + /** + * Clones the entry. + */ + @Override + public Object clone() { + try { + // The JCL says that the `extra' field is also copied. + final ZipEntry clone = (ZipEntry) super.clone(); + if (extra != null) { + clone.extra = extra.clone(); + } + return clone; + } catch (final CloneNotSupportedException ex) { + throw new InternalError(); + } + } + + /** + * Returns the entry name. The path components in the entry are always + * separated by slashes ('/'). + */ + public String getName() { + return name; + } + + /** + * Sets the time of last modification of the entry. + * + * @time the time of last modification of the entry. + */ + public void setTime(final long time) { + final Calendar cal = getCalendar(); + synchronized (cal) { + cal.setTime(new Date(time * 1000L)); + dostime = (((cal.get(Calendar.YEAR) - 1980) & 0x7f) << 25) + | ((cal.get(Calendar.MONTH) + 1) << 21) + | ((cal.get(Calendar.DAY_OF_MONTH)) << 16) + | ((cal.get(Calendar.HOUR_OF_DAY)) << 11) + | ((cal.get(Calendar.MINUTE)) << 5) + | ((cal.get(Calendar.SECOND)) >> 1); + } + dostime = (int) (dostime / 1000L); + this.known |= KNOWN_TIME; + } + + /** + * Gets the time of last modification of the entry. + * + * @return the time of last modification of the entry, or -1 if unknown. + */ + public long getTime() { + if ((known & KNOWN_TIME) == 0) { + return -1; + } + + final int sec = 2 * (dostime & 0x1f); + final int min = (dostime >> 5) & 0x3f; + final int hrs = (dostime >> 11) & 0x1f; + final int day = (dostime >> 16) & 0x1f; + final int mon = ((dostime >> 21) & 0xf) - 1; + final int year = ((dostime >> 25) & 0x7f) + 1980; /* since 1900 */ + + try { + cal = getCalendar(); + synchronized (cal) { + cal.set(year, mon, day, hrs, min, sec); + return cal.getTime().getTime(); + } + } catch (final RuntimeException ex) { + /* Ignore illegal time stamp */ + known &= ~KNOWN_TIME; + return -1; + } + } + + private static synchronized Calendar getCalendar() { + if (cal == null) { + cal = Calendar.getInstance(); + } + + return cal; + } + + /** + * Sets the size of the uncompressed data. + * + * @exception IllegalArgumentException + * if size is not in 0..0xffffffffL + */ + public void setSize(final long size) { + if ((size & 0xffffffff00000000L) != 0) { + throw new IllegalArgumentException(); + } + this.size = (int) size; + this.known |= KNOWN_SIZE; + } + + /** + * Gets the size of the uncompressed data. + * + * @return the size or -1 if unknown. + */ + public long getSize() { + return (known & KNOWN_SIZE) != 0 ? size & 0xffffffffL : -1L; + } + + /** + * Sets the size of the compressed data. + * + * @exception IllegalArgumentException + * if size is not in 0..0xffffffffL + */ + public void setCompressedSize(final long csize) { + if ((csize & 0xffffffff00000000L) != 0) { + throw new IllegalArgumentException(); + } + this.compressedSize = (int) csize; + this.known |= KNOWN_CSIZE; + } + + /** + * Gets the size of the compressed data. + * + * @return the size or -1 if unknown. + */ + public long getCompressedSize() { + return (known & KNOWN_CSIZE) != 0 ? compressedSize & 0xffffffffL : -1L; + } + + /** + * Sets the crc of the uncompressed data. + * + * @exception IllegalArgumentException + * if crc is not in 0..0xffffffffL + */ + public void setCrc(final long crc) { + if ((crc & 0xffffffff00000000L) != 0) { + throw new IllegalArgumentException(); + } + this.crc = (int) crc; + this.known |= KNOWN_CRC; + } + + /** + * Gets the crc of the uncompressed data. + * + * @return the crc or -1 if unknown. + */ + public long getCrc() { + return (known & KNOWN_CRC) != 0 ? crc & 0xffffffffL : -1L; + } + + /** + * Sets the compression method. Only DEFLATED and STORED are supported. + * + * @exception IllegalArgumentException + * if method is not supported. + * @see ZipOutputStream#DEFLATED + * @see ZipOutputStream#STORED + */ + public void setMethod(final int method) { + if ((method != ZipOutputStream.STORED) + && (method != ZipOutputStream.DEFLATED)) { + throw new IllegalArgumentException(); + } + this.method = (short) method; + } + + /** + * Gets the compression method. + * + * @return the compression method or -1 if unknown. + */ + public int getMethod() { + return method; + } + + /** + * Sets the extra data. + * + * @exception IllegalArgumentException + * if extra is longer than 0xffff bytes. + */ + public void setExtra(final byte[] extra) { + if (extra == null) { + this.extra = null; + return; + } + + if (extra.length > 0xffff) { + throw new IllegalArgumentException(); + } + this.extra = extra; + try { + int pos = 0; + while (pos < extra.length) { + final int sig = (extra[pos++] & 0xff) + | ((extra[pos++] & 0xff) << 8); + final int len = (extra[pos++] & 0xff) + | ((extra[pos++] & 0xff) << 8); + if (sig == 0x5455) { + /* extended time stamp */ + final int flags = extra[pos]; + if ((flags & 1) != 0) { + final long time = ((extra[pos + 1] & 0xff) + | ((extra[pos + 2] & 0xff) << 8) + | ((extra[pos + 3] & 0xff) << 16) | ((extra[pos + 4] & 0xff) << 24)); + setTime(time); + } + } + pos += len; + } + } catch (final ArrayIndexOutOfBoundsException ex) { + /* be lenient */ + return; + } + } + + /** + * Gets the extra data. + * + * @return the extra data or null if not set. + */ + public byte[] getExtra() { + return extra; + } + + /** + * Sets the entry comment. + * + * @exception IllegalArgumentException + * if comment is longer than 0xffff. + */ + public void setComment(final String comment) { + if ((comment != null) && (comment.length() > 0xffff)) { + throw new IllegalArgumentException(); + } + this.comment = comment; + } + + /** + * Gets the comment. + * + * @return the comment or null if not set. + */ + public String getComment() { + return comment; + } + + /** + * Gets true, if the entry is a directory. This is solely determined by the + * name, a trailing slash '/' marks a directory. + */ + public boolean isDirectory() { + final int nlen = name.length(); + return (nlen > 0) && (name.charAt(nlen - 1) == '/'); + } + + /** + * Gets the string representation of this ZipEntry. This is just the name as + * returned by getName(). + */ + @Override + public String toString() { + return name; + } + + /** + * Gets the hashCode of this ZipEntry. This is just the hashCode of the + * name. Note that the equals method isn't changed, though. + */ + @Override + public int hashCode() { + return name.hashCode(); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/ZipException.java b/epublib-core/src/main/java/net/sf/jazzlib/ZipException.java new file mode 100644 index 00000000..61d8b157 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/ZipException.java @@ -0,0 +1,70 @@ +/* ZipException.java - exception representing a zip related error + Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.IOException; + +/** + * Thrown during the creation or input of a zip file. + * + * @author Jochen Hoenicke + * @author Per Bothner + * @status updated to 1.4 + */ +public class ZipException extends IOException { + /** + * Compatible with JDK 1.0+. + */ + private static final long serialVersionUID = 8000196834066748623L; + + /** + * Create an exception without a message. + */ + public ZipException() { + } + + /** + * Create an exception with a message. + * + * @param msg + * the message + */ + public ZipException(final String msg) { + super(msg); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/ZipFile.java b/epublib-core/src/main/java/net/sf/jazzlib/ZipFile.java new file mode 100644 index 00000000..2b6b0482 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/ZipFile.java @@ -0,0 +1,557 @@ +/* net.sf.jazzlib.ZipFile + Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.BufferedInputStream; +import java.io.DataInput; +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; + +/** + * This class represents a Zip archive. You can ask for the contained entries, + * or get an input stream for a file entry. The entry is automatically + * decompressed. + * + * This class is thread safe: You can open input streams for arbitrary entries + * in different threads. + * + * @author Jochen Hoenicke + * @author Artur Biesiadowski + */ +public class ZipFile implements ZipConstants { + + /** + * Mode flag to open a zip file for reading. + */ + public static final int OPEN_READ = 0x1; + + /** + * Mode flag to delete a zip file after reading. + */ + public static final int OPEN_DELETE = 0x4; + + // Name of this zip file. + private final String name; + + // File from which zip entries are read. + private final RandomAccessFile raf; + + // The entries of this zip file when initialized and not yet closed. + private Map entries; + + private boolean closed = false; + + /** + * Opens a Zip file with the given name for reading. + * + * @exception IOException + * if a i/o error occured. + * @exception ZipException + * if the file doesn't contain a valid zip archive. + */ + public ZipFile(final String name) throws ZipException, IOException { + this.raf = new RandomAccessFile(name, "r"); + this.name = name; + } + + /** + * Opens a Zip file reading the given File. + * + * @exception IOException + * if a i/o error occured. + * @exception ZipException + * if the file doesn't contain a valid zip archive. + */ + public ZipFile(final File file) throws ZipException, IOException { + this.raf = new RandomAccessFile(file, "r"); + this.name = file.getPath(); + } + + /** + * Opens a Zip file reading the given File in the given mode. + * + * If the OPEN_DELETE mode is specified, the zip file will be deleted at + * some time moment after it is opened. It will be deleted before the zip + * file is closed or the Virtual Machine exits. + * + * The contents of the zip file will be accessible until it is closed. + * + * The OPEN_DELETE mode is currently unimplemented in this library + * + * @since JDK1.3 + * @param mode + * Must be one of OPEN_READ or OPEN_READ | OPEN_DELETE + * + * @exception IOException + * if a i/o error occured. + * @exception ZipException + * if the file doesn't contain a valid zip archive. + */ + public ZipFile(final File file, final int mode) throws ZipException, + IOException { + if ((mode & OPEN_DELETE) != 0) { + throw new IllegalArgumentException( + "OPEN_DELETE mode not supported yet in net.sf.jazzlib.ZipFile"); + } + this.raf = new RandomAccessFile(file, "r"); + this.name = file.getPath(); + } + + /** + * Read an unsigned short in little endian byte order from the given + * DataInput stream using the given byte buffer. + * + * @param di + * DataInput stream to read from. + * @param b + * the byte buffer to read in (must be at least 2 bytes long). + * @return The value read. + * + * @exception IOException + * if a i/o error occured. + * @exception EOFException + * if the file ends prematurely + */ + private final int readLeShort(final DataInput di, final byte[] b) + throws IOException { + di.readFully(b, 0, 2); + return (b[0] & 0xff) | ((b[1] & 0xff) << 8); + } + + /** + * Read an int in little endian byte order from the given DataInput stream + * using the given byte buffer. + * + * @param di + * DataInput stream to read from. + * @param b + * the byte buffer to read in (must be at least 4 bytes long). + * @return The value read. + * + * @exception IOException + * if a i/o error occured. + * @exception EOFException + * if the file ends prematurely + */ + private final int readLeInt(final DataInput di, final byte[] b) + throws IOException { + di.readFully(b, 0, 4); + return ((b[0] & 0xff) | ((b[1] & 0xff) << 8)) + | (((b[2] & 0xff) | ((b[3] & 0xff) << 8)) << 16); + } + + /** + * Read an unsigned short in little endian byte order from the given byte + * buffer at the given offset. + * + * @param b + * the byte array to read from. + * @param off + * the offset to read from. + * @return The value read. + */ + private final int readLeShort(final byte[] b, final int off) { + return (b[off] & 0xff) | ((b[off + 1] & 0xff) << 8); + } + + /** + * Read an int in little endian byte order from the given byte buffer at the + * given offset. + * + * @param b + * the byte array to read from. + * @param off + * the offset to read from. + * @return The value read. + */ + private final int readLeInt(final byte[] b, final int off) { + return ((b[off] & 0xff) | ((b[off + 1] & 0xff) << 8)) + | (((b[off + 2] & 0xff) | ((b[off + 3] & 0xff) << 8)) << 16); + } + + /** + * Read the central directory of a zip file and fill the entries array. This + * is called exactly once when first needed. It is called while holding the + * lock on raf. + * + * @exception IOException + * if a i/o error occured. + * @exception ZipException + * if the central directory is malformed + */ + private void readEntries() throws ZipException, IOException { + /* + * Search for the End Of Central Directory. When a zip comment is + * present the directory may start earlier. FIXME: This searches the + * whole file in a very slow manner if the file isn't a zip file. + */ + long pos = raf.length() - ENDHDR; + final byte[] ebs = new byte[CENHDR]; + + do { + if (pos < 0) { + throw new ZipException( + "central directory not found, probably not a zip file: " + + name); + } + raf.seek(pos--); + } while (readLeInt(raf, ebs) != ENDSIG); + + if (raf.skipBytes(ENDTOT - ENDNRD) != (ENDTOT - ENDNRD)) { + throw new EOFException(name); + } + final int count = readLeShort(raf, ebs); + if (raf.skipBytes(ENDOFF - ENDSIZ) != (ENDOFF - ENDSIZ)) { + throw new EOFException(name); + } + final int centralOffset = readLeInt(raf, ebs); + + entries = new HashMap(count + (count / 2)); + raf.seek(centralOffset); + + byte[] buffer = new byte[16]; + for (int i = 0; i < count; i++) { + raf.readFully(ebs); + if (readLeInt(ebs, 0) != CENSIG) { + throw new ZipException("Wrong Central Directory signature: " + + name); + } + + final int method = readLeShort(ebs, CENHOW); + final int dostime = readLeInt(ebs, CENTIM); + final int crc = readLeInt(ebs, CENCRC); + final int csize = readLeInt(ebs, CENSIZ); + final int size = readLeInt(ebs, CENLEN); + final int nameLen = readLeShort(ebs, CENNAM); + final int extraLen = readLeShort(ebs, CENEXT); + final int commentLen = readLeShort(ebs, CENCOM); + + final int offset = readLeInt(ebs, CENOFF); + + final int needBuffer = Math.max(nameLen, commentLen); + if (buffer.length < needBuffer) { + buffer = new byte[needBuffer]; + } + + raf.readFully(buffer, 0, nameLen); + final String name = new String(buffer, 0, 0, nameLen); + + final ZipEntry entry = new ZipEntry(name); + entry.setMethod(method); + entry.setCrc(crc & 0xffffffffL); + entry.setSize(size & 0xffffffffL); + entry.setCompressedSize(csize & 0xffffffffL); + entry.setDOSTime(dostime); + if (extraLen > 0) { + final byte[] extra = new byte[extraLen]; + raf.readFully(extra); + entry.setExtra(extra); + } + if (commentLen > 0) { + raf.readFully(buffer, 0, commentLen); + entry.setComment(new String(buffer, 0, commentLen)); + } + entry.offset = offset; + entries.put(name, entry); + } + } + + /** + * Closes the ZipFile. This also closes all input streams given by this + * class. After this is called, no further method should be called. + * + * @exception IOException + * if a i/o error occured. + */ + public void close() throws IOException { + synchronized (raf) { + closed = true; + entries = null; + raf.close(); + } + } + + /** + * Calls the close() method when this ZipFile has not yet been + * explicitly closed. + */ + @Override + protected void finalize() throws IOException { + if (!closed && (raf != null)) { + close(); + } + } + + /** + * Returns an enumeration of all Zip entries in this Zip file. + */ + public Enumeration entries() { + try { + return new ZipEntryEnumeration(getEntries().values().iterator()); + } catch (final IOException ioe) { + return null; + } + } + + /** + * Checks that the ZipFile is still open and reads entries when necessary. + * + * @exception IllegalStateException + * when the ZipFile has already been closed. + * @exception IOEexception + * when the entries could not be read. + */ + private Map getEntries() throws IOException { + synchronized (raf) { + if (closed) { + throw new IllegalStateException("ZipFile has closed: " + name); + } + + if (entries == null) { + readEntries(); + } + + return entries; + } + } + + /** + * Searches for a zip entry in this archive with the given name. + * + * @param the + * name. May contain directory components separated by slashes + * ('/'). + * @return the zip entry, or null if no entry with that name exists. + */ + public ZipEntry getEntry(final String name) { + try { + final Map entries = getEntries(); + final ZipEntry entry = entries.get(name); + return entry != null ? (ZipEntry) entry.clone() : null; + } catch (final IOException ioe) { + return null; + } + } + + // access should be protected by synchronized(raf) + private final byte[] locBuf = new byte[LOCHDR]; + + /** + * Checks, if the local header of the entry at index i matches the central + * directory, and returns the offset to the data. + * + * @param entry + * to check. + * @return the start offset of the (compressed) data. + * + * @exception IOException + * if a i/o error occured. + * @exception ZipException + * if the local header doesn't match the central directory + * header + */ + private long checkLocalHeader(final ZipEntry entry) throws IOException { + synchronized (raf) { + raf.seek(entry.offset); + raf.readFully(locBuf); + + if (readLeInt(locBuf, 0) != LOCSIG) { + throw new ZipException("Wrong Local header signature: " + name); + } + + if (entry.getMethod() != readLeShort(locBuf, LOCHOW)) { + throw new ZipException("Compression method mismatch: " + name); + } + + if (entry.getName().length() != readLeShort(locBuf, LOCNAM)) { + throw new ZipException("file name length mismatch: " + name); + } + + final int extraLen = entry.getName().length() + + readLeShort(locBuf, LOCEXT); + return entry.offset + LOCHDR + extraLen; + } + } + + /** + * Creates an input stream reading the given zip entry as uncompressed data. + * Normally zip entry should be an entry returned by getEntry() or + * entries(). + * + * @param entry + * the entry to create an InputStream for. + * @return the input stream. + * + * @exception IOException + * if a i/o error occured. + * @exception ZipException + * if the Zip archive is malformed. + */ + public InputStream getInputStream(final ZipEntry entry) throws IOException { + final Map entries = getEntries(); + final String name = entry.getName(); + final ZipEntry zipEntry = entries.get(name); + if (zipEntry == null) { + throw new NoSuchElementException(name); + } + + final long start = checkLocalHeader(zipEntry); + final int method = zipEntry.getMethod(); + final InputStream is = new BufferedInputStream(new PartialInputStream( + raf, start, zipEntry.getCompressedSize())); + switch (method) { + case ZipOutputStream.STORED: + return is; + case ZipOutputStream.DEFLATED: + return new InflaterInputStream(is, new Inflater(true)); + default: + throw new ZipException("Unknown compression method " + method); + } + } + + /** + * Returns the (path) name of this zip file. + */ + public String getName() { + return name; + } + + /** + * Returns the number of entries in this zip file. + */ + public int size() { + try { + return getEntries().size(); + } catch (final IOException ioe) { + return 0; + } + } + + private static class ZipEntryEnumeration implements Enumeration { + private final Iterator elements; + + public ZipEntryEnumeration(final Iterator elements) { + this.elements = elements; + } + + @Override + public boolean hasMoreElements() { + return elements.hasNext(); + } + + @Override + public Object nextElement() { + /* + * We return a clone, just to be safe that the user doesn't change + * the entry. + */ + return ((ZipEntry) elements.next()).clone(); + } + } + + private static class PartialInputStream extends InputStream { + private final RandomAccessFile raf; + long filepos, end; + + public PartialInputStream(final RandomAccessFile raf, final long start, + final long len) { + this.raf = raf; + filepos = start; + end = start + len; + } + + @Override + public int available() { + final long amount = end - filepos; + if (amount > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + return (int) amount; + } + + @Override + public int read() throws IOException { + if (filepos == end) { + return -1; + } + synchronized (raf) { + raf.seek(filepos++); + return raf.read(); + } + } + + @Override + public int read(final byte[] b, final int off, int len) + throws IOException { + if (len > (end - filepos)) { + len = (int) (end - filepos); + if (len == 0) { + return -1; + } + } + synchronized (raf) { + raf.seek(filepos); + final int count = raf.read(b, off, len); + if (count > 0) { + filepos += len; + } + return count; + } + } + + @Override + public long skip(long amount) { + if (amount < 0) { + throw new IllegalArgumentException(); + } + if (amount > (end - filepos)) { + amount = end - filepos; + } + filepos += amount; + return amount; + } + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/ZipInputStream.java b/epublib-core/src/main/java/net/sf/jazzlib/ZipInputStream.java new file mode 100644 index 00000000..8caf80f3 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/ZipInputStream.java @@ -0,0 +1,380 @@ +/* net.sf.jazzlib.ZipInputStream + Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +/** + * This is a FilterInputStream that reads the files in an zip archive one after + * another. It has a special method to get the zip entry of the next file. The + * zip entry contains information about the file name size, compressed size, + * CRC, etc. + * + * It includes support for STORED and DEFLATED entries. + * + * @author Jochen Hoenicke + */ +public class ZipInputStream extends InflaterInputStream implements ZipConstants { + private CRC32 crc = new CRC32(); + private ZipEntry entry = null; + + private int csize; + private int size; + private int method; + private int flags; + private int avail; + private boolean entryAtEOF; + + /** + * Creates a new Zip input stream, reading a zip archive. + */ + public ZipInputStream(final InputStream in) { + super(in, new Inflater(true)); + } + + private void fillBuf() throws IOException { + avail = len = in.read(buf, 0, buf.length); + } + + private int readBuf(final byte[] out, final int offset, int length) + throws IOException { + if (avail <= 0) { + fillBuf(); + if (avail <= 0) { + return -1; + } + } + if (length > avail) { + length = avail; + } + System.arraycopy(buf, len - avail, out, offset, length); + avail -= length; + return length; + } + + private void readFully(final byte[] out) throws IOException { + int off = 0; + int len = out.length; + while (len > 0) { + final int count = readBuf(out, off, len); + if (count == -1) { + throw new EOFException(); + } + off += count; + len -= count; + } + } + + private final int readLeByte() throws IOException { + if (avail <= 0) { + fillBuf(); + if (avail <= 0) { + throw new ZipException("EOF in header"); + } + } + return buf[len - avail--] & 0xff; + } + + /** + * Read an unsigned short in little endian byte order. + */ + private final int readLeShort() throws IOException { + return readLeByte() | (readLeByte() << 8); + } + + /** + * Read an int in little endian byte order. + */ + private final int readLeInt() throws IOException { + return readLeShort() | (readLeShort() << 16); + } + + /** + * Open the next entry from the zip archive, and return its description. If + * the previous entry wasn't closed, this method will close it. + */ + public ZipEntry getNextEntry() throws IOException { + if (crc == null) { + throw new IOException("Stream closed."); + } + if (entry != null) { + closeEntry(); + } + + final int header = readLeInt(); + if (header == CENSIG) { + /* Central Header reached. */ + close(); + return null; + } + if (header != LOCSIG) { + throw new ZipException("Wrong Local header signature: " + + Integer.toHexString(header)); + } + /* skip version */ + readLeShort(); + flags = readLeShort(); + method = readLeShort(); + final int dostime = readLeInt(); + final int crc = readLeInt(); + csize = readLeInt(); + size = readLeInt(); + final int nameLen = readLeShort(); + final int extraLen = readLeShort(); + + if ((method == ZipOutputStream.STORED) && (csize != size)) { + throw new ZipException("Stored, but compressed != uncompressed"); + } + + final byte[] buffer = new byte[nameLen]; + readFully(buffer); + final String name = new String(buffer); + + entry = createZipEntry(name); + entryAtEOF = false; + entry.setMethod(method); + if ((flags & 8) == 0) { + entry.setCrc(crc & 0xffffffffL); + entry.setSize(size & 0xffffffffL); + entry.setCompressedSize(csize & 0xffffffffL); + } + entry.setDOSTime(dostime); + if (extraLen > 0) { + final byte[] extra = new byte[extraLen]; + readFully(extra); + entry.setExtra(extra); + } + + if ((method == ZipOutputStream.DEFLATED) && (avail > 0)) { + System.arraycopy(buf, len - avail, buf, 0, avail); + len = avail; + avail = 0; + inf.setInput(buf, 0, len); + } + return entry; + } + + private void readDataDescr() throws IOException { + if (readLeInt() != EXTSIG) { + throw new ZipException("Data descriptor signature not found"); + } + entry.setCrc(readLeInt() & 0xffffffffL); + csize = readLeInt(); + size = readLeInt(); + entry.setSize(size & 0xffffffffL); + entry.setCompressedSize(csize & 0xffffffffL); + } + + /** + * Closes the current zip entry and moves to the next one. + */ + public void closeEntry() throws IOException { + if (crc == null) { + throw new IOException("Stream closed."); + } + if (entry == null) { + return; + } + + if (method == ZipOutputStream.DEFLATED) { + if ((flags & 8) != 0) { + /* We don't know how much we must skip, read until end. */ + final byte[] tmp = new byte[2048]; + while (read(tmp) > 0) { + ; + } + /* read will close this entry */ + return; + } + csize -= inf.getTotalIn(); + avail = inf.getRemaining(); + } + + if ((avail > csize) && (csize >= 0)) { + avail -= csize; + } else { + csize -= avail; + avail = 0; + while (csize != 0) { + final long skipped = in.skip(csize & 0xffffffffL); + if (skipped <= 0) { + throw new ZipException("zip archive ends early."); + } + csize -= skipped; + } + } + + size = 0; + crc.reset(); + if (method == ZipOutputStream.DEFLATED) { + inf.reset(); + } + entry = null; + entryAtEOF = true; + } + + @Override + public int available() throws IOException { + return entryAtEOF ? 0 : 1; + } + + /** + * Reads a byte from the current zip entry. + * + * @return the byte or -1 on EOF. + * @exception IOException + * if a i/o error occured. + * @exception ZipException + * if the deflated stream is corrupted. + */ + @Override + public int read() throws IOException { + final byte[] b = new byte[1]; + if (read(b, 0, 1) <= 0) { + return -1; + } + return b[0] & 0xff; + } + + /** + * Reads a block of bytes from the current zip entry. + * + * @return the number of bytes read (may be smaller, even before EOF), or -1 + * on EOF. + * @exception IOException + * if a i/o error occured. + * @exception ZipException + * if the deflated stream is corrupted. + */ + @Override + public int read(final byte[] b, final int off, int len) throws IOException { + if (len == 0) { + return 0; + } + if (crc == null) { + throw new IOException("Stream closed."); + } + if (entry == null) { + return -1; + } + boolean finished = false; + switch (method) { + case ZipOutputStream.DEFLATED: + len = super.read(b, off, len); + if (len < 0) { + if (!inf.finished()) { + throw new ZipException("Inflater not finished!?"); + } + avail = inf.getRemaining(); + if ((flags & 8) != 0) { + readDataDescr(); + } + + if ((inf.getTotalIn() != csize) || (inf.getTotalOut() != size)) { + throw new ZipException("size mismatch: " + csize + ";" + + size + " <-> " + inf.getTotalIn() + ";" + + inf.getTotalOut()); + } + inf.reset(); + finished = true; + } + break; + + case ZipOutputStream.STORED: + + if ((len > csize) && (csize >= 0)) { + len = csize; + } + + len = readBuf(b, off, len); + if (len > 0) { + csize -= len; + size -= len; + } + + if (csize == 0) { + finished = true; + } else if (len < 0) { + throw new ZipException("EOF in stored block"); + } + break; + } + + if (len > 0) { + crc.update(b, off, len); + } + + if (finished) { + final long entryCrc = entry.getCrc(); + if ((entryCrc >= 0) && ((crc.getValue() & 0xffffffffL) != entryCrc)) { + throw new ZipException("CRC mismatch"); + } + crc.reset(); + entry = null; + entryAtEOF = true; + } + return len; + } + + /** + * Closes the zip file. + * + * @exception IOException + * if a i/o error occured. + */ + @Override + public void close() throws IOException { + super.close(); + crc = null; + entry = null; + entryAtEOF = true; + } + + /** + * Creates a new zip entry for the given name. This is equivalent to new + * ZipEntry(name). + * + * @param name + * the name of the zip entry. + */ + protected ZipEntry createZipEntry(final String name) { + return new ZipEntry(name); + } +} diff --git a/epublib-core/src/main/java/net/sf/jazzlib/ZipOutputStream.java b/epublib-core/src/main/java/net/sf/jazzlib/ZipOutputStream.java new file mode 100644 index 00000000..0ae94b85 --- /dev/null +++ b/epublib-core/src/main/java/net/sf/jazzlib/ZipOutputStream.java @@ -0,0 +1,425 @@ +/* net.sf.jazzlib.ZipOutputStream + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package net.sf.jazzlib; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Enumeration; +import java.util.Vector; + +/** + * This is a FilterOutputStream that writes the files into a zip archive one + * after another. It has a special method to start a new zip entry. The zip + * entries contains information about the file name size, compressed size, CRC, + * etc. + * + * It includes support for STORED and DEFLATED entries. + * + * This class is not thread safe. + * + * @author Jochen Hoenicke + */ +public class ZipOutputStream extends DeflaterOutputStream implements +ZipConstants { + private Vector entries = new Vector(); + private final CRC32 crc = new CRC32(); + private ZipEntry curEntry = null; + + private int curMethod; + private int size; + private int offset = 0; + + private byte[] zipComment = new byte[0]; + private int defaultMethod = DEFLATED; + + /** + * Our Zip version is hard coded to 1.0 resp. 2.0 + */ + private final static int ZIP_STORED_VERSION = 10; + private final static int ZIP_DEFLATED_VERSION = 20; + + /** + * Compression method. This method doesn't compress at all. + */ + public final static int STORED = 0; + /** + * Compression method. This method uses the Deflater. + */ + public final static int DEFLATED = 8; + + /** + * Creates a new Zip output stream, writing a zip archive. + * + * @param out + * the output stream to which the zip archive is written. + */ + public ZipOutputStream(final OutputStream out) { + super(out, new Deflater(Deflater.DEFAULT_COMPRESSION, true)); + } + + /** + * Set the zip file comment. + * + * @param comment + * the comment. + * @exception IllegalArgumentException + * if encoding of comment is longer than 0xffff bytes. + */ + public void setComment(final String comment) { + byte[] commentBytes; + commentBytes = comment.getBytes(); + if (commentBytes.length > 0xffff) { + throw new IllegalArgumentException("Comment too long."); + } + zipComment = commentBytes; + } + + /** + * Sets default compression method. If the Zip entry specifies another + * method its method takes precedence. + * + * @param method + * the method. + * @exception IllegalArgumentException + * if method is not supported. + * @see #STORED + * @see #DEFLATED + */ + public void setMethod(final int method) { + if ((method != STORED) && (method != DEFLATED)) { + throw new IllegalArgumentException("Method not supported."); + } + defaultMethod = method; + } + + /** + * Sets default compression level. The new level will be activated + * immediately. + * + * @exception IllegalArgumentException + * if level is not supported. + * @see Deflater + */ + public void setLevel(final int level) { + def.setLevel(level); + } + + /** + * Write an unsigned short in little endian byte order. + */ + private final void writeLeShort(final int value) throws IOException { + out.write(value & 0xff); + out.write((value >> 8) & 0xff); + } + + /** + * Write an int in little endian byte order. + */ + private final void writeLeInt(final int value) throws IOException { + writeLeShort(value); + writeLeShort(value >> 16); + } + + /** + * Starts a new Zip entry. It automatically closes the previous entry if + * present. If the compression method is stored, the entry must have a valid + * size and crc, otherwise all elements (except name) are optional, but must + * be correct if present. If the time is not set in the entry, the current + * time is used. + * + * @param entry + * the entry. + * @exception IOException + * if an I/O error occured. + * @exception ZipException + * if stream was finished. + */ + public void putNextEntry(final ZipEntry entry) throws IOException { + if (entries == null) { + throw new ZipException("ZipOutputStream was finished"); + } + + int method = entry.getMethod(); + int flags = 0; + if (method == -1) { + method = defaultMethod; + } + + if (method == STORED) { + if (entry.getCompressedSize() >= 0) { + if (entry.getSize() < 0) { + entry.setSize(entry.getCompressedSize()); + } else if (entry.getSize() != entry.getCompressedSize()) { + throw new ZipException( + "Method STORED, but compressed size != size"); + } + } else { + entry.setCompressedSize(entry.getSize()); + } + + if (entry.getSize() < 0) { + throw new ZipException("Method STORED, but size not set"); + } + if (entry.getCrc() < 0) { + throw new ZipException("Method STORED, but crc not set"); + } + } else if (method == DEFLATED) { + if ((entry.getCompressedSize() < 0) || (entry.getSize() < 0) + || (entry.getCrc() < 0)) { + flags |= 8; + } + } + + if (curEntry != null) { + closeEntry(); + } + + if (entry.getTime() < 0) { + entry.setTime(System.currentTimeMillis()); + } + + entry.flags = flags; + entry.offset = offset; + entry.setMethod(method); + curMethod = method; + /* Write the local file header */ + writeLeInt(LOCSIG); + writeLeShort(method == STORED ? ZIP_STORED_VERSION + : ZIP_DEFLATED_VERSION); + writeLeShort(flags); + writeLeShort(method); + writeLeInt(entry.getDOSTime()); + if ((flags & 8) == 0) { + writeLeInt((int) entry.getCrc()); + writeLeInt((int) entry.getCompressedSize()); + writeLeInt((int) entry.getSize()); + } else { + writeLeInt(0); + writeLeInt(0); + writeLeInt(0); + } + final byte[] name = entry.getName().getBytes(); + if (name.length > 0xffff) { + throw new ZipException("Name too long."); + } + byte[] extra = entry.getExtra(); + if (extra == null) { + extra = new byte[0]; + } + writeLeShort(name.length); + writeLeShort(extra.length); + out.write(name); + out.write(extra); + + offset += LOCHDR + name.length + extra.length; + + /* Activate the entry. */ + + curEntry = entry; + crc.reset(); + if (method == DEFLATED) { + def.reset(); + } + size = 0; + } + + /** + * Closes the current entry. + * + * @exception IOException + * if an I/O error occured. + * @exception ZipException + * if no entry is active. + */ + public void closeEntry() throws IOException { + if (curEntry == null) { + throw new ZipException("No open entry"); + } + + /* First finish the deflater, if appropriate */ + if (curMethod == DEFLATED) { + super.finish(); + } + + final int csize = curMethod == DEFLATED ? def.getTotalOut() : size; + + if (curEntry.getSize() < 0) { + curEntry.setSize(size); + } else if (curEntry.getSize() != size) { + throw new ZipException("size was " + size + ", but I expected " + + curEntry.getSize()); + } + + if (curEntry.getCompressedSize() < 0) { + curEntry.setCompressedSize(csize); + } else if (curEntry.getCompressedSize() != csize) { + throw new ZipException("compressed size was " + csize + + ", but I expected " + curEntry.getSize()); + } + + if (curEntry.getCrc() < 0) { + curEntry.setCrc(crc.getValue()); + } else if (curEntry.getCrc() != crc.getValue()) { + throw new ZipException("crc was " + + Long.toHexString(crc.getValue()) + ", but I expected " + + Long.toHexString(curEntry.getCrc())); + } + + offset += csize; + + /* Now write the data descriptor entry if needed. */ + if ((curMethod == DEFLATED) && ((curEntry.flags & 8) != 0)) { + writeLeInt(EXTSIG); + writeLeInt((int) curEntry.getCrc()); + writeLeInt((int) curEntry.getCompressedSize()); + writeLeInt((int) curEntry.getSize()); + offset += EXTHDR; + } + + entries.addElement(curEntry); + curEntry = null; + } + + /** + * Writes the given buffer to the current entry. + * + * @exception IOException + * if an I/O error occured. + * @exception ZipException + * if no entry is active. + */ + @Override + public void write(final byte[] b, final int off, final int len) + throws IOException { + if (curEntry == null) { + throw new ZipException("No open entry."); + } + + switch (curMethod) { + case DEFLATED: + super.write(b, off, len); + break; + + case STORED: + out.write(b, off, len); + break; + } + + crc.update(b, off, len); + size += len; + } + + /** + * Finishes the stream. This will write the central directory at the end of + * the zip file and flush the stream. + * + * @exception IOException + * if an I/O error occured. + */ + @Override + public void finish() throws IOException { + if (entries == null) { + return; + } + if (curEntry != null) { + closeEntry(); + } + + int numEntries = 0; + int sizeEntries = 0; + + final Enumeration elements = entries.elements(); + while (elements.hasMoreElements()) { + final ZipEntry entry = (ZipEntry) elements.nextElement(); + + final int method = entry.getMethod(); + writeLeInt(CENSIG); + writeLeShort(method == STORED ? ZIP_STORED_VERSION + : ZIP_DEFLATED_VERSION); + writeLeShort(method == STORED ? ZIP_STORED_VERSION + : ZIP_DEFLATED_VERSION); + writeLeShort(entry.flags); + writeLeShort(method); + writeLeInt(entry.getDOSTime()); + writeLeInt((int) entry.getCrc()); + writeLeInt((int) entry.getCompressedSize()); + writeLeInt((int) entry.getSize()); + + final byte[] name = entry.getName().getBytes(); + if (name.length > 0xffff) { + throw new ZipException("Name too long."); + } + byte[] extra = entry.getExtra(); + if (extra == null) { + extra = new byte[0]; + } + final String strComment = entry.getComment(); + final byte[] comment = strComment != null ? strComment.getBytes() + : new byte[0]; + if (comment.length > 0xffff) { + throw new ZipException("Comment too long."); + } + + writeLeShort(name.length); + writeLeShort(extra.length); + writeLeShort(comment.length); + writeLeShort(0); /* disk number */ + writeLeShort(0); /* internal file attr */ + writeLeInt(0); /* external file attr */ + writeLeInt(entry.offset); + + out.write(name); + out.write(extra); + out.write(comment); + numEntries++; + sizeEntries += CENHDR + name.length + extra.length + comment.length; + } + + writeLeInt(ENDSIG); + writeLeShort(0); /* disk number */ + writeLeShort(0); /* disk with start of central dir */ + writeLeShort(numEntries); + writeLeShort(numEntries); + writeLeInt(sizeEntries); + writeLeInt(offset); + writeLeShort(zipComment.length); + out.write(zipComment); + out.flush(); + entries = null; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/Constants.java b/epublib-core/src/main/java/nl/siegmann/epublib/Constants.java new file mode 100644 index 00000000..20147b5c --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/Constants.java @@ -0,0 +1,12 @@ +package nl.siegmann.epublib; + + + +public interface Constants { + String CHARACTER_ENCODING = "UTF-8"; + String DOCTYPE_XHTML = ""; + String NAMESPACE_XHTML = "http://www.w3.org/1999/xhtml"; + String EPUBLIB_GENERATOR_NAME = "EPUBLib version 3.0"; + char FRAGMENT_SEPARATOR_CHAR = '#'; + String DEFAULT_TOC_ID = "toc"; +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationEvent.java b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationEvent.java new file mode 100644 index 00000000..e3a96b04 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationEvent.java @@ -0,0 +1,154 @@ +package nl.siegmann.epublib.browsersupport; + +import java.util.EventObject; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.util.StringUtil; + +/** + * Used to tell NavigationEventListener just what kind of navigation action the user just did. + * + * @author paul + * + */ +public class NavigationEvent extends EventObject { + + private static final long serialVersionUID = -6346750144308952762L; + + private Resource oldResource; + private int oldSpinePos; + private Navigator navigator; + private Book oldBook; + private int oldSectionPos; + private String oldFragmentId; + + public NavigationEvent(Object source) { + super(source); + } + + public NavigationEvent(Object source, Navigator navigator) { + super(source); + this.navigator = navigator; + this.oldBook = navigator.getBook(); + this.oldFragmentId = navigator.getCurrentFragmentId(); + this.oldSectionPos = navigator.getCurrentSectionPos(); + this.oldResource = navigator.getCurrentResource(); + this.oldSpinePos = navigator.getCurrentSpinePos(); + } + + /** + * The previous position within the section. + * + * @return The previous position within the section. + */ + public int getOldSectionPos() { + return oldSectionPos; + } + + public Navigator getNavigator() { + return navigator; + } + + public String getOldFragmentId() { + return oldFragmentId; + } + + // package + void setOldFragmentId(String oldFragmentId) { + this.oldFragmentId = oldFragmentId; + } + + public Book getOldBook() { + return oldBook; + } + + // package + void setOldPagePos(int oldPagePos) { + this.oldSectionPos = oldPagePos; + } + + public int getCurrentSectionPos() { + return navigator.getCurrentSectionPos(); + } + + public int getOldSpinePos() { + return oldSpinePos; + } + + public int getCurrentSpinePos() { + return navigator.getCurrentSpinePos(); + } + + public String getCurrentFragmentId() { + return navigator.getCurrentFragmentId(); + } + + public boolean isBookChanged() { + if (oldBook == null) { + return true; + } + return oldBook != navigator.getBook(); + } + + public boolean isSpinePosChanged() { + return getOldSpinePos() != getCurrentSpinePos(); + } + + public boolean isFragmentChanged() { + return StringUtil.equals(getOldFragmentId(), getCurrentFragmentId()); + } + + public Resource getOldResource() { + return oldResource; + } + + public Resource getCurrentResource() { + return navigator.getCurrentResource(); + } + public void setOldResource(Resource oldResource) { + this.oldResource = oldResource; + } + + + public void setOldSpinePos(int oldSpinePos) { + this.oldSpinePos = oldSpinePos; + } + + + public void setNavigator(Navigator navigator) { + this.navigator = navigator; + } + + + public void setOldBook(Book oldBook) { + this.oldBook = oldBook; + } + + public Book getCurrentBook() { + return getNavigator().getBook(); + } + + public boolean isResourceChanged() { + return oldResource != getCurrentResource(); + } + + public String toString() { + return StringUtil.toString( + "oldSectionPos", oldSectionPos, + "oldResource", oldResource, + "oldBook", oldBook, + "oldFragmentId", oldFragmentId, + "oldSpinePos", oldSpinePos, + "currentPagePos", getCurrentSectionPos(), + "currentResource", getCurrentResource(), + "currentBook", getCurrentBook(), + "currentFragmentId", getCurrentFragmentId(), + "currentSpinePos", getCurrentSpinePos() + ); + } + + public boolean isSectionPosChanged() { + return oldSectionPos != getCurrentSectionPos(); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationEventListener.java b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationEventListener.java new file mode 100644 index 00000000..12b40a01 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationEventListener.java @@ -0,0 +1,17 @@ +package nl.siegmann.epublib.browsersupport; + +/** + * Implemented by classes that want to be notified if the user moves to another location in the book. + * + * @author paul + * + */ +public interface NavigationEventListener { + + /** + * Called whenever the user navigates to another position in the book. + * + * @param navigationEvent + */ + public void navigationPerformed(NavigationEvent navigationEvent); +} \ No newline at end of file diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationHistory.java b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationHistory.java new file mode 100644 index 00000000..1d7aed5c --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/NavigationHistory.java @@ -0,0 +1,200 @@ +package nl.siegmann.epublib.browsersupport; + +import java.util.ArrayList; +import java.util.List; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; + + + +/** + * A history of the user's locations with the epub. + * + * @author paul.siegmann + * + */ +public class NavigationHistory implements NavigationEventListener { + + public static final int DEFAULT_MAX_HISTORY_SIZE = 1000; + private static final long DEFAULT_HISTORY_WAIT_TIME = 1000; + + private static class Location { + private String href; + + public Location(String href) { + super(); + this.href = href; + } + + @SuppressWarnings("unused") + public void setHref(String href) { + this.href = href; + } + + public String getHref() { + return href; + } + } + + private long lastUpdateTime = 0; + private List locations = new ArrayList(); + private Navigator navigator; + private int currentPos = -1; + private int currentSize = 0; + private int maxHistorySize = DEFAULT_MAX_HISTORY_SIZE; + private long historyWaitTime = DEFAULT_HISTORY_WAIT_TIME; + + public NavigationHistory(Navigator navigator) { + this.navigator = navigator; + navigator.addNavigationEventListener(this); + initBook(navigator.getBook()); + } + + public int getCurrentPos() { + return currentPos; + } + + + public int getCurrentSize() { + return currentSize; + } + + public void initBook(Book book) { + if (book == null) { + return; + } + locations = new ArrayList(); + currentPos = -1; + currentSize = 0; + if (navigator.getCurrentResource() != null) { + addLocation(navigator.getCurrentResource().getHref()); + } + } + + /** + * If the time between a navigation event is less than the historyWaitTime then the new location is not added to the history. + * When a user is rapidly viewing many pages using the slider we do not want all of them to be added to the history. + * + * @return the time we wait before adding the page to the history + */ + public long getHistoryWaitTime() { + return historyWaitTime; + } + + public void setHistoryWaitTime(long historyWaitTime) { + this.historyWaitTime = historyWaitTime; + } + + public void addLocation(Resource resource) { + if (resource == null) { + return; + } + addLocation(resource.getHref()); + } + + /** + * Adds the location after the current position. + * If the currentposition is not the end of the list then the elements between the current element and the end of the list will be discarded. + * Does nothing if the new location matches the current location. + *
+ * If this nr of locations becomes larger then the historySize then the first item(s) will be removed. + * + * @param location + */ + public void addLocation(Location location) { + // do nothing if the new location matches the current location + if ( !(locations.isEmpty()) && + location.getHref().equals(locations.get(currentPos).getHref())) { + return; + } + currentPos++; + if (currentPos != currentSize) { + locations.set(currentPos, location); + } else { + locations.add(location); + checkHistorySize(); + } + currentSize = currentPos + 1; + } + + /** + * Removes all elements that are too much for the maxHistorySize out of the history. + * + */ + private void checkHistorySize() { + while(locations.size() > maxHistorySize) { + locations.remove(0); + currentSize--; + currentPos--; + } + } + + public void addLocation(String href) { + addLocation(new Location(href)); + } + + private String getLocationHref(int pos) { + if (pos < 0 || pos >= locations.size()) { + return null; + } + return locations.get(currentPos).getHref(); + } + + /** + * Moves the current positions delta positions. + * + * move(-1) to go one position back in history.
+ * move(1) to go one position forward.
+ * + * @param delta + * + * @return Whether we actually moved. If the requested value is illegal it will return false, true otherwise. + */ + public boolean move(int delta) { + if (((currentPos + delta) < 0) + || ((currentPos + delta) >= currentSize)) { + return false; + } + currentPos += delta; + navigator.gotoResource(getLocationHref(currentPos), this); + return true; + } + + + /** + * If this is not the source of the navigationEvent then the addLocation will be called with the href of the currentResource in the navigationEvent. + */ + @Override + public void navigationPerformed(NavigationEvent navigationEvent) { + if (this == navigationEvent.getSource()) { + return; + } + if (navigationEvent.getCurrentResource() == null) { + return; + } + + if ((System.currentTimeMillis() - this.lastUpdateTime) > historyWaitTime) { + // if the user scrolled rapidly through the pages then the last page will not be added to the history. We fix that here: + addLocation(navigationEvent.getOldResource()); + + addLocation(navigationEvent.getCurrentResource().getHref()); + } + lastUpdateTime = System.currentTimeMillis(); + } + + public String getCurrentHref() { + if (currentPos < 0 || currentPos >= locations.size()) { + return null; + } + return locations.get(currentPos).getHref(); + } + + public void setMaxHistorySize(int maxHistorySize) { + this.maxHistorySize = maxHistorySize; + } + + public int getMaxHistorySize() { + return maxHistorySize; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/Navigator.java b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/Navigator.java new file mode 100644 index 00000000..7f1e6643 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/Navigator.java @@ -0,0 +1,220 @@ +package nl.siegmann.epublib.browsersupport; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; + + +/** + * A helper class for epub browser applications. + * + * It helps moving from one resource to the other, from one resource to the other and keeping other + * elements of the application up-to-date by calling the NavigationEventListeners. + * + * @author paul + * + */ +public class Navigator implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 1076126986424925474L; + private Book book; + private int currentSpinePos; + private Resource currentResource; + private int currentPagePos; + private String currentFragmentId; + + private List eventListeners = new ArrayList(); + + public Navigator() { + this(null); + } + public Navigator(Book book) { + this.book = book; + this.currentSpinePos = 0; + if (book != null) { + this.currentResource = book.getCoverPage(); + } + this.currentPagePos = 0; + } + + private synchronized void handleEventListeners(NavigationEvent navigationEvent) { + for (int i = 0; i < eventListeners.size(); i++) { + NavigationEventListener navigationEventListener = eventListeners.get(i); + navigationEventListener.navigationPerformed(navigationEvent); + } + } + + public boolean addNavigationEventListener(NavigationEventListener navigationEventListener) { + return this.eventListeners.add(navigationEventListener); + } + + + public boolean removeNavigationEventListener(NavigationEventListener navigationEventListener) { + return this.eventListeners.remove(navigationEventListener); + } + + public int gotoFirstSpineSection(Object source) { + return gotoSpineSection(0, source); + } + + public int gotoPreviousSpineSection(Object source) { + return gotoPreviousSpineSection(0, source); + } + + public int gotoPreviousSpineSection(int pagePos, Object source) { + if (currentSpinePos < 0) { + return gotoSpineSection(0, pagePos, source); + } else { + return gotoSpineSection(currentSpinePos - 1, pagePos, source); + } + } + + public boolean hasNextSpineSection() { + return (currentSpinePos < (book.getSpine().size() - 1)); + } + + public boolean hasPreviousSpineSection() { + return (currentSpinePos > 0); + } + + public int gotoNextSpineSection(Object source) { + if (currentSpinePos < 0) { + return gotoSpineSection(0, source); + } else { + return gotoSpineSection(currentSpinePos + 1, source); + } + } + + public int gotoResource(String resourceHref, Object source) { + Resource resource = book.getResources().getByHref(resourceHref); + return gotoResource(resource, source); + } + + + public int gotoResource(Resource resource, Object source) { + return gotoResource(resource, 0, null, source); + } + + public int gotoResource(Resource resource, String fragmentId, Object source) { + return gotoResource(resource, 0, fragmentId, source); + } + + public int gotoResource(Resource resource, int pagePos, Object source) { + return gotoResource(resource, pagePos, null, source); + } + + public int gotoResource(Resource resource, int pagePos, String fragmentId, Object source) { + if (resource == null) { + return -1; + } + NavigationEvent navigationEvent = new NavigationEvent(source, this); + this.currentResource = resource; + this.currentSpinePos = book.getSpine().getResourceIndex(currentResource); + this.currentPagePos = pagePos; + this.currentFragmentId = fragmentId; + handleEventListeners(navigationEvent); + + return currentSpinePos; + } + + public int gotoResourceId(String resourceId, Object source) { + return gotoSpineSection(book.getSpine().findFirstResourceById(resourceId), source); + } + + public int gotoSpineSection(int newSpinePos, Object source) { + return gotoSpineSection(newSpinePos, 0, source); + } + + /** + * Go to a specific section. + * Illegal spine positions are silently ignored. + * + * @param newSpinePos + * @param source + * @return The current position within the spine + */ + public int gotoSpineSection(int newSpinePos, int newPagePos, Object source) { + if (newSpinePos == currentSpinePos) { + return currentSpinePos; + } + if (newSpinePos < 0 || newSpinePos >= book.getSpine().size()) { + return currentSpinePos; + } + NavigationEvent navigationEvent = new NavigationEvent(source, this); + currentSpinePos = newSpinePos; + currentPagePos = newPagePos; + currentResource = book.getSpine().getResource(currentSpinePos); + handleEventListeners(navigationEvent); + return currentSpinePos; + } + + public int gotoLastSpineSection(Object source) { + return gotoSpineSection(book.getSpine().size() - 1, source); + } + + public void gotoBook(Book book, Object source) { + NavigationEvent navigationEvent = new NavigationEvent(source, this); + this.book = book; + this.currentFragmentId = null; + this.currentPagePos = 0; + this.currentResource = null; + this.currentSpinePos = book.getSpine().getResourceIndex(currentResource); + handleEventListeners(navigationEvent); + } + + + /** + * The current position within the spine. + * + * @return something < 0 if the current position is not within the spine. + */ + public int getCurrentSpinePos() { + return currentSpinePos; + } + + public Resource getCurrentResource() { + return currentResource; + } + + /** + * Sets the current index and resource without calling the eventlisteners. + * + * If you want the eventListeners called use gotoSection(index); + * + * @param currentIndex + */ + public void setCurrentSpinePos(int currentIndex) { + this.currentSpinePos = currentIndex; + this.currentResource = book.getSpine().getResource(currentIndex); + } + + public Book getBook() { + return book; + } + + /** + * Sets the current index and resource without calling the eventlisteners. + * + * If you want the eventListeners called use gotoSection(index); + * + */ + public int setCurrentResource(Resource currentResource) { + this.currentSpinePos = book.getSpine().getResourceIndex(currentResource); + this.currentResource = currentResource; + return currentSpinePos; + } + + public String getCurrentFragmentId() { + return currentFragmentId; + } + + public int getCurrentSectionPos() { + return currentPagePos; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/package-info.java b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/package-info.java new file mode 100644 index 00000000..098f2e05 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/browsersupport/package-info.java @@ -0,0 +1,7 @@ +/** + * Provides classes that help make an epub reader application. + * + * These classes have no dependencies on graphic toolkits, they're purely + * to help with the browsing/navigation logic. + */ +package nl.siegmann.epublib.browsersupport; diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Author.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Author.java new file mode 100644 index 00000000..4b3772ad --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Author.java @@ -0,0 +1,90 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; + +import nl.siegmann.epublib.util.StringUtil; + +/** + * Represents one of the authors of the book + * + * @author paul + * + */ +public class Author implements Serializable { + + private static final long serialVersionUID = 6663408501416574200L; + + private String firstname; + private String lastname; + private Relator relator; + + public Scheme getScheme() { + return scheme; + } + + public void setScheme(Scheme scheme) { + this.scheme = scheme; + } + + private Scheme scheme; + + public Author(String singleName) { + this("", singleName); + } + + + public Author(String firstname, String lastname) { + this.firstname = firstname; + this.lastname = lastname; + } + + public String getFirstname() { + return firstname; + } + public void setFirstname(String firstname) { + this.firstname = firstname; + } + public String getLastname() { + return lastname; + } + public void setLastname(String lastname) { + this.lastname = lastname; + } + + public String toString() { + return lastname + ", " + firstname; + } + + public int hashCode() { + return StringUtil.hashCode(firstname, lastname); + } + + + public boolean equals(Object authorObject) { + if(! (authorObject instanceof Author)) { + return false; + } + Author other = (Author) authorObject; + return StringUtil.equals(firstname, other.firstname) + && StringUtil.equals(lastname, other.lastname); + } + + public Relator setRole(String code) { + Relator result = Relator.byCode(code); + if (result == null) { + result = Relator.AUTHOR; + } + this.relator = result; + return result; + } + + + public Relator getRelator() { + return relator; + } + + + public void setRelator(Relator relator) { + this.relator = relator; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Book.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Book.java new file mode 100644 index 00000000..b0400c7d --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Book.java @@ -0,0 +1,530 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + + + + +/** + * Representation of a Book. + * + * All resources of a Book (html, css, xml, fonts, images) are represented as Resources. See getResources() for access to these.
+ * A Book as 3 indexes into these Resources, as per the epub specification.
+ *
+ *
Spine
+ *
these are the Resources to be shown when a user reads the book from start to finish.
+ *
Table of Contents
+ *
The table of contents. Table of Contents references may be in a different order and contain different Resources than the spine, and often do. + *
Guide
+ *
The Guide has references to a set of special Resources like the cover page, the Glossary, the copyright page, etc. + *
+ *

+ * The complication is that these 3 indexes may and usually do point to different pages. + * A chapter may be split up in 2 pieces to fit it in to memory. Then the spine will contain both pieces, but the Table of Contents only the first. + * The Content page may be in the Table of Contents, the Guide, but not in the Spine. + * Etc. + *

+ + + + + + + + + + + + + + + + + + + + + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Spine + + + + + + + +Table of Contents + + + + + + + +Guide + +Chapter 1 + +Chapter 1 + +Part 2 + +Chapter 2 + +Chapter 1 + +Chapter 2 + +Cover + +Resources + +Preface + + + + + + + + + + + + + + + + + + + + + * @author paul + * + */ +public class Book implements Serializable { + + private static final long serialVersionUID = 2068355170895770100L; + + private Resources resources = new Resources(); + private Metadata metadata = new Metadata(); + private Spine spine = new Spine(); + private TableOfContents tableOfContents = new TableOfContents(); + private Guide guide = new Guide(); + private OpfResource opfResource; + private Resource ncxResource; + private Resource coverImage; + + /** + * Adds the resource to the table of contents of the book as a child section of the given parentSection + * + * @param parentSection + * @param sectionTitle + * @param resource + * @return The table of contents + */ + public TOCReference addSection(TOCReference parentSection, String sectionTitle, + Resource resource) { + getResources().add(resource); + if (spine.findFirstResourceById(resource.getId()) < 0) { + spine.addSpineReference(new SpineReference(resource)); + } + return parentSection.addChildSection(new TOCReference(sectionTitle, resource)); + } + + public void generateSpineFromTableOfContents() { + Spine spine = new Spine(tableOfContents); + + // in case the tocResource was already found and assigned + spine.setTocResource(this.spine.getTocResource()); + + this.spine = spine; + } + + /** + * Adds a resource to the book's set of resources, table of contents and if there is no resource with the id in the spine also adds it to the spine. + * + * @param title + * @param resource + * @return The table of contents + */ + public TOCReference addSection(String title, Resource resource) { + getResources().add(resource); + TOCReference tocReference = tableOfContents.addTOCReference(new TOCReference(title, resource)); + if (spine.findFirstResourceById(resource.getId()) < 0) { + spine.addSpineReference(new SpineReference(resource)); + } + return tocReference; + } + + + /** + * The Book's metadata (titles, authors, etc) + * + * @return The Book's metadata (titles, authors, etc) + */ + public Metadata getMetadata() { + return metadata; + } + public void setMetadata(Metadata metadata) { + this.metadata = metadata; + } + + + public void setResources(Resources resources) { + this.resources = resources; + } + + + public Resource addResource(Resource resource) { + return resources.add(resource); + } + + /** + * The collection of all images, chapters, sections, xhtml files, stylesheets, etc that make up the book. + * + * @return The collection of all images, chapters, sections, xhtml files, stylesheets, etc that make up the book. + */ + public Resources getResources() { + return resources; + } + + + /** + * The sections of the book that should be shown if a user reads the book from start to finish. + * + * @return The Spine + */ + public Spine getSpine() { + return spine; + } + + + public void setSpine(Spine spine) { + this.spine = spine; + } + + + /** + * The Table of Contents of the book. + * + * @return The Table of Contents of the book. + */ + public TableOfContents getTableOfContents() { + return tableOfContents; + } + + + public void setTableOfContents(TableOfContents tableOfContents) { + this.tableOfContents = tableOfContents; + } + + /** + * The book's cover page as a Resource. + * An XHTML document containing a link to the cover image. + * + * @return The book's cover page as a Resource + */ + public Resource getCoverPage() { + Resource coverPage = guide.getCoverPage(); + if (coverPage == null) { + coverPage = spine.getResource(0); + } + return coverPage; + } + + + public void setCoverPage(Resource coverPage) { + if (coverPage == null) { + return; + } + if (! resources.containsByHref(coverPage.getHref())) { + resources.add(coverPage); + } + guide.setCoverPage(coverPage); + } + + /** + * Gets the first non-blank title from the book's metadata. + * + * @return the first non-blank title from the book's metadata. + */ + public Title getTitle() { + return getMetadata().getFirstTitle(); + } + + + /** + * The book's cover image. + * + * @return The book's cover image. + */ + public Resource getCoverImage() { + return coverImage; + } + + public void setCoverImage(Resource coverImage) { + if (coverImage == null) { + return; + } + if (! resources.containsByHref(coverImage.getHref())) { + resources.add(coverImage); + } + this.coverImage = coverImage; + } + + /** + * The guide; contains references to special sections of the book like colophon, glossary, etc. + * + * @return The guide; contains references to special sections of the book like colophon, glossary, etc. + */ + public Guide getGuide() { + return guide; + } + + /** + * All Resources of the Book that can be reached via the Spine, the TableOfContents or the Guide. + *

+ * Consists of a list of "reachable" resources: + *

    + *
  • The coverpage
  • + *
  • The resources of the Spine that are not already in the result
  • + *
  • The resources of the Table of Contents that are not already in the result
  • + *
  • The resources of the Guide that are not already in the result
  • + *
+ * To get all html files that make up the epub file use {@link #getResources()} + * @return All Resources of the Book that can be reached via the Spine, the TableOfContents or the Guide. + */ + public List getContents() { + Map result = new LinkedHashMap(); + addToContentsResult(getCoverPage(), result); + + for (SpineReference spineReference: getSpine().getSpineReferences()) { + addToContentsResult(spineReference.getResource(), result); + } + + for (Resource resource: getTableOfContents().getAllUniqueResources()) { + addToContentsResult(resource, result); + } + + for (GuideReference guideReference: getGuide().getReferences()) { + addToContentsResult(guideReference.getResource(), result); + } + + return new ArrayList(result.values()); + } + + private static void addToContentsResult(Resource resource, Map allReachableResources){ + if (resource != null && (! allReachableResources.containsKey(resource.getHref()))) { + allReachableResources.put(resource.getHref(), resource); + } + } + + public OpfResource getOpfResource() { + return opfResource; + } + + public void setOpfResource(OpfResource opfResource) { + this.opfResource = opfResource; + } + + public void setNcxResource(Resource ncxResource) { + this.ncxResource = ncxResource; + } + + public Resource getNcxResource() { + return ncxResource; + } +} + diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Date.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Date.java new file mode 100644 index 00000000..a8040b4c --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Date.java @@ -0,0 +1,100 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.text.SimpleDateFormat; + +import nl.siegmann.epublib.epub.PackageDocumentBase; + +/** + * A Date used by the book's metadata. + * + * Examples: creation-date, modification-date, etc + * + * @author paul + * + */ +public class Date implements Serializable { + /** + * + */ + private static final long serialVersionUID = 7533866830395120136L; + + public enum Event { + PUBLICATION("publication"), + MODIFICATION("modification"), + CREATION("creation"); + + private final String value; + + Event(String v) { + value = v; + } + + public static Event fromValue(String v) { + for (Event c : Event.values()) { + if (c.value.equals(v)) { + return c; + } + } + return null; + } + + public String toString() { + return value; + } + }; + + private Event event; + private String dateString; + + public Date(java.util.Date date) { + this(date, (Event) null); + } + + public Date(String dateString) { + this(dateString, (Event) null); + } + + public Date(java.util.Date date, Event event) { + this((new SimpleDateFormat(PackageDocumentBase.dateFormat)).format(date), event); + } + + public Date(String dateString, Event event) { + this.dateString = dateString; + this.event = event; + } + + public Date(java.util.Date date, String event) { + this((new SimpleDateFormat(PackageDocumentBase.dateFormat)).format(date), event); + } + + public Date(String dateString, String event) { + this(checkDate(dateString), Event.fromValue(event)); + this.dateString = dateString; + } + + private static String checkDate(String dateString) { + if (dateString == null) { + throw new IllegalArgumentException("Cannot create a date from a blank string"); + } + return dateString; + } + public String getValue() { + return dateString; + } + public Event getEvent() { + return event; + } + + public void setEvent(Event event) { + this.event = event; + } + + public String toString() { + if (event == null) { + return dateString; + } + return "" + event + ":" + dateString; + } +} + diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Guide.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Guide.java new file mode 100644 index 00000000..e18d7167 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Guide.java @@ -0,0 +1,123 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +/** + * The guide is a selection of special pages of the book. + * Examples of these are the cover, list of illustrations, etc. + * + * It is an optional part of an epub, and support for the various types of references varies by reader. + * + * The only part of this that is heavily used is the cover page. + * + * @author paul + * + */ +public class Guide implements Serializable { + + /** + * + */ + private static final long serialVersionUID = -6256645339915751189L; + + public static final String DEFAULT_COVER_TITLE = GuideReference.COVER; + + private List references = new ArrayList(); + private static final int COVERPAGE_NOT_FOUND = -1; + private static final int COVERPAGE_UNITIALIZED = -2; + + private int coverPageIndex = -1; + + public List getReferences() { + return references; + } + + public void setReferences(List references) { + this.references = references; + uncheckCoverPage(); + } + + private void uncheckCoverPage() { + coverPageIndex = COVERPAGE_UNITIALIZED; + } + + public GuideReference getCoverReference() { + checkCoverPage(); + if (coverPageIndex >= 0) { + return references.get(coverPageIndex); + } + return null; + } + + public int setCoverReference(GuideReference guideReference) { + if (coverPageIndex >= 0) { + references.set(coverPageIndex, guideReference); + } else { + references.add(0, guideReference); + coverPageIndex = 0; + } + return coverPageIndex; + } + + private void checkCoverPage() { + if (coverPageIndex == COVERPAGE_UNITIALIZED) { + initCoverPage(); + } + } + + + private void initCoverPage() { + int result = COVERPAGE_NOT_FOUND; + for (int i = 0; i < references.size(); i++) { + GuideReference guideReference = references.get(i); + if (guideReference.getType().equals(GuideReference.COVER)) { + result = i; + break; + } + } + coverPageIndex = result; + } + + /** + * The coverpage of the book. + * + * @return The coverpage of the book. + */ + public Resource getCoverPage() { + GuideReference guideReference = getCoverReference(); + if (guideReference == null) { + return null; + } + return guideReference.getResource(); + } + + public void setCoverPage(Resource coverPage) { + GuideReference coverpageGuideReference = new GuideReference(coverPage, GuideReference.COVER, DEFAULT_COVER_TITLE); + setCoverReference(coverpageGuideReference); + } + + + public ResourceReference addReference(GuideReference reference) { + this.references.add(reference); + uncheckCoverPage(); + return reference; + } + + /** + * A list of all GuideReferences that have the given referenceTypeName (ignoring case). + * + * @param referenceTypeName + * @return A list of all GuideReferences that have the given referenceTypeName (ignoring case). + */ + public List getGuideReferencesByType(String referenceTypeName) { + List result = new ArrayList(); + for (GuideReference guideReference: references) { + if (referenceTypeName.equalsIgnoreCase(guideReference.getType())) { + result.add(guideReference); + } + } + return result; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/GuideReference.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/GuideReference.java new file mode 100644 index 00000000..9a9e3ca5 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/GuideReference.java @@ -0,0 +1,102 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; + +import nl.siegmann.epublib.util.StringUtil; + + +/** + * These are references to elements of the book's guide. + * + * @see nl.siegmann.epublib.domain.Guide + * + * @author paul + * + */ +public class GuideReference extends TitledResourceReference implements Serializable { + + /** + * + */ + private static final long serialVersionUID = -316179702440631834L; + + /** + * the book cover(s), jacket information, etc. + */ + public static String COVER = "cover"; + + /** + * human-readable page with title, author, publisher, and other metadata + */ + public static String TITLE_PAGE = "title-page"; + + /** + * Human-readable table of contents. + * Not to be confused the epub file table of contents + * + */ + public static String TOC = "toc"; + + /** + * back-of-book style index + */ + public static String INDEX = "index"; + public static String GLOSSARY = "glossary"; + public static String ACKNOWLEDGEMENTS = "acknowledgements"; + public static String BIBLIOGRAPHY = "bibliography"; + public static String COLOPHON = "colophon"; + public static String COPYRIGHT_PAGE = "copyright-page"; + public static String DEDICATION = "dedication"; + + /** + * an epigraph is a phrase, quotation, or poem that is set at the beginning of a document or component. + * source: http://en.wikipedia.org/wiki/Epigraph_%28literature%29 + */ + public static String EPIGRAPH = "epigraph"; + + public static String FOREWORD = "foreword"; + + /** + * list of illustrations + */ + public static String LOI = "loi"; + + /** + * list of tables + */ + public static String LOT = "lot"; + public static String NOTES = "notes"; + public static String PREFACE = "preface"; + + /** + * A page of content (e.g. "Chapter 1") + */ + public static String TEXT = "text"; + + private String type; + + public GuideReference(Resource resource) { + this(resource, null); + } + + public GuideReference(Resource resource, String title) { + super(resource, title); + } + + public GuideReference(Resource resource, String type, String title) { + this(resource, type, title, null); + } + + public GuideReference(Resource resource, String type, String title, String fragmentId) { + super(resource, title, fragmentId); + this.type = StringUtil.isNotBlank(type) ? type.toLowerCase() : null; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Identifier.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Identifier.java new file mode 100644 index 00000000..ce810d2d --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Identifier.java @@ -0,0 +1,119 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.List; +import java.util.UUID; + +import nl.siegmann.epublib.util.StringUtil; + +/** + * A Book's identifier. + * + * Defaults to a random UUID and scheme "UUID" + * + * @author paul + * + */ +public class Identifier implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 955949951416391810L; + + private boolean bookId = false; + private Scheme scheme; + private String value; + + /** + * Creates an Identifier with as value a random UUID and scheme "UUID" + */ + public Identifier() { + this(Scheme.UUID, UUID.randomUUID().toString()); + } + + + public Identifier(Scheme scheme, String value) { + this.scheme = scheme; + this.value = value; + } + + public Scheme getScheme() { + return scheme; + } + + public void setScheme(Scheme scheme) { + this.scheme = scheme; + } + + /** + * The first identifier for which the bookId is true is made the bookId identifier. + * If no identifier has bookId == true then the first bookId identifier is written as the primary. + * + * @param identifiers + * @return The first identifier for which the bookId is true is made the bookId identifier. + */ + public static Identifier getBookIdIdentifier(List identifiers) { + if(identifiers == null || identifiers.isEmpty()) { + return null; + } + + Identifier result = null; + for(Identifier identifier: identifiers) { + if(identifier.isBookId()) { + result = identifier; + break; + } + } + + if(result == null) { + result = identifiers.get(0); + } + + return result; + } + + public String getValue() { + return value; + } + public void setValue(String value) { + this.value = value; + } + + + public void setBookId(boolean bookId) { + this.bookId = bookId; + } + + + /** + * This bookId property allows the book creator to add multiple ids and tell the epubwriter which one to write out as the bookId. + * + * The Dublin Core metadata spec allows multiple identifiers for a Book. + * The epub spec requires exactly one identifier to be marked as the book id. + * + * @return whether this is the unique book id. + */ + public boolean isBookId() { + return bookId; + } + + public int hashCode() { + return StringUtil.defaultIfNull(scheme.getName()).hashCode() ^ StringUtil.defaultIfNull(value).hashCode(); + } + + public boolean equals(Object otherIdentifier) { + if(! (otherIdentifier instanceof Identifier)) { + return false; + } + return StringUtil.equals(scheme.getName(), ((Identifier) otherIdentifier).scheme.getName()) + && StringUtil.equals(value, ((Identifier) otherIdentifier).value); + } + + public String toString() { + if (StringUtil.isBlank(scheme.getName())) { + return "" + value; + } + return "" + scheme + ":" + value; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/LazyResource.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/LazyResource.java new file mode 100644 index 00000000..1983dd31 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/LazyResource.java @@ -0,0 +1,166 @@ +package nl.siegmann.epublib.domain; + +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.IOUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A Resource that loads its data only on-demand. + * This way larger books can fit into memory and can be opened faster. + * + */ +public class LazyResource extends Resource { + + + /** + * + */ + private static final long serialVersionUID = 5089400472352002866L; + private String filename; + private long cachedSize; + + private static final Logger LOG = LoggerFactory.getLogger(LazyResource.class); + + /** + * Creates a Lazy resource, by not actually loading the data for this entry. + * + * The data will be loaded on the first call to getData() + * + * @param filename the file name for the epub we're created from. + * @param size the size of this resource. + * @param href The resource's href within the epub. + */ + public LazyResource(String filename, long size, String href) { + super( null, null, href, MediatypeService.determineMediaType(href)); + this.filename = filename; + this.cachedSize = size; + } + + /** + * Creates a Resource that tries to load the data, but falls back to lazy loading. + * + * If the size of the resource is known ahead of time we can use that to allocate + * a matching byte[]. If this succeeds we can safely load the data. + * + * If it fails we leave the data null for now and it will be lazy-loaded when + * it is accessed. + * + * @param in + * @param fileName + * @param length + * @param href + * @throws IOException + */ + public LazyResource(InputStream in, String filename, int length, String href) throws IOException { + super(null, IOUtil.toByteArray(in, length), href, MediatypeService.determineMediaType(href)); + this.filename = filename; + this.cachedSize = length; + } + + /** + * Gets the contents of the Resource as an InputStream. + * + * @return The contents of the Resource. + * + * @throws IOException + */ + public InputStream getInputStream() throws IOException { + if (isInitialized()) { + return new ByteArrayInputStream(getData()); + } else { + return getResourceStream(); + } + } + + /** + * Initializes the resource by loading its data into memory. + * + * @throws IOException + */ + public void initialize() throws IOException { + getData(); + } + + /** + * The contents of the resource as a byte[] + * + * If this resource was lazy-loaded and the data was not yet loaded, + * it will be loaded into memory at this point. + * This included opening the zip file, so expect a first load to be slow. + * + * @return The contents of the resource + */ + public byte[] getData() throws IOException { + + if ( data == null ) { + + LOG.debug("Initializing lazy resource " + filename + "#" + this.getHref() ); + + InputStream in = getResourceStream(); + byte[] readData = IOUtil.toByteArray(in, (int) this.cachedSize); + if ( readData == null ) { + throw new IOException("Could not load the contents of entry " + this.getHref() + " from epub file " + filename); + } else { + this.data = readData; + } + + in.close(); + } + + return data; + } + + + private InputStream getResourceStream() throws FileNotFoundException, + IOException { + ZipFile zipFile = new ZipFile(filename); + ZipEntry zipEntry = zipFile.getEntry(originalHref); + if (zipEntry == null) { + zipFile.close(); + throw new IllegalStateException("Cannot find entry " + originalHref + " in epub file " + filename); + } + return new ResourceInputStream(zipFile.getInputStream(zipEntry), zipFile); + } + + /** + * Tells this resource to release its cached data. + * + * If this resource was not lazy-loaded, this is a no-op. + */ + public void close() { + if ( this.filename != null ) { + this.data = null; + } + } + + /** + * Returns if the data for this resource has been loaded into memory. + * + * @return true if data was loaded. + */ + public boolean isInitialized() { + return data != null; + } + + /** + * Returns the size of this resource in bytes. + * + * @return the size. + */ + public long getSize() { + if ( data != null ) { + return data.length; + } + + return cachedSize; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestItemProperties.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestItemProperties.java new file mode 100644 index 00000000..f02f81cc --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestItemProperties.java @@ -0,0 +1,21 @@ +package nl.siegmann.epublib.domain; + +public enum ManifestItemProperties implements ManifestProperties { + COVER_IMAGE("cover-image"), + MATHML("mathml"), + NAV("nav"), + REMOTE_RESOURCES("remote-resources"), + SCRIPTED("scripted"), + SVG("svg"), + SWITCH("switch"); + + private String name; + + private ManifestItemProperties(String name) { + this.name = name; + } + + public String getName() { + return name; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestItemRefProperties.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestItemRefProperties.java new file mode 100644 index 00000000..b9662597 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestItemRefProperties.java @@ -0,0 +1,16 @@ +package nl.siegmann.epublib.domain; + +public enum ManifestItemRefProperties implements ManifestProperties { + PAGE_SPREAD_LEFT("page-spread-left"), + PAGE_SPREAD_RIGHT("page-spread-right"); + + private String name; + + private ManifestItemRefProperties(String name) { + this.name = name; + } + + public String getName() { + return name; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestProperties.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestProperties.java new file mode 100644 index 00000000..04273151 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ManifestProperties.java @@ -0,0 +1,6 @@ +package nl.siegmann.epublib.domain; + +public interface ManifestProperties { + + public String getName(); +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/MediaType.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/MediaType.java new file mode 100644 index 00000000..15cb6508 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/MediaType.java @@ -0,0 +1,76 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.Collection; + +/** + * MediaType is used to tell the type of content a resource is. + * + * Examples of mediatypes are image/gif, text/css and application/xhtml+xml + * + * All allowed mediaTypes are maintained bye the MediaTypeService. + * + * @see nl.siegmann.epublib.service.MediatypeService + * + * @author paul + * + */ +public class MediaType implements Serializable { + /** + * + */ + private static final long serialVersionUID = -7256091153727506788L; + private String name; + private String defaultExtension; + private Collection extensions; + + public MediaType(String name, String defaultExtension) { + this(name, defaultExtension, new String[] {defaultExtension}); + } + + public MediaType(String name, String defaultExtension, + String[] extensions) { + this(name, defaultExtension, Arrays.asList(extensions)); + } + + public int hashCode() { + if (name == null) { + return 0; + } + return name.hashCode(); + } + public MediaType(String name, String defaultExtension, + Collection extensions) { + super(); + this.name = name; + this.defaultExtension = defaultExtension; + this.extensions = extensions; + } + + + public String getName() { + return name; + } + + + public String getDefaultExtension() { + return defaultExtension; + } + + + public Collection getExtensions() { + return extensions; + } + + public boolean equals(Object otherMediaType) { + if(! (otherMediaType instanceof MediaType)) { + return false; + } + return name.equals(((MediaType) otherMediaType).getName()); + } + + public String toString() { + return name; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Metadata.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Metadata.java new file mode 100644 index 00000000..a78f3dfc --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Metadata.java @@ -0,0 +1,218 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.xml.namespace.QName; + +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.StringUtil; + +/** + * A Book's collection of Metadata. + * In the future it should contain all Dublin Core attributes, for now it contains a set of often-used ones. + * + * @author paul + * + */ +public class Metadata implements Serializable { + + /** + * + */ + private static final long serialVersionUID = -2437262888962149444L; + + public static final String DEFAULT_LANGUAGE = "en"; + + private boolean autoGeneratedId = true; + private List authors = new ArrayList(); + private List contributors = new ArrayList(); + private List dates = new ArrayList(); + private String language = DEFAULT_LANGUAGE; + private Map otherProperties = new HashMap(); + private List rights = new ArrayList(); + private List titles = new ArrayList<>(); + private List<Identifier> identifiers = new ArrayList<Identifier>(); + private List<String> subjects = new ArrayList<String>(); + private String format = MediatypeService.EPUB.getName(); + private List<String> types = new ArrayList<String>(); + private List<String> descriptions = new ArrayList<String>(); + private List<String> publishers = new ArrayList<String>(); + private Map<String, String> metaAttributes = new HashMap<String, String>(); + + public Metadata() { + identifiers.add(new Identifier()); + autoGeneratedId = true; + } + + public boolean isAutoGeneratedId() { + return autoGeneratedId; + } + + /** + * Metadata properties not hard-coded like the author, title, etc. + * + * @return Metadata properties not hard-coded like the author, title, etc. + */ + public Map<QName, String> getOtherProperties() { + return otherProperties; + } + public void setOtherProperties(Map<QName, String> otherProperties) { + this.otherProperties = otherProperties; + } + + public Date addDate(Date date) { + this.dates.add(date); + return date; + } + + public List<Date> getDates() { + return dates; + } + public void setDates(List<Date> dates) { + this.dates = dates; + } + + public Author addAuthor(Author author) { + authors.add(author); + return author; + } + + public List<Author> getAuthors() { + return authors; + } + public void setAuthors(List<Author> authors) { + this.authors = authors; + } + + public Author addContributor(Author contributor) { + contributors.add(contributor); + return contributor; + } + + public List<Author> getContributors() { + return contributors; + } + public void setContributors(List<Author> contributors) { + this.contributors = contributors; + } + + public String getLanguage() { + return language; + } + public void setLanguage(String language) { + this.language = language; + } + public List<String> getSubjects() { + return subjects; + } + public void setSubjects(List<String> subjects) { + this.subjects = subjects; + } + public void setRights(List<String> rights) { + this.rights = rights; + } + public List<String> getRights() { + return rights; + } + + + /** + * Gets the first non-blank title of the book. + * Will return "" if no title found. + * + * @return the first non-blank title of the book. + */ + public Title getFirstTitle() { + if (titles == null || titles.isEmpty()) { + return Title.EMPTY; + } + for (Title title: titles) { + if (StringUtil.isNotBlank(title.value)) { + return title; + } + } + return Title.EMPTY; + } + + + public Title addTitle(Title title) { + this.titles.add(title); + return title; + } + public void setTitles(List<Title> titles) { + this.titles = titles; + } + public List<Title> getTitles() { + return titles; + } + + public String addPublisher(String publisher) { + this.publishers.add(publisher); + return publisher; + } + public void setPublishers(List<String> publishers) { + this.publishers = publishers; + } + public List<String> getPublishers() { + return publishers; + } + + public String addDescription(String description) { + this.descriptions.add(description); + return description; + } + public void setDescriptions(List<String> descriptions) { + this.descriptions = descriptions; + } + public List<String> getDescriptions() { + return descriptions; + } + + public Identifier addIdentifier(Identifier identifier) { + if (autoGeneratedId && (! (identifiers.isEmpty()))) { + identifiers.set(0, identifier); + } else { + identifiers.add(identifier); + } + autoGeneratedId = false; + return identifier; + } + public void setIdentifiers(List<Identifier> identifiers) { + this.identifiers = identifiers; + autoGeneratedId = false; + } + + public List<Identifier> getIdentifiers() { + return identifiers; + } + public void setFormat(String format) { + this.format = format; + } + public String getFormat() { + return format; + } + + public String addType(String type) { + this.types.add(type); + return type; + } + + public List<String> getTypes() { + return types; + } + public void setTypes(List<String> types) { + this.types = types; + } + + public String getMetaAttribute(String name) { + return metaAttributes.get(name); + } + + public void setMetaAttributes(Map<String, String> metaAttributes) { + this.metaAttributes = metaAttributes; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/OpfResource.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/OpfResource.java new file mode 100644 index 00000000..603d3b9d --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/OpfResource.java @@ -0,0 +1,38 @@ +package nl.siegmann.epublib.domain; + +import java.io.IOException; + +public class OpfResource extends Resource { + + public static final String DEFAULT_VERSION = "2.0"; + + private String version; + + private String prefix; + + public OpfResource(Resource resource) throws IOException { + super( + resource.getId(), + resource.getData(), + resource.getHref(), + resource.getMediaType(), + resource.getInputEncoding() + ); + } + + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } + + public String getPrefix() { + return prefix; + } + + public void setPrefix(String prefix) { + this.prefix = prefix; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Relator.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Relator.java new file mode 100644 index 00000000..4ce05796 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Relator.java @@ -0,0 +1,1143 @@ +package nl.siegmann.epublib.domain; + + +/** + * A relator denotes which role a certain individual had in the creation/modification of the ebook. + * + * Examples are 'creator', 'blurb writer', etc. + * + * This is contains the complete Library of Concress relator list. + * + * @see <a href="http://www.loc.gov/marc/relators/relaterm.html">MARC Code List for Relators</a> + * + * @author paul + * + */ +public enum Relator { + + /** + * Use for a person or organization who principally exhibits acting skills in a musical or dramatic presentation or entertainment. + */ + ACTOR("act", "Actor"), + + /** + * Use for a person or organization who 1) reworks a musical composition, usually for a different medium, or 2) rewrites novels or stories for motion pictures or other audiovisual medium. + */ + ADAPTER("adp", "Adapter"), + + /** + * Use for a person or organization that reviews, examines and interprets data or information in a specific area. + */ + ANALYST("anl", "Analyst"), + + /** + * Use for a person or organization who draws the two-dimensional figures, manipulates the three dimensional objects and/or also programs the computer to move objects and images for the purpose of animated film processing. Animation cameras, stands, celluloid screens, transparencies and inks are some of the tools of the animator. + */ + ANIMATOR("anm", "Animator"), + + /** + * Use for a person who writes manuscript annotations on a printed item. + */ + ANNOTATOR("ann", "Annotator"), + + /** + * Use for a person or organization responsible for the submission of an application or who is named as eligible for the results of the processing of the application (e.g., bestowing of rights, reward, title, position). + */ + APPLICANT("app", "Applicant"), + + /** + * Use for a person or organization who designs structures or oversees their construction. + */ + ARCHITECT("arc", "Architect"), + + /** + * Use for a person or organization who transcribes a musical composition, usually for a different medium from that of the original; in an arrangement the musical substance remains essentially unchanged. + */ + ARRANGER("arr", "Arranger"), + + /** + * Use for a person (e.g., a painter or sculptor) who makes copies of works of visual art. + */ + ART_COPYIST("acp", "Art copyist"), + + /** + * Use for a person (e.g., a painter) or organization who conceives, and perhaps also implements, an original graphic design or work of art, if specific codes (e.g., [egr], [etr]) are not desired. For book illustrators, prefer Illustrator [ill]. + */ + ARTIST("art", "Artist"), + + /** + * Use for a person responsible for controlling the development of the artistic style of an entire production, including the choice of works to be presented and selection of senior production staff. + */ + ARTISTIC_DIRECTOR("ard", "Artistic director"), + + /** + * Use for a person or organization to whom a license for printing or publishing has been transferred. + */ + ASSIGNEE("asg", "Assignee"), + + /** + * Use for a person or organization associated with or found in an item or collection, which cannot be determined to be that of a Former owner [fmo] or other designated relator indicative of provenance. + */ + ASSOCIATED_NAME("asn", "Associated name"), + + /** + * Use for an author, artist, etc., relating him/her to a work for which there is or once was substantial authority for designating that person as author, creator, etc. of the work. + */ + ATTRIBUTED_NAME("att", "Attributed name"), + + /** + * Use for a person or organization in charge of the estimation and public auctioning of goods, particularly books, artistic works, etc. + */ + AUCTIONEER("auc", "Auctioneer"), + + /** + * Use for a person or organization chiefly responsible for the intellectual or artistic content of a work, usually printed text. This term may also be used when more than one person or body bears such responsibility. + */ + AUTHOR("aut", "Author"), + + /** + * Use for a person or organization whose work is largely quoted or extracted in works to which he or she did not contribute directly. Such quotations are found particularly in exhibition catalogs, collections of photographs, etc. + */ + AUTHOR_IN_QUOTATIONS_OR_TEXT_EXTRACTS("aqt", "Author in quotations or text extracts"), + + /** + * Use for a person or organization responsible for an afterword, postface, colophon, etc. but who is not the chief author of a work. + */ + AUTHOR_OF_AFTERWORD_COLOPHON_ETC("aft", "Author of afterword, colophon, etc."), + + /** + * Use for a person or organization responsible for the dialog or spoken commentary for a screenplay or sound recording. + */ + AUTHOR_OF_DIALOG("aud", "Author of dialog"), + + /** + * Use for a person or organization responsible for an introduction, preface, foreword, or other critical introductory matter, but who is not the chief author. + */ + AUTHOR_OF_INTRODUCTION_ETC("aui", "Author of introduction, etc."), + + /** + * Use for a person or organization responsible for a motion picture screenplay, dialog, spoken commentary, etc. + */ + AUTHOR_OF_SCREENPLAY_ETC("aus", "Author of screenplay, etc."), + + /** + * Use for a person or organization responsible for a work upon which the work represented by the catalog record is based. This may be appropriate for adaptations, sequels, continuations, indexes, etc. + */ + BIBLIOGRAPHIC_ANTECEDENT("ant", "Bibliographic antecedent"), + + /** + * Use for a person or organization responsible for the binding of printed or manuscript materials. + */ + BINDER("bnd", "Binder"), + + /** + * Use for a person or organization responsible for the binding design of a book, including the type of binding, the type of materials used, and any decorative aspects of the binding. + */ + BINDING_DESIGNER("bdd", "Binding designer"), + + /** + * Use for the named entity responsible for writing a commendation or testimonial for a work, which appears on or within the publication itself, frequently on the back or dust jacket of print publications or on advertising material for all media. + */ + BLURB_WRITER("blw", "Blurb writer"), + + /** + * Use for a person or organization responsible for the entire graphic design of a book, including arrangement of type and illustration, choice of materials, and process used. + */ + BOOK_DESIGNER("bkd", "Book designer"), + + /** + * Use for a person or organization responsible for the production of books and other print media, if specific codes (e.g., [bkd], [egr], [tyd], [prt]) are not desired. + */ + BOOK_PRODUCER("bkp", "Book producer"), + + /** + * Use for a person or organization responsible for the design of flexible covers designed for or published with a book, including the type of materials used, and any decorative aspects of the bookjacket. + */ + BOOKJACKET_DESIGNER("bjd", "Bookjacket designer"), + + /** + * Use for a person or organization responsible for the design of a book owner's identification label that is most commonly pasted to the inside front cover of a book. + */ + BOOKPLATE_DESIGNER("bpd", "Bookplate designer"), + + /** + * Use for a person or organization who makes books and other bibliographic materials available for purchase. Interest in the materials is primarily lucrative. + */ + BOOKSELLER("bsl", "Bookseller"), + + /** + * Use for a person or organization who writes in an artistic hand, usually as a copyist and or engrosser. + */ + CALLIGRAPHER("cll", "Calligrapher"), + + /** + * Use for a person or organization responsible for the creation of maps and other cartographic materials. + */ + CARTOGRAPHER("ctg", "Cartographer"), + + /** + * Use for a censor, bowdlerizer, expurgator, etc., official or private. + */ + CENSOR("cns", "Censor"), + + /** + * Use for a person or organization who composes or arranges dances or other movements (e.g., "master of swords") for a musical or dramatic presentation or entertainment. + */ + CHOREOGRAPHER("chr", "Choreographer"), + + /** + * Use for a person or organization who is in charge of the images captured for a motion picture film. The cinematographer works under the supervision of a director, and may also be referred to as director of photography. Do not confuse with videographer. + */ + CINEMATOGRAPHER("cng", "Cinematographer"), + + /** + * Use for a person or organization for whom another person or organization is acting. + */ + CLIENT("cli", "Client"), + + /** + * Use for a person or organization that takes a limited part in the elaboration of a work of another person or organization that brings complements (e.g., appendices, notes) to the work. + */ + COLLABORATOR("clb", "Collaborator"), + + /** + * Use for a person or organization who has brought together material from various sources that has been arranged, described, and cataloged as a collection. A collector is neither the creator of the material nor a person to whom manuscripts in the collection may have been addressed. + */ + COLLECTOR("col", "Collector"), + + /** + * Use for a person or organization responsible for the production of photographic prints from film or other colloid that has ink-receptive and ink-repellent surfaces. + */ + COLLOTYPER("clt", "Collotyper"), + + /** + * Use for the named entity responsible for applying color to drawings, prints, photographs, maps, moving images, etc. + */ + COLORIST("clr", "Colorist"), + + /** + * Use for a person or organization who provides interpretation, analysis, or a discussion of the subject matter on a recording, motion picture, or other audiovisual medium. + */ + COMMENTATOR("cmm", "Commentator"), + + /** + * Use for a person or organization responsible for the commentary or explanatory notes about a text. For the writer of manuscript annotations in a printed book, use Annotator [ann]. + */ + COMMENTATOR_FOR_WRITTEN_TEXT("cwt", "Commentator for written text"), + + /** + * Use for a person or organization who produces a work or publication by selecting and putting together material from the works of various persons or bodies. + */ + COMPILER("com", "Compiler"), + + /** + * Use for the party who applies to the courts for redress, usually in an equity proceeding. + */ + COMPLAINANT("cpl", "Complainant"), + + /** + * Use for a complainant who takes an appeal from one court or jurisdiction to another to reverse the judgment, usually in an equity proceeding. + */ + COMPLAINANT_APPELLANT("cpt", "Complainant-appellant"), + + /** + * Use for a complainant against whom an appeal is taken from one court or jurisdiction to another to reverse the judgment, usually in an equity proceeding. + */ + COMPLAINANT_APPELLEE("cpe", "Complainant-appellee"), + + /** + * Use for a person or organization who creates a musical work, usually a piece of music in manuscript or printed form. + */ + COMPOSER("cmp", "Composer"), + + /** + * Use for a person or organization responsible for the creation of metal slug, or molds made of other materials, used to produce the text and images in printed matter. + */ + COMPOSITOR("cmt", "Compositor"), + + /** + * Use for a person or organization responsible for the original idea on which a work is based, this includes the scientific author of an audio-visual item and the conceptor of an advertisement. + */ + CONCEPTOR("ccp", "Conceptor"), + + /** + * Use for a person who directs a performing group (orchestra, chorus, opera, etc.) in a musical or dramatic presentation or entertainment. + */ + CONDUCTOR("cnd", "Conductor"), + + /** + * Use for the named entity responsible for documenting, preserving, or treating printed or manuscript material, works of art, artifacts, or other media. + */ + CONSERVATOR("con", "Conservator"), + + /** + * Use for a person or organization relevant to a resource, who is called upon for professional advice or services in a specialized field of knowledge or training. + */ + CONSULTANT("csl", "Consultant"), + + /** + * Use for a person or organization relevant to a resource, who is engaged specifically to provide an intellectual overview of a strategic or operational task and by analysis, specification, or instruction, to create or propose a cost-effective course of action or solution. + */ + CONSULTANT_TO_A_PROJECT("csp", "Consultant to a project"), + + /** + * Use for the party who opposes, resists, or disputes, in a court of law, a claim, decision, result, etc. + */ + CONTESTANT("cos", "Contestant"), + + /** + * Use for a contestant who takes an appeal from one court of law or jurisdiction to another to reverse the judgment. + */ + CONTESTANT_APPELLANT("cot", "Contestant-appellant"), + + /** + * Use for a contestant against whom an appeal is taken from one court of law or jurisdiction to another to reverse the judgment. + */ + CONTESTANT_APPELLEE("coe", "Contestant-appellee"), + + /** + * Use for the party defending a claim, decision, result, etc. being opposed, resisted, or disputed in a court of law. + */ + CONTESTEE("cts", "Contestee"), + + /** + * Use for a contestee who takes an appeal from one court or jurisdiction to another to reverse the judgment. + */ + CONTESTEE_APPELLANT("ctt", "Contestee-appellant"), + + /** + * Use for a contestee against whom an appeal is taken from one court or jurisdiction to another to reverse the judgment. + */ + CONTESTEE_APPELLEE("cte", "Contestee-appellee"), + + /** + * Use for a person or organization relevant to a resource, who enters into a contract with another person or organization to perform a specific task. + */ + CONTRACTOR("ctr", "Contractor"), + + /** + * Use for a person or organization one whose work has been contributed to a larger work, such as an anthology, serial publication, or other compilation of individual works. Do not use if the sole function in relation to a work is as author, editor, compiler or translator. + */ + CONTRIBUTOR("ctb", "Contributor"), + + /** + * Use for a person or organization listed as a copyright owner at the time of registration. Copyright can be granted or later transferred to another person or organization, at which time the claimant becomes the copyright holder. + */ + COPYRIGHT_CLAIMANT("cpc", "Copyright claimant"), + + /** + * Use for a person or organization to whom copy and legal rights have been granted or transferred for the intellectual content of a work. The copyright holder, although not necessarily the creator of the work, usually has the exclusive right to benefit financially from the sale and use of the work to which the associated copyright protection applies. + */ + COPYRIGHT_HOLDER("cph", "Copyright holder"), + + /** + * Use for a person or organization who is a corrector of manuscripts, such as the scriptorium official who corrected the work of a scribe. For printed matter, use Proofreader. + */ + CORRECTOR("crr", "Corrector"), + + /** + * Use for a person or organization who was either the writer or recipient of a letter or other communication. + */ + CORRESPONDENT("crp", "Correspondent"), + + /** + * Use for a person or organization who designs or makes costumes, fixes hair, etc., for a musical or dramatic presentation or entertainment. + */ + COSTUME_DESIGNER("cst", "Costume designer"), + + /** + * Use for a person or organization responsible for the graphic design of a book cover, album cover, slipcase, box, container, etc. For a person or organization responsible for the graphic design of an entire book, use Book designer; for book jackets, use Bookjacket designer. + */ + COVER_DESIGNER("cov", "Cover designer"), + + /** + * Use for a person or organization responsible for the intellectual or artistic content of a work. + */ + CREATOR("cre", "Creator"), + + /** + * Use for a person or organization responsible for conceiving and organizing an exhibition. + */ + CURATOR_OF_AN_EXHIBITION("cur", "Curator of an exhibition"), + + /** + * Use for a person or organization who principally exhibits dancing skills in a musical or dramatic presentation or entertainment. + */ + DANCER("dnc", "Dancer"), + + /** + * Use for a person or organization that submits data for inclusion in a database or other collection of data. + */ + DATA_CONTRIBUTOR("dtc", "Data contributor"), + + /** + * Use for a person or organization responsible for managing databases or other data sources. + */ + DATA_MANAGER("dtm", "Data manager"), + + /** + * Use for a person or organization to whom a book, manuscript, etc., is dedicated (not the recipient of a gift). + */ + DEDICATEE("dte", "Dedicatee"), + + /** + * Use for the author of a dedication, which may be a formal statement or in epistolary or verse form. + */ + DEDICATOR("dto", "Dedicator"), + + /** + * Use for the party defending or denying allegations made in a suit and against whom relief or recovery is sought in the courts, usually in a legal action. + */ + DEFENDANT("dfd", "Defendant"), + + /** + * Use for a defendant who takes an appeal from one court or jurisdiction to another to reverse the judgment, usually in a legal action. + */ + DEFENDANT_APPELLANT("dft", "Defendant-appellant"), + + /** + * Use for a defendant against whom an appeal is taken from one court or jurisdiction to another to reverse the judgment, usually in a legal action. + */ + DEFENDANT_APPELLEE("dfe", "Defendant-appellee"), + + /** + * Use for the organization granting a degree for which the thesis or dissertation described was presented. + */ + DEGREE_GRANTOR("dgg", "Degree grantor"), + + /** + * Use for a person or organization executing technical drawings from others' designs. + */ + DELINEATOR("dln", "Delineator"), + + /** + * Use for an entity depicted or portrayed in a work, particularly in a work of art. + */ + DEPICTED("dpc", "Depicted"), + + /** + * Use for a person or organization placing material in the physical custody of a library or repository without transferring the legal title. + */ + DEPOSITOR("dpt", "Depositor"), + + /** + * Use for a person or organization responsible for the design if more specific codes (e.g., [bkd], [tyd]) are not desired. + */ + DESIGNER("dsr", "Designer"), + + /** + * Use for a person or organization who is responsible for the general management of a work or who supervises the production of a performance for stage, screen, or sound recording. + */ + DIRECTOR("drt", "Director"), + + /** + * Use for a person who presents a thesis for a university or higher-level educational degree. + */ + DISSERTANT("dis", "Dissertant"), + + /** + * Use for the name of a place from which a resource, e.g., a serial, is distributed. + */ + DISTRIBUTION_PLACE("dbp", "Distribution place"), + + /** + * Use for a person or organization that has exclusive or shared marketing rights for an item. + */ + DISTRIBUTOR("dst", "Distributor"), + + /** + * Use for a person or organization who is the donor of a book, manuscript, etc., to its present owner. Donors to previous owners are designated as Former owner [fmo] or Inscriber [ins]. + */ + DONOR("dnr", "Donor"), + + /** + * Use for a person or organization who prepares artistic or technical drawings. + */ + DRAFTSMAN("drm", "Draftsman"), + + /** + * Use for a person or organization to which authorship has been dubiously or incorrectly ascribed. + */ + DUBIOUS_AUTHOR("dub", "Dubious author"), + + /** + * Use for a person or organization who prepares for publication a work not primarily his/her own, such as by elucidating text, adding introductory or other critical matter, or technically directing an editorial staff. + */ + EDITOR("edt", "Editor"), + + /** + * Use for a person responsible for setting up a lighting rig and focusing the lights for a production, and running the lighting at a performance. + */ + ELECTRICIAN("elg", "Electrician"), + + /** + * Use for a person or organization who creates a duplicate printing surface by pressure molding and electrodepositing of metal that is then backed up with lead for printing. + */ + ELECTROTYPER("elt", "Electrotyper"), + + /** + * Use for a person or organization that is responsible for technical planning and design, particularly with construction. + */ + ENGINEER("eng", "Engineer"), + + /** + * Use for a person or organization who cuts letters, figures, etc. on a surface, such as a wooden or metal plate, for printing. + */ + ENGRAVER("egr", "Engraver"), + + /** + * Use for a person or organization who produces text or images for printing by subjecting metal, glass, or some other surface to acid or the corrosive action of some other substance. + */ + ETCHER("etr", "Etcher"), + + /** + * Use for the name of the place where an event such as a conference or a concert took place. + */ + EVENT_PLACE("evp", "Event place"), + + /** + * Use for a person or organization in charge of the description and appraisal of the value of goods, particularly rare items, works of art, etc. + */ + EXPERT("exp", "Expert"), + + /** + * Use for a person or organization that executed the facsimile. + */ + FACSIMILIST("fac", "Facsimilist"), + + /** + * Use for a person or organization that manages or supervises the work done to collect raw data or do research in an actual setting or environment (typically applies to the natural and social sciences). + */ + FIELD_DIRECTOR("fld", "Field director"), + + /** + * Use for a person or organization who is an editor of a motion picture film. This term is used regardless of the medium upon which the motion picture is produced or manufactured (e.g., acetate film, video tape). + */ + FILM_EDITOR("flm", "Film editor"), + + /** + * Use for a person or organization who is identified as the only party or the party of the first part. In the case of transfer of right, this is the assignor, transferor, licensor, grantor, etc. Multiple parties can be named jointly as the first party + */ + FIRST_PARTY("fpy", "First party"), + + /** + * Use for a person or organization who makes or imitates something of value or importance, especially with the intent to defraud. + */ + FORGER("frg", "Forger"), + + /** + * Use for a person or organization who owned an item at any time in the past. Includes those to whom the material was once presented. A person or organization giving the item to the present owner is designated as Donor [dnr] + */ + FORMER_OWNER("fmo", "Former owner"), + + /** + * Use for a person or organization that furnished financial support for the production of the work. + */ + FUNDER("fnd", "Funder"), + + /** + * Use for a person responsible for geographic information system (GIS) development and integration with global positioning system data. + */ + GEOGRAPHIC_INFORMATION_SPECIALIST("gis", "Geographic information specialist"), + + /** + * Use for a person or organization in memory or honor of whom a book, manuscript, etc. is donated. + */ + HONOREE("hnr", "Honoree"), + + /** + * Use for a person who is invited or regularly leads a program (often broadcast) that includes other guests, performers, etc. (e.g., talk show host). + */ + HOST("hst", "Host"), + + /** + * Use for a person or organization responsible for the decoration of a work (especially manuscript material) with precious metals or color, usually with elaborate designs and motifs. + */ + ILLUMINATOR("ilu", "Illuminator"), + + /** + * Use for a person or organization who conceives, and perhaps also implements, a design or illustration, usually to accompany a written text. + */ + ILLUSTRATOR("ill", "Illustrator"), + + /** + * Use for a person who signs a presentation statement. + */ + INSCRIBER("ins", "Inscriber"), + + /** + * Use for a person or organization who principally plays an instrument in a musical or dramatic presentation or entertainment. + */ + INSTRUMENTALIST("itr", "Instrumentalist"), + + /** + * Use for a person or organization who is interviewed at a consultation or meeting, usually by a reporter, pollster, or some other information gathering agent. + */ + INTERVIEWEE("ive", "Interviewee"), + + /** + * Use for a person or organization who acts as a reporter, pollster, or other information gathering agent in a consultation or meeting involving one or more individuals. + */ + INTERVIEWER("ivr", "Interviewer"), + + /** + * Use for a person or organization who first produces a particular useful item, or develops a new process for obtaining a known item or result. + */ + INVENTOR("inv", "Inventor"), + + /** + * Use for an institution that provides scientific analyses of material samples. + */ + LABORATORY("lbr", "Laboratory"), + + /** + * Use for a person or organization that manages or supervises work done in a controlled setting or environment. + */ + LABORATORY_DIRECTOR("ldr", "Laboratory director"), + + /** + * Use for a person or organization whose work involves coordinating the arrangement of existing and proposed land features and structures. + */ + LANDSCAPE_ARCHITECT("lsa", "Landscape architect"), + + /** + * Use to indicate that a person or organization takes primary responsibility for a particular activity or endeavor. Use with another relator term or code to show the greater importance this person or organization has regarding that particular role. If more than one relator is assigned to a heading, use the Lead relator only if it applies to all the relators. + */ + LEAD("led", "Lead"), + + /** + * Use for a person or organization permitting the temporary use of a book, manuscript, etc., such as for photocopying or microfilming. + */ + LENDER("len", "Lender"), + + /** + * Use for the party who files a libel in an ecclesiastical or admiralty case. + */ + LIBELANT("lil", "Libelant"), + + /** + * Use for a libelant who takes an appeal from one ecclesiastical court or admiralty to another to reverse the judgment. + */ + LIBELANT_APPELLANT("lit", "Libelant-appellant"), + + /** + * Use for a libelant against whom an appeal is taken from one ecclesiastical court or admiralty to another to reverse the judgment. + */ + LIBELANT_APPELLEE("lie", "Libelant-appellee"), + + /** + * Use for a party against whom a libel has been filed in an ecclesiastical court or admiralty. + */ + LIBELEE("lel", "Libelee"), + + /** + * Use for a libelee who takes an appeal from one ecclesiastical court or admiralty to another to reverse the judgment. + */ + LIBELEE_APPELLANT("let", "Libelee-appellant"), + + /** + * Use for a libelee against whom an appeal is taken from one ecclesiastical court or admiralty to another to reverse the judgment. + */ + LIBELEE_APPELLEE("lee", "Libelee-appellee"), + + /** + * Use for a person or organization who is a writer of the text of an opera, oratorio, etc. + */ + LIBRETTIST("lbt", "Librettist"), + + /** + * Use for a person or organization who is an original recipient of the right to print or publish. + */ + LICENSEE("lse", "Licensee"), + + /** + * Use for person or organization who is a signer of the license, imprimatur, etc. + */ + LICENSOR("lso", "Licensor"), + + /** + * Use for a person or organization who designs the lighting scheme for a theatrical presentation, entertainment, motion picture, etc. + */ + LIGHTING_DESIGNER("lgd", "Lighting designer"), + + /** + * Use for a person or organization who prepares the stone or plate for lithographic printing, including a graphic artist creating a design directly on the surface from which printing will be done. + */ + LITHOGRAPHER("ltg", "Lithographer"), + + /** + * Use for a person or organization who is a writer of the text of a song. + */ + LYRICIST("lyr", "Lyricist"), + + /** + * Use for a person or organization that makes an artifactual work (an object made or modified by one or more persons). Examples of artifactual works include vases, cannons or pieces of furniture. + */ + MANUFACTURER("mfr", "Manufacturer"), + + /** + * Use for the named entity responsible for marbling paper, cloth, leather, etc. used in construction of a resource. + */ + MARBLER("mrb", "Marbler"), + + /** + * Use for a person or organization performing the coding of SGML, HTML, or XML markup of metadata, text, etc. + */ + MARKUP_EDITOR("mrk", "Markup editor"), + + /** + * Use for a person or organization primarily responsible for compiling and maintaining the original description of a metadata set (e.g., geospatial metadata set). + */ + METADATA_CONTACT("mdc", "Metadata contact"), + + /** + * Use for a person or organization responsible for decorations, illustrations, letters, etc. cut on a metal surface for printing or decoration. + */ + METAL_ENGRAVER("mte", "Metal-engraver"), + + /** + * Use for a person who leads a program (often broadcast) where topics are discussed, usually with participation of experts in fields related to the discussion. + */ + MODERATOR("mod", "Moderator"), + + /** + * Use for a person or organization that supervises compliance with the contract and is responsible for the report and controls its distribution. Sometimes referred to as the grantee, or controlling agency. + */ + MONITOR("mon", "Monitor"), + + /** + * Use for a person who transcribes or copies musical notation + */ + MUSIC_COPYIST("mcp", "Music copyist"), + + /** + * Use for a person responsible for basic music decisions about a production, including coordinating the work of the composer, the sound editor, and sound mixers, selecting musicians, and organizing and/or conducting sound for rehearsals and performances. + */ + MUSICAL_DIRECTOR("msd", "Musical director"), + + /** + * Use for a person or organization who performs music or contributes to the musical content of a work when it is not possible or desirable to identify the function more precisely. + */ + MUSICIAN("mus", "Musician"), + + /** + * Use for a person who is a speaker relating the particulars of an act, occurrence, or course of events. + */ + NARRATOR("nrt", "Narrator"), + + /** + * Use for a person or organization responsible for opposing a thesis or dissertation. + */ + OPPONENT("opn", "Opponent"), + + /** + * Use for a person or organization responsible for organizing a meeting for which an item is the report or proceedings. + */ + ORGANIZER_OF_MEETING("orm", "Organizer of meeting"), + + /** + * Use for a person or organization performing the work, i.e., the name of a person or organization associated with the intellectual content of the work. This category does not include the publisher or personal affiliation, or sponsor except where it is also the corporate author. + */ + ORIGINATOR("org", "Originator"), + + /** + * Use for relator codes from other lists which have no equivalent in the MARC list or for terms which have not been assigned a code. + */ + OTHER("oth", "Other"), + + /** + * Use for a person or organization that currently owns an item or collection. + */ + OWNER("own", "Owner"), + + /** + * Use for a person or organization responsible for the production of paper, usually from wood, cloth, or other fibrous material. + */ + PAPERMAKER("ppm", "Papermaker"), + + /** + * Use for a person or organization that applied for a patent. + */ + PATENT_APPLICANT("pta", "Patent applicant"), + + /** + * Use for a person or organization that was granted the patent referred to by the item. + */ + PATENT_HOLDER("pth", "Patent holder"), + + /** + * Use for a person or organization responsible for commissioning a work. Usually a patron uses his or her means or influence to support the work of artists, writers, etc. This includes those who commission and pay for individual works. + */ + PATRON("pat", "Patron"), + + /** + * Use for a person or organization who exhibits musical or acting skills in a musical or dramatic presentation or entertainment, if specific codes for those functions ([act], [dnc], [itr], [voc], etc.) are not used. If specific codes are used, [prf] is used for a person whose principal skill is not known or specified. + */ + PERFORMER("prf", "Performer"), + + /** + * Use for an authority (usually a government agency) that issues permits under which work is accomplished. + */ + PERMITTING_AGENCY("pma", "Permitting agency"), + + /** + * Use for a person or organization responsible for taking photographs, whether they are used in their original form or as reproductions. + */ + PHOTOGRAPHER("pht", "Photographer"), + + /** + * Use for the party who complains or sues in court in a personal action, usually in a legal proceeding. + */ + PLAINTIFF("ptf", "Plaintiff"), + + /** + * Use for a plaintiff who takes an appeal from one court or jurisdiction to another to reverse the judgment, usually in a legal proceeding. + */ + PLAINTIFF_APPELLANT("ptt", "Plaintiff-appellant"), + + /** + * Use for a plaintiff against whom an appeal is taken from one court or jurisdiction to another to reverse the judgment, usually in a legal proceeding. + */ + PLAINTIFF_APPELLEE("pte", "Plaintiff-appellee"), + + /** + * Use for a person or organization responsible for the production of plates, usually for the production of printed images and/or text. + */ + PLATEMAKER("plt", "Platemaker"), + + /** + * Use for a person or organization who prints texts, whether from type or plates. + */ + PRINTER("prt", "Printer"), + + /** + * Use for a person or organization who prints illustrations from plates. + */ + PRINTER_OF_PLATES("pop", "Printer of plates"), + + /** + * Use for a person or organization who makes a relief, intaglio, or planographic printing surface. + */ + PRINTMAKER("prm", "Printmaker"), + + /** + * Use for a person or organization primarily responsible for performing or initiating a process, such as is done with the collection of metadata sets. + */ + PROCESS_CONTACT("prc", "Process contact"), + + /** + * Use for a person or organization responsible for the making of a motion picture, including business aspects, management of the productions, and the commercial success of the work. + */ + PRODUCER("pro", "Producer"), + + /** + * Use for a person responsible for all technical and business matters in a production. + */ + PRODUCTION_MANAGER("pmn", "Production manager"), + + /** + * Use for a person or organization associated with the production (props, lighting, special effects, etc.) of a musical or dramatic presentation or entertainment. + */ + PRODUCTION_PERSONNEL("prd", "Production personnel"), + + /** + * Use for a person or organization responsible for the creation and/or maintenance of computer program design documents, source code, and machine-executable digital files and supporting documentation. + */ + PROGRAMMER("prg", "Programmer"), + + /** + * Use for a person or organization with primary responsibility for all essential aspects of a project, or that manages a very large project that demands senior level responsibility, or that has overall responsibility for managing projects, or provides overall direction to a project manager. + */ + PROJECT_DIRECTOR("pdr", "Project director"), + + /** + * Use for a person who corrects printed matter. For manuscripts, use Corrector [crr]. + */ + PROOFREADER("pfr", "Proofreader"), + + /** + * Use for the name of the place where a resource is published. + */ + PUBLICATION_PLACE("pup", "Publication place"), + + /** + * Use for a person or organization that makes printed matter, often text, but also printed music, artwork, etc. available to the public. + */ + PUBLISHER("pbl", "Publisher"), + + /** + * Use for a person or organization who presides over the elaboration of a collective work to ensure its coherence or continuity. This includes editors-in-chief, literary editors, editors of series, etc. + */ + PUBLISHING_DIRECTOR("pbd", "Publishing director"), + + /** + * Use for a person or organization who manipulates, controls, or directs puppets or marionettes in a musical or dramatic presentation or entertainment. + */ + PUPPETEER("ppt", "Puppeteer"), + + /** + * Use for a person or organization to whom correspondence is addressed. + */ + RECIPIENT("rcp", "Recipient"), + + /** + * Use for a person or organization who supervises the technical aspects of a sound or video recording session. + */ + RECORDING_ENGINEER("rce", "Recording engineer"), + + /** + * Use for a person or organization who writes or develops the framework for an item without being intellectually responsible for its content. + */ + REDACTOR("red", "Redactor"), + + /** + * Use for a person or organization who prepares drawings of architectural designs (i.e., renderings) in accurate, representational perspective to show what the project will look like when completed. + */ + RENDERER("ren", "Renderer"), + + /** + * Use for a person or organization who writes or presents reports of news or current events on air or in print. + */ + REPORTER("rpt", "Reporter"), + + /** + * Use for an agency that hosts data or material culture objects and provides services to promote long term, consistent and shared use of those data or objects. + */ + REPOSITORY("rps", "Repository"), + + /** + * Use for a person who directed or managed a research project. + */ + RESEARCH_TEAM_HEAD("rth", "Research team head"), + + /** + * Use for a person who participated in a research project but whose role did not involve direction or management of it. + */ + RESEARCH_TEAM_MEMBER("rtm", "Research team member"), + + /** + * Use for a person or organization responsible for performing research. + */ + RESEARCHER("res", "Researcher"), + + /** + * Use for the party who makes an answer to the courts pursuant to an application for redress, usually in an equity proceeding. + */ + RESPONDENT("rsp", "Respondent"), + + /** + * Use for a respondent who takes an appeal from one court or jurisdiction to another to reverse the judgment, usually in an equity proceeding. + */ + RESPONDENT_APPELLANT("rst", "Respondent-appellant"), + + /** + * Use for a respondent against whom an appeal is taken from one court or jurisdiction to another to reverse the judgment, usually in an equity proceeding. + */ + RESPONDENT_APPELLEE("rse", "Respondent-appellee"), + + /** + * Use for a person or organization legally responsible for the content of the published material. + */ + RESPONSIBLE_PARTY("rpy", "Responsible party"), + + /** + * Use for a person or organization, other than the original choreographer or director, responsible for restaging a choreographic or dramatic work and who contributes minimal new content. + */ + RESTAGER("rsg", "Restager"), + + /** + * Use for a person or organization responsible for the review of a book, motion picture, performance, etc. + */ + REVIEWER("rev", "Reviewer"), + + /** + * Use for a person or organization responsible for parts of a work, often headings or opening parts of a manuscript, that appear in a distinctive color, usually red. + */ + RUBRICATOR("rbr", "Rubricator"), + + /** + * Use for a person or organization who is the author of a motion picture screenplay. + */ + SCENARIST("sce", "Scenarist"), + + /** + * Use for a person or organization who brings scientific, pedagogical, or historical competence to the conception and realization on a work, particularly in the case of audio-visual items. + */ + SCIENTIFIC_ADVISOR("sad", "Scientific advisor"), + + /** + * Use for a person who is an amanuensis and for a writer of manuscripts proper. For a person who makes pen-facsimiles, use Facsimilist [fac]. + */ + SCRIBE("scr", "Scribe"), + + /** + * Use for a person or organization who models or carves figures that are three-dimensional representations. + */ + SCULPTOR("scl", "Sculptor"), + + /** + * Use for a person or organization who is identified as the party of the second part. In the case of transfer of right, this is the assignee, transferee, licensee, grantee, etc. Multiple parties can be named jointly as the second party. + */ + SECOND_PARTY("spy", "Second party"), + + /** + * Use for a person or organization who is a recorder, redactor, or other person responsible for expressing the views of a organization. + */ + SECRETARY("sec", "Secretary"), + + /** + * Use for a person or organization who translates the rough sketches of the art director into actual architectural structures for a theatrical presentation, entertainment, motion picture, etc. Set designers draw the detailed guides and specifications for building the set. + */ + SET_DESIGNER("std", "Set designer"), + + /** + * Use for a person whose signature appears without a presentation or other statement indicative of provenance. When there is a presentation statement, use Inscriber [ins]. + */ + SIGNER("sgn", "Signer"), + + /** + * Use for a person or organization who uses his/her/their voice with or without instrumental accompaniment to produce music. A performance may or may not include actual words. + */ + SINGER("sng", "Singer"), + + /** + * Use for a person who produces and reproduces the sound score (both live and recorded), the installation of microphones, the setting of sound levels, and the coordination of sources of sound for a production. + */ + SOUND_DESIGNER("sds", "Sound designer"), + + /** + * Use for a person who participates in a program (often broadcast) and makes a formalized contribution or presentation generally prepared in advance. + */ + SPEAKER("spk", "Speaker"), + + /** + * Use for a person or organization that issued a contract or under the auspices of which a work has been written, printed, published, etc. + */ + SPONSOR("spn", "Sponsor"), + + /** + * Use for a person who is in charge of everything that occurs on a performance stage, and who acts as chief of all crews and assistant to a director during rehearsals. + */ + STAGE_MANAGER("stm", "Stage manager"), + + /** + * Use for an organization responsible for the development or enforcement of a standard. + */ + STANDARDS_BODY("stn", "Standards body"), + + /** + * Use for a person or organization who creates a new plate for printing by molding or copying another printing surface. + */ + STEREOTYPER("str", "Stereotyper"), + + /** + * Use for a person relaying a story with creative and/or theatrical interpretation. + */ + STORYTELLER("stl", "Storyteller"), + + /** + * Use for a person or organization that supports (by allocating facilities, staff, or other resources) a project, program, meeting, event, data objects, material culture objects, or other entities capable of support. + */ + SUPPORTING_HOST("sht", "Supporting host"), + + /** + * Use for a person or organization who does measurements of tracts of land, etc. to determine location, forms, and boundaries. + */ + SURVEYOR("srv", "Surveyor"), + + /** + * Use for a person who, in the context of a resource, gives instruction in an intellectual subject or demonstrates while teaching physical skills. + */ + TEACHER("tch", "Teacher"), + + /** + * Use for a person who is ultimately in charge of scenery, props, lights and sound for a production. + */ + TECHNICAL_DIRECTOR("tcd", "Technical director"), + + /** + * Use for a person under whose supervision a degree candidate develops and presents a thesis, mémoire, or text of a dissertation. + */ + THESIS_ADVISOR("ths", "Thesis advisor"), + + /** + * Use for a person who prepares a handwritten or typewritten copy from original material, including from dictated or orally recorded material. For makers of pen-facsimiles, use Facsimilist [fac]. + */ + TRANSCRIBER("trc", "Transcriber"), + + /** + * Use for a person or organization who renders a text from one language into another, or from an older form of a language into the modern form. + */ + TRANSLATOR("trl", "Translator"), + + /** + * Use for a person or organization who designed the type face used in a particular item. + */ + TYPE_DESIGNER("tyd", "Type designer"), + + /** + * Use for a person or organization primarily responsible for choice and arrangement of type used in an item. If the typographer is also responsible for other aspects of the graphic design of a book (e.g., Book designer [bkd]), codes for both functions may be needed. + */ + TYPOGRAPHER("tyg", "Typographer"), + + /** + * Use for the name of a place where a university that is associated with a resource is located, for example, a university where an academic dissertation or thesis was presented. + */ + UNIVERSITY_PLACE("uvp", "University place"), + + /** + * Use for a person or organization in charge of a video production, e.g. the video recording of a stage production as opposed to a commercial motion picture. The videographer may be the camera operator or may supervise one or more camera operators. Do not confuse with cinematographer. + */ + VIDEOGRAPHER("vdg", "Videographer"), + + /** + * Use for a person or organization who principally exhibits singing skills in a musical or dramatic presentation or entertainment. + */ + VOCALIST("voc", "Vocalist"), + + /** + * Use for a person who verifies the truthfulness of an event or action. + */ + WITNESS("wit", "Witness"), + + /** + * Use for a person or organization who makes prints by cutting the image in relief on the end-grain of a wood block. + */ + WOOD_ENGRAVER("wde", "Wood-engraver"), + + /** + * Use for a person or organization who makes prints by cutting the image in relief on the plank side of a wood block. + */ + WOODCUTTER("wdc", "Woodcutter"), + + /** + * Use for a person or organization who writes significant material which accompanies a sound recording or other audiovisual material. + */ + WRITER_OF_ACCOMPANYING_MATERIAL("wam", "Writer of accompanying material"); + + private final String code; + private final String name; + + Relator(String code, String name) { + this.code = code; + this.name = name; + } + + public String getCode() { + return code; + } + + public String getName() { + return name; + } + + public static Relator byCode(String code) { + for (Relator relator : Relator.values()) { + if (relator.getCode().equalsIgnoreCase(code)) { + return relator; + } + } + return null; + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Resource.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Resource.java new file mode 100644 index 00000000..9cbd7efc --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Resource.java @@ -0,0 +1,341 @@ +package nl.siegmann.epublib.domain; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.Serializable; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.IOUtil; +import nl.siegmann.epublib.util.StringUtil; +import nl.siegmann.epublib.util.commons.io.XmlStreamReader; + +/** + * Represents a resource that is part of the epub. + * A resource can be a html file, image, xml, etc. + * + * @author paul + * + */ +public class Resource implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 1043946707835004037L; + private String id; + private String title; + private String href; + protected String originalHref; + private MediaType mediaType; + private String inputEncoding = Constants.CHARACTER_ENCODING; + protected byte[] data; + private boolean isNav; + private boolean containingSvg; + private boolean isScripted; + + /** + * Creates an empty Resource with the given href. + * + * Assumes that if the data is of a text type (html/css/etc) then the encoding will be UTF-8 + * + * @param href The location of the resource within the epub. Example: "chapter1.html". + */ + public Resource(String href) { + this(null, new byte[0], href, MediatypeService.determineMediaType(href)); + } + + /** + * Creates a Resource with the given data and MediaType. + * The href will be automatically generated. + * + * Assumes that if the data is of a text type (html/css/etc) then the encoding will be UTF-8 + * + * @param data The Resource's contents + * @param mediaType The MediaType of the Resource + */ + public Resource(byte[] data, MediaType mediaType) { + this(null, data, null, mediaType); + } + + /** + * Creates a resource with the given data at the specified href. + * The MediaType will be determined based on the href extension. + * + * Assumes that if the data is of a text type (html/css/etc) then the encoding will be UTF-8 + * + * @see nl.siegmann.epublib.service.MediatypeService#determineMediaType(String) + * + * @param data The Resource's contents + * @param href The location of the resource within the epub. Example: "chapter1.html". + */ + public Resource(byte[] data, String href) { + this(null, data, href, MediatypeService.determineMediaType(href), Constants.CHARACTER_ENCODING); + } + + /** + * Creates a resource with the data from the given Reader at the specified href. + * The MediaType will be determined based on the href extension. + * + * @see nl.siegmann.epublib.service.MediatypeService#determineMediaType(String) + * + * @param in The Resource's contents + * @param href The location of the resource within the epub. Example: "cover.jpg". + */ + public Resource(Reader in, String href) throws IOException { + this(null, IOUtil.toByteArray(in, Constants.CHARACTER_ENCODING), href, MediatypeService.determineMediaType(href), Constants.CHARACTER_ENCODING); + } + + /** + * Creates a resource with the data from the given InputStream at the specified href. + * The MediaType will be determined based on the href extension. + * + * @see nl.siegmann.epublib.service.MediatypeService#determineMediaType(String) + * + * Assumes that if the data is of a text type (html/css/etc) then the encoding will be UTF-8 + * + * It is recommended to us the {@link #Resource(Reader, String)} method for creating textual + * (html/css/etc) resources to prevent encoding problems. + * Use this method only for binary Resources like images, fonts, etc. + * + * + * @param in The Resource's contents + * @param href The location of the resource within the epub. Example: "cover.jpg". + */ + public Resource(InputStream in, String href) throws IOException { + this(null, IOUtil.toByteArray(in), href, MediatypeService.determineMediaType(href)); + } + + /** + * Creates a resource with the given id, data, mediatype at the specified href. + * Assumes that if the data is of a text type (html/css/etc) then the encoding will be UTF-8 + * + * @param id The id of the Resource. Internal use only. Will be auto-generated if it has a null-value. + * @param data The Resource's contents + * @param href The location of the resource within the epub. Example: "chapter1.html". + * @param mediaType The resources MediaType + */ + public Resource(String id, byte[] data, String href, MediaType mediaType) { + this(id, data, href, mediaType, Constants.CHARACTER_ENCODING); + } + + + /** + * Creates a resource with the given id, data, mediatype at the specified href. + * If the data is of a text type (html/css/etc) then it will use the given inputEncoding. + * + * @param id The id of the Resource. Internal use only. Will be auto-generated if it has a null-value. + * @param data The Resource's contents + * @param href The location of the resource within the epub. Example: "chapter1.html". + * @param mediaType The resources MediaType + * @param inputEncoding If the data is of a text type (html/css/etc) then it will use the given inputEncoding. + */ + public Resource(String id, byte[] data, String href, MediaType mediaType, String inputEncoding) { + this.id = id; + this.href = href; + this.originalHref = href; + this.mediaType = mediaType; + this.inputEncoding = inputEncoding; + this.data = data; + } + + public boolean isNav() { + return isNav; + } + + public void setNav(boolean nav) { + isNav = nav; + } + + public boolean isContainingSvg() { + return containingSvg; + } + + public void setContainingSvg(boolean containingSvg) { + this.containingSvg = containingSvg; + } + + public boolean isScripted() { + return isScripted; + } + + public void setScripted(boolean scripted) { + isScripted = scripted; + } + + /** + * Gets the contents of the Resource as an InputStream. + * + * @return The contents of the Resource. + * + * @throws IOException + */ + public InputStream getInputStream() throws IOException { + return new ByteArrayInputStream(getData()); + } + + /** + * The contents of the resource as a byte[] + * + * @return The contents of the resource + */ + public byte[] getData() throws IOException { + return data; + } + + /** + * Tells this resource to release its cached data. + * + * If this resource was not lazy-loaded, this is a no-op. + */ + public void close() { + } + + /** + * Sets the data of the Resource. + * If the data is a of a different type then the original data then make sure to change the MediaType. + * + * @param data + */ + public void setData(byte[] data) { + this.data = data; + } + + /** + * Returns the size of this resource in bytes. + * + * @return the size. + */ + public long getSize() { + return data.length; + } + + /** + * If the title is found by scanning the underlying html document then it is cached here. + * + * @return the title + */ + public String getTitle() { + return title; + } + + /** + * Sets the Resource's id: Make sure it is unique and a valid identifier. + * + * @param id + */ + public void setId(String id) { + this.id = id; + } + + /** + * The resources Id. + * + * Must be both unique within all the resources of this book and a valid identifier. + * @return The resources Id. + */ + public String getId() { + return id; + } + + /** + * The location of the resource within the contents folder of the epub file. + * + * Example:<br/> + * images/cover.jpg<br/> + * content/chapter1.xhtml<br/> + * + * @return The location of the resource within the contents folder of the epub file. + */ + public String getHref() { + return href; + } + + /** + * Sets the Resource's href. + * + * @param href + */ + public void setHref(String href) { + this.href = href; + } + + /** + * The character encoding of the resource. + * Is allowed to be null for non-text resources like images. + * + * @return The character encoding of the resource. + */ + public String getInputEncoding() { + return inputEncoding; + } + + /** + * Sets the Resource's input character encoding. + * + * @param encoding + */ + public void setInputEncoding(String encoding) { + this.inputEncoding = encoding; + } + + /** + * Gets the contents of the Resource as Reader. + * + * Does all sorts of smart things (courtesy of apache commons io XMLStreamREader) to handle encodings, byte order markers, etc. + * + * @return the contents of the Resource as Reader. + * @throws IOException + */ + public Reader getReader() throws IOException { + return new XmlStreamReader(new ByteArrayInputStream(getData()), getInputEncoding()); + } + + /** + * Gets the hashCode of the Resource's href. + * + */ + public int hashCode() { + return href.hashCode(); + } + + /** + * Checks to see of the given resourceObject is a resource and whether its href is equal to this one. + * + * @return whether the given resourceObject is a resource and whether its href is equal to this one. + */ + public boolean equals(Object resourceObject) { + if (! (resourceObject instanceof Resource)) { + return false; + } + return href.equals(((Resource) resourceObject).getHref()); + } + + /** + * This resource's mediaType. + * + * @return This resource's mediaType. + */ + public MediaType getMediaType() { + return mediaType; + } + + public void setMediaType(MediaType mediaType) { + this.mediaType = mediaType; + } + + public void setTitle(String title) { + this.title = title; + } + + public String toString() { + return StringUtil.toString("id", id, + "title", title, + "encoding", inputEncoding, + "mediaType", mediaType, + "href", href, + "size", (data == null ? 0 : data.length)); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/ResourceInputStream.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ResourceInputStream.java new file mode 100644 index 00000000..92b305ff --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ResourceInputStream.java @@ -0,0 +1,37 @@ +package nl.siegmann.epublib.domain; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.ZipFile; + + +/** + * A wrapper class for closing a ZipFile object when the InputStream derived + * from it is closed. + * + * @author ttopalov + * + */ +public class ResourceInputStream extends FilterInputStream { + + private final ZipFile zipFile; + + /** + * Constructor. + * + * @param in + * The InputStream object. + * @param zipFile + * The ZipFile object. + */ + public ResourceInputStream(InputStream in, ZipFile zipFile) { + super(in); + this.zipFile = zipFile; + } + + @Override + public void close() throws IOException { + super.close(); + zipFile.close(); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/ResourceReference.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ResourceReference.java new file mode 100644 index 00000000..9ba8cea0 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/ResourceReference.java @@ -0,0 +1,45 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; + +public class ResourceReference implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 2596967243557743048L; + protected Resource resource; + + public ResourceReference(Resource resource) { + this.resource = resource; + } + + + public Resource getResource() { + return resource; + } + + /** + * Besides setting the resource it also sets the fragmentId to null. + * + * @param resource + */ + public void setResource(Resource resource) { + this.resource = resource; + } + + + /** + * The id of the reference referred to. + * + * null of the reference is null or has a null id itself. + * + * @return The id of the reference referred to. + */ + public String getResourceId() { + if (resource != null) { + return resource.getId(); + } + return null; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Resources.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Resources.java new file mode 100644 index 00000000..a04eda99 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Resources.java @@ -0,0 +1,385 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.StringUtil; + +/** + * All the resources that make up the book. + * XHTML files, images and epub xml documents must be here. + * + * @author paul + * + */ +public class Resources implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 2450876953383871451L; + private static final String IMAGE_PREFIX = "image_"; + private static final String ITEM_PREFIX = "item_"; + private int lastId = 1; + private Resource navResource; + + private Map<String, Resource> resources = new HashMap<String, Resource>(); + + public Resource getNavResource() { + return navResource; + } + + public void setNavResource(Resource navResource) { + this.navResource = navResource; + } + + + /** + * Adds a resource to the resources. + * + * Fixes the resources id and href if necessary. + * + * @param resource + * @return the newly added resource + */ + public Resource add(Resource resource) { + fixResourceHref(resource); + fixResourceId(resource); + this.resources.put(resource.getHref(), resource); + return resource; + } + + /** + * Checks the id of the given resource and changes to a unique identifier if it isn't one already. + * + * @param resource + */ + public void fixResourceId(Resource resource) { + String resourceId = resource.getId(); + + // first try and create a unique id based on the resource's href + if (StringUtil.isBlank(resource.getId())) { + resourceId = StringUtil.substringBeforeLast(resource.getHref(), '.'); + resourceId = StringUtil.substringAfterLast(resourceId, '/'); + } + + resourceId = makeValidId(resourceId, resource); + + // check if the id is unique. if not: create one from scratch + if (StringUtil.isBlank(resourceId) || containsId(resourceId)) { + resourceId = createUniqueResourceId(resource); + } + resource.setId(resourceId); + } + + /** + * Check if the id is a valid identifier. if not: prepend with valid identifier + * + * @param resource + * @return a valid id + */ + private String makeValidId(String resourceId, Resource resource) { + if (StringUtil.isNotBlank(resourceId) && ! Character.isJavaIdentifierStart(resourceId.charAt(0))) { + resourceId = getResourceItemPrefix(resource) + resourceId; + } + return resourceId; + } + + private String getResourceItemPrefix(Resource resource) { + String result; + if (MediatypeService.isBitmapImage(resource.getMediaType())) { + result = IMAGE_PREFIX; + } else { + result = ITEM_PREFIX; + } + return result; + } + + /** + * Creates a new resource id that is guaranteed to be unique for this set of Resources + * + * @param resource + * @return a new resource id that is guaranteed to be unique for this set of Resources + */ + private String createUniqueResourceId(Resource resource) { + int counter = lastId; + if (counter == Integer.MAX_VALUE) { + if (resources.size() == Integer.MAX_VALUE) { + throw new IllegalArgumentException("Resources contains " + Integer.MAX_VALUE + " elements: no new elements can be added"); + } else { + counter = 1; + } + } + String prefix = getResourceItemPrefix(resource); + String result = prefix + counter; + while (containsId(result)) { + result = prefix + (++ counter); + } + lastId = counter; + return result; + } + + /** + * Whether the map of resources already contains a resource with the given id. + * + * @param id + * @return Whether the map of resources already contains a resource with the given id. + */ + public boolean containsId(String id) { + if (StringUtil.isBlank(id)) { + return false; + } + for (Resource resource: resources.values()) { + if (id.equals(resource.getId())) { + return true; + } + } + return false; + } + + /** + * Gets the resource with the given id. + * + * @param id + * @return null if not found + */ + public Resource getById(String id) { + if (StringUtil.isBlank(id)) { + return null; + } + for (Resource resource: resources.values()) { + if (id.equals(resource.getId())) { + return resource; + } + } + return null; + } + + /** + * Remove the resource with the given href. + * + * @param href + * @return the removed resource, null if not found + */ + public Resource remove(String href) { + return resources.remove(href); + } + + private void fixResourceHref(Resource resource) { + if(StringUtil.isNotBlank(resource.getHref()) + && ! resources.containsKey(resource.getHref())) { + return; + } + if(StringUtil.isBlank(resource.getHref())) { + if(resource.getMediaType() == null) { + throw new IllegalArgumentException("Resource must have either a MediaType or a href"); + } + int i = 1; + String href = createHref(resource.getMediaType(), i); + while(resources.containsKey(href)) { + href = createHref(resource.getMediaType(), (++i)); + } + resource.setHref(href); + } + } + + private String createHref(MediaType mediaType, int counter) { + if(MediatypeService.isBitmapImage(mediaType)) { + return "image_" + counter + mediaType.getDefaultExtension(); + } else { + return "item_" + counter + mediaType.getDefaultExtension(); + } + } + + + public boolean isEmpty() { + return resources.isEmpty(); + } + + /** + * The number of resources + * @return The number of resources + */ + public int size() { + return resources.size(); + } + + /** + * The resources that make up this book. + * Resources can be xhtml pages, images, xml documents, etc. + * + * @return The resources that make up this book. + */ + public Map<String, Resource> getResourceMap() { + return resources; + } + + public Collection<Resource> getAll() { + return resources.values(); + } + + + /** + * Whether there exists a resource with the given href + * @param href + * @return Whether there exists a resource with the given href + */ + public boolean containsByHref(String href) { + if (StringUtil.isBlank(href)) { + return false; + } + return resources.containsKey(StringUtil.substringBefore(href, Constants.FRAGMENT_SEPARATOR_CHAR)); + } + + /** + * Sets the collection of Resources to the given collection of resources + * + * @param resources + */ + public void set(Collection<Resource> resources) { + this.resources.clear(); + addAll(resources); + } + + /** + * Adds all resources from the given Collection of resources to the existing collection. + * + * @param resources + */ + public void addAll(Collection<Resource> resources) { + for(Resource resource: resources) { + fixResourceHref(resource); + this.resources.put(resource.getHref(), resource); + } + } + + /** + * Sets the collection of Resources to the given collection of resources + * + * @param resources A map with as keys the resources href and as values the Resources + */ + public void set(Map<String, Resource> resources) { + this.resources = new HashMap<String, Resource>(resources); + } + + + /** + * First tries to find a resource with as id the given idOrHref, if that + * fails it tries to find one with the idOrHref as href. + * + * @param idOrHref + * @return the found Resource + */ + public Resource getByIdOrHref(String idOrHref) { + Resource resource = getById(idOrHref); + if (resource == null) { + resource = getByHref(idOrHref); + } + return resource; + } + + + /** + * Gets the resource with the given href. + * If the given href contains a fragmentId then that fragment id will be ignored. + * + * @param href + * @return null if not found. + */ + public Resource getByHref(String href) { + if (StringUtil.isBlank(href)) { + return null; + } + href = StringUtil.substringBefore(href, Constants.FRAGMENT_SEPARATOR_CHAR); + Resource result = resources.get(href); + return result; + } + + /** + * Gets the first resource (random order) with the give mediatype. + * + * Useful for looking up the table of contents as it's supposed to be the only resource with NCX mediatype. + * + * @param mediaType + * @return the first resource (random order) with the give mediatype. + */ + public Resource findFirstResourceByMediaType(MediaType mediaType) { + return findFirstResourceByMediaType(resources.values(), mediaType); + } + + /** + * Gets the first resource (random order) with the give mediatype. + * + * Useful for looking up the table of contents as it's supposed to be the only resource with NCX mediatype. + * + * @param mediaType + * @return the first resource (random order) with the give mediatype. + */ + public static Resource findFirstResourceByMediaType(Collection<Resource> resources, MediaType mediaType) { + for (Resource resource: resources) { + if (resource.getMediaType() == mediaType) { + return resource; + } + } + return null; + } + + /** + * All resources that have the given MediaType. + * + * @param mediaType + * @return All resources that have the given MediaType. + */ + public List<Resource> getResourcesByMediaType(MediaType mediaType) { + List<Resource> result = new ArrayList<Resource>(); + if (mediaType == null) { + return result; + } + for (Resource resource: getAll()) { + if (resource.getMediaType() == mediaType) { + result.add(resource); + } + } + return result; + } + + /** + * All Resources that match any of the given list of MediaTypes + * + * @param mediaTypes + * @return All Resources that match any of the given list of MediaTypes + */ + public List<Resource> getResourcesByMediaTypes(MediaType[] mediaTypes) { + List<Resource> result = new ArrayList<Resource>(); + if (mediaTypes == null) { + return result; + } + + // this is the fastest way of doing this according to + // http://stackoverflow.com/questions/1128723/in-java-how-can-i-test-if-an-array-contains-a-certain-value + List<MediaType> mediaTypesList = Arrays.asList(mediaTypes); + for (Resource resource: getAll()) { + if (mediaTypesList.contains(resource.getMediaType())) { + result.add(resource); + } + } + return result; + } + + + /** + * All resource hrefs + * + * @return all resource hrefs + */ + public Collection<String> getAllHrefs() { + return resources.keySet(); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Scheme.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Scheme.java new file mode 100644 index 00000000..dd8818c5 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Scheme.java @@ -0,0 +1,36 @@ +package nl.siegmann.epublib.domain; + +public class Scheme { + + public final static Scheme UUID = new Scheme("UUID"); + + public final static Scheme ISBN = new Scheme("ISBN"); + + private String name; + private String value; + + public Scheme(String name) { + this.name = name; + } + + public Scheme(String name, String value) { + this.name = name; + this.value = value; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Spine.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Spine.java new file mode 100644 index 00000000..069ac5f7 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Spine.java @@ -0,0 +1,191 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import nl.siegmann.epublib.util.StringUtil; + +/** + * The spine sections are the sections of the book in the order in which the book should be read. + * + * This contrasts with the Table of Contents sections which is an index into the Book's sections. + * + * @see nl.siegmann.epublib.domain.TableOfContents + * + * @author paul + * + */ +public class Spine implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 3878483958947357246L; + private Resource tocResource; + private List<SpineReference> spineReferences; + + public Spine() { + this(new ArrayList<SpineReference>()); + } + + /** + * Creates a spine out of all the resources in the table of contents. + * + * @param tableOfContents + */ + public Spine(TableOfContents tableOfContents) { + this.spineReferences = createSpineReferences(tableOfContents.getAllUniqueResources()); + } + + public Spine(List<SpineReference> spineReferences) { + this.spineReferences = spineReferences; + } + + public static List<SpineReference> createSpineReferences(Collection<Resource> resources) { + List<SpineReference> result = new ArrayList<SpineReference>(resources.size()); + for (Resource resource: resources) { + result.add(new SpineReference(resource)); + } + return result; + } + + public List<SpineReference> getSpineReferences() { + return spineReferences; + } + public void setSpineReferences(List<SpineReference> spineReferences) { + this.spineReferences = spineReferences; + } + + /** + * Gets the resource at the given index. + * Null if not found. + * + * @param index + * @return the resource at the given index. + */ + public Resource getResource(int index) { + if (index < 0 || index >= spineReferences.size()) { + return null; + } + return spineReferences.get(index).getResource(); + } + + /** + * Finds the first resource that has the given resourceId. + * + * Null if not found. + * + * @param resourceId + * @return the first resource that has the given resourceId. + */ + public int findFirstResourceById(String resourceId) { + if (StringUtil.isBlank(resourceId)) { + return -1; + } + + for (int i = 0; i < spineReferences.size(); i++) { + SpineReference spineReference = spineReferences.get(i); + if (resourceId.equals(spineReference.getResourceId())) { + return i; + } + } + return -1; + } + + /** + * Adds the given spineReference to the spine references and returns it. + * + * @param spineReference + * @return the given spineReference + */ + public SpineReference addSpineReference(SpineReference spineReference) { + if (spineReferences == null) { + this.spineReferences = new ArrayList<SpineReference>(); + } + spineReferences.add(spineReference); + return spineReference; + } + + /** + * Adds the given resource to the spine references and returns it. + * + * @return the given spineReference + */ + public SpineReference addResource(Resource resource) { + return addSpineReference(new SpineReference(resource)); + } + + /** + * The number of elements in the spine. + * + * @return The number of elements in the spine. + */ + public int size() { + return spineReferences.size(); + } + + /** + * As per the epub file format the spine officially maintains a reference to the Table of Contents. + * The epubwriter will look for it here first, followed by some clever tricks to find it elsewhere if not found. + * Put it here to be sure of the expected behaviours. + * + * @param tocResource + */ + public void setTocResource(Resource tocResource) { + this.tocResource = tocResource; + } + + /** + * The resource containing the XML for the tableOfContents. + * When saving an epub file this resource needs to be in this place. + * + * @return The resource containing the XML for the tableOfContents. + */ + public Resource getTocResource() { + return tocResource; + } + + /** + * The position within the spine of the given resource. + * + * @param currentResource + * @return something < 0 if not found. + * + */ + public int getResourceIndex(Resource currentResource) { + if (currentResource == null) { + return -1; + } + return getResourceIndex(currentResource.getHref()); + } + + /** + * The first position within the spine of a resource with the given href. + * + * @return something < 0 if not found. + * + */ + public int getResourceIndex(String resourceHref) { + int result = -1; + if (StringUtil.isBlank(resourceHref)) { + return result; + } + for (int i = 0; i < spineReferences.size(); i++) { + if (resourceHref.equals(spineReferences.get(i).getResource().getHref())) { + result = i; + break; + } + } + return result; + } + + /** + * Whether the spine has any references + * @return Whether the spine has any references + */ + public boolean isEmpty() { + return spineReferences.isEmpty(); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/SpineReference.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/SpineReference.java new file mode 100644 index 00000000..6b545f43 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/SpineReference.java @@ -0,0 +1,56 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; + + +/** + * A Section of a book. + * Represents both an item in the package document and a item in the index. + * + * @author paul + * + */ +public class SpineReference extends ResourceReference implements Serializable { + + /** + * + */ + private static final long serialVersionUID = -7921609197351510248L; + private boolean linear = true; + + public SpineReference(Resource resource) { + this(resource, true); + } + + + public SpineReference(Resource resource, boolean linear) { + super(resource); + this.linear = linear; + } + + /** + * Linear denotes whether the section is Primary or Auxiliary. + * Usually the cover page has linear set to false and all the other sections + * have it set to true. + * + * It's an optional property that readers may also ignore. + * + * <blockquote>primary or auxiliary is useful for Reading Systems which + * opt to present auxiliary content differently than primary content. + * For example, a Reading System might opt to render auxiliary content in + * a popup window apart from the main window which presents the primary + * content. (For an example of the types of content that may be considered + * auxiliary, refer to the example below and the subsequent discussion.)</blockquote> + * @see <a href="http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.4">OPF Spine specification</a> + * + * @return whether the section is Primary or Auxiliary. + */ + public boolean isLinear() { + return linear; + } + + public void setLinear(boolean linear) { + this.linear = linear; + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/TOCReference.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/TOCReference.java new file mode 100644 index 00000000..5dae8aa1 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/TOCReference.java @@ -0,0 +1,64 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +/** + * An item in the Table of Contents. + * + * @see nl.siegmann.epublib.domain.TableOfContents + * + * @author paul + * + */ +public class TOCReference extends TitledResourceReference implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 5787958246077042456L; + private List<TOCReference> children; + private static final Comparator<TOCReference> COMPARATOR_BY_TITLE_IGNORE_CASE = new Comparator<TOCReference>() { + + @Override + public int compare(TOCReference tocReference1, TOCReference tocReference2) { + return String.CASE_INSENSITIVE_ORDER.compare(tocReference1.getTitle(), tocReference2.getTitle()); + } + }; + + public TOCReference() { + this(null, null, null); + } + + public TOCReference(String name, Resource resource) { + this(name, resource, null); + } + + public TOCReference(String name, Resource resource, String fragmentId) { + this(name, resource, fragmentId, new ArrayList<TOCReference>()); + } + + public TOCReference(String title, Resource resource, String fragmentId, List<TOCReference> children) { + super(resource, title, fragmentId); + this.children = children; + } + + public static Comparator<TOCReference> getComparatorByTitleIgnoreCase() { + return COMPARATOR_BY_TITLE_IGNORE_CASE; + } + + public List<TOCReference> getChildren() { + return children; + } + + public TOCReference addChildSection(TOCReference childSection) { + this.children.add(childSection); + return childSection; + } + + public void setChildren(List<TOCReference> children) { + this.children = children; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/TableOfContents.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/TableOfContents.java new file mode 100644 index 00000000..56cf6d0e --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/TableOfContents.java @@ -0,0 +1,254 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * The table of contents of the book. + * The TableOfContents is a tree structure at the root it is a list of TOCReferences, each if which may have as children another list of TOCReferences. + * + * The table of contents is used by epub as a quick index to chapters and sections within chapters. + * It may contain duplicate entries, may decide to point not to certain chapters, etc. + * + * See the spine for the complete list of sections in the order in which they should be read. + * + * @see nl.siegmann.epublib.domain.Spine + * + * @author paul + * + */ +public class TableOfContents implements Serializable { + + /** + * + */ + private static final long serialVersionUID = -3147391239966275152L; + + public static final String DEFAULT_PATH_SEPARATOR = "/"; + + private List<TOCReference> tocReferences; + + public TableOfContents() { + this(new ArrayList<TOCReference>()); + } + + public TableOfContents(List<TOCReference> tocReferences) { + this.tocReferences = tocReferences; + } + + public List<TOCReference> getTocReferences() { + return tocReferences; + } + + public void setTocReferences(List<TOCReference> tocReferences) { + this.tocReferences = tocReferences; + } + + /** + * Calls addTOCReferenceAtLocation after splitting the path using the DEFAULT_PATH_SEPARATOR. + * @return the new TOCReference + */ + public TOCReference addSection(Resource resource, String path) { + return addSection(resource, path, DEFAULT_PATH_SEPARATOR); + } + + /** + * Calls addTOCReferenceAtLocation after splitting the path using the given pathSeparator. + * + * @param resource + * @param path + * @param pathSeparator + * @return the new TOCReference + */ + public TOCReference addSection(Resource resource, String path, String pathSeparator) { + String[] pathElements = path.split(pathSeparator); + return addSection(resource, pathElements); + } + + /** + * Finds the first TOCReference in the given list that has the same title as the given Title. + * + * @param title + * @param tocReferences + * @return null if not found. + */ + private static TOCReference findTocReferenceByTitle(String title, List<TOCReference> tocReferences) { + for (TOCReference tocReference: tocReferences) { + if (title.equals(tocReference.getTitle())) { + return tocReference; + } + } + return null; + } + + /** + * Adds the given Resources to the TableOfContents at the location specified by the pathElements. + * + * Example: + * Calling this method with a Resource and new String[] {"chapter1", "paragraph1"} will result in the following: + * <ul> + * <li>a TOCReference with the title "chapter1" at the root level.<br/> + * If this TOCReference did not yet exist it will have been created and does not point to any resource</li> + * <li>A TOCReference that has the title "paragraph1". This TOCReference will be the child of TOCReference "chapter1" and + * will point to the given Resource</li> + * </ul> + * + * @param resource + * @param pathElements + * @return the new TOCReference + */ + public TOCReference addSection(Resource resource, String[] pathElements) { + if (pathElements == null || pathElements.length == 0) { + return null; + } + TOCReference result = null; + List<TOCReference> currentTocReferences = this.tocReferences; + for (int i = 0; i < pathElements.length; i++) { + String currentTitle = pathElements[i]; + result = findTocReferenceByTitle(currentTitle, currentTocReferences); + if (result == null) { + result = new TOCReference(currentTitle, null); + currentTocReferences.add(result); + } + currentTocReferences = result.getChildren(); + } + result.setResource(resource); + return result; + } + + /** + * Adds the given Resources to the TableOfContents at the location specified by the pathElements. + * + * Example: + * Calling this method with a Resource and new int[] {0, 0} will result in the following: + * <ul> + * <li>a TOCReference at the root level.<br/> + * If this TOCReference did not yet exist it will have been created with a title of "" and does not point to any resource</li> + * <li>A TOCReference that points to the given resource and is a child of the previously created TOCReference.<br/> + * If this TOCReference didn't exist yet it will be created and have a title of ""</li> + * </ul> + * + * @param resource + * @param pathElements + * @return the new TOCReference + */ + public TOCReference addSection(Resource resource, int[] pathElements, String sectionTitlePrefix, String sectionNumberSeparator) { + if (pathElements == null || pathElements.length == 0) { + return null; + } + TOCReference result = null; + List<TOCReference> currentTocReferences = this.tocReferences; + for (int i = 0; i < pathElements.length; i++) { + int currentIndex = pathElements[i]; + if (currentIndex > 0 && currentIndex < (currentTocReferences.size() - 1)) { + result = currentTocReferences.get(currentIndex); + } else { + result = null; + } + if (result == null) { + paddTOCReferences(currentTocReferences, pathElements, i, sectionTitlePrefix, sectionNumberSeparator); + result = currentTocReferences.get(currentIndex); + } + currentTocReferences = result.getChildren(); + } + result.setResource(resource); + return result; + } + + private void paddTOCReferences(List<TOCReference> currentTocReferences, + int[] pathElements, int pathPos, String sectionPrefix, String sectionNumberSeparator) { + for (int i = currentTocReferences.size(); i <= pathElements[pathPos]; i++) { + String sectionTitle = createSectionTitle(pathElements, pathPos, i, sectionPrefix, + sectionNumberSeparator); + currentTocReferences.add(new TOCReference(sectionTitle, null)); + } + } + + private String createSectionTitle(int[] pathElements, int pathPos, int lastPos, + String sectionPrefix, String sectionNumberSeparator) { + StringBuilder title = new StringBuilder(sectionPrefix); + for (int i = 0; i < pathPos; i++) { + if (i > 0) { + title.append(sectionNumberSeparator); + } + title.append(pathElements[i] + 1); + } + if (pathPos > 0) { + title.append(sectionNumberSeparator); + } + title.append(lastPos + 1); + return title.toString(); + } + + public TOCReference addTOCReference(TOCReference tocReference) { + if (tocReferences == null) { + tocReferences = new ArrayList<TOCReference>(); + } + tocReferences.add(tocReference); + return tocReference; + } + + /** + * All unique references (unique by href) in the order in which they are referenced to in the table of contents. + * + * @return All unique references (unique by href) in the order in which they are referenced to in the table of contents. + */ + public List<Resource> getAllUniqueResources() { + Set<String> uniqueHrefs = new HashSet<String>(); + List<Resource> result = new ArrayList<Resource>(); + getAllUniqueResources(uniqueHrefs, result, tocReferences); + return result; + } + + + private static void getAllUniqueResources(Set<String> uniqueHrefs, List<Resource> result, List<TOCReference> tocReferences) { + for (TOCReference tocReference: tocReferences) { + Resource resource = tocReference.getResource(); + if (resource != null && ! uniqueHrefs.contains(resource.getHref())) { + uniqueHrefs.add(resource.getHref()); + result.add(resource); + } + getAllUniqueResources(uniqueHrefs, result, tocReference.getChildren()); + } + } + + /** + * The total number of references in this table of contents. + * + * @return The total number of references in this table of contents. + */ + public int size() { + return getTotalSize(tocReferences); + } + + private static int getTotalSize(Collection<TOCReference> tocReferences) { + int result = tocReferences.size(); + for (TOCReference tocReference: tocReferences) { + result += getTotalSize(tocReference.getChildren()); + } + return result; + } + + /** + * The maximum depth of the reference tree + * @return The maximum depth of the reference tree + */ + public int calculateDepth() { + return calculateDepth(tocReferences, 0); + } + + private int calculateDepth(List<TOCReference> tocReferences, int currentDepth) { + int maxChildDepth = 0; + for (TOCReference tocReference: tocReferences) { + int childDepth = calculateDepth(tocReference.getChildren(), 1); + if (childDepth > maxChildDepth) { + maxChildDepth = childDepth; + } + } + return currentDepth + maxChildDepth; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/Title.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Title.java new file mode 100644 index 00000000..dd960681 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/Title.java @@ -0,0 +1,49 @@ +package nl.siegmann.epublib.domain; + +import java.util.Objects; + +public class Title { + + public static final Title EMPTY = new Title(""); + + String value; + String type; + + public Title(String value) { + this.value = value; + } + + public Title(String value, String type) { + this.value = value; + this.type = type; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Title title = (Title) o; + return Objects.equals(value, title.value) && Objects.equals(type, title.type); + } + + @Override + public int hashCode() { + return Objects.hash(value, type); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/domain/TitledResourceReference.java b/epublib-core/src/main/java/nl/siegmann/epublib/domain/TitledResourceReference.java new file mode 100644 index 00000000..81cee4b3 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/domain/TitledResourceReference.java @@ -0,0 +1,73 @@ +package nl.siegmann.epublib.domain; + +import java.io.Serializable; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.util.StringUtil; + +public class TitledResourceReference extends ResourceReference implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 3918155020095190080L; + private String fragmentId; + private String title; + + public TitledResourceReference(Resource resource) { + this(resource, null); + } + + public TitledResourceReference(Resource resource, String title) { + this(resource, title, null); + } + + public TitledResourceReference(Resource resource, String title, String fragmentId) { + super(resource); + this.title = title; + this.fragmentId = fragmentId; + } + + public String getFragmentId() { + return fragmentId; + } + + public void setFragmentId(String fragmentId) { + this.fragmentId = fragmentId; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + + /** + * If the fragmentId is blank it returns the resource href, otherwise it returns the resource href + '#' + the fragmentId. + * + * @return If the fragmentId is blank it returns the resource href, otherwise it returns the resource href + '#' + the fragmentId. + */ + public String getCompleteHref() { + if (StringUtil.isBlank(fragmentId)) { + return resource.getHref(); + } else { + return resource.getHref() + Constants.FRAGMENT_SEPARATOR_CHAR + fragmentId; + } + } + + public void setResource(Resource resource, String fragmentId) { + super.setResource(resource); + this.fragmentId = fragmentId; + } + + /** + * Sets the resource to the given resource and sets the fragmentId to null. + * + */ + public void setResource(Resource resource) { + setResource(resource, null); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/BookProcessor.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/BookProcessor.java new file mode 100644 index 00000000..3cbc296b --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/BookProcessor.java @@ -0,0 +1,27 @@ +package nl.siegmann.epublib.epub; + +import nl.siegmann.epublib.domain.Book; + +/** + * Post-processes a book. + * + * Can be used to clean up a book after reading or before writing. + * + * @author paul + * + */ +public interface BookProcessor { + + /** + * A BookProcessor that returns the input book unchanged. + */ + public BookProcessor IDENTITY_BOOKPROCESSOR = new BookProcessor() { + + @Override + public Book processBook(Book book) { + return book; + } + }; + + Book processBook(Book book); +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/BookProcessorPipeline.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/BookProcessorPipeline.java new file mode 100644 index 00000000..84f797b5 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/BookProcessorPipeline.java @@ -0,0 +1,74 @@ +package nl.siegmann.epublib.epub; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import nl.siegmann.epublib.domain.Book; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A book processor that combines several other bookprocessors + * + * Fixes coverpage/coverimage. + * Cleans up the XHTML. + * + * @author paul.siegmann + * + */ +public class BookProcessorPipeline implements BookProcessor { + + private Logger log = LoggerFactory.getLogger(BookProcessorPipeline.class); + private List<BookProcessor> bookProcessors; + + public BookProcessorPipeline() { + this(null); + } + + public BookProcessorPipeline(List<BookProcessor> bookProcessingPipeline) { + this.bookProcessors = bookProcessingPipeline; + } + + + @Override + public Book processBook(Book book) { + if (bookProcessors == null) { + return book; + } + for(BookProcessor bookProcessor: bookProcessors) { + try { + book = bookProcessor.processBook(book); + } catch(Exception e) { + log.error(e.getMessage(), e); + } + } + return book; + } + + public void addBookProcessor(BookProcessor bookProcessor) { + if (this.bookProcessors == null) { + bookProcessors = new ArrayList<BookProcessor>(); + } + this.bookProcessors.add(bookProcessor); + } + + public void addBookProcessors(Collection<BookProcessor> bookProcessors) { + if (this.bookProcessors == null) { + this.bookProcessors = new ArrayList<BookProcessor>(); + } + this.bookProcessors.addAll(bookProcessors); + } + + + public List<BookProcessor> getBookProcessors() { + return bookProcessors; + } + + + public void setBookProcessingPipeline(List<BookProcessor> bookProcessingPipeline) { + this.bookProcessors = bookProcessingPipeline; + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/DOMUtil.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/DOMUtil.java new file mode 100644 index 00000000..5b157e8b --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/DOMUtil.java @@ -0,0 +1,125 @@ +package nl.siegmann.epublib.epub; + +import java.util.ArrayList; +import java.util.List; + +import nl.siegmann.epublib.util.StringUtil; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.Text; + +/** + * Utility methods for working with the DOM. + * + * @author paul + * + */ +// package +class DOMUtil { + + + /** + * First tries to get the attribute value by doing an getAttributeNS on the element, if that gets an empty element it does a getAttribute without namespace. + * + * @param element + * @param namespace + * @param attribute + * @return + */ + public static String getAttribute(Node element, String namespace, String attribute) { + Node node = element.getAttributes().getNamedItemNS(namespace, attribute); + if (node == null) { + node = element.getAttributes().getNamedItem(attribute); + } + return node != null ? node.getNodeValue() : ""; + } + + /** + * Gets all descendant elements of the given parentElement with the given namespace and tagname and returns their text child as a list of String. + * + * @param parentElement + * @param namespace + * @param tagname + * @return + */ + public static List<String> getElementsTextChild(Element parentElement, String namespace, String tagname) { + NodeList elements = parentElement.getElementsByTagNameNS(namespace, tagname); + List<String> result = new ArrayList<String>(elements.getLength()); + for(int i = 0; i < elements.getLength(); i++) { + result.add(getTextChildrenContent((Element) elements.item(i))); + } + return result; + } + + /** + * Finds in the current document the first element with the given namespace and elementName and with the given findAttributeName and findAttributeValue. + * It then returns the value of the given resultAttributeName. + * + * @param document + * @param namespace + * @param elementName + * @param findAttributeName + * @param findAttributeValue + * @param resultAttributeName + * @return + */ + public static String getFindAttributeValue(Document document, String namespace, String elementName, String findAttributeName, String findAttributeValue, String resultAttributeName) { + NodeList metaTags = document.getElementsByTagNameNS(namespace, elementName); + for(int i = 0; i < metaTags.getLength(); i++) { + Element metaElement = (Element) metaTags.item(i); + if(findAttributeValue.equalsIgnoreCase(metaElement.getAttribute(findAttributeName)) + && StringUtil.isNotBlank(metaElement.getAttribute(resultAttributeName))) { + return metaElement.getAttribute(resultAttributeName); + } + } + return null; + } + + /** + * Gets the first element that is a child of the parentElement and has the given namespace and tagName + * + * @param parentElement + * @param namespace + * @param tagName + * @return + */ + public static Element getFirstElementByTagNameNS(Element parentElement, String namespace, String tagName) { + NodeList nodes = parentElement.getElementsByTagNameNS(namespace, tagName); + if(nodes.getLength() == 0) { + return null; + } + return (Element) nodes.item(0); + } + + /** + * The contents of all Text nodes that are children of the given parentElement. + * The result is trim()-ed. + * + * The reason for this more complicated procedure instead of just returning the data of the firstChild is that + * when the text is Chinese characters then on Android each Characater is represented in the DOM as + * an individual Text node. + * + * @param parentElement + * @return + */ + public static String getTextChildrenContent(Element parentElement) { + if(parentElement == null) { + return null; + } + StringBuilder result = new StringBuilder(); + NodeList childNodes = parentElement.getChildNodes(); + for (int i = 0; i < childNodes.getLength(); i++) { + Node node = childNodes.item(i); + if ((node == null) || + (node.getNodeType() != Node.TEXT_NODE)) { + continue; + } + result.append(((Text) node).getData()); + } + return result.toString().trim(); + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubProcessorSupport.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubProcessorSupport.java new file mode 100644 index 00000000..70faba6a --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubProcessorSupport.java @@ -0,0 +1,121 @@ +package nl.siegmann.epublib.epub; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.net.URL; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import nl.siegmann.epublib.Constants; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xmlpull.v1.XmlPullParserFactory; +import org.xmlpull.v1.XmlSerializer; + +/** + * Various low-level support methods for reading/writing epubs. + * + * @author paul.siegmann + * + */ +public class EpubProcessorSupport { + + private static final Logger log = LoggerFactory.getLogger(EpubProcessorSupport.class); + + protected static DocumentBuilderFactory documentBuilderFactory; + + static { + init(); + } + + static class EntityResolverImpl implements EntityResolver { + private String previousLocation; + + @Override + public InputSource resolveEntity(String publicId, String systemId) + throws SAXException, IOException { + String resourcePath; + if (systemId.startsWith("http:")) { + URL url = new URL(systemId); + resourcePath = "dtd/" + url.getHost() + url.getPath(); + previousLocation = resourcePath.substring(0, resourcePath.lastIndexOf('/')); + } else { + resourcePath = previousLocation + systemId.substring(systemId.lastIndexOf('/')); + } + + if (this.getClass().getClassLoader().getResource(resourcePath) == null) { + throw new RuntimeException("remote resource is not cached : [" + systemId + "] cannot continue"); + } + + InputStream in = EpubProcessorSupport.class.getClassLoader().getResourceAsStream(resourcePath); + return new InputSource(in); + } + } + + + private static void init() { + EpubProcessorSupport.documentBuilderFactory = DocumentBuilderFactory.newInstance(); + documentBuilderFactory.setNamespaceAware(true); + documentBuilderFactory.setValidating(false); + } + + public static XmlSerializer createXmlSerializer(OutputStream out) throws UnsupportedEncodingException { + return createXmlSerializer(new OutputStreamWriter(out, Constants.CHARACTER_ENCODING)); + } + + public static XmlSerializer createXmlSerializer(Writer out) { + XmlSerializer result = null; + try { + XmlPullParserFactory factory = XmlPullParserFactory.newInstance(); + factory.setValidating(true); + result = factory.newSerializer(); + result.setFeature("http://xmlpull.org/v1/doc/features.html#indent-output", true); + result.setOutput(out); + } catch (Exception e) { + log.error("When creating XmlSerializer: " + e.getClass().getName() + ": " + e.getMessage()); + } + return result; + } + + /** + * Gets an EntityResolver that loads dtd's and such from the epublib classpath. + * In order to enable the loading of relative urls the given EntityResolver contains the previousLocation. + * Because of a new EntityResolver is created every time this method is called. + * Fortunately the EntityResolver created uses up very little memory per instance. + * + * @return an EntityResolver that loads dtd's and such from the epublib classpath. + */ + public static EntityResolver getEntityResolver() { + return new EntityResolverImpl(); + } + + public DocumentBuilderFactory getDocumentBuilderFactory() { + return documentBuilderFactory; + } + + /** + * Creates a DocumentBuilder that looks up dtd's and schema's from epublib's classpath. + * + * @return a DocumentBuilder that looks up dtd's and schema's from epublib's classpath. + */ + public static DocumentBuilder createDocumentBuilder() { + DocumentBuilder result = null; + try { + result = documentBuilderFactory.newDocumentBuilder(); + result.setEntityResolver(getEntityResolver()); + } catch (ParserConfigurationException e) { + log.error(e.getMessage()); + } + return result; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubReader.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubReader.java new file mode 100644 index 00000000..340cd7ae --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubReader.java @@ -0,0 +1,159 @@ +package nl.siegmann.epublib.epub; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; + +import net.sf.jazzlib.ZipFile; +import net.sf.jazzlib.ZipInputStream; +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.*; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.ResourceUtil; +import nl.siegmann.epublib.util.StringUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +/** + * Reads an epub file. + * + * @author paul + * + */ +public class EpubReader { + + private static final Logger log = LoggerFactory.getLogger(EpubReader.class); + private BookProcessor bookProcessor = BookProcessor.IDENTITY_BOOKPROCESSOR; + + public Book readEpub(InputStream in) throws IOException { + return readEpub(in, Constants.CHARACTER_ENCODING); + } + + public Book readEpub(ZipInputStream in) throws IOException { + return readEpub(in, Constants.CHARACTER_ENCODING); + } + + public Book readEpub(ZipFile zipfile) throws IOException { + return readEpub(zipfile, Constants.CHARACTER_ENCODING); + } + + /** + * Read epub from inputstream + * + * @param in the inputstream from which to read the epub + * @param encoding the encoding to use for the html files within the epub + * @return the Book as read from the inputstream + * @throws IOException + */ + public Book readEpub(InputStream in, String encoding) throws IOException { + return readEpub(new ZipInputStream(in), encoding); + } + + + + /** + * Reads this EPUB without loading any resources into memory. + * + * @param zipFile the file to load + * @param encoding the encoding for XHTML files + * + * @return this Book without loading all resources into memory. + * @throws IOException + */ + public Book readEpubLazy(ZipFile zipFile, String encoding ) throws IOException { + return readEpubLazy(zipFile, encoding, Arrays.asList(MediatypeService.mediatypes) ); + } + + public Book readEpub(ZipInputStream in, String encoding) throws IOException { + return readEpub(ResourcesLoader.loadResources(in, encoding)); + } + + public Book readEpub(ZipFile in, String encoding) throws IOException { + return readEpub(ResourcesLoader.loadResources(in, encoding)); + } + + /** + * Reads this EPUB without loading all resources into memory. + * + * @param zipFile the file to load + * @param encoding the encoding for XHTML files + * @param lazyLoadedTypes a list of the MediaType to load lazily + * @return this Book without loading all resources into memory. + * @throws IOException + */ + public Book readEpubLazy(ZipFile zipFile, String encoding, List<MediaType> lazyLoadedTypes ) throws IOException { + Resources resources = ResourcesLoader.loadResources(zipFile, encoding, lazyLoadedTypes); + return readEpub(resources); + } + + public Book readEpub(Resources resources) throws IOException{ + return readEpub(resources, new Book()); + } + + public Book readEpub(Resources resources, Book result) throws IOException{ + if (result == null) { + result = new Book(); + } + handleMimeType(result, resources); + String packageResourceHref = getPackageResourceHref(resources); + OpfResource packageResource = processPackageResource(packageResourceHref, result, resources); + result.setOpfResource(packageResource); + Resource ncxResource = processNcxResource(packageResource, result); + result.setNcxResource(ncxResource); + result = postProcessBook(result); + return result; + } + + + private Book postProcessBook(Book book) { + if (bookProcessor != null) { + book = bookProcessor.processBook(book); + } + return book; + } + + private Resource processNcxResource(Resource packageResource, Book book) { + return NCXDocument.read(book, this); + } + + private OpfResource processPackageResource(String packageResourceHref, Book book, Resources resources) throws IOException { + OpfResource packageResource = new OpfResource( + resources.remove(packageResourceHref) + ); + try { + PackageDocumentReader.read(packageResource, this, book, resources); + } catch (Exception e) { + log.error(e.getMessage(), e); + } + return packageResource; + } + + private String getPackageResourceHref(Resources resources) { + String defaultResult = "OEBPS/content.opf"; + String result = defaultResult; + + Resource containerResource = resources.remove("META-INF/container.xml"); + if(containerResource == null) { + return result; + } + try { + Document document = ResourceUtil.getAsDocument(containerResource); + Element rootFileElement = (Element) ((Element) document.getDocumentElement().getElementsByTagName("rootfiles").item(0)).getElementsByTagName("rootfile").item(0); + result = rootFileElement.getAttribute("full-path"); + } catch (Exception e) { + log.error(e.getMessage(), e); + } + if(StringUtil.isBlank(result)) { + result = defaultResult; + } + return result; + } + + private void handleMimeType(Book result, Resources resources) { + resources.remove("mimetype"); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubWriter.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubWriter.java new file mode 100644 index 00000000..cb1d1eec --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubWriter.java @@ -0,0 +1,186 @@ +package nl.siegmann.epublib.epub; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.zip.CRC32; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import nl.siegmann.epublib.domain.OpfResource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xmlpull.v1.XmlSerializer; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.IOUtil; + +/** + * Generates an epub file. Not thread-safe, single use object. + * + * @author paul + * + */ +public class EpubWriter { + + private final static Logger log = LoggerFactory.getLogger(EpubWriter.class); + + // package + static final String EMPTY_NAMESPACE_PREFIX = ""; + + private BookProcessor bookProcessor = BookProcessor.IDENTITY_BOOKPROCESSOR; + + public EpubWriter() { + this(BookProcessor.IDENTITY_BOOKPROCESSOR); + } + + + public EpubWriter(BookProcessor bookProcessor) { + this.bookProcessor = bookProcessor; + } + + + public void write(Book book, OutputStream out) throws IOException { + book = processBook(book); + ZipOutputStream resultStream = new ZipOutputStream(out); + writeMimeType(resultStream); + writeContainer(resultStream); + if( + null == book.getOpfResource() + || OpfResource.DEFAULT_VERSION.equals(book.getOpfResource().getVersion()) + || !book.getTableOfContents().getTocReferences().isEmpty() + ) { + initTOCResource(book); + } + writeResources(book, resultStream); + writePackageDocument(book, resultStream); + resultStream.close(); + } + + private Book processBook(Book book) { + if (bookProcessor != null) { + book = bookProcessor.processBook(book); + } + return book; + } + + private void initTOCResource(Book book) { + Resource tocResource; + try { + tocResource = NCXDocument.createNCXResource(book); + Resource currentTocResource = book.getSpine().getTocResource(); + if (currentTocResource != null) { + book.getResources().remove(currentTocResource.getHref()); + } + book.getSpine().setTocResource(tocResource); + book.getResources().add(tocResource); + } catch (Exception e) { + log.error("Error writing table of contents: " + e.getClass().getName() + ": " + e.getMessage()); + } + } + + + private void writeResources(Book book, ZipOutputStream resultStream) throws IOException { + for(Resource resource: book.getResources().getAll()) { + writeResource(resource, resultStream); + } + } + + /** + * Writes the resource to the resultStream. + * + * @param resource + * @param resultStream + * @throws IOException + */ + private void writeResource(Resource resource, ZipOutputStream resultStream) + throws IOException { + if(resource == null) { + return; + } + try { + resultStream.putNextEntry(new ZipEntry("OEBPS/" + resource.getHref())); + InputStream inputStream = resource.getInputStream(); + IOUtil.copy(inputStream, resultStream); + inputStream.close(); + } catch(Exception e) { + log.error(e.getMessage(), e); + } + } + + + private void writePackageDocument(Book book, ZipOutputStream resultStream) throws IOException { + resultStream.putNextEntry(new ZipEntry("OEBPS/content.opf")); + XmlSerializer xmlSerializer = EpubProcessorSupport.createXmlSerializer(resultStream); + PackageDocumentWriter.write(this, xmlSerializer, book); + xmlSerializer.flush(); +// String resultAsString = result.toString(); +// resultStream.write(resultAsString.getBytes(Constants.ENCODING)); + } + + /** + * Writes the META-INF/container.xml file. + * + * @param resultStream + * @throws IOException + */ + private void writeContainer(ZipOutputStream resultStream) throws IOException { + resultStream.putNextEntry(new ZipEntry("META-INF/container.xml")); + Writer out = new OutputStreamWriter(resultStream); + out.write("<?xml version=\"1.0\"?>\n"); + out.write("<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n"); + out.write("\t<rootfiles>\n"); + out.write("\t\t<rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n"); + out.write("\t</rootfiles>\n"); + out.write("</container>"); + out.flush(); + } + + /** + * Stores the mimetype as an uncompressed file in the ZipOutputStream. + * + * @param resultStream + * @throws IOException + */ + private void writeMimeType(ZipOutputStream resultStream) throws IOException { + ZipEntry mimetypeZipEntry = new ZipEntry("mimetype"); + mimetypeZipEntry.setMethod(ZipEntry.STORED); + byte[] mimetypeBytes = MediatypeService.EPUB.getName().getBytes(); + mimetypeZipEntry.setSize(mimetypeBytes.length); + mimetypeZipEntry.setCrc(calculateCrc(mimetypeBytes)); + resultStream.putNextEntry(mimetypeZipEntry); + resultStream.write(mimetypeBytes); + } + + private long calculateCrc(byte[] data) { + CRC32 crc = new CRC32(); + crc.update(data); + return crc.getValue(); + } + + String getNcxId() { + return "ncx"; + } + + String getNcxHref() { + return "toc.ncx"; + } + + String getNcxMediaType() { + return MediatypeService.NCX.getName(); + } + + public BookProcessor getBookProcessor() { + return bookProcessor; + } + + + public void setBookProcessor(BookProcessor bookProcessor) { + this.bookProcessor = bookProcessor; + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/HtmlProcessor.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/HtmlProcessor.java new file mode 100644 index 00000000..19f2022b --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/HtmlProcessor.java @@ -0,0 +1,10 @@ +package nl.siegmann.epublib.epub; + +import java.io.OutputStream; + +import nl.siegmann.epublib.domain.Resource; + +public interface HtmlProcessor { + + void processHtmlResource(Resource resource, OutputStream out); +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/Main.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/Main.java new file mode 100644 index 00000000..bbfa9f3e --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/Main.java @@ -0,0 +1,5 @@ +package nl.siegmann.epublib.epub; + +public class Main { + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/NCXDocument.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/NCXDocument.java new file mode 100644 index 00000000..14565e23 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/NCXDocument.java @@ -0,0 +1,280 @@ +package nl.siegmann.epublib.epub; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import javax.xml.stream.FactoryConfigurationError; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.Author; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Identifier; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.TOCReference; +import nl.siegmann.epublib.domain.TableOfContents; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.ResourceUtil; +import nl.siegmann.epublib.util.StringUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xmlpull.v1.XmlSerializer; + +/** + * Writes the ncx document as defined by namespace http://www.daisy.org/z3986/2005/ncx/ + * + * @author paul + * + */ +public class NCXDocument { + + public static final String NAMESPACE_NCX = "http://www.daisy.org/z3986/2005/ncx/"; + public static final String PREFIX_NCX = "ncx"; + public static final String NCX_ITEM_ID = "ncx"; + public static final String DEFAULT_NCX_HREF = "toc.ncx"; + public static final String PREFIX_DTB = "dtb"; + + private static final Logger log = LoggerFactory.getLogger(NCXDocument.class); + + private interface NCXTags { + String ncx = "ncx"; + String meta = "meta"; + String navPoint = "navPoint"; + String navMap = "navMap"; + String navLabel = "navLabel"; + String content = "content"; + String text = "text"; + String docTitle = "docTitle"; + String docAuthor = "docAuthor"; + String head = "head"; + } + + private interface NCXAttributes { + String src = "src"; + String name = "name"; + String content = "content"; + String id = "id"; + String playOrder = "playOrder"; + String clazz = "class"; + String version = "version"; + } + + private interface NCXAttributeValues { + + String chapter = "chapter"; + String version = "2005-1"; + + } + + public static Resource read(Book book, EpubReader epubReader) { + Resource ncxResource = null; + if(book.getSpine().getTocResource() == null) { + log.error("Book does not contain a table of contents file"); + return ncxResource; + } + try { + ncxResource = book.getSpine().getTocResource(); + if(ncxResource == null) { + return ncxResource; + } + Document ncxDocument = ResourceUtil.getAsDocument(ncxResource); + Element navMapElement = DOMUtil.getFirstElementByTagNameNS(ncxDocument.getDocumentElement(), NAMESPACE_NCX, NCXTags.navMap); + if (null != navMapElement) { + TableOfContents tableOfContents = new TableOfContents(readTOCReferences(navMapElement.getChildNodes(), book)); + book.setTableOfContents(tableOfContents); + } + } catch (Exception e) { + log.error(e.getMessage(), e); + } + return ncxResource; + } + + private static List<TOCReference> readTOCReferences(NodeList navpoints, Book book) { + if(navpoints == null) { + return new ArrayList<TOCReference>(); + } + List<TOCReference> result = new ArrayList<TOCReference>(navpoints.getLength()); + for(int i = 0; i < navpoints.getLength(); i++) { + Node node = navpoints.item(i); + if (node.getNodeType() != Document.ELEMENT_NODE) { + continue; + } + if (! (node.getLocalName().equals(NCXTags.navPoint))) { + continue; + } + TOCReference tocReference = readTOCReference((Element) node, book); + result.add(tocReference); + } + return result; + } + + static TOCReference readTOCReference(Element navpointElement, Book book) { + String label = readNavLabel(navpointElement); + String tocResourceRoot = StringUtil.substringBeforeLast(book.getSpine().getTocResource().getHref(), '/'); + if (tocResourceRoot.length() == book.getSpine().getTocResource().getHref().length()) { + tocResourceRoot = ""; + } else { + tocResourceRoot = tocResourceRoot + "/"; + } + String reference = StringUtil.collapsePathDots(tocResourceRoot + readNavReference(navpointElement)); + String href = StringUtil.substringBefore(reference, Constants.FRAGMENT_SEPARATOR_CHAR); + String fragmentId = StringUtil.substringAfter(reference, Constants.FRAGMENT_SEPARATOR_CHAR); + Resource resource = book.getResources().getByHref(href); + if (resource == null) { + log.error("Resource with href " + href + " in NCX document not found"); + } + TOCReference result = new TOCReference(label, resource, fragmentId); + List<TOCReference> childTOCReferences = readTOCReferences(navpointElement.getChildNodes(), book); + result.setChildren(childTOCReferences); + return result; + } + + private static String readNavReference(Element navpointElement) { + Element contentElement = DOMUtil.getFirstElementByTagNameNS(navpointElement, NAMESPACE_NCX, NCXTags.content); + String result = DOMUtil.getAttribute(contentElement, NAMESPACE_NCX, NCXAttributes.src); + try { + result = URLDecoder.decode(result, Constants.CHARACTER_ENCODING); + } catch (UnsupportedEncodingException e) { + log.error(e.getMessage()); + } + return result; + } + + private static String readNavLabel(Element navpointElement) { + Element navLabel = DOMUtil.getFirstElementByTagNameNS(navpointElement, NAMESPACE_NCX, NCXTags.navLabel); + return DOMUtil.getTextChildrenContent(DOMUtil.getFirstElementByTagNameNS(navLabel, NAMESPACE_NCX, NCXTags.text)); + } + + + public static void write(EpubWriter epubWriter, Book book, ZipOutputStream resultStream) throws IOException { + resultStream.putNextEntry(new ZipEntry(book.getSpine().getTocResource().getHref())); + XmlSerializer out = EpubProcessorSupport.createXmlSerializer(resultStream); + write(out, book); + out.flush(); + } + + + /** + * Generates a resource containing an xml document containing the table of contents of the book in ncx format. + * + * @param xmlSerializer the serializer used + * @param book the book to serialize + * + * @throws FactoryConfigurationError + * @throws IOException + * @throws IllegalStateException + * @throws IllegalArgumentException + */ + public static void write(XmlSerializer xmlSerializer, Book book) throws IllegalArgumentException, IllegalStateException, IOException { + write(xmlSerializer, book.getMetadata().getIdentifiers(), book.getTitle().getValue(), book.getMetadata().getAuthors(), book.getTableOfContents()); + } + + public static Resource createNCXResource(Book book) throws IllegalArgumentException, IllegalStateException, IOException { + return createNCXResource(book.getMetadata().getIdentifiers(), book.getTitle().getValue(), book.getMetadata().getAuthors(), book.getTableOfContents()); + } + public static Resource createNCXResource(List<Identifier> identifiers, String title, List<Author> authors, TableOfContents tableOfContents) throws IllegalArgumentException, IllegalStateException, IOException { + ByteArrayOutputStream data = new ByteArrayOutputStream(); + XmlSerializer out = EpubProcessorSupport.createXmlSerializer(data); + write(out, identifiers, title, authors, tableOfContents); + Resource resource = new Resource(NCX_ITEM_ID, data.toByteArray(), DEFAULT_NCX_HREF, MediatypeService.NCX); + return resource; + } + + public static void write(XmlSerializer serializer, List<Identifier> identifiers, String title, List<Author> authors, TableOfContents tableOfContents) throws IllegalArgumentException, IllegalStateException, IOException { + serializer.startDocument(Constants.CHARACTER_ENCODING, false); + serializer.setPrefix(EpubWriter.EMPTY_NAMESPACE_PREFIX, NAMESPACE_NCX); + serializer.startTag(NAMESPACE_NCX, NCXTags.ncx); +// serializer.writeNamespace("ncx", NAMESPACE_NCX); +// serializer.attribute("xmlns", NAMESPACE_NCX); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, NCXAttributes.version, NCXAttributeValues.version); + serializer.startTag(NAMESPACE_NCX, NCXTags.head); + + for(Identifier identifier: identifiers) { + writeMetaElement(identifier.getScheme().getName(), identifier.getValue(), serializer); + } + + writeMetaElement("generator", Constants.EPUBLIB_GENERATOR_NAME, serializer); + writeMetaElement("depth", String.valueOf(tableOfContents.calculateDepth()), serializer); + writeMetaElement("totalPageCount", "0", serializer); + writeMetaElement("maxPageNumber", "0", serializer); + + serializer.endTag(NAMESPACE_NCX, "head"); + + serializer.startTag(NAMESPACE_NCX, NCXTags.docTitle); + serializer.startTag(NAMESPACE_NCX, NCXTags.text); + // write the first title + serializer.text(StringUtil.defaultIfNull(title)); + serializer.endTag(NAMESPACE_NCX, NCXTags.text); + serializer.endTag(NAMESPACE_NCX, NCXTags.docTitle); + + for(Author author: authors) { + serializer.startTag(NAMESPACE_NCX, NCXTags.docAuthor); + serializer.startTag(NAMESPACE_NCX, NCXTags.text); + serializer.text(author.getLastname() + ", " + author.getFirstname()); + serializer.endTag(NAMESPACE_NCX, NCXTags.text); + serializer.endTag(NAMESPACE_NCX, NCXTags.docAuthor); + } + + serializer.startTag(NAMESPACE_NCX, NCXTags.navMap); + writeNavPoints(tableOfContents.getTocReferences(), 1, serializer); + serializer.endTag(NAMESPACE_NCX, NCXTags.navMap); + + serializer.endTag(NAMESPACE_NCX, "ncx"); + serializer.endDocument(); + } + + + private static void writeMetaElement(String dtbName, String content, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + serializer.startTag(NAMESPACE_NCX, NCXTags.meta); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, NCXAttributes.name, PREFIX_DTB + ":" + dtbName); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, NCXAttributes.content, content); + serializer.endTag(NAMESPACE_NCX, NCXTags.meta); + } + + private static int writeNavPoints(List<TOCReference> tocReferences, int playOrder, + XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + for(TOCReference tocReference: tocReferences) { + if (tocReference.getResource() == null) { + playOrder = writeNavPoints(tocReference.getChildren(), playOrder, serializer); + continue; + } + writeNavPointStart(tocReference, playOrder, serializer); + playOrder++; + if(! tocReference.getChildren().isEmpty()) { + playOrder = writeNavPoints(tocReference.getChildren(), playOrder, serializer); + } + writeNavPointEnd(tocReference, serializer); + } + return playOrder; + } + + + private static void writeNavPointStart(TOCReference tocReference, int playOrder, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + serializer.startTag(NAMESPACE_NCX, NCXTags.navPoint); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, NCXAttributes.id, "navPoint-" + playOrder); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, NCXAttributes.playOrder, String.valueOf(playOrder)); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, NCXAttributes.clazz, NCXAttributeValues.chapter); + serializer.startTag(NAMESPACE_NCX, NCXTags.navLabel); + serializer.startTag(NAMESPACE_NCX, NCXTags.text); + serializer.text(tocReference.getTitle()); + serializer.endTag(NAMESPACE_NCX, NCXTags.text); + serializer.endTag(NAMESPACE_NCX, NCXTags.navLabel); + serializer.startTag(NAMESPACE_NCX, NCXTags.content); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, NCXAttributes.src, tocReference.getCompleteHref()); + serializer.endTag(NAMESPACE_NCX, NCXTags.content); + } + + private static void writeNavPointEnd(TOCReference tocReference, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + serializer.endTag(NAMESPACE_NCX, NCXTags.navPoint); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentBase.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentBase.java new file mode 100644 index 00000000..84ad74de --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentBase.java @@ -0,0 +1,87 @@ +package nl.siegmann.epublib.epub; + + +/** + * Functionality shared by the PackageDocumentReader and the PackageDocumentWriter + * + * @author paul + * + */ +public class PackageDocumentBase { + public static final String BOOK_ID_ID = "BookId"; + public static final String NAMESPACE_OPF = "http://www.idpf.org/2007/opf"; + public static final String NAMESPACE_DUBLIN_CORE = "http://purl.org/dc/elements/1.1/"; + public static final String PREFIX_DUBLIN_CORE = "dc"; + public static final String PREFIX_OPF = "opf"; + public static final String dateFormat = "yyyy-MM-dd"; + + protected interface DCTags { + String title = "title"; + String creator = "creator"; + String subject = "subject"; + String description = "description"; + String publisher = "publisher"; + String contributor = "contributor"; + String date = "date"; + String type = "type"; + String format = "format"; + String identifier = "identifier"; + String source = "source"; + String language = "language"; + String relation = "relation"; + String coverage = "coverage"; + String rights = "rights"; + } + + protected interface DCAttributes { + String scheme = "scheme"; + String id = "id"; + } + + protected interface OPFTags { + String metadata = "metadata"; + String meta = "meta"; + String manifest = "manifest"; + String packageTag = "package"; + String itemref = "itemref"; + String spine = "spine"; + String reference = "reference"; + String guide = "guide"; + String item = "item"; + } + + protected interface OPFAttributes { + String uniqueIdentifier = "unique-identifier"; + String idref = "idref"; + String name = "name"; + String content = "content"; + String type = "type"; + String href = "href"; + String linear = "linear"; + String event = "event"; + String role = "role"; + String file_as = "file-as"; + String id = "id"; + String media_type = "media-type"; + String title = "title"; + String toc = "toc"; + String version = "version"; + String scheme = "scheme"; + String property = "property"; + String properties = "properties"; + String refines = "refines"; + String identifier_type = "identifier-type"; + String title_type = "title-type"; + String prefix = "prefix"; + } + + protected interface OPFValues { + String meta_cover = "cover"; + String reference_cover = "cover"; + String no = "no"; + String generator = "generator"; + String nav = "nav"; + String svg = "svg"; + String scripted = "scripted"; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentMetadataReader.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentMetadataReader.java new file mode 100644 index 00000000..34f3b566 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentMetadataReader.java @@ -0,0 +1,274 @@ +package nl.siegmann.epublib.epub; + +import nl.siegmann.epublib.domain.*; +import nl.siegmann.epublib.util.StringUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import javax.xml.namespace.QName; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Reads the package document metadata. + * <p> + * In its own separate class because the PackageDocumentReader became a bit large and unwieldy. + * + * @author paul + */ +// package +class PackageDocumentMetadataReader extends PackageDocumentBase { + + private static final Logger log = LoggerFactory.getLogger(PackageDocumentMetadataReader.class); + + public static Metadata readMetadata(Document packageDocument) { + Metadata result = new Metadata(); + Element metadataElement = DOMUtil.getFirstElementByTagNameNS(packageDocument.getDocumentElement(), NAMESPACE_OPF, OPFTags.metadata); + if (metadataElement == null) { + log.error("Package does not contain element " + OPFTags.metadata); + return result; + } + result.setTitles(readTitles(metadataElement)); + result.setPublishers(DOMUtil.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE, DCTags.publisher)); + result.setDescriptions(DOMUtil.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE, DCTags.description)); + result.setRights(DOMUtil.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE, DCTags.rights)); + result.setTypes(DOMUtil.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE, DCTags.type)); + result.setSubjects(DOMUtil.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE, DCTags.subject)); + result.setIdentifiers(readIdentifiers(metadataElement)); + result.setAuthors(readCreators(metadataElement)); + result.setContributors(readContributors(metadataElement)); + result.setDates(readDates(metadataElement)); + result.setOtherProperties(readOtherProperties(metadataElement)); + result.setMetaAttributes(readMetaProperties(metadataElement)); + Element languageTag = DOMUtil.getFirstElementByTagNameNS(metadataElement, NAMESPACE_DUBLIN_CORE, DCTags.language); + if (languageTag != null) { + result.setLanguage(DOMUtil.getTextChildrenContent(languageTag)); + } + + + return result; + } + + private static List<Title> readTitles(Element metadataElement) { + List<Title> result = new ArrayList<>(); + NodeList titleElements = metadataElement.getOwnerDocument().getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, DCTags.title); + + for (int j = 0; j < titleElements.getLength(); j++) { + Node titleElement = titleElements.item(j); + result.add( + new Title( + titleElement.getTextContent(), + findTitleType(titleElement, metadataElement) + ) + ); + } + return result; + } + + private static String findTitleType(Node titleElement, Element metadataElement) { + // Try to find redefine as used in epub 3 + NodeList metaElements = metadataElement.getOwnerDocument().getElementsByTagNameNS("*", "meta"); + for (int j = 0; j < metaElements.getLength(); j++) { + Node metaElement = metaElements.item(j); + Node refines = metaElement.getAttributes().getNamedItem(OPFAttributes.refines); + Node property = metaElement.getAttributes().getNamedItem(OPFAttributes.property); + if ( + null != refines + && null != property + && refines.getNodeValue().equals( + "#" + DOMUtil.getAttribute( + titleElement, + EpubWriter.EMPTY_NAMESPACE_PREFIX, + OPFAttributes.id + ) + ) + ) { + return metaElement.getTextContent(); + } + } + return null; + } + + /** + * consumes meta tags that have a property attribute as defined in the standard. For example: + * <meta property="rendition:layout">pre-paginated</meta> + * + * @param metadataElement + * @return + */ + private static Map<QName, String> readOtherProperties(Element metadataElement) { + Map<QName, String> result = new HashMap<QName, String>(); + + NodeList metaTags = metadataElement.getElementsByTagName(OPFTags.meta); + for (int i = 0; i < metaTags.getLength(); i++) { + Node metaNode = metaTags.item(i); + Node property = metaNode.getAttributes().getNamedItem(OPFAttributes.property); + Node refines = metaNode.getAttributes().getNamedItem(OPFAttributes.refines); + if (property != null && refines == null) { + String name = property.getNodeValue(); + String value = metaNode.getTextContent(); + result.put(new QName(name), value); + } + } + + return result; + } + + /** + * consumes meta tags that have a property attribute as defined in the standard. For example: + * <meta property="rendition:layout">pre-paginated</meta> + * + * @param metadataElement + * @return + */ + private static Map<String, String> readMetaProperties(Element metadataElement) { + Map<String, String> result = new HashMap<String, String>(); + + NodeList metaTags = metadataElement.getElementsByTagName(OPFTags.meta); + for (int i = 0; i < metaTags.getLength(); i++) { + Element metaElement = (Element) metaTags.item(i); + String name = metaElement.getAttribute(OPFAttributes.name); + String value = metaElement.getAttribute(OPFAttributes.content); + result.put(name, value); + } + + return result; + } + + private static String getBookIdId(Document document) { + Element packageElement = DOMUtil.getFirstElementByTagNameNS(document.getDocumentElement(), NAMESPACE_OPF, OPFTags.packageTag); + if (packageElement == null) { + return null; + } + String result = packageElement.getAttributeNS(NAMESPACE_OPF, OPFAttributes.uniqueIdentifier); + return result; + } + + private static List<Author> readCreators(Element metadataElement) { + return readAuthors(DCTags.creator, metadataElement); + } + + private static List<Author> readContributors(Element metadataElement) { + return readAuthors(DCTags.contributor, metadataElement); + } + + private static List<Author> readAuthors(String authorTag, Element metadataElement) { + NodeList elements = metadataElement.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, authorTag); + List<Author> result = new ArrayList<Author>(elements.getLength()); + for (int i = 0; i < elements.getLength(); i++) { + Element authorElement = (Element) elements.item(i); + Author author = createAuthor(authorElement); + if (author != null) { + result.add(author); + } + } + return result; + + } + + private static List<Date> readDates(Element metadataElement) { + NodeList elements = metadataElement.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, DCTags.date); + List<Date> result = new ArrayList<Date>(elements.getLength()); + for (int i = 0; i < elements.getLength(); i++) { + Element dateElement = (Element) elements.item(i); + Date date; + try { + date = new Date(DOMUtil.getTextChildrenContent(dateElement), dateElement.getAttributeNS(NAMESPACE_OPF, OPFAttributes.event)); + result.add(date); + } catch (IllegalArgumentException e) { + log.error(e.getMessage()); + } + } + return result; + + } + + private static Author createAuthor(Element authorElement) { + String authorString = DOMUtil.getTextChildrenContent(authorElement); + if (StringUtil.isBlank(authorString)) { + return null; + } + int spacePos = authorString.lastIndexOf(' '); + Author result; + if (spacePos < 0) { + result = new Author(authorString); + } else { + result = new Author(authorString.substring(0, spacePos), authorString.substring(spacePos + 1)); + } + + String role = DOMUtil.getAttribute(authorElement, NAMESPACE_OPF, OPFAttributes.role); + if (StringUtil.isNotBlank(role)) { + result.setRole(role); + } else { + // Try to find redefine as used in epub 3 + NodeList metaElements = authorElement.getOwnerDocument().getElementsByTagNameNS("*", "meta"); + for (int j = 0; j < metaElements.getLength(); j++) { + Node metaElement = metaElements.item(j); + Node refines = metaElement.getAttributes().getNamedItem(OPFAttributes.refines); + Node property = metaElement.getAttributes().getNamedItem(OPFAttributes.property); + Node schemeNode = metaElement.getAttributes().getNamedItem(OPFAttributes.scheme); + if ( + null != refines + && null != property + && null != schemeNode + && refines.getNodeValue().equals("#" + authorElement.getAttribute("id")) + && OPFAttributes.role.equals(property.getNodeValue()) + ) { + result.setRole(metaElement.getTextContent()); + result.setScheme(new Scheme(schemeNode.getNodeValue())); + } + } + } + return result; + } + + + private static List<Identifier> readIdentifiers(Element metadataElement) { + NodeList identifierElements = metadataElement.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, DCTags.identifier); + if (identifierElements.getLength() == 0) { + log.error("Package does not contain element " + DCTags.identifier); + return new ArrayList<Identifier>(); + } + String bookIdId = getBookIdId(metadataElement.getOwnerDocument()); + List<Identifier> result = new ArrayList<Identifier>(identifierElements.getLength()); + for (int i = 0; i < identifierElements.getLength(); i++) { + Element identifierElement = (Element) identifierElements.item(i); + Scheme scheme = new Scheme(identifierElement.getAttributeNS(NAMESPACE_OPF, DCAttributes.scheme)); + if (StringUtil.isBlank(scheme.getName())) { + //Try to find redefine meta element as used in opf version 3 + NodeList metaElements = identifierElement.getOwnerDocument().getElementsByTagNameNS("*", "meta"); + for (int j = 0; j < metaElements.getLength(); j++) { + Node metaElement = metaElements.item(j); + Node refines = metaElement.getAttributes().getNamedItem(OPFAttributes.refines); + Node property = metaElement.getAttributes().getNamedItem(OPFAttributes.property); + Node schemeNode = metaElement.getAttributes().getNamedItem(OPFAttributes.scheme); + if ( + null != refines + && null != property + && null != scheme + && refines.getNodeValue().equals("#" + identifierElement.getAttribute("id")) + && "identifier-type".equals(property.getNodeValue()) + ) { + scheme = new Scheme(schemeNode.getNodeValue(), metaElement.getTextContent()); + } + } + } + String identifierValue = DOMUtil.getTextChildrenContent(identifierElement); + if (StringUtil.isBlank(identifierValue)) { + continue; + } + Identifier identifier = new Identifier(scheme, identifierValue); + if (identifierElement.getAttribute("id").equals(bookIdId)) { + identifier.setBookId(true); + } + result.add(identifier); + } + return result; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentMetadataWriter.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentMetadataWriter.java new file mode 100644 index 00000000..dab5b97f --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentMetadataWriter.java @@ -0,0 +1,284 @@ +package nl.siegmann.epublib.epub; + +import java.io.IOException; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import javax.xml.namespace.QName; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.*; +import nl.siegmann.epublib.util.StringUtil; + +import org.xmlpull.v1.XmlSerializer; + +public class PackageDocumentMetadataWriter extends PackageDocumentBase { + + + /** + * Writes the book's metadata. + * + * @param book + * @param serializer + * @throws IOException + * @throws IllegalStateException + * @throws IllegalArgumentException + */ + public static void writeMetaData(Book book, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + serializer.setPrefix(PREFIX_DUBLIN_CORE, NAMESPACE_DUBLIN_CORE); + + serializer.startTag(NAMESPACE_OPF, OPFTags.metadata); + + serializer.setPrefix(PREFIX_DUBLIN_CORE, NAMESPACE_DUBLIN_CORE); + serializer.setPrefix(PREFIX_OPF, NAMESPACE_OPF); + + if(isEpub3(book)) { + writeIdentifiersEpub3(book.getMetadata().getIdentifiers(), serializer); + } else { + writeIdentifiersEpub2(book.getMetadata().getIdentifiers(), serializer); + } + writeTitles(book.getMetadata().getTitles(), serializer); + writeSimpleMetdataElements(DCTags.subject, book.getMetadata().getSubjects(), serializer); + writeSimpleMetdataElements(DCTags.description, book.getMetadata().getDescriptions(), serializer); + writeSimpleMetdataElements(DCTags.publisher, book.getMetadata().getPublishers(), serializer); + writeSimpleMetdataElements(DCTags.type, book.getMetadata().getTypes(), serializer); + writeSimpleMetdataElements(DCTags.rights, book.getMetadata().getRights(), serializer); + + // write authors + int countAuthors = 1; + for(Author author: book.getMetadata().getAuthors()) { + if(isEpub3(book)){ + writeAuthorEpub3Syntax(serializer, author, DCTags.creator, countAuthors++); + } else { + writeAuthorEpub2Syntax(serializer, author, DCTags.creator); + } + } + + // write contributors + countAuthors = 1; + for(Author author: book.getMetadata().getContributors()) { + if(isEpub3(book)){ + writeAuthorEpub3Syntax(serializer, author, DCTags.contributor, countAuthors++); + } else { + writeAuthorEpub2Syntax(serializer, author, DCTags.contributor); + } + } + + // write dates + for (Date date: book.getMetadata().getDates()) { + serializer.setPrefix(PREFIX_OPF, NAMESPACE_OPF); + serializer.startTag(NAMESPACE_DUBLIN_CORE, DCTags.date); + if (date.getEvent() != null) { + serializer.attribute(NAMESPACE_OPF, OPFAttributes.event, date.getEvent().toString()); + } + serializer.text(date.getValue()); + serializer.endTag(NAMESPACE_DUBLIN_CORE, DCTags.date); + } + + // write language + if(StringUtil.isNotBlank(book.getMetadata().getLanguage())) { + serializer.startTag(NAMESPACE_DUBLIN_CORE, "language"); + serializer.text(book.getMetadata().getLanguage()); + serializer.endTag(NAMESPACE_DUBLIN_CORE, "language"); + } + + // write other properties + if(book.getMetadata().getOtherProperties() != null) { + for(Map.Entry<QName, String> mapEntry: book.getMetadata().getOtherProperties().entrySet()) { + String namespaceURI = mapEntry.getKey().getNamespaceURI(); + serializer.startTag( + StringUtil.isNotBlank(namespaceURI) ? namespaceURI : NAMESPACE_OPF, + OPFTags.meta + ); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.property, mapEntry.getKey().getLocalPart()); + serializer.text(mapEntry.getValue()); + serializer.endTag( + StringUtil.isNotBlank(namespaceURI) ? namespaceURI : NAMESPACE_OPF, + OPFTags.meta + ); + + } + } + + // write coverimage + if(book.getCoverImage() != null) { // write the cover image + serializer.startTag(NAMESPACE_OPF, OPFTags.meta); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.name, OPFValues.meta_cover); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.content, book.getCoverImage().getId()); + serializer.endTag(NAMESPACE_OPF, OPFTags.meta); + } + + // write generator + serializer.startTag(NAMESPACE_OPF, OPFTags.meta); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.name, OPFValues.generator); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.content, Constants.EPUBLIB_GENERATOR_NAME); + serializer.endTag(NAMESPACE_OPF, OPFTags.meta); + + serializer.endTag(NAMESPACE_OPF, OPFTags.metadata); + } + + private static void writeTitles(List<Title> titles, final XmlSerializer serializer) throws IOException { + int counter = 0; + for (Title title : titles) { + writeTitle(title, serializer, counter++); + } + } + + private static void writeTitle(Title title, XmlSerializer serializer, int counter) throws IOException { + String titleId = DCTags.title + counter; + serializer.startTag(NAMESPACE_DUBLIN_CORE, DCTags.title); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.id, titleId); + serializer.text(title.getValue()); + serializer.endTag(NAMESPACE_DUBLIN_CORE, DCTags.title); + if(StringUtil.isNotBlank(title.getType())){ + serializer.startTag(NAMESPACE_OPF, OPFTags.meta); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, "refines", "#" + titleId); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.property, OPFAttributes.title_type); + serializer.text(title.getType()); + serializer.endTag(NAMESPACE_OPF, OPFTags.meta); + } + } + + private static boolean isEpub3(Book book) { + return null != book.getOpfResource() && book.getOpfResource().getVersion().equals("3.0"); + } + + private static void writeAuthorEpub2Syntax(XmlSerializer serializer, Author author, String creator) throws IOException { + serializer.setPrefix(PREFIX_OPF, NAMESPACE_OPF); + serializer.startTag(NAMESPACE_DUBLIN_CORE, creator); + if(null != author.getRelator()) { + serializer.attribute(NAMESPACE_OPF, OPFAttributes.role, author.getRelator().getCode()); + } + serializer.attribute(NAMESPACE_OPF, OPFAttributes.file_as, author.getLastname() + ", " + author.getFirstname()); + serializer.text(author.getFirstname() + " " + author.getLastname()); + serializer.endTag(NAMESPACE_DUBLIN_CORE, creator); + } + + private static void writeAuthorEpub3Syntax(XmlSerializer serializer, Author author, String creator, int countAuthors) throws IOException { + String authorId = creator + countAuthors; + serializer.startTag(NAMESPACE_DUBLIN_CORE, creator); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.id, authorId); + serializer.text(author.getFirstname() + " " + author.getLastname()); + serializer.endTag(NAMESPACE_DUBLIN_CORE, creator); + + if(!( + null == author.getScheme() + && (null == author.getRelator() || Relator.AUTHOR.equals(author.getRelator().getCode())) + )){ + serializer.startTag(NAMESPACE_OPF, OPFTags.meta); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, "refines", "#" + authorId); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.property, OPFAttributes.role); + if(null != author.getScheme()) { + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.scheme, author.getScheme().getName()); + } + serializer.text(author.getRelator().getCode()); + serializer.endTag(NAMESPACE_OPF, OPFTags.meta); + } + + + serializer.startTag(NAMESPACE_OPF, OPFTags.meta); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, "refines", "#" + authorId); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.property, OPFAttributes.file_as); + serializer.text(author.getLastname() + ", " + author.getFirstname()); + serializer.endTag(NAMESPACE_OPF, OPFTags.meta); + + } + + private static void writeSimpleMetdataElements(String tagName, List<String> values, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + for(String value: values) { + if (StringUtil.isBlank(value)) { + continue; + } + serializer.startTag(NAMESPACE_DUBLIN_CORE, tagName); + serializer.text(value); + serializer.endTag(NAMESPACE_DUBLIN_CORE, tagName); + } + } + + + /** + * Writes out the complete list of Identifiers to the package document. + * The first identifier for which the bookId is true is made the bookId identifier. + * If no identifier has bookId == true then the first bookId identifier is written as the primary. + * + * @param identifiers + * @param serializer + * @throws IOException + * @throws IllegalStateException + * @throws IllegalArgumentException + * @ + */ + private static void writeIdentifiersEpub2(List<Identifier> identifiers, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + Identifier bookIdIdentifier = Identifier.getBookIdIdentifier(identifiers); + if(bookIdIdentifier == null) { + return; + } + + serializer.startTag(NAMESPACE_DUBLIN_CORE, DCTags.identifier); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, DCAttributes.id, BOOK_ID_ID); + serializer.attribute(NAMESPACE_OPF, OPFAttributes.scheme, bookIdIdentifier.getScheme().getName()); + serializer.text(bookIdIdentifier.getValue()); + serializer.endTag(NAMESPACE_DUBLIN_CORE, DCTags.identifier); + + for(Identifier identifier: identifiers.subList(1, identifiers.size())) { + if(identifier == bookIdIdentifier) { + continue; + } + serializer.startTag(NAMESPACE_DUBLIN_CORE, DCTags.identifier); + if(null != identifier.getScheme() && StringUtil.isNotBlank(identifier.getScheme().getName())) { + serializer.attribute(NAMESPACE_OPF, "scheme", identifier.getScheme().getName()); + } + serializer.text(identifier.getValue()); + serializer.endTag(NAMESPACE_DUBLIN_CORE, DCTags.identifier); + } + } + + /** + * Writes out the complete list of Identifiers to the package document. + * The first identifier for which the bookId is true is made the bookId identifier. + * If no identifier has bookId == true then the first bookId identifier is written as the primary. + * + * @param identifiers + * @param serializer + * @throws IOException + * @throws IllegalStateException + * @throws IllegalArgumentException + * @ + */ + private static void writeIdentifiersEpub3(List<Identifier> identifiers, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + Identifier bookIdIdentifier = Identifier.getBookIdIdentifier(identifiers); + if(bookIdIdentifier == null) { + return; + } + + writeIdentifier(serializer, bookIdIdentifier, 0); + + int idCount = 1; + for(Identifier identifier: identifiers.subList(1, identifiers.size())) { + if(identifier == bookIdIdentifier) { + continue; + } + writeIdentifier(serializer, bookIdIdentifier, idCount++); + } + } + + private static void writeIdentifier(XmlSerializer serializer, Identifier bookIdIdentifier, int counter) throws IOException { + String bookId = (counter > 0) ? BOOK_ID_ID.concat(String.valueOf(counter)) : BOOK_ID_ID; + serializer.startTag(NAMESPACE_DUBLIN_CORE, DCTags.identifier); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, DCAttributes.id, bookId); + serializer.text(bookIdIdentifier.getValue()); + serializer.endTag(NAMESPACE_DUBLIN_CORE, DCTags.identifier); + + String schemeValue = bookIdIdentifier.getScheme().getValue(); + if(StringUtil.isNotBlank(schemeValue)) { + serializer.startTag(NAMESPACE_OPF, OPFTags.meta); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.refines, "#" + bookId); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.property, OPFAttributes.identifier_type); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.scheme, bookIdIdentifier.getScheme().getName()); + serializer.text(schemeValue); + serializer.endTag(NAMESPACE_OPF, OPFTags.meta); + } + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentReader.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentReader.java new file mode 100644 index 00000000..b5b2f6d8 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentReader.java @@ -0,0 +1,395 @@ +package nl.siegmann.epublib.epub; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.*; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.ResourceUtil; +import nl.siegmann.epublib.util.StringUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.util.*; + +/** + * Reads the opf package document as defined by namespace http://www.idpf.org/2007/opf + * + * @author paul + */ +public class PackageDocumentReader extends PackageDocumentBase { + + private static final Logger log = LoggerFactory.getLogger(PackageDocumentReader.class); + private static final String[] POSSIBLE_NCX_ITEM_IDS = new String[]{"toc", "ncx", "ncxtoc"}; + + + public static void read(OpfResource packageResource, EpubReader epubReader, Book book, Resources resources) throws UnsupportedEncodingException, SAXException, IOException, ParserConfigurationException { + Document packageDocument = ResourceUtil.getAsDocument(packageResource); + String packageHref = packageResource.getHref(); + resources = fixHrefs(packageHref, resources); + if (null != packageDocument) { + packageResource.setVersion( + getOpfVersion(packageDocument) + ); + packageResource.setPrefix( + getOpfPrefix(packageDocument) + ); + } + readGuide(packageDocument, epubReader, book, resources); + + // Books sometimes use non-identifier ids. We map these here to legal ones + Map<String, String> idMapping = new HashMap<String, String>(); + + resources = readManifest(packageDocument, packageHref, epubReader, resources, idMapping); + book.setResources(resources); + readCover(packageDocument, book); + book.setMetadata(PackageDocumentMetadataReader.readMetadata(packageDocument)); + book.setSpine(readSpine(packageDocument, book.getResources(), idMapping)); + + // if we did not find a cover page then we make the first page of the book the cover page + if (book.getCoverPage() == null && book.getSpine().size() > 0) { + book.setCoverPage(book.getSpine().getResource(0)); + } + } + + private static String getOpfVersion(Document packageDocument) { + NodeList packageNodes = packageDocument.getElementsByTagNameNS("*", "package"); + if (packageNodes.getLength() <= 0) return null; + Node packageNode = packageNodes.item(0); + if (!packageNode.hasAttributes()) return null; + Node versionNode = packageNode.getAttributes().getNamedItem("version"); + if (null == versionNode) return null; + return versionNode.getNodeValue(); + } + + private static String getOpfPrefix(Document packageDocument) { + NodeList packageNodes = packageDocument.getElementsByTagNameNS("*", "package"); + if (packageNodes.getLength() <= 0) return null; + Node packageNode = packageNodes.item(0); + if (!packageNode.hasAttributes()) return null; + Node prefixNode = packageNode.getAttributes().getNamedItem("prefix"); + if (null == prefixNode) return null; + return prefixNode.getNodeValue(); + } + +// private static Resource readCoverImage(Element metadataElement, Resources resources) { +// String coverResourceId = DOMUtil.getFindAttributeValue(metadataElement.getOwnerDocument(), NAMESPACE_OPF, OPFTags.meta, OPFAttributes.name, OPFValues.meta_cover, OPFAttributes.content); +// if (StringUtil.isBlank(coverResourceId)) { +// return null; +// } +// Resource coverResource = resources.getByIdOrHref(coverResourceId); +// return coverResource; +// } + + + /** + * Reads the manifest containing the resource ids, hrefs and mediatypes. + * + * @param packageDocument + * @param packageHref + * @param epubReader + * @param book + * @param resourcesByHref + * @return a Map with resources, with their id's as key. + */ + private static Resources readManifest(Document packageDocument, String packageHref, + EpubReader epubReader, Resources resources, Map<String, String> idMapping) { + Element manifestElement = DOMUtil.getFirstElementByTagNameNS(packageDocument.getDocumentElement(), NAMESPACE_OPF, OPFTags.manifest); + Resources result = new Resources(); + if (manifestElement == null) { + log.error("Package document does not contain element " + OPFTags.manifest); + return result; + } + NodeList itemElements = manifestElement.getElementsByTagNameNS(NAMESPACE_OPF, OPFTags.item); + for (int i = 0; i < itemElements.getLength(); i++) { + Element itemElement = (Element) itemElements.item(i); + String id = DOMUtil.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.id); + String href = DOMUtil.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.href); + String property = DOMUtil.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.properties); + try { + href = URLDecoder.decode(href, Constants.CHARACTER_ENCODING); + } catch (UnsupportedEncodingException e) { + log.error(e.getMessage()); + } + String mediaTypeName = DOMUtil.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.media_type); + Resource resource = resources.remove(href); + if (resource == null) { + log.error("resource with href '" + href + "' not found"); + continue; + } + resource.setId(id); + if (StringUtil.equals(property, OPFValues.nav)) { + resource.setNav(true); + result.setNavResource(resource); + } else { + resource.setNav(false); + } + resource.setContainingSvg(StringUtil.equals(property, OPFValues.svg)); + resource.setScripted(StringUtil.equals(property, OPFValues.scripted)); + MediaType mediaType = MediatypeService.getMediaTypeByName(mediaTypeName); + if (mediaType != null) { + resource.setMediaType(mediaType); + } + result.add(resource); + idMapping.put(id, resource.getId()); + } + return result; + } + + + /** + * Reads the book's guide. + * Here some more attempts are made at finding the cover page. + * + * @param packageDocument + * @param epubReader + * @param book + * @param resources + */ + private static void readGuide(Document packageDocument, + EpubReader epubReader, Book book, Resources resources) { + Element guideElement = DOMUtil.getFirstElementByTagNameNS(packageDocument.getDocumentElement(), NAMESPACE_OPF, OPFTags.guide); + if (guideElement == null) { + return; + } + Guide guide = book.getGuide(); + NodeList guideReferences = guideElement.getElementsByTagNameNS(NAMESPACE_OPF, OPFTags.reference); + for (int i = 0; i < guideReferences.getLength(); i++) { + Element referenceElement = (Element) guideReferences.item(i); + String resourceHref = DOMUtil.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.href); + if (StringUtil.isBlank(resourceHref)) { + continue; + } + Resource resource = resources.getByHref(StringUtil.substringBefore(resourceHref, Constants.FRAGMENT_SEPARATOR_CHAR)); + if (resource == null) { + log.error("Guide is referencing resource with href " + resourceHref + " which could not be found"); + continue; + } + String type = DOMUtil.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.type); + if (StringUtil.isBlank(type)) { + log.error("Guide is referencing resource with href " + resourceHref + " which is missing the 'type' attribute"); + continue; + } + String title = DOMUtil.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.title); + if (GuideReference.COVER.equalsIgnoreCase(type)) { + continue; // cover is handled elsewhere + } + GuideReference reference = new GuideReference(resource, type, title, StringUtil.substringAfter(resourceHref, Constants.FRAGMENT_SEPARATOR_CHAR)); + guide.addReference(reference); + } + } + + + /** + * Strips off the package prefixes up to the href of the packageHref. + * <p> + * Example: + * If the packageHref is "OEBPS/content.opf" then a resource href like "OEBPS/foo/bar.html" will be turned into "foo/bar.html" + * + * @param packageHref + * @param resourcesByHref + * @return The stripped package href + */ + static Resources fixHrefs(String packageHref, + Resources resourcesByHref) { + int lastSlashPos = packageHref.lastIndexOf('/'); + if (lastSlashPos < 0) { + return resourcesByHref; + } + Resources result = new Resources(); + for (Resource resource : resourcesByHref.getAll()) { + if (StringUtil.isNotBlank(resource.getHref()) + && resource.getHref().length() > lastSlashPos) { + resource.setHref(resource.getHref().substring(lastSlashPos + 1)); + } + result.add(resource); + } + return result; + } + + /** + * Reads the document's spine, containing all sections in reading order. + * + * @param packageDocument + * @param epubReader + * @param book + * @param resourcesById + * @return the document's spine, containing all sections in reading order. + */ + private static Spine readSpine(Document packageDocument, Resources resources, Map<String, String> idMapping) { + + Element spineElement = DOMUtil.getFirstElementByTagNameNS(packageDocument.getDocumentElement(), NAMESPACE_OPF, OPFTags.spine); + if (spineElement == null) { + log.error("Element " + OPFTags.spine + " not found in package document, generating one automatically"); + return generateSpineFromResources(resources); + } + Spine result = new Spine(); + String tocResourceId = DOMUtil.getAttribute(spineElement, NAMESPACE_OPF, OPFAttributes.toc); + result.setTocResource(findTableOfContentsResource(tocResourceId, resources)); + NodeList spineNodes = packageDocument.getElementsByTagNameNS(NAMESPACE_OPF, OPFTags.itemref); + List<SpineReference> spineReferences = new ArrayList<SpineReference>(spineNodes.getLength()); + for (int i = 0; i < spineNodes.getLength(); i++) { + Element spineItem = (Element) spineNodes.item(i); + String itemref = DOMUtil.getAttribute(spineItem, NAMESPACE_OPF, OPFAttributes.idref); + if (StringUtil.isBlank(itemref)) { + log.error("itemref with missing or empty idref"); // XXX + continue; + } + String id = idMapping.get(itemref); + if (id == null) { + id = itemref; + } + Resource resource = resources.getByIdOrHref(id); + if (resource == null) { + log.error("resource with id \'" + id + "\' not found"); + continue; + } + + SpineReference spineReference = new SpineReference(resource); + if (OPFValues.no.equalsIgnoreCase(DOMUtil.getAttribute(spineItem, NAMESPACE_OPF, OPFAttributes.linear))) { + spineReference.setLinear(false); + } + spineReferences.add(spineReference); + } + result.setSpineReferences(spineReferences); + return result; + } + + /** + * Creates a spine out of all resources in the resources. + * The generated spine consists of all XHTML pages in order of their href. + * + * @param resources + * @return a spine created out of all resources in the resources. + */ + private static Spine generateSpineFromResources(Resources resources) { + Spine result = new Spine(); + List<String> resourceHrefs = new ArrayList<String>(); + resourceHrefs.addAll(resources.getAllHrefs()); + Collections.sort(resourceHrefs, String.CASE_INSENSITIVE_ORDER); + for (String resourceHref : resourceHrefs) { + Resource resource = resources.getByHref(resourceHref); + if (resource.getMediaType() == MediatypeService.NCX) { + result.setTocResource(resource); + } else if (resource.getMediaType() == MediatypeService.XHTML) { + result.addSpineReference(new SpineReference(resource)); + } + } + return result; + } + + + /** + * The spine tag should contain a 'toc' attribute with as value the resource id of the table of contents resource. + * <p> + * Here we try several ways of finding this table of contents resource. + * We try the given attribute value, some often-used ones and finally look through all resources for the first resource with the table of contents mimetype. + * + * @param spineElement + * @param resourcesById + * @return the Resource containing the table of contents + */ + static Resource findTableOfContentsResource(String tocResourceId, Resources resources) { + Resource tocResource = null; + if (StringUtil.isNotBlank(tocResourceId)) { + tocResource = resources.getByIdOrHref(tocResourceId); + } + + if (tocResource != null) { + return tocResource; + } + + // get the first resource with the NCX mediatype + tocResource = resources.findFirstResourceByMediaType(MediatypeService.NCX); + + if (tocResource == null) { + for (int i = 0; i < POSSIBLE_NCX_ITEM_IDS.length; i++) { + tocResource = resources.getByIdOrHref(POSSIBLE_NCX_ITEM_IDS[i]); + if (tocResource != null) { + break; + } + tocResource = resources.getByIdOrHref(POSSIBLE_NCX_ITEM_IDS[i].toUpperCase()); + if (tocResource != null) { + break; + } + } + } + + if (tocResource == null) { + log.error("Could not find table of contents resource. Tried resource with id '" + tocResourceId + "', " + Constants.DEFAULT_TOC_ID + ", " + Constants.DEFAULT_TOC_ID.toUpperCase() + " and any NCX resource."); + } + return tocResource; + } + + + /** + * Find all resources that have something to do with the coverpage and the cover image. + * Search the meta tags and the guide references + * + * @param packageDocument + * @return all resources that have something to do with the coverpage and the cover image. + */ + // package + static Set<String> findCoverHrefs(Document packageDocument) { + + Set<String> result = new HashSet<String>(); + + // try and find a meta tag with name = 'cover' and a non-blank id + String coverResourceId = DOMUtil.getFindAttributeValue(packageDocument, NAMESPACE_OPF, + OPFTags.meta, OPFAttributes.name, OPFValues.meta_cover, + OPFAttributes.content); + + if (StringUtil.isNotBlank(coverResourceId)) { + String coverHref = DOMUtil.getFindAttributeValue(packageDocument, NAMESPACE_OPF, + OPFTags.item, OPFAttributes.id, coverResourceId, + OPFAttributes.href); + if (StringUtil.isNotBlank(coverHref)) { + result.add(coverHref); + } else { + result.add(coverResourceId); // maybe there was a cover href put in the cover id attribute + } + } + // try and find a reference tag with type is 'cover' and reference is not blank + String coverHref = DOMUtil.getFindAttributeValue(packageDocument, NAMESPACE_OPF, + OPFTags.reference, OPFAttributes.type, OPFValues.reference_cover, + OPFAttributes.href); + if (StringUtil.isNotBlank(coverHref)) { + result.add(coverHref); + } + return result; + } + + /** + * Finds the cover resource in the packageDocument and adds it to the book if found. + * Keeps the cover resource in the resources map + * + * @param packageDocument + * @param book + * @param resources + */ + private static void readCover(Document packageDocument, Book book) { + + Collection<String> coverHrefs = findCoverHrefs(packageDocument); + for (String coverHref : coverHrefs) { + Resource resource = book.getResources().getByHref(coverHref); + if (resource == null) { + log.error("Cover resource " + coverHref + " not found"); + continue; + } + if (resource.getMediaType() == MediatypeService.XHTML) { + book.setCoverPage(resource); + } else if (MediatypeService.isBitmapImage(resource.getMediaType())) { + book.setCoverImage(resource); + } + } + } + + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentWriter.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentWriter.java new file mode 100644 index 00000000..5fee7b18 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/PackageDocumentWriter.java @@ -0,0 +1,225 @@ +package nl.siegmann.epublib.epub; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.*; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.StringUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xmlpull.v1.XmlSerializer; + +import javax.xml.stream.XMLStreamException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import static nl.siegmann.epublib.domain.OpfResource.DEFAULT_VERSION; + + +/** + * Writes the opf package document as defined by namespace http://www.idpf.org/2007/opf + * + * @author paul + */ +public class PackageDocumentWriter extends PackageDocumentBase { + + private static final Logger log = LoggerFactory.getLogger(PackageDocumentWriter.class); + + public static void write(EpubWriter epubWriter, XmlSerializer serializer, Book book) throws IOException { + try { + serializer.startDocument(Constants.CHARACTER_ENCODING, false); + serializer.setPrefix(EpubWriter.EMPTY_NAMESPACE_PREFIX, NAMESPACE_OPF); +// serializer.setPrefix(PREFIX_DUBLIN_CORE, NAMESPACE_DUBLIN_CORE); + serializer.startTag(NAMESPACE_OPF, OPFTags.packageTag); + String version = DEFAULT_VERSION; + if (null != book.getOpfResource()) { + version = book.getOpfResource().getVersion(); + } + serializer.attribute( + EpubWriter.EMPTY_NAMESPACE_PREFIX, + OPFAttributes.version, StringUtil.isNotBlank(version) ? version : DEFAULT_VERSION + ); + if (null != book.getOpfResource() && StringUtil.isNotBlank(book.getOpfResource().getPrefix())) { + serializer.attribute( + EpubWriter.EMPTY_NAMESPACE_PREFIX, + OPFAttributes.prefix, book.getOpfResource().getPrefix() + ); + } + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.uniqueIdentifier, BOOK_ID_ID); + + PackageDocumentMetadataWriter.writeMetaData(book, serializer); + + writeManifest(book, epubWriter, serializer); + writeSpine(book, epubWriter, serializer); + writeGuide(book, epubWriter, serializer); + + serializer.endTag(NAMESPACE_OPF, OPFTags.packageTag); + serializer.endDocument(); + serializer.flush(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + + /** + * Writes the package's spine. + * + * @param book + * @param epubWriter + * @param serializer + * @throws IOException + * @throws IllegalStateException + * @throws IllegalArgumentException + * @throws XMLStreamException + */ + private static void writeSpine(Book book, EpubWriter epubWriter, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + serializer.startTag(NAMESPACE_OPF, OPFTags.spine); + if (null != book.getSpine().getTocResource()) { + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.toc, book.getSpine().getTocResource().getId()); + } + if (book.getCoverPage() != null // there is a cover page + && book.getSpine().findFirstResourceById(book.getCoverPage().getId()) < 0) { // cover page is not already in the spine + // write the cover html file + serializer.startTag(NAMESPACE_OPF, OPFTags.itemref); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.idref, book.getCoverPage().getId()); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.linear, "no"); + serializer.endTag(NAMESPACE_OPF, OPFTags.itemref); + } + writeSpineItems(book.getSpine(), serializer); + serializer.endTag(NAMESPACE_OPF, OPFTags.spine); + + } + + + private static void writeManifest(Book book, EpubWriter epubWriter, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + serializer.startTag(NAMESPACE_OPF, OPFTags.manifest); + + if (null != book.getSpine().getTocResource()) { + serializer.startTag(NAMESPACE_OPF, OPFTags.item); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.id, epubWriter.getNcxId()); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.href, epubWriter.getNcxHref()); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.media_type, epubWriter.getNcxMediaType()); + serializer.endTag(NAMESPACE_OPF, OPFTags.item); + } + +// writeCoverResources(book, serializer); + + for (Resource resource : getAllResourcesSortById(book)) { + writeItem(book, resource, serializer); + } + + serializer.endTag(NAMESPACE_OPF, OPFTags.manifest); + } + + private static List<Resource> getAllResourcesSortById(Book book) { + List<Resource> allResources = new ArrayList<Resource>(book.getResources().getAll()); + Collections.sort(allResources, new Comparator<Resource>() { + + @Override + public int compare(Resource resource1, Resource resource2) { + return resource1.getId().compareToIgnoreCase(resource2.getId()); + } + }); + return allResources; + } + + /** + * Writes a resources as an item element + * + * @param resource + * @param serializer + * @throws IOException + * @throws IllegalStateException + * @throws IllegalArgumentException + * @throws XMLStreamException + */ + private static void writeItem(Book book, Resource resource, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + if (resource == null || + (resource.getMediaType() == MediatypeService.NCX + && book.getSpine().getTocResource() != null)) { + return; + } + if (StringUtil.isBlank(resource.getId())) { + log.error("resource id must not be empty (href: " + resource.getHref() + ", mediatype:" + resource.getMediaType() + ")"); + return; + } + if (StringUtil.isBlank(resource.getHref())) { + log.error("resource href must not be empty (id: " + resource.getId() + ", mediatype:" + resource.getMediaType() + ")"); + return; + } + if (resource.getMediaType() == null) { + log.error("resource mediatype must not be empty (id: " + resource.getId() + ", href:" + resource.getHref() + ")"); + return; + } + serializer.startTag(NAMESPACE_OPF, OPFTags.item); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.id, resource.getId()); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.href, resource.getHref()); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.media_type, resource.getMediaType().getName()); + if (resource.isNav()) { + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.properties, OPFValues.nav); + } + if (resource.isContainingSvg()) { + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.properties, OPFValues.svg); + } + if (resource.isScripted()) { + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.properties, OPFValues.scripted); + } + serializer.endTag(NAMESPACE_OPF, OPFTags.item); + } + + /** + * List all spine references + * + * @throws IOException + * @throws IllegalStateException + * @throws IllegalArgumentException + */ + private static void writeSpineItems(Spine spine, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + for (SpineReference spineReference : spine.getSpineReferences()) { + serializer.startTag(NAMESPACE_OPF, OPFTags.itemref); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.idref, spineReference.getResourceId()); + if (!spineReference.isLinear()) { + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.linear, OPFValues.no); + } + serializer.endTag(NAMESPACE_OPF, OPFTags.itemref); + } + } + + private static void writeGuide(Book book, EpubWriter epubWriter, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + serializer.startTag(NAMESPACE_OPF, OPFTags.guide); + ensureCoverPageGuideReferenceWritten(book.getGuide(), epubWriter, serializer); + for (GuideReference reference : book.getGuide().getReferences()) { + writeGuideReference(reference, serializer); + } + serializer.endTag(NAMESPACE_OPF, OPFTags.guide); + } + + private static void ensureCoverPageGuideReferenceWritten(Guide guide, + EpubWriter epubWriter, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + if (!(guide.getGuideReferencesByType(GuideReference.COVER).isEmpty())) { + return; + } + Resource coverPage = guide.getCoverPage(); + if (coverPage != null) { + writeGuideReference(new GuideReference(guide.getCoverPage(), GuideReference.COVER, GuideReference.COVER), serializer); + } + } + + + private static void writeGuideReference(GuideReference reference, XmlSerializer serializer) throws IllegalArgumentException, IllegalStateException, IOException { + if (reference == null) { + return; + } + serializer.startTag(NAMESPACE_OPF, OPFTags.reference); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.type, reference.getType()); + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.href, reference.getCompleteHref()); + if (StringUtil.isNotBlank(reference.getTitle())) { + serializer.attribute(EpubWriter.EMPTY_NAMESPACE_PREFIX, OPFAttributes.title, reference.getTitle()); + } + serializer.endTag(NAMESPACE_OPF, OPFTags.reference); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/epub/ResourcesLoader.java b/epublib-core/src/main/java/nl/siegmann/epublib/epub/ResourcesLoader.java new file mode 100644 index 00000000..19fb3e15 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/epub/ResourcesLoader.java @@ -0,0 +1,154 @@ +package nl.siegmann.epublib.epub; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collection; +import java.util.Collections; +import java.util.Enumeration; +import java.util.List; + +import net.sf.jazzlib.ZipEntry; +import net.sf.jazzlib.ZipException; +import net.sf.jazzlib.ZipFile; +import net.sf.jazzlib.ZipInputStream; +import nl.siegmann.epublib.domain.LazyResource; +import nl.siegmann.epublib.domain.MediaType; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.Resources; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.CollectionUtil; +import nl.siegmann.epublib.util.ResourceUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Loads Resources from inputStreams, ZipFiles, etc + * + * @author paul + * + */ +public class ResourcesLoader { + private static final Logger LOG = LoggerFactory.getLogger(ResourcesLoader.class); + + /** + * Loads the entries of the zipFile as resources. + * + * The MediaTypes that are in the lazyLoadedTypes will not get their contents loaded, but are stored as references to + * entries into the ZipFile and are loaded on demand by the Resource system. + * + * @param zipFile + * @param defaultHtmlEncoding + * @param lazyLoadedTypes + * @return + * @throws IOException + */ + public static Resources loadResources(ZipFile zipFile, String defaultHtmlEncoding, + List<MediaType> lazyLoadedTypes) throws IOException { + + Resources result = new Resources(); + Enumeration<? extends ZipEntry> entries = zipFile.entries(); + + while( entries.hasMoreElements() ) { + ZipEntry zipEntry = entries.nextElement(); + + if(zipEntry == null || zipEntry.isDirectory()) { + continue; + } + + String href = zipEntry.getName(); + + Resource resource; + + if (shouldLoadLazy(href, lazyLoadedTypes)) { + resource = new LazyResource(zipFile.getName(), zipEntry.getSize(), href); + } else { + resource = ResourceUtil.createResource(zipEntry, zipFile.getInputStream(zipEntry)); + } + + if(resource.getMediaType() == MediatypeService.XHTML) { + resource.setInputEncoding(defaultHtmlEncoding); + } + result.add(resource); + } + + return result; + } + + /** + * Whether the given href will load a mediaType that is in the collection of lazilyLoadedMediaTypes. + * + * @param href + * @param lazilyLoadedMediaTypes + * @return Whether the given href will load a mediaType that is in the collection of lazilyLoadedMediaTypes. + */ + private static boolean shouldLoadLazy(String href, Collection<MediaType> lazilyLoadedMediaTypes) { + if (CollectionUtil.isEmpty(lazilyLoadedMediaTypes)) { + return false; + } + MediaType mediaType = MediatypeService.determineMediaType(href); + return lazilyLoadedMediaTypes.contains(mediaType); + } + + /** + * Loads all entries from the ZipInputStream as Resources. + * + * Loads the contents of all ZipEntries into memory. + * Is fast, but may lead to memory problems when reading large books on devices with small amounts of memory. + * + * @param zipInputStream + * @param defaultHtmlEncoding + * @return + * @throws IOException + */ + public static Resources loadResources(ZipInputStream zipInputStream, String defaultHtmlEncoding) throws IOException { + Resources result = new Resources(); + ZipEntry zipEntry; + do { + // get next valid zipEntry + zipEntry = getNextZipEntry(zipInputStream); + if((zipEntry == null) || zipEntry.isDirectory()) { + continue; + } + + // store resource + Resource resource = ResourceUtil.createResource(zipEntry, zipInputStream); + if(resource.getMediaType() == MediatypeService.XHTML) { + resource.setInputEncoding(defaultHtmlEncoding); + } + result.add(resource); + } while(zipEntry != null); + + return result; + } + + + private static ZipEntry getNextZipEntry(ZipInputStream zipInputStream) throws IOException { + try { + return zipInputStream.getNextEntry(); + } catch(ZipException e) { + //see <a href="https://github.com/psiegman/epublib/issues/122">Issue #122 Infinite loop</a>. + //when reading a file that is not a real zip archive or a zero length file, zipInputStream.getNextEntry() + //throws an exception and does not advance, so loadResources enters an infinite loop + LOG.error("Invalid or damaged zip file.", e); + try { zipInputStream.closeEntry(); } catch (Exception ignored) {} + throw e; + } + } + + /** + * Loads all entries from the ZipInputStream as Resources. + * + * Loads the contents of all ZipEntries into memory. + * Is fast, but may lead to memory problems when reading large books on devices with small amounts of memory. + * + * @param zipFile + * @param defaultHtmlEncoding + * @return + * @throws IOException + */ + public static Resources loadResources(ZipFile zipFile, String defaultHtmlEncoding) throws IOException { + return loadResources(zipFile, defaultHtmlEncoding, Collections.<MediaType>emptyList()); + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/service/MediatypeService.java b/epublib-core/src/main/java/nl/siegmann/epublib/service/MediatypeService.java new file mode 100644 index 00000000..be689b6f --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/service/MediatypeService.java @@ -0,0 +1,84 @@ +package nl.siegmann.epublib.service; + +import java.util.HashMap; +import java.util.Map; + +import nl.siegmann.epublib.domain.MediaType; +import nl.siegmann.epublib.util.StringUtil; + + +/** + * Manages mediatypes that are used by epubs + * + * @author paul + * + */ +public class MediatypeService { + + public static final MediaType XHTML = new MediaType("application/xhtml+xml", ".xhtml", new String[] {".htm", ".html", ".xhtml"}); + public static final MediaType EPUB = new MediaType("application/epub+zip", ".epub"); + public static final MediaType NCX = new MediaType("application/x-dtbncx+xml", ".ncx"); + + public static final MediaType JAVASCRIPT = new MediaType("text/javascript", ".js"); + public static final MediaType CSS = new MediaType("text/css", ".css"); + + // images + public static final MediaType JPG = new MediaType("image/jpeg", ".jpg", new String[] {".jpg", ".jpeg"}); + public static final MediaType PNG = new MediaType("image/png", ".png"); + public static final MediaType GIF = new MediaType("image/gif", ".gif"); + + public static final MediaType SVG = new MediaType("image/svg+xml", ".svg"); + + // fonts + public static final MediaType TTF = new MediaType("application/x-truetype-font", ".ttf"); + public static final MediaType OPENTYPE = new MediaType("application/vnd.ms-opentype", ".otf"); + public static final MediaType WOFF = new MediaType("application/font-woff", ".woff"); + + // audio + public static final MediaType MP3 = new MediaType("audio/mpeg", ".mp3"); + public static final MediaType OGG = new MediaType("audio/ogg", ".ogg"); + + // video + public static final MediaType MP4 = new MediaType("video/mp4", ".mp4"); + + public static final MediaType SMIL = new MediaType("application/smil+xml", ".smil"); + public static final MediaType XPGT = new MediaType("application/adobe-page-template+xml", ".xpgt"); + public static final MediaType PLS = new MediaType("application/pls+xml", ".pls"); + + public static MediaType[] mediatypes = new MediaType[] { + XHTML, EPUB, JPG, PNG, GIF, CSS, SVG, TTF, NCX, XPGT, OPENTYPE, WOFF, SMIL, PLS, JAVASCRIPT, MP3, MP4, OGG + }; + + public static Map<String, MediaType> mediaTypesByName = new HashMap<String, MediaType>(); + static { + for(int i = 0; i < mediatypes.length; i++) { + mediaTypesByName.put(mediatypes[i].getName(), mediatypes[i]); + } + } + + public static boolean isBitmapImage(MediaType mediaType) { + return mediaType == JPG || mediaType == PNG || mediaType == GIF; + } + + /** + * Gets the MediaType based on the file extension. + * Null of no matching extension found. + * + * @param filename + * @return the MediaType based on the file extension. + */ + public static MediaType determineMediaType(String filename) { + for (MediaType mediaType: mediaTypesByName.values()) { + for(String extension: mediaType.getExtensions()) { + if(StringUtil.endsWithIgnoreCase(filename, extension)) { + return mediaType; + } + } + } + return null; + } + + public static MediaType getMediaTypeByName(String mediaTypeName) { + return mediaTypesByName.get(mediaTypeName); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/CollectionUtil.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/CollectionUtil.java new file mode 100644 index 00000000..f780cb68 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/CollectionUtil.java @@ -0,0 +1,68 @@ +package nl.siegmann.epublib.util; + +import java.util.Collection; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.List; + +public class CollectionUtil { + + /** + * Wraps an Enumeration around an Iterator + * @author paul.siegmann + * + * @param <T> + */ + private static class IteratorEnumerationAdapter<T> implements Enumeration<T> { + private Iterator<T> iterator; + + public IteratorEnumerationAdapter(Iterator<T> iter) { + this.iterator = iter; + } + + @Override + public boolean hasMoreElements() { + return iterator.hasNext(); + } + + @Override + public T nextElement() { + return iterator.next(); + } + } + + /** + * Creates an Enumeration out of the given Iterator. + * @param <T> + * @param it + * @return an Enumeration created out of the given Iterator. + */ + public static <T> Enumeration<T> createEnumerationFromIterator(Iterator<T> it) { + return new IteratorEnumerationAdapter<T>(it); + } + + + /** + * Returns the first element of the list, null if the list is null or empty. + * + * @param <T> + * @param list + * @return the first element of the list, null if the list is null or empty. + */ + public static <T> T first(List<T> list) { + if(list == null || list.isEmpty()) { + return null; + } + return list.get(0); + } + + /** + * Whether the given collection is null or has no elements. + * + * @param collection + * @return Whether the given collection is null or has no elements. + */ + public static boolean isEmpty(Collection<?> collection) { + return collection == null || collection.isEmpty(); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/IOUtil.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/IOUtil.java new file mode 100644 index 00000000..4d2dd804 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/IOUtil.java @@ -0,0 +1,132 @@ +package nl.siegmann.epublib.util; + +import java.io.*; + +/** + * Most of the functions herein are re-implementations of the ones in apache io IOUtils. + * The reason for re-implementing this is that the functions are fairly simple and using my own implementation saves the inclusion of a 200Kb jar file. + */ +public class IOUtil { + + public static final int IO_COPY_BUFFER_SIZE = 1024 * 4; + + /** + * Gets the contents of the Reader as a byte[], with the given character encoding. + * + * @param in + * @param encoding + * @return the contents of the Reader as a byte[], with the given character encoding. + * @throws IOException + */ + public static byte[] toByteArray(Reader in, String encoding) throws IOException { + StringWriter out = new StringWriter(); + copy(in, out); + out.flush(); + return out.toString().getBytes(encoding); + } + + /** + * Returns the contents of the InputStream as a byte[] + * + * @param in + * @return the contents of the InputStream as a byte[] + * @throws IOException + */ + public static byte[] toByteArray(InputStream in) throws IOException { + ByteArrayOutputStream result = new ByteArrayOutputStream(); + copy(in, result); + result.flush(); + return result.toByteArray(); + } + + /** + * Reads data from the InputStream, using the specified buffer size. + * + * This is meant for situations where memory is tight, since + * it prevents buffer expansion. + * + * @param in the stream to read data from + * @param size the size of the array to create + * @return the array, or null + * @throws IOException + */ + public static byte[] toByteArray( InputStream in, int size ) throws IOException { + + try { + ByteArrayOutputStream result; + + if ( size > 0 ) { + result = new ByteArrayOutputStream(size); + } else { + result = new ByteArrayOutputStream(); + } + + copy(in, result); + result.flush(); + return result.toByteArray(); + } catch ( OutOfMemoryError error ) { + //Return null so it gets loaded lazily. + return null; + } + + } + + + /** + * if totalNrRead < 0 then totalNrRead is returned, if (nrRead + totalNrRead) < Integer.MAX_VALUE then nrRead + totalNrRead is returned, -1 otherwise. + * @param nrRead + * @param totalNrNread + * @return if totalNrRead < 0 then totalNrRead is returned, if (nrRead + totalNrRead) < Integer.MAX_VALUE then nrRead + totalNrRead is returned, -1 otherwise. + */ + protected static int calcNewNrReadSize(int nrRead, int totalNrNread) { + if (totalNrNread < 0) { + return totalNrNread; + } + if (totalNrNread > (Integer.MAX_VALUE - nrRead)) { + return -1; + } else { + return (totalNrNread + nrRead); + } + } + + /** + * Copies the contents of the InputStream to the OutputStream. + * + * @param in + * @param out + * @return the nr of bytes read, or -1 if the amount > Integer.MAX_VALUE + * @throws IOException + */ + public static int copy(InputStream in, OutputStream out) + throws IOException { + byte[] buffer = new byte[IO_COPY_BUFFER_SIZE]; + int readSize = -1; + int result = 0; + while ((readSize = in.read(buffer)) >= 0) { + out.write(buffer, 0, readSize); + result = calcNewNrReadSize(readSize, result); + } + out.flush(); + return result; + } + + /** + * Copies the contents of the Reader to the Writer. + * + * @param in + * @param out + * @return the nr of characters read, or -1 if the amount > Integer.MAX_VALUE + * @throws IOException + */ + public static int copy(Reader in, Writer out) throws IOException { + char[] buffer = new char[IO_COPY_BUFFER_SIZE]; + int readSize = -1; + int result = 0; + while ((readSize = in.read(buffer)) >= 0) { + out.write(buffer, 0, readSize); + result = calcNewNrReadSize(readSize, result); + } + out.flush(); + return result; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/NoCloseOutputStream.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/NoCloseOutputStream.java new file mode 100644 index 00000000..4161eba1 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/NoCloseOutputStream.java @@ -0,0 +1,33 @@ +package nl.siegmann.epublib.util; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * OutputStream with the close() disabled. + * We write multiple documents to a ZipOutputStream. + * Some of the formatters call a close() after writing their data. + * We don't want them to do that, so we wrap regular OutputStreams in this NoCloseOutputStream. + * + * @author paul + * + */ +public class NoCloseOutputStream extends OutputStream { + + private OutputStream outputStream; + + public NoCloseOutputStream(OutputStream outputStream) { + this.outputStream = outputStream; + } + + @Override + public void write(int b) throws IOException { + outputStream.write(b); + } + + /** + * A close() that does not call it's parent's close() + */ + public void close() { + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/NoCloseWriter.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/NoCloseWriter.java new file mode 100644 index 00000000..ba2512c6 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/NoCloseWriter.java @@ -0,0 +1,36 @@ +package nl.siegmann.epublib.util; + +import java.io.IOException; +import java.io.Writer; + +/** + * Writer with the close() disabled. + * We write multiple documents to a ZipOutputStream. + * Some of the formatters call a close() after writing their data. + * We don't want them to do that, so we wrap regular Writers in this NoCloseWriter. + * + * @author paul + * + */ +public class NoCloseWriter extends Writer { + + private Writer writer; + + public NoCloseWriter(Writer writer) { + this.writer = writer; + } + + @Override + public void close() throws IOException { + } + + @Override + public void flush() throws IOException { + writer.flush(); + } + + @Override + public void write(char[] cbuf, int off, int len) throws IOException { + writer.write(cbuf, off, len); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/ResourceUtil.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/ResourceUtil.java new file mode 100644 index 00000000..066f33e4 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/ResourceUtil.java @@ -0,0 +1,131 @@ +package nl.siegmann.epublib.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.UnsupportedEncodingException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.ParserConfigurationException; + +import net.sf.jazzlib.ZipEntry; +import net.sf.jazzlib.ZipInputStream; +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.MediaType; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.epub.EpubProcessorSupport; +import nl.siegmann.epublib.service.MediatypeService; + +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +/** + * Various resource utility methods + * + * @author paul + * + */ +public class ResourceUtil { + + public static Resource createResource(File file) throws IOException { + if (file == null) { + return null; + } + MediaType mediaType = MediatypeService.determineMediaType(file.getName()); + byte[] data = IOUtil.toByteArray(new FileInputStream(file)); + Resource result = new Resource(data, mediaType); + return result; + } + + + /** + * Creates a resource with as contents a html page with the given title. + * + * @param title + * @param href + * @return a resource with as contents a html page with the given title. + */ + public static Resource createResource(String title, String href) { + String content = "<html><head><title>" + title + "

" + title + "

"; + return new Resource(null, content.getBytes(), href, MediatypeService.XHTML, Constants.CHARACTER_ENCODING); + } + + /** + * Creates a resource out of the given zipEntry and zipInputStream. + * + * @param zipEntry + * @param zipInputStream + * @return a resource created out of the given zipEntry and zipInputStream. + * @throws IOException + */ + public static Resource createResource(ZipEntry zipEntry, ZipInputStream zipInputStream) throws IOException { + return new Resource(zipInputStream, zipEntry.getName()); + + } + + public static Resource createResource(ZipEntry zipEntry, InputStream zipInputStream) throws IOException { + return new Resource(zipInputStream, zipEntry.getName()); + + } + + /** + * Converts a given string from given input character encoding to the requested output character encoding. + * + * @param inputEncoding + * @param outputEncoding + * @param input + * @return the string from given input character encoding converted to the requested output character encoding. + * @throws UnsupportedEncodingException + */ + public static byte[] recode(String inputEncoding, String outputEncoding, byte[] input) throws UnsupportedEncodingException { + return new String(input, inputEncoding).getBytes(outputEncoding); + } + + /** + * Gets the contents of the Resource as an InputSource in a null-safe manner. + * + */ + public static InputSource getInputSource(Resource resource) throws IOException { + if (resource == null) { + return null; + } + Reader reader = resource.getReader(); + if (reader == null) { + return null; + } + InputSource inputSource = new InputSource(reader); + return inputSource; + } + + + /** + * Reads parses the xml therein and returns the result as a Document + */ + public static Document getAsDocument(Resource resource) throws UnsupportedEncodingException, SAXException, IOException, ParserConfigurationException { + return getAsDocument(resource, EpubProcessorSupport.createDocumentBuilder()); + } + + + /** + * Reads the given resources inputstream, parses the xml therein and returns the result as a Document + * + * @param resource + * @param documentBuilder + * @return the document created from the given resource + * @throws UnsupportedEncodingException + * @throws SAXException + * @throws IOException + * @throws ParserConfigurationException + */ + public static Document getAsDocument(Resource resource, DocumentBuilder documentBuilder) throws UnsupportedEncodingException, SAXException, IOException, ParserConfigurationException { + InputSource inputSource = getInputSource(resource); + if (inputSource == null) { + return null; + } + Document result = documentBuilder.parse(inputSource); + return result; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/StringUtil.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/StringUtil.java new file mode 100644 index 00000000..0f60b923 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/StringUtil.java @@ -0,0 +1,275 @@ +package nl.siegmann.epublib.util; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Various String utility functions. + * + * Most of the functions herein are re-implementations of the ones in apache + * commons StringUtils. The reason for re-implementing this is that the + * functions are fairly simple and using my own implementation saves the + * inclusion of a 200Kb jar file. + * + * @author paul.siegmann + * + */ +public class StringUtil { + + /** + * Changes a path containing '..', '.' and empty dirs into a path that + * doesn't. X/foo/../Y is changed into 'X/Y', etc. Does not handle invalid + * paths like "../". + * + * @param path + * @return the normalized path + */ + public static String collapsePathDots(String path) { + String[] stringParts = path.split("/"); + List parts = new ArrayList(Arrays.asList(stringParts)); + for (int i = 0; i < parts.size() - 1; i++) { + String currentDir = parts.get(i); + if (currentDir.length() == 0 || currentDir.equals(".")) { + parts.remove(i); + i--; + } else if (currentDir.equals("..")) { + parts.remove(i - 1); + parts.remove(i - 1); + i -= 2; + } + } + StringBuilder result = new StringBuilder(); + if (path.startsWith("/")) { + result.append('/'); + } + for (int i = 0; i < parts.size(); i++) { + result.append(parts.get(i)); + if (i < (parts.size() - 1)) { + result.append('/'); + } + } + return result.toString(); + } + + /** + * Whether the String is not null, not zero-length and does not contain of + * only whitespace. + * + * @param text + * @return Whether the String is not null, not zero-length and does not contain of + */ + public static boolean isNotBlank(String text) { + return !isBlank(text); + } + + /** + * Whether the String is null, zero-length and does contain only whitespace. + * + * @return Whether the String is null, zero-length and does contain only whitespace. + */ + public static boolean isBlank(String text) { + if (isEmpty(text)) { + return true; + } + for (int i = 0; i < text.length(); i++) { + if (!Character.isWhitespace(text.charAt(i))) { + return false; + } + } + return true; + } + + /** + * Whether the given string is null or zero-length. + * + * @param text the input for this method + * @return Whether the given string is null or zero-length. + */ + public static boolean isEmpty(String text) { + return (text == null) || (text.length() == 0); + } + + /** + * Whether the given source string ends with the given suffix, ignoring + * case. + * + * @param source + * @param suffix + * @return Whether the given source string ends with the given suffix, ignoring case. + */ + public static boolean endsWithIgnoreCase(String source, String suffix) { + if (isEmpty(suffix)) { + return true; + } + if (isEmpty(source)) { + return false; + } + if (suffix.length() > source.length()) { + return false; + } + return source.substring(source.length() - suffix.length()) + .toLowerCase().endsWith(suffix.toLowerCase()); + } + + /** + * If the given text is null return "", the original text otherwise. + * + * @param text + * @return If the given text is null "", the original text otherwise. + */ + public static String defaultIfNull(String text) { + return defaultIfNull(text, ""); + } + + /** + * If the given text is null return "", the given defaultValue otherwise. + * + * @param text + * @param defaultValue + * @return If the given text is null "", the given defaultValue otherwise. + */ + public static String defaultIfNull(String text, String defaultValue) { + if (text == null) { + return defaultValue; + } + return text; + } + + /** + * Null-safe string comparator + * + * @param text1 + * @param text2 + * @return whether the two strings are equal + */ + public static boolean equals(String text1, String text2) { + if (text1 == null) { + return (text2 == null); + } + return text1.equals(text2); + } + + /** + * Pretty toString printer. + * + * @param keyValues + * @return a string representation of the input values + */ + public static String toString(Object... keyValues) { + StringBuilder result = new StringBuilder(); + result.append('['); + for (int i = 0; i < keyValues.length; i += 2) { + if (i > 0) { + result.append(", "); + } + result.append(keyValues[i]); + result.append(": "); + Object value = null; + if ((i + 1) < keyValues.length) { + value = keyValues[i + 1]; + } + if (value == null) { + result.append(""); + } else { + result.append('\''); + result.append(value); + result.append('\''); + } + } + result.append(']'); + return result.toString(); + } + + public static int hashCode(String... values) { + int result = 31; + for (int i = 0; i < values.length; i++) { + result ^= String.valueOf(values[i]).hashCode(); + } + return result; + } + + /** + * Gives the substring of the given text before the given separator. + * + * If the text does not contain the given separator then the given text is + * returned. + * + * @param text + * @param separator + * @return the substring of the given text before the given separator. + */ + public static String substringBefore(String text, char separator) { + if (isEmpty(text)) { + return text; + } + int sepPos = text.indexOf(separator); + if (sepPos < 0) { + return text; + } + return text.substring(0, sepPos); + } + + /** + * Gives the substring of the given text before the last occurrence of the + * given separator. + * + * If the text does not contain the given separator then the given text is + * returned. + * + * @param text + * @param separator + * @return the substring of the given text before the last occurrence of the given separator. + */ + public static String substringBeforeLast(String text, char separator) { + if (isEmpty(text)) { + return text; + } + int cPos = text.lastIndexOf(separator); + if (cPos < 0) { + return text; + } + return text.substring(0, cPos); + } + + /** + * Gives the substring of the given text after the last occurrence of the + * given separator. + * + * If the text does not contain the given separator then "" is returned. + * + * @param text + * @param separator + * @return the substring of the given text after the last occurrence of the given separator. + */ + public static String substringAfterLast(String text, char separator) { + if (isEmpty(text)) { + return text; + } + int cPos = text.lastIndexOf(separator); + if (cPos < 0) { + return ""; + } + return text.substring(cPos + 1); + } + + /** + * Gives the substring of the given text after the given separator. + * + * If the text does not contain the given separator then "" is returned. + * + * @param text the input text + * @param c the separator char + * @return the substring of the given text after the given separator. + */ + public static String substringAfter(String text, char c) { + if (isEmpty(text)) { + return text; + } + int cPos = text.indexOf(c); + if (cPos < 0) { + return ""; + } + return text.substring(cPos + 1); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/BOMInputStream.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/BOMInputStream.java new file mode 100644 index 00000000..367a9127 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/BOMInputStream.java @@ -0,0 +1,340 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.siegmann.epublib.util.commons.io; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; + +/** + * This class is used to wrap a stream that includes an encoded + * {@link ByteOrderMark} as its first bytes. + * + * This class detects these bytes and, if required, can automatically skip them + * and return the subsequent byte as the first byte in the stream. + * + * The {@link ByteOrderMark} implementation has the following pre-defined BOMs: + *
    + *
  • UTF-8 - {@link ByteOrderMark#UTF_8}
  • + *
  • UTF-16BE - {@link ByteOrderMark#UTF_16LE}
  • + *
  • UTF-16LE - {@link ByteOrderMark#UTF_16BE}
  • + *
+ * + * + *

Example 1 - Detect and exclude a UTF-8 BOM

+ *
+ *      BOMInputStream bomIn = new BOMInputStream(in);
+ *      if (bomIn.hasBOM()) {
+ *          // has a UTF-8 BOM
+ *      }
+ * 
+ * + *

Example 2 - Detect a UTF-8 BOM (but don't exclude it)

+ *
+ *      boolean include = true;
+ *      BOMInputStream bomIn = new BOMInputStream(in, include);
+ *      if (bomIn.hasBOM()) {
+ *          // has a UTF-8 BOM
+ *      }
+ * 
+ * + *

Example 3 - Detect Multiple BOMs

+ *
+ *      BOMInputStream bomIn = new BOMInputStream(in, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
+ *      if (bomIn.hasBOM() == false) {
+ *          // No BOM found
+ *      } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
+ *          // has a UTF-16LE BOM
+ *      } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
+ *          // has a UTF-16BE BOM
+ *      }
+ * 
+ * + * @see ByteOrderMark + * @see Wikipedia - Byte Order Mark + * @version $Revision: 1052095 $ $Date: 2010-12-22 23:03:20 +0000 (Wed, 22 Dec 2010) $ + * @since Commons IO 2.0 + */ +public class BOMInputStream extends ProxyInputStream { + private final boolean include; + private final List boms; + private ByteOrderMark byteOrderMark; + private int[] firstBytes; + private int fbLength; + private int fbIndex; + private int markFbIndex; + private boolean markedAtStart; + + /** + * Constructs a new BOM InputStream that excludes + * a {@link ByteOrderMark#UTF_8} BOM. + * @param delegate the InputStream to delegate to + */ + public BOMInputStream(InputStream delegate) { + this(delegate, false, ByteOrderMark.UTF_8); + } + + /** + * Constructs a new BOM InputStream that detects a + * a {@link ByteOrderMark#UTF_8} and optionally includes it. + * @param delegate the InputStream to delegate to + * @param include true to include the UTF-8 BOM or + * false to exclude it + */ + public BOMInputStream(InputStream delegate, boolean include) { + this(delegate, include, ByteOrderMark.UTF_8); + } + + /** + * Constructs a new BOM InputStream that excludes + * the specified BOMs. + * @param delegate the InputStream to delegate to + * @param boms The BOMs to detect and exclude + */ + public BOMInputStream(InputStream delegate, ByteOrderMark... boms) { + this(delegate, false, boms); + } + + /** + * Constructs a new BOM InputStream that detects the + * specified BOMs and optionally includes them. + * @param delegate the InputStream to delegate to + * @param include true to include the specified BOMs or + * false to exclude them + * @param boms The BOMs to detect and optionally exclude + */ + public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) { + super(delegate); + if (boms == null || boms.length == 0) { + throw new IllegalArgumentException("No BOMs specified"); + } + this.include = include; + this.boms = Arrays.asList(boms); + } + + /** + * Indicates whether the stream contains one of the specified BOMs. + * + * @return true if the stream has one of the specified BOMs, otherwise false + * if it does not + * @throws IOException if an error reading the first bytes of the stream occurs + */ + public boolean hasBOM() throws IOException { + return (getBOM() != null); + } + + /** + * Indicates whether the stream contains the specified BOM. + * + * @param bom The BOM to check for + * @return true if the stream has the specified BOM, otherwise false + * if it does not + * @throws IllegalArgumentException if the BOM is not one the stream + * is configured to detect + * @throws IOException if an error reading the first bytes of the stream occurs + */ + public boolean hasBOM(ByteOrderMark bom) throws IOException { + if (!boms.contains(bom)) { + throw new IllegalArgumentException("Stream not configure to detect " + bom); + } + return (byteOrderMark != null && getBOM().equals(bom)); + } + + /** + * Return the BOM (Byte Order Mark). + * + * @return The BOM or null if none + * @throws IOException if an error reading the first bytes of the stream occurs + */ + public ByteOrderMark getBOM() throws IOException { + if (firstBytes == null) { + int max = 0; + for (ByteOrderMark bom : boms) { + max = Math.max(max, bom.length()); + } + firstBytes = new int[max]; + for (int i = 0; i < firstBytes.length; i++) { + firstBytes[i] = in.read(); + fbLength++; + if (firstBytes[i] < 0) { + break; + } + + byteOrderMark = find(); + if (byteOrderMark != null) { + if (!include) { + fbLength = 0; + } + break; + } + } + } + return byteOrderMark; + } + + /** + * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}. + * + * @return The BOM charset Name or null if no BOM found + * @throws IOException if an error reading the first bytes of the stream occurs + * + */ + public String getBOMCharsetName() throws IOException { + getBOM(); + return (byteOrderMark == null ? null : byteOrderMark.getCharsetName()); + } + + /** + * This method reads and either preserves or skips the first bytes in the + * stream. It behaves like the single-byte read() method, + * either returning a valid byte or -1 to indicate that the initial bytes + * have been processed already. + * @return the byte read (excluding BOM) or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + private int readFirstBytes() throws IOException { + getBOM(); + return (fbIndex < fbLength) ? firstBytes[fbIndex++] : -1; + } + + /** + * Find a BOM with the specified bytes. + * + * @return The matched BOM or null if none matched + */ + private ByteOrderMark find() { + for (ByteOrderMark bom : boms) { + if (matches(bom)) { + return bom; + } + } + return null; + } + + /** + * Check if the bytes match a BOM. + * + * @param bom The BOM + * @return true if the bytes match the bom, otherwise false + */ + private boolean matches(ByteOrderMark bom) { + if (bom.length() != fbLength) { + return false; + } + for (int i = 0; i < bom.length(); i++) { + if (bom.get(i) != firstBytes[i]) { + return false; + } + } + return true; + } + + //---------------------------------------------------------------------------- + // Implementation of InputStream + //---------------------------------------------------------------------------- + + /** + * Invokes the delegate's read() method, detecting and + * optionally skipping BOM. + * @return the byte read (excluding BOM) or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read() throws IOException { + int b = readFirstBytes(); + return (b >= 0) ? b : in.read(); + } + + /** + * Invokes the delegate's read(byte[], int, int) method, detecting + * and optionally skipping BOM. + * @param buf the buffer to read the bytes into + * @param off The start offset + * @param len The number of bytes to read (excluding BOM) + * @return the number of bytes read or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read(byte[] buf, int off, int len) throws IOException { + int firstCount = 0; + int b = 0; + while ((len > 0) && (b >= 0)) { + b = readFirstBytes(); + if (b >= 0) { + buf[off++] = (byte) (b & 0xFF); + len--; + firstCount++; + } + } + int secondCount = in.read(buf, off, len); + return (secondCount < 0) ? (firstCount > 0 ? firstCount : -1) : firstCount + secondCount; + } + + /** + * Invokes the delegate's read(byte[]) method, detecting and + * optionally skipping BOM. + * @param buf the buffer to read the bytes into + * @return the number of bytes read (excluding BOM) + * or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read(byte[] buf) throws IOException { + return read(buf, 0, buf.length); + } + + /** + * Invokes the delegate's mark(int) method. + * @param readlimit read ahead limit + */ + @Override + public synchronized void mark(int readlimit) { + markFbIndex = fbIndex; + markedAtStart = (firstBytes == null); + in.mark(readlimit); + } + + /** + * Invokes the delegate's reset() method. + * @throws IOException if an I/O error occurs + */ + @Override + public synchronized void reset() throws IOException { + fbIndex = markFbIndex; + if (markedAtStart) { + firstBytes = null; + } + + in.reset(); + } + + /** + * Invokes the delegate's skip(long) method, detecting + * and optionallyskipping BOM. + * @param n the number of bytes to skip + * @return the number of bytes to skipped or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public long skip(long n) throws IOException { + while ((n > 0) && (readFirstBytes() >= 0)) { + n--; + } + return in.skip(n); + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/ByteOrderMark.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/ByteOrderMark.java new file mode 100644 index 00000000..55ceeea8 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/ByteOrderMark.java @@ -0,0 +1,170 @@ +package nl.siegmann.epublib.util.commons.io; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +/** + * Byte Order Mark (BOM) representation - + * see {@link BOMInputStream}. + * + * @see BOMInputStream + * @see Wikipedia - Byte Order Mark + * @version $Id: ByteOrderMark.java 1005099 2010-10-06 16:13:01Z niallp $ + * @since Commons IO 2.0 + */ +public class ByteOrderMark implements Serializable { + + private static final long serialVersionUID = 1L; + + /** UTF-8 BOM */ + public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF); + /** UTF-16BE BOM (Big Endian) */ + public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF); + /** UTF-16LE BOM (Little Endian) */ + public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE); + + private final String charsetName; + private final int[] bytes; + + /** + * Construct a new BOM. + * + * @param charsetName The name of the charset the BOM represents + * @param bytes The BOM's bytes + * @throws IllegalArgumentException if the charsetName is null or + * zero length + * @throws IllegalArgumentException if the bytes are null or zero + * length + */ + public ByteOrderMark(String charsetName, int... bytes) { + if (charsetName == null || charsetName.length() == 0) { + throw new IllegalArgumentException("No charsetName specified"); + } + if (bytes == null || bytes.length == 0) { + throw new IllegalArgumentException("No bytes specified"); + } + this.charsetName = charsetName; + this.bytes = new int[bytes.length]; + System.arraycopy(bytes, 0, this.bytes, 0, bytes.length); + } + + /** + * Return the name of the {@link java.nio.charset.Charset} the BOM represents. + * + * @return the character set name + */ + public String getCharsetName() { + return charsetName; + } + + /** + * Return the length of the BOM's bytes. + * + * @return the length of the BOM's bytes + */ + public int length() { + return bytes.length; + } + + /** + * The byte at the specified position. + * + * @param pos The position + * @return The specified byte + */ + public int get(int pos) { + return bytes[pos]; + } + + /** + * Return a copy of the BOM's bytes. + * + * @return a copy of the BOM's bytes + */ + public byte[] getBytes() { + byte[] copy = new byte[bytes.length]; + for (int i = 0; i < bytes.length; i++) { + copy[i] = (byte)bytes[i]; + } + return copy; + } + + /** + * Indicates if this BOM's bytes equals another. + * + * @param obj The object to compare to + * @return true if the bom's bytes are equal, otherwise + * false + */ + @Override + public boolean equals(Object obj) { + if (!(obj instanceof ByteOrderMark)) { + return false; + } + ByteOrderMark bom = (ByteOrderMark)obj; + if (bytes.length != bom.length()) { + return false; + } + for (int i = 0; i < bytes.length; i++) { + if (bytes[i] != bom.get(i)) { + return false; + } + } + return true; + } + + /** + * Return the hashcode for this BOM. + * + * @return the hashcode for this BOM. + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + int hashCode = getClass().hashCode(); + for (int b : bytes) { + hashCode += b; + } + return hashCode; + } + + /** + * Provide a String representation of the BOM. + * + * @return the length of the BOM's bytes + */ + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append(getClass().getSimpleName()); + builder.append('['); + builder.append(charsetName); + builder.append(": "); + for (int i = 0; i < bytes.length; i++) { + if (i > 0) { + builder.append(","); + } + builder.append("0x"); + builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase()); + } + builder.append(']'); + return builder.toString(); + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/ProxyInputStream.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/ProxyInputStream.java new file mode 100644 index 00000000..d8d58230 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/ProxyInputStream.java @@ -0,0 +1,238 @@ +package nl.siegmann.epublib.util.commons.io; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * A Proxy stream which acts as expected, that is it passes the method + * calls on to the proxied stream and doesn't change which methods are + * being called. + *

+ * It is an alternative base class to FilterInputStream + * to increase reusability, because FilterInputStream changes the + * methods being called, such as read(byte[]) to read(byte[], int, int). + *

+ * See the protected methods for ways in which a subclass can easily decorate + * a stream with custom pre-, post- or error processing functionality. + * + * @author Stephen Colebourne + * @version $Id: ProxyInputStream.java 934041 2010-04-14 17:37:24Z jukka $ + */ +public abstract class ProxyInputStream extends FilterInputStream { + + /** + * Constructs a new ProxyInputStream. + * + * @param proxy the InputStream to delegate to + */ + public ProxyInputStream(InputStream proxy) { + super(proxy); + // the proxy is stored in a protected superclass variable named 'in' + } + + /** + * Invokes the delegate's read() method. + * @return the byte read or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read() throws IOException { + try { + beforeRead(1); + int b = in.read(); + afterRead(b != -1 ? 1 : -1); + return b; + } catch (IOException e) { + handleIOException(e); + return -1; + } + } + + /** + * Invokes the delegate's read(byte[]) method. + * @param bts the buffer to read the bytes into + * @return the number of bytes read or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read(byte[] bts) throws IOException { + try { + beforeRead(bts != null ? bts.length : 0); + int n = in.read(bts); + afterRead(n); + return n; + } catch (IOException e) { + handleIOException(e); + return -1; + } + } + + /** + * Invokes the delegate's read(byte[], int, int) method. + * @param bts the buffer to read the bytes into + * @param off The start offset + * @param len The number of bytes to read + * @return the number of bytes read or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read(byte[] bts, int off, int len) throws IOException { + try { + beforeRead(len); + int n = in.read(bts, off, len); + afterRead(n); + return n; + } catch (IOException e) { + handleIOException(e); + return -1; + } + } + + /** + * Invokes the delegate's skip(long) method. + * @param ln the number of bytes to skip + * @return the actual number of bytes skipped + * @throws IOException if an I/O error occurs + */ + @Override + public long skip(long ln) throws IOException { + try { + return in.skip(ln); + } catch (IOException e) { + handleIOException(e); + return 0; + } + } + + /** + * Invokes the delegate's available() method. + * @return the number of available bytes + * @throws IOException if an I/O error occurs + */ + @Override + public int available() throws IOException { + try { + return super.available(); + } catch (IOException e) { + handleIOException(e); + return 0; + } + } + + /** + * Invokes the delegate's close() method. + * @throws IOException if an I/O error occurs + */ + @Override + public void close() throws IOException { + try { + in.close(); + } catch (IOException e) { + handleIOException(e); + } + } + + /** + * Invokes the delegate's mark(int) method. + * @param readlimit read ahead limit + */ + @Override + public synchronized void mark(int readlimit) { + in.mark(readlimit); + } + + /** + * Invokes the delegate's reset() method. + * @throws IOException if an I/O error occurs + */ + @Override + public synchronized void reset() throws IOException { + try { + in.reset(); + } catch (IOException e) { + handleIOException(e); + } + } + + /** + * Invokes the delegate's markSupported() method. + * @return true if mark is supported, otherwise false + */ + @Override + public boolean markSupported() { + return in.markSupported(); + } + + /** + * Invoked by the read methods before the call is proxied. The number + * of bytes that the caller wanted to read (1 for the {@link #read()} + * method, buffer length for {@link #read(byte[])}, etc.) is given as + * an argument. + *

+ * Subclasses can override this method to add common pre-processing + * functionality without having to override all the read methods. + * The default implementation does nothing. + *

+ * Note this method is not called from {@link #skip(long)} or + * {@link #reset()}. You need to explicitly override those methods if + * you want to add pre-processing steps also to them. + * + * @since Commons IO 2.0 + * @param n number of bytes that the caller asked to be read + * @throws IOException if the pre-processing fails + */ + protected void beforeRead(int n) throws IOException { + } + + /** + * Invoked by the read methods after the proxied call has returned + * successfully. The number of bytes returned to the caller (or -1 if + * the end of stream was reached) is given as an argument. + *

+ * Subclasses can override this method to add common post-processing + * functionality without having to override all the read methods. + * The default implementation does nothing. + *

+ * Note this method is not called from {@link #skip(long)} or + * {@link #reset()}. You need to explicitly override those methods if + * you want to add post-processing steps also to them. + * + * @since Commons IO 2.0 + * @param n number of bytes read, or -1 if the end of stream was reached + * @throws IOException if the post-processing fails + */ + protected void afterRead(int n) throws IOException { + } + + /** + * Handle any IOExceptions thrown. + *

+ * This method provides a point to implement custom exception + * handling. The default behaviour is to re-throw the exception. + * @param e The IOException thrown + * @throws IOException if an I/O error occurs + * @since Commons IO 2.0 + */ + protected void handleIOException(IOException e) throws IOException { + throw e; + } + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/XmlStreamReader.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/XmlStreamReader.java new file mode 100644 index 00000000..1a5f18c9 --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/XmlStreamReader.java @@ -0,0 +1,752 @@ +package nl.siegmann.epublib.util.commons.io; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; +import java.text.MessageFormat; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +/** + * Character stream that handles all the necessary Voodo to figure out the + * charset encoding of the XML document within the stream. + *

+ * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. + * This one IS a character stream. + *

+ * All this has to be done without consuming characters from the stream, if not + * the XML parser will not recognized the document as a valid XML. This is not + * 100% true, but it's close enough (UTF-8 BOM is not handled by all parsers + * right now, XmlStreamReader handles it and things work in all parsers). + *

+ * The XmlStreamReader class handles the charset encoding of XML documents in + * Files, raw streams and HTTP streams by offering a wide set of constructors. + *

+ * By default the charset encoding detection is lenient, the constructor with + * the lenient flag can be used for an script (following HTTP MIME and XML + * specifications). All this is nicely explained by Mark Pilgrim in his blog, + * Determining the character encoding of a feed. + *

+ * Originally developed for ROME under + * Apache License 2.0. + * + * @author Alejandro Abdelnur + * @version $Id: XmlStreamReader.java 1052161 2010-12-23 03:12:09Z niallp $ + * @see org.apache.commons.io.output.XmlStreamWriter + * @since Commons IO 2.0 + */ +public class XmlStreamReader extends Reader { + private static final int BUFFER_SIZE = 4096; + + private static final String UTF_8 = "UTF-8"; + + private static final String US_ASCII = "US-ASCII"; + + private static final String UTF_16BE = "UTF-16BE"; + + private static final String UTF_16LE = "UTF-16LE"; + + private static final String UTF_16 = "UTF-16"; + + private static final String EBCDIC = "CP1047"; + + private static final ByteOrderMark[] BOMS = new ByteOrderMark[] { + ByteOrderMark.UTF_8, + ByteOrderMark.UTF_16BE, + ByteOrderMark.UTF_16LE + }; + private static final ByteOrderMark[] XML_GUESS_BYTES = new ByteOrderMark[] { + new ByteOrderMark(UTF_8, 0x3C, 0x3F, 0x78, 0x6D), + new ByteOrderMark(UTF_16BE, 0x00, 0x3C, 0x00, 0x3F), + new ByteOrderMark(UTF_16LE, 0x3C, 0x00, 0x3F, 0x00), + new ByteOrderMark(EBCDIC, 0x4C, 0x6F, 0xA7, 0x94) + }; + + + private final Reader reader; + + private final String encoding; + + private final String defaultEncoding; + + /** + * Returns the default encoding to use if none is set in HTTP content-type, + * XML prolog and the rules based on content-type are not adequate. + *

+ * If it is NULL the content-type based rules are used. + * + * @return the default encoding to use. + */ + public String getDefaultEncoding() { + return defaultEncoding; + } + + /** + * Creates a Reader for a File. + *

+ * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, + * if this is also missing defaults to UTF-8. + *

+ * It does a lenient charset encoding detection, check the constructor with + * the lenient parameter for details. + * + * @param file File to create a Reader from. + * @throws IOException thrown if there is a problem reading the file. + */ + public XmlStreamReader(File file) throws IOException { + this(new FileInputStream(file)); + } + + /** + * Creates a Reader for a raw InputStream. + *

+ * It follows the same logic used for files. + *

+ * It does a lenient charset encoding detection, check the constructor with + * the lenient parameter for details. + * + * @param is InputStream to create a Reader from. + * @throws IOException thrown if there is a problem reading the stream. + */ + public XmlStreamReader(InputStream is) throws IOException { + this(is, true); + } + + /** + * Creates a Reader for a raw InputStream. + *

+ * It follows the same logic used for files. + *

+ * If lenient detection is indicated and the detection above fails as per + * specifications it then attempts the following: + *

+ * If the content type was 'text/html' it replaces it with 'text/xml' and + * tries the detection again. + *

+ * Else if the XML prolog had a charset encoding that encoding is used. + *

+ * Else if the content type had a charset encoding that encoding is used. + *

+ * Else 'UTF-8' is used. + *

+ * If lenient detection is indicated an XmlStreamReaderException is never + * thrown. + * + * @param is InputStream to create a Reader from. + * @param lenient indicates if the charset encoding detection should be + * relaxed. + * @throws IOException thrown if there is a problem reading the stream. + * @throws XmlStreamReaderException thrown if the charset encoding could not + * be determined according to the specs. + */ + public XmlStreamReader(InputStream is, boolean lenient) throws IOException { + this(is, lenient, null); + } + + /** + * Creates a Reader for a raw InputStream. + *

+ * It follows the same logic used for files. + *

+ * If lenient detection is indicated and the detection above fails as per + * specifications it then attempts the following: + *

+ * If the content type was 'text/html' it replaces it with 'text/xml' and + * tries the detection again. + *

+ * Else if the XML prolog had a charset encoding that encoding is used. + *

+ * Else if the content type had a charset encoding that encoding is used. + *

+ * Else 'UTF-8' is used. + *

+ * If lenient detection is indicated an XmlStreamReaderException is never + * thrown. + * + * @param is InputStream to create a Reader from. + * @param lenient indicates if the charset encoding detection should be + * relaxed. + * @param defaultEncoding The default encoding + * @throws IOException thrown if there is a problem reading the stream. + * @throws XmlStreamReaderException thrown if the charset encoding could not + * be determined according to the specs. + */ + public XmlStreamReader(InputStream is, boolean lenient, String defaultEncoding) throws IOException { + this.defaultEncoding = defaultEncoding; + BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); + BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); + this.encoding = doRawStream(bom, pis, lenient); + this.reader = new InputStreamReader(pis, encoding); + } + + /** + * Creates a Reader using the InputStream of a URL. + *

+ * If the URL is not of type HTTP and there is not 'content-type' header in + * the fetched data it uses the same logic used for Files. + *

+ * If the URL is a HTTP Url or there is a 'content-type' header in the + * fetched data it uses the same logic used for an InputStream with + * content-type. + *

+ * It does a lenient charset encoding detection, check the constructor with + * the lenient parameter for details. + * + * @param url URL to create a Reader from. + * @throws IOException thrown if there is a problem reading the stream of + * the URL. + */ + public XmlStreamReader(URL url) throws IOException { + this(url.openConnection(), null); + } + + /** + * Creates a Reader using the InputStream of a URLConnection. + *

+ * If the URLConnection is not of type HttpURLConnection and there is not + * 'content-type' header in the fetched data it uses the same logic used for + * files. + *

+ * If the URLConnection is a HTTP Url or there is a 'content-type' header in + * the fetched data it uses the same logic used for an InputStream with + * content-type. + *

+ * It does a lenient charset encoding detection, check the constructor with + * the lenient parameter for details. + * + * @param conn URLConnection to create a Reader from. + * @param defaultEncoding The default encoding + * @throws IOException thrown if there is a problem reading the stream of + * the URLConnection. + */ + public XmlStreamReader(URLConnection conn, String defaultEncoding) throws IOException { + this.defaultEncoding = defaultEncoding; + boolean lenient = true; + String contentType = conn.getContentType(); + InputStream is = conn.getInputStream(); + BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); + BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); + if (conn instanceof HttpURLConnection || contentType != null) { + this.encoding = doHttpStream(bom, pis, contentType, lenient); + } else { + this.encoding = doRawStream(bom, pis, lenient); + } + this.reader = new InputStreamReader(pis, encoding); + } + + /** + * Creates a Reader using an InputStream an the associated content-type + * header. + *

+ * First it checks if the stream has BOM. If there is not BOM checks the + * content-type encoding. If there is not content-type encoding checks the + * XML prolog encoding. If there is not XML prolog encoding uses the default + * encoding mandated by the content-type MIME type. + *

+ * It does a lenient charset encoding detection, check the constructor with + * the lenient parameter for details. + * + * @param is InputStream to create the reader from. + * @param httpContentType content-type header to use for the resolution of + * the charset encoding. + * @throws IOException thrown if there is a problem reading the file. + */ + public XmlStreamReader(InputStream is, String httpContentType) + throws IOException { + this(is, httpContentType, true); + } + + /** + * Creates a Reader using an InputStream an the associated content-type + * header. This constructor is lenient regarding the encoding detection. + *

+ * First it checks if the stream has BOM. If there is not BOM checks the + * content-type encoding. If there is not content-type encoding checks the + * XML prolog encoding. If there is not XML prolog encoding uses the default + * encoding mandated by the content-type MIME type. + *

+ * If lenient detection is indicated and the detection above fails as per + * specifications it then attempts the following: + *

+ * If the content type was 'text/html' it replaces it with 'text/xml' and + * tries the detection again. + *

+ * Else if the XML prolog had a charset encoding that encoding is used. + *

+ * Else if the content type had a charset encoding that encoding is used. + *

+ * Else 'UTF-8' is used. + *

+ * If lenient detection is indicated an XmlStreamReaderException is never + * thrown. + * + * @param is InputStream to create the reader from. + * @param httpContentType content-type header to use for the resolution of + * the charset encoding. + * @param lenient indicates if the charset encoding detection should be + * relaxed. + * @param defaultEncoding The default encoding + * @throws IOException thrown if there is a problem reading the file. + * @throws XmlStreamReaderException thrown if the charset encoding could not + * be determined according to the specs. + */ + public XmlStreamReader(InputStream is, String httpContentType, + boolean lenient, String defaultEncoding) throws IOException { + this.defaultEncoding = defaultEncoding; + BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); + BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); + this.encoding = doHttpStream(bom, pis, httpContentType, lenient); + this.reader = new InputStreamReader(pis, encoding); + } + + /** + * Creates a Reader using an InputStream an the associated content-type + * header. This constructor is lenient regarding the encoding detection. + *

+ * First it checks if the stream has BOM. If there is not BOM checks the + * content-type encoding. If there is not content-type encoding checks the + * XML prolog encoding. If there is not XML prolog encoding uses the default + * encoding mandated by the content-type MIME type. + *

+ * If lenient detection is indicated and the detection above fails as per + * specifications it then attempts the following: + *

+ * If the content type was 'text/html' it replaces it with 'text/xml' and + * tries the detection again. + *

+ * Else if the XML prolog had a charset encoding that encoding is used. + *

+ * Else if the content type had a charset encoding that encoding is used. + *

+ * Else 'UTF-8' is used. + *

+ * If lenient detection is indicated an XmlStreamReaderException is never + * thrown. + * + * @param is InputStream to create the reader from. + * @param httpContentType content-type header to use for the resolution of + * the charset encoding. + * @param lenient indicates if the charset encoding detection should be + * relaxed. + * @throws IOException thrown if there is a problem reading the file. + * @throws XmlStreamReaderException thrown if the charset encoding could not + * be determined according to the specs. + */ + public XmlStreamReader(InputStream is, String httpContentType, + boolean lenient) throws IOException { + this(is, httpContentType, lenient, null); + } + + /** + * Returns the charset encoding of the XmlStreamReader. + * + * @return charset encoding. + */ + public String getEncoding() { + return encoding; + } + + /** + * Invokes the underlying reader's read(char[], int, int) method. + * @param buf the buffer to read the characters into + * @param offset The start offset + * @param len The number of bytes to read + * @return the number of characters read or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read(char[] buf, int offset, int len) throws IOException { + return reader.read(buf, offset, len); + } + + /** + * Closes the XmlStreamReader stream. + * + * @throws IOException thrown if there was a problem closing the stream. + */ + @Override + public void close() throws IOException { + reader.close(); + } + + /** + * Process the raw stream. + * + * @param bom BOMInputStream to detect byte order marks + * @param pis BOMInputStream to guess XML encoding + * @param lenient indicates if the charset encoding detection should be + * relaxed. + * @return the encoding to be used + * @throws IOException thrown if there is a problem reading the stream. + */ + private String doRawStream(BOMInputStream bom, BOMInputStream pis, boolean lenient) + throws IOException { + String bomEnc = bom.getBOMCharsetName(); + String xmlGuessEnc = pis.getBOMCharsetName(); + String xmlEnc = getXmlProlog(pis, xmlGuessEnc); + try { + return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); + } catch (XmlStreamReaderException ex) { + if (lenient) { + return doLenientDetection(null, ex); + } else { + throw ex; + } + } + } + + /** + * Process a HTTP stream. + * + * @param bom BOMInputStream to detect byte order marks + * @param pis BOMInputStream to guess XML encoding + * @param httpContentType The HTTP content type + * @param lenient indicates if the charset encoding detection should be + * relaxed. + * @return the encoding to be used + * @throws IOException thrown if there is a problem reading the stream. + */ + private String doHttpStream(BOMInputStream bom, BOMInputStream pis, String httpContentType, + boolean lenient) throws IOException { + String bomEnc = bom.getBOMCharsetName(); + String xmlGuessEnc = pis.getBOMCharsetName(); + String xmlEnc = getXmlProlog(pis, xmlGuessEnc); + try { + return calculateHttpEncoding(httpContentType, bomEnc, + xmlGuessEnc, xmlEnc, lenient); + } catch (XmlStreamReaderException ex) { + if (lenient) { + return doLenientDetection(httpContentType, ex); + } else { + throw ex; + } + } + } + + /** + * Do lenient detection. + * + * @param httpContentType content-type header to use for the resolution of + * the charset encoding. + * @param ex The thrown exception + * @return the encoding + * @throws IOException thrown if there is a problem reading the stream. + */ + private String doLenientDetection(String httpContentType, + XmlStreamReaderException ex) throws IOException { + if (httpContentType != null && httpContentType.startsWith("text/html")) { + httpContentType = httpContentType.substring("text/html".length()); + httpContentType = "text/xml" + httpContentType; + try { + return calculateHttpEncoding(httpContentType, ex.getBomEncoding(), + ex.getXmlGuessEncoding(), ex.getXmlEncoding(), true); + } catch (XmlStreamReaderException ex2) { + ex = ex2; + } + } + String encoding = ex.getXmlEncoding(); + if (encoding == null) { + encoding = ex.getContentTypeEncoding(); + } + if (encoding == null) { + encoding = (defaultEncoding == null) ? UTF_8 : defaultEncoding; + } + return encoding; + } + + /** + * Calculate the raw encoding. + * + * @param bomEnc BOM encoding + * @param xmlGuessEnc XML Guess encoding + * @param xmlEnc XML encoding + * @return the raw encoding + * @throws IOException thrown if there is a problem reading the stream. + */ + String calculateRawEncoding(String bomEnc, String xmlGuessEnc, + String xmlEnc) throws IOException { + + // BOM is Null + if (bomEnc == null) { + if (xmlGuessEnc == null || xmlEnc == null) { + return (defaultEncoding == null ? UTF_8 : defaultEncoding); + } + if (xmlEnc.equals(UTF_16) && + (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) { + return xmlGuessEnc; + } + return xmlEnc; + } + + // BOM is UTF-8 + if (bomEnc.equals(UTF_8)) { + if (xmlGuessEnc != null && !xmlGuessEnc.equals(UTF_8)) { + String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc }); + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); + } + if (xmlEnc != null && !xmlEnc.equals(UTF_8)) { + String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc }); + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); + } + return bomEnc; + } + + // BOM is UTF-16BE or UTF-16LE + if (bomEnc.equals(UTF_16BE) || bomEnc.equals(UTF_16LE)) { + if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) { + String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc }); + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); + } + if (xmlEnc != null && !xmlEnc.equals(UTF_16) && !xmlEnc.equals(bomEnc)) { + String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc }); + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); + } + return bomEnc; + } + + // BOM is something else + String msg = MessageFormat.format(RAW_EX_2, new Object[] { bomEnc, xmlGuessEnc, xmlEnc }); + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); + } + + + /** + * Calculate the HTTP encoding. + * + * @param httpContentType The HTTP content type + * @param bomEnc BOM encoding + * @param xmlGuessEnc XML Guess encoding + * @param xmlEnc XML encoding + * @param lenient indicates if the charset encoding detection should be + * relaxed. + * @return the HTTP encoding + * @throws IOException thrown if there is a problem reading the stream. + */ + String calculateHttpEncoding(String httpContentType, + String bomEnc, String xmlGuessEnc, String xmlEnc, + boolean lenient) throws IOException { + + // Lenient and has XML encoding + if (lenient && xmlEnc != null) { + return xmlEnc; + } + + // Determine mime/encoding content types from HTTP Content Type + String cTMime = getContentTypeMime(httpContentType); + String cTEnc = getContentTypeEncoding(httpContentType); + boolean appXml = isAppXml(cTMime); + boolean textXml = isTextXml(cTMime); + + // Mime type NOT "application/xml" or "text/xml" + if (!appXml && !textXml) { + String msg = MessageFormat.format(HTTP_EX_3, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + } + + // No content type encoding + if (cTEnc == null) { + if (appXml) { + return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); + } else { + return (defaultEncoding == null) ? US_ASCII : defaultEncoding; + } + } + + // UTF-16BE or UTF-16LE content type encoding + if (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE)) { + if (bomEnc != null) { + String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + } + return cTEnc; + } + + // UTF-16 content type encoding + if (cTEnc.equals(UTF_16)) { + if (bomEnc != null && bomEnc.startsWith(UTF_16)) { + return bomEnc; + } + String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + } + + return cTEnc; + } + + /** + * Returns MIME type or NULL if httpContentType is NULL. + * + * @param httpContentType the HTTP content type + * @return The mime content type + */ + static String getContentTypeMime(String httpContentType) { + String mime = null; + if (httpContentType != null) { + int i = httpContentType.indexOf(";"); + if (i >= 0) { + mime = httpContentType.substring(0, i); + } else { + mime = httpContentType; + } + mime = mime.trim(); + } + return mime; + } + + private static final Pattern CHARSET_PATTERN = Pattern + .compile("charset=[\"']?([.[^; \"']]*)[\"']?"); + + /** + * Returns charset parameter value, NULL if not present, NULL if + * httpContentType is NULL. + * + * @param httpContentType the HTTP content type + * @return The content type encoding + */ + static String getContentTypeEncoding(String httpContentType) { + String encoding = null; + if (httpContentType != null) { + int i = httpContentType.indexOf(";"); + if (i > -1) { + String postMime = httpContentType.substring(i + 1); + Matcher m = CHARSET_PATTERN.matcher(postMime); + encoding = (m.find()) ? m.group(1) : null; + encoding = (encoding != null) ? encoding.toUpperCase() : null; + } + } + return encoding; + } + + public static final Pattern ENCODING_PATTERN = Pattern.compile( + "<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", + Pattern.MULTILINE); + + /** + * Returns the encoding declared in the , NULL if none. + * + * @param is InputStream to create the reader from. + * @param guessedEnc guessed encoding + * @return the encoding declared in the + * @throws IOException thrown if there is a problem reading the stream. + */ + private static String getXmlProlog(InputStream is, String guessedEnc) + throws IOException { + String encoding = null; + if (guessedEnc != null) { + byte[] bytes = new byte[BUFFER_SIZE]; + is.mark(BUFFER_SIZE); + int offset = 0; + int max = BUFFER_SIZE; + int c = is.read(bytes, offset, max); + int firstGT = -1; + String xmlProlog = null; + while (c != -1 && firstGT == -1 && offset < BUFFER_SIZE) { + offset += c; + max -= c; + c = is.read(bytes, offset, max); + xmlProlog = new String(bytes, 0, offset, guessedEnc); + firstGT = xmlProlog.indexOf('>'); + } + if (firstGT == -1) { + if (c == -1) { + throw new IOException("Unexpected end of XML stream"); + } else { + throw new IOException( + "XML prolog or ROOT element not found on first " + + offset + " bytes"); + } + } + int bytesRead = offset; + if (bytesRead > 0) { + is.reset(); + BufferedReader bReader = new BufferedReader(new StringReader( + xmlProlog.substring(0, firstGT + 1))); + StringBuilder prolog = new StringBuilder(); + String line = bReader.readLine(); + while (line != null) { + prolog.append(line); + line = bReader.readLine(); + } + Matcher m = ENCODING_PATTERN.matcher(prolog); + if (m.find()) { + encoding = m.group(1).toUpperCase(); + encoding = encoding.substring(1, encoding.length() - 1); + } + } + } + return encoding; + } + + /** + * Indicates if the MIME type belongs to the APPLICATION XML family. + * + * @param mime The mime type + * @return true if the mime type belongs to the APPLICATION XML family, + * otherwise false + */ + static boolean isAppXml(String mime) { + return mime != null && + (mime.equals("application/xml") || + mime.equals("application/xml-dtd") || + mime.equals("application/xml-external-parsed-entity") || + (mime.startsWith("application/") && mime.endsWith("+xml"))); + } + + /** + * Indicates if the MIME type belongs to the TEXT XML family. + * + * @param mime The mime type + * @return true if the mime type belongs to the TEXT XML family, + * otherwise false + */ + static boolean isTextXml(String mime) { + return mime != null && + (mime.equals("text/xml") || + mime.equals("text/xml-external-parsed-entity") || + (mime.startsWith("text/") && mime.endsWith("+xml"))); + } + + private static final String RAW_EX_1 = + "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch"; + + private static final String RAW_EX_2 = + "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM"; + + private static final String HTTP_EX_1 = + "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL"; + + private static final String HTTP_EX_2 = + "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch"; + + private static final String HTTP_EX_3 = + "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME"; + +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/XmlStreamReaderException.java b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/XmlStreamReaderException.java new file mode 100644 index 00000000..1ff2505f --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/util/commons/io/XmlStreamReaderException.java @@ -0,0 +1,138 @@ +package nl.siegmann.epublib.util.commons.io; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * The XmlStreamReaderException is thrown by the XmlStreamReader constructors if + * the charset encoding can not be determined according to the XML 1.0 + * specification and RFC 3023. + *

+ * The exception returns the unconsumed InputStream to allow the application to + * do an alternate processing with the stream. Note that the original + * InputStream given to the XmlStreamReader cannot be used as that one has been + * already read. + * + * @author Alejandro Abdelnur + * @version $Id: XmlStreamReaderException.java 1004112 2010-10-04 04:48:25Z niallp $ + * @since Commons IO 2.0 + */ +public class XmlStreamReaderException extends IOException { + + private static final long serialVersionUID = 1L; + + private final String bomEncoding; + + private final String xmlGuessEncoding; + + private final String xmlEncoding; + + private final String contentTypeMime; + + private final String contentTypeEncoding; + + /** + * Creates an exception instance if the charset encoding could not be + * determined. + *

+ * Instances of this exception are thrown by the XmlStreamReader. + * + * @param msg message describing the reason for the exception. + * @param bomEnc BOM encoding. + * @param xmlGuessEnc XML guess encoding. + * @param xmlEnc XML prolog encoding. + */ + public XmlStreamReaderException(String msg, String bomEnc, + String xmlGuessEnc, String xmlEnc) { + this(msg, null, null, bomEnc, xmlGuessEnc, xmlEnc); + } + + /** + * Creates an exception instance if the charset encoding could not be + * determined. + *

+ * Instances of this exception are thrown by the XmlStreamReader. + * + * @param msg message describing the reason for the exception. + * @param ctMime MIME type in the content-type. + * @param ctEnc encoding in the content-type. + * @param bomEnc BOM encoding. + * @param xmlGuessEnc XML guess encoding. + * @param xmlEnc XML prolog encoding. + */ + public XmlStreamReaderException(String msg, String ctMime, String ctEnc, + String bomEnc, String xmlGuessEnc, String xmlEnc) { + super(msg); + contentTypeMime = ctMime; + contentTypeEncoding = ctEnc; + bomEncoding = bomEnc; + xmlGuessEncoding = xmlGuessEnc; + xmlEncoding = xmlEnc; + } + + /** + * Returns the BOM encoding found in the InputStream. + * + * @return the BOM encoding, null if none. + */ + public String getBomEncoding() { + return bomEncoding; + } + + /** + * Returns the encoding guess based on the first bytes of the InputStream. + * + * @return the encoding guess, null if it couldn't be guessed. + */ + public String getXmlGuessEncoding() { + return xmlGuessEncoding; + } + + /** + * Returns the encoding found in the XML prolog of the InputStream. + * + * @return the encoding of the XML prolog, null if none. + */ + public String getXmlEncoding() { + return xmlEncoding; + } + + /** + * Returns the MIME type in the content-type used to attempt determining the + * encoding. + * + * @return the MIME type in the content-type, null if there was not + * content-type or the encoding detection did not involve HTTP. + */ + public String getContentTypeMime() { + return contentTypeMime; + } + + /** + * Returns the encoding in the content-type used to attempt determining the + * encoding. + * + * @return the encoding in the content-type, null if there was not + * content-type, no encoding in it or the encoding detection did not + * involve HTTP. + */ + public String getContentTypeEncoding() { + return contentTypeEncoding; + } +} diff --git a/epublib-core/src/main/java/nl/siegmann/epublib/utilities/StreamWriterDelegate.java b/epublib-core/src/main/java/nl/siegmann/epublib/utilities/StreamWriterDelegate.java new file mode 100644 index 00000000..9313b0fc --- /dev/null +++ b/epublib-core/src/main/java/nl/siegmann/epublib/utilities/StreamWriterDelegate.java @@ -0,0 +1,202 @@ +package nl.siegmann.epublib.utilities; +/* + * Copyright (c) 2006, John Kristian + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of StAX-Utils nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +import javax.xml.namespace.NamespaceContext; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +/** + * Abstract class for writing filtered XML streams. This class provides methods + * that merely delegate to the contained stream. Subclasses should override some + * of these methods, and may also provide additional methods and fields. + * + * @author John Kristian + */ +public abstract class StreamWriterDelegate implements XMLStreamWriter { + + protected StreamWriterDelegate(XMLStreamWriter out) { + this .out = out; + } + + protected XMLStreamWriter out; + + public Object getProperty(String name) + throws IllegalArgumentException { + return out.getProperty(name); + } + + public NamespaceContext getNamespaceContext() { + return out.getNamespaceContext(); + } + + public void setNamespaceContext(NamespaceContext context) + throws XMLStreamException { + out.setNamespaceContext(context); + } + + public void setDefaultNamespace(String uri) + throws XMLStreamException { + out.setDefaultNamespace(uri); + } + + public void writeStartDocument() throws XMLStreamException { + out.writeStartDocument(); + } + + public void writeStartDocument(String version) + throws XMLStreamException { + out.writeStartDocument(version); + } + + public void writeStartDocument(String encoding, String version) + throws XMLStreamException { + out.writeStartDocument(encoding, version); + } + + public void writeDTD(String dtd) throws XMLStreamException { + out.writeDTD(dtd); + } + + public void writeProcessingInstruction(String target) + throws XMLStreamException { + out.writeProcessingInstruction(target); + } + + public void writeProcessingInstruction(String target, String data) + throws XMLStreamException { + out.writeProcessingInstruction(target, data); + } + + public void writeComment(String data) throws XMLStreamException { + out.writeComment(data); + } + + public void writeEmptyElement(String localName) + throws XMLStreamException { + out.writeEmptyElement(localName); + } + + public void writeEmptyElement(String namespaceURI, String localName) + throws XMLStreamException { + out.writeEmptyElement(namespaceURI, localName); + } + + public void writeEmptyElement(String prefix, String localName, + String namespaceURI) throws XMLStreamException { + out.writeEmptyElement(prefix, localName, namespaceURI); + } + + public void writeStartElement(String localName) + throws XMLStreamException { + out.writeStartElement(localName); + } + + public void writeStartElement(String namespaceURI, String localName) + throws XMLStreamException { + out.writeStartElement(namespaceURI, localName); + } + + public void writeStartElement(String prefix, String localName, + String namespaceURI) throws XMLStreamException { + out.writeStartElement(prefix, localName, namespaceURI); + } + + public void writeDefaultNamespace(String namespaceURI) + throws XMLStreamException { + out.writeDefaultNamespace(namespaceURI); + } + + public void writeNamespace(String prefix, String namespaceURI) + throws XMLStreamException { + out.writeNamespace(prefix, namespaceURI); + } + + public String getPrefix(String uri) throws XMLStreamException { + return out.getPrefix(uri); + } + + public void setPrefix(String prefix, String uri) + throws XMLStreamException { + out.setPrefix(prefix, uri); + } + + public void writeAttribute(String localName, String value) + throws XMLStreamException { + out.writeAttribute(localName, value); + } + + public void writeAttribute(String namespaceURI, String localName, + String value) throws XMLStreamException { + out.writeAttribute(namespaceURI, localName, value); + } + + public void writeAttribute(String prefix, String namespaceURI, + String localName, String value) throws XMLStreamException { + out.writeAttribute(prefix, namespaceURI, localName, value); + } + + public void writeCharacters(String text) throws XMLStreamException { + out.writeCharacters(text); + } + + public void writeCharacters(char[] text, int start, int len) + throws XMLStreamException { + out.writeCharacters(text, start, len); + } + + public void writeCData(String data) throws XMLStreamException { + out.writeCData(data); + } + + public void writeEntityRef(String name) throws XMLStreamException { + out.writeEntityRef(name); + } + + public void writeEndElement() throws XMLStreamException { + out.writeEndElement(); + } + + public void writeEndDocument() throws XMLStreamException { + out.writeEndDocument(); + } + + public void flush() throws XMLStreamException { + out.flush(); + } + + public void close() throws XMLStreamException { + out.close(); + } + +} + diff --git a/epublib-core/src/main/resources/dtd/openebook.org/dtds/oeb-1.2/oeb12.ent b/epublib-core/src/main/resources/dtd/openebook.org/dtds/oeb-1.2/oeb12.ent new file mode 100644 index 00000000..f7b58d25 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/openebook.org/dtds/oeb-1.2/oeb12.ent @@ -0,0 +1,1135 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/openebook.org/dtds/oeb-1.2/oebpkg12.dtd b/epublib-core/src/main/resources/dtd/openebook.org/dtds/oeb-1.2/oebpkg12.dtd new file mode 100644 index 00000000..34cc2b10 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/openebook.org/dtds/oeb-1.2/oebpkg12.dtd @@ -0,0 +1,390 @@ + + + + + + + + + +%OEBEntities; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.daisy.org/z3986/2005/ncx-2005-1.dtd b/epublib-core/src/main/resources/dtd/www.daisy.org/z3986/2005/ncx-2005-1.dtd new file mode 100644 index 00000000..b889c41a --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.daisy.org/z3986/2005/ncx-2005-1.dtd @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/ruby/xhtml-ruby-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/ruby/xhtml-ruby-1.mod new file mode 100644 index 00000000..a44bb3fa --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/ruby/xhtml-ruby-1.mod @@ -0,0 +1,242 @@ + + + + + + + + + + + + + + + + + + + + + + + +]]> + +]]> + + + +]]> + + + + + + + + + + + + + +]]> + + + + + + +]]> + + + + + + +]]> +]]> + + + + + + + +]]> + + + + + + + + +]]> + + + + +]]> +]]> + + + + + + +]]> +]]> + + + + + + + + + + +]]> + + + + + +]]> + + + + + +]]> +]]> + + + + + +]]> + + + + + +]]> + + + + + +]]> +]]> +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-arch-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-arch-1.mod new file mode 100644 index 00000000..4a4fa6ca --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-arch-1.mod @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-attribs-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-attribs-1.mod new file mode 100644 index 00000000..104e5700 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-attribs-1.mod @@ -0,0 +1,142 @@ + + + + + + + + + +]]> + + + + +]]> + + + + +]]> + + + + + + + + +]]> + + + + + + + + + + + +]]> + + +]]> + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-base-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-base-1.mod new file mode 100644 index 00000000..dca21ca0 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-base-1.mod @@ -0,0 +1,53 @@ + + + + + + + + + + + + +]]> + + + +]]> + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-bdo-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-bdo-1.mod new file mode 100644 index 00000000..fcd67bf6 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-bdo-1.mod @@ -0,0 +1,47 @@ + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkphras-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkphras-1.mod new file mode 100644 index 00000000..0eeb1641 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkphras-1.mod @@ -0,0 +1,164 @@ + + + + + + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + +]]> + + + +]]> + + + + +]]> + + + +]]> + + + + +]]> + + + +]]> + + + + +]]> + + + +]]> + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkpres-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkpres-1.mod new file mode 100644 index 00000000..30968bb7 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkpres-1.mod @@ -0,0 +1,40 @@ + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkstruct-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkstruct-1.mod new file mode 100644 index 00000000..ab37c73c --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-blkstruct-1.mod @@ -0,0 +1,57 @@ + + + + + + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-charent-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-charent-1.mod new file mode 100644 index 00000000..b1faf15c --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-charent-1.mod @@ -0,0 +1,39 @@ + + + + + + + +%xhtml-lat1; + + +%xhtml-symbol; + + +%xhtml-special; + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-csismap-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-csismap-1.mod new file mode 100644 index 00000000..5977f038 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-csismap-1.mod @@ -0,0 +1,114 @@ + + + + + + + + + + +]]> + + + + + + +]]> + + + + + + + + + + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-datatypes-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-datatypes-1.mod new file mode 100644 index 00000000..a2ea3ae8 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-datatypes-1.mod @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-datatypes-1.mod.1 b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-datatypes-1.mod.1 new file mode 100644 index 00000000..a2ea3ae8 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-datatypes-1.mod.1 @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-edit-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-edit-1.mod new file mode 100644 index 00000000..2d3d43f1 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-edit-1.mod @@ -0,0 +1,66 @@ + + + + + + + + + + + + +]]> + + + +]]> + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-events-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-events-1.mod new file mode 100644 index 00000000..ad8a798c --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-events-1.mod @@ -0,0 +1,135 @@ + + + + + + + + + + +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-form-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-form-1.mod new file mode 100644 index 00000000..98b0b926 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-form-1.mod @@ -0,0 +1,292 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-framework-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-framework-1.mod new file mode 100644 index 00000000..f37976a6 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-framework-1.mod @@ -0,0 +1,97 @@ + + + + + + + + +%xhtml-arch.mod;]]> + + + +%xhtml-notations.mod;]]> + + + +%xhtml-datatypes.mod;]]> + + + +%xhtml-xlink.mod; + + + +%xhtml-qname.mod;]]> + + + +%xhtml-events.mod;]]> + + + +%xhtml-attribs.mod;]]> + + + +%xhtml-model.redecl; + + + +%xhtml-model.mod;]]> + + + +%xhtml-charent.mod;]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-hypertext-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-hypertext-1.mod new file mode 100644 index 00000000..85d8348f --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-hypertext-1.mod @@ -0,0 +1,54 @@ + + + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-image-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-image-1.mod new file mode 100644 index 00000000..7eea4f9a --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-image-1.mod @@ -0,0 +1,51 @@ + + + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlphras-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlphras-1.mod new file mode 100644 index 00000000..ebada109 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlphras-1.mod @@ -0,0 +1,203 @@ + + + + + + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlpres-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlpres-1.mod new file mode 100644 index 00000000..3e41322c --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlpres-1.mod @@ -0,0 +1,138 @@ + + + + + + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlstruct-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlstruct-1.mod new file mode 100644 index 00000000..4d6bd01a --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlstruct-1.mod @@ -0,0 +1,62 @@ + + + + + + + + + + + + + +]]> + + + +]]> + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlstyle-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlstyle-1.mod new file mode 100644 index 00000000..6d526cd1 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlstyle-1.mod @@ -0,0 +1,34 @@ + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-lat1.ent b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-lat1.ent new file mode 100644 index 00000000..ffee223e --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-lat1.ent @@ -0,0 +1,196 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-link-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-link-1.mod new file mode 100644 index 00000000..4a15f1dd --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-link-1.mod @@ -0,0 +1,59 @@ + + + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-list-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-list-1.mod new file mode 100644 index 00000000..72bdb25c --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-list-1.mod @@ -0,0 +1,129 @@ + + + + + + + + + + + + + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-meta-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-meta-1.mod new file mode 100644 index 00000000..d2f6d2c6 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-meta-1.mod @@ -0,0 +1,47 @@ + + + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-notations-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-notations-1.mod new file mode 100644 index 00000000..2da12d02 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-notations-1.mod @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-object-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-object-1.mod new file mode 100644 index 00000000..bee7aeb0 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-object-1.mod @@ -0,0 +1,60 @@ + + + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-param-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-param-1.mod new file mode 100644 index 00000000..4ba07916 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-param-1.mod @@ -0,0 +1,48 @@ + + + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-pres-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-pres-1.mod new file mode 100644 index 00000000..42a0d6df --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-pres-1.mod @@ -0,0 +1,38 @@ + + + + + + + + +%xhtml-inlpres.mod;]]> + + + +%xhtml-blkpres.mod;]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-qname-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-qname-1.mod new file mode 100644 index 00000000..35c180a6 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-qname-1.mod @@ -0,0 +1,318 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + + +%xhtml-qname-extra.mod; + + + + + + + + + +]]> + + + + + + + + + + + + + + + + + + + +]]> + + + + + + + +]]> + + + + +%xhtml-qname.redecl; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-script-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-script-1.mod new file mode 100644 index 00000000..0152ab02 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-script-1.mod @@ -0,0 +1,67 @@ + + + + + + + + + + + + +]]> + + + +]]> + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-special.ent b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-special.ent new file mode 100644 index 00000000..ca358b2f --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-special.ent @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-ssismap-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-ssismap-1.mod new file mode 100644 index 00000000..45da878f --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-ssismap-1.mod @@ -0,0 +1,32 @@ + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-struct-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-struct-1.mod new file mode 100644 index 00000000..c826f0f0 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-struct-1.mod @@ -0,0 +1,136 @@ + + + + + + + + + + + + + + +]]> + + + +]]> + + + + + + + +]]> + + + + + + +]]> + + + + + + + +]]> + + + +]]> + + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-style-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-style-1.mod new file mode 100644 index 00000000..dc85a9e6 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-style-1.mod @@ -0,0 +1,48 @@ + + + + + + + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-symbol.ent b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-symbol.ent new file mode 100644 index 00000000..63c2abfa --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-symbol.ent @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-symbol.ent.1 b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-symbol.ent.1 new file mode 100644 index 00000000..63c2abfa --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-symbol.ent.1 @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-table-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-table-1.mod new file mode 100644 index 00000000..540b7346 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-table-1.mod @@ -0,0 +1,333 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + + + + + + + +]]> + + + +]]> + + + + + + +]]> + + + +]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-text-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-text-1.mod new file mode 100644 index 00000000..a461e1e1 --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml-text-1.mod @@ -0,0 +1,52 @@ + + + + + + + + +%xhtml-inlstruct.mod;]]> + + + +%xhtml-inlphras.mod;]]> + + + +%xhtml-blkstruct.mod;]]> + + + +%xhtml-blkphras.mod;]]> + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml11-model-1.mod b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml11-model-1.mod new file mode 100644 index 00000000..eb834f3d --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml-modularization/DTD/xhtml11-model-1.mod @@ -0,0 +1,252 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent new file mode 100644 index 00000000..ffee223e --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent @@ -0,0 +1,196 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-special.ent b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-special.ent new file mode 100644 index 00000000..ca358b2f --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-special.ent @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent new file mode 100644 index 00000000..63c2abfa --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd new file mode 100644 index 00000000..2927b9ec --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd @@ -0,0 +1,978 @@ + + + + + +%HTMLlat1; + + +%HTMLsymbol; + + +%HTMLspecial; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd new file mode 100644 index 00000000..628f27ac --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd @@ -0,0 +1,1201 @@ + + + + + +%HTMLlat1; + + +%HTMLsymbol; + + +%HTMLspecial; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml11/DTD/xhtml11.dtd b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml11/DTD/xhtml11.dtd new file mode 100644 index 00000000..2a999b5b --- /dev/null +++ b/epublib-core/src/main/resources/dtd/www.w3.org/TR/xhtml11/DTD/xhtml11.dtd @@ -0,0 +1,294 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + + + + +%xhtml-inlstyle.mod;]]> + + + + + + + +%xhtml-framework.mod;]]> + + + + +]]> + + + + +%xhtml-text.mod;]]> + + + + +%xhtml-hypertext.mod;]]> + + + + +%xhtml-list.mod;]]> + + + + + + +%xhtml-edit.mod;]]> + + + + +%xhtml-bdo.mod;]]> + + + + + + +%xhtml-ruby.mod;]]> + + + + +%xhtml-pres.mod;]]> + + + + +%xhtml-link.mod;]]> + + + + +%xhtml-meta.mod;]]> + + + + +%xhtml-base.mod;]]> + + + + +%xhtml-script.mod;]]> + + + + +%xhtml-style.mod;]]> + + + + +%xhtml-image.mod;]]> + + + + +%xhtml-csismap.mod;]]> + + + + +%xhtml-ssismap.mod;]]> + + + + +%xhtml-param.mod;]]> + + + + +%xhtml-object.mod;]]> + + + + +%xhtml-table.mod;]]> + + + + +%xhtml-form.mod;]]> + + + + +%xhtml-legacy.mod;]]> + + + + +%xhtml-struct.mod;]]> + + + diff --git a/epublib-core/src/main/resources/log4j.properties b/epublib-core/src/main/resources/log4j.properties new file mode 100644 index 00000000..bdfcdfe7 --- /dev/null +++ b/epublib-core/src/main/resources/log4j.properties @@ -0,0 +1,55 @@ +#------------------------------------------------------------------------------ +# +# The following properties set the logging levels and log appender. The +# log4j.rootCategory variable defines the default log level and one or more +# appenders. For the console, use 'S'. For the daily rolling file, use 'R'. +# For an HTML formatted log, use 'H'. +# +# To override the default (rootCategory) log level, define a property of the +# form (see below for available values): +# +# log4j.logger. = +# +# Available logger names: +# TODO +# +# Possible Log Levels: +# FATAL, ERROR, WARN, INFO, DEBUG +# +#------------------------------------------------------------------------------ +log4j.rootCategory=INFO, S + +#------------------------------------------------------------------------------ +# +# The following properties configure the console (stdout) appender. +# See http://logging.apache.org/log4j/docs/api/index.html for details. +# +#------------------------------------------------------------------------------ +log4j.appender.S = org.apache.log4j.ConsoleAppender +log4j.appender.S.layout = org.apache.log4j.PatternLayout +log4j.appender.S.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} [%p] %l %m%n + +#------------------------------------------------------------------------------ +# +# The following properties configure the Daily Rolling File appender. +# See http://logging.apache.org/log4j/docs/api/index.html for details. +# +#------------------------------------------------------------------------------ +log4j.appender.R = org.apache.log4j.DailyRollingFileAppender +log4j.appender.R.File = logs/epublib.log +log4j.appender.R.Append = true +log4j.appender.R.DatePattern = '.'yyy-MM-dd +log4j.appender.R.layout = org.apache.log4j.PatternLayout +log4j.appender.R.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n + +#------------------------------------------------------------------------------ +# +# The following properties configure the Rolling File appender in HTML. +# See http://logging.apache.org/log4j/docs/api/index.html for details. +# +#------------------------------------------------------------------------------ +log4j.appender.H = org.apache.log4j.RollingFileAppender +log4j.appender.H.File = logs/epublib_log.html +log4j.appender.H.MaxFileSize = 100KB +log4j.appender.H.Append = false +log4j.appender.H.layout = org.apache.log4j.HTMLLayout \ No newline at end of file diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/browsersupport/NavigationHistoryTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/browsersupport/NavigationHistoryTest.java new file mode 100644 index 00000000..f0c75a7a --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/browsersupport/NavigationHistoryTest.java @@ -0,0 +1,213 @@ +package nl.siegmann.epublib.browsersupport; + +import static org.junit.Assert.assertEquals; + +import java.util.HashMap; +import java.util.Map; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; + +import org.junit.Test; + +public class NavigationHistoryTest { + + private static final Resource mockResource = new Resource("mockResource.html"); + + private static class MockBook extends Book { + public Resource getCoverPage() { + return mockResource; + } + } + + + private static class MockSectionWalker extends Navigator { + + private Map resourcesByHref = new HashMap(); + + public MockSectionWalker(Book book) { + super(book); + resourcesByHref.put(mockResource.getHref(), mockResource); + } + + public int gotoFirstSpineSection(Object source) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public int gotoPreviousSpineSection(Object source) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public boolean hasNextSpineSection() { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public boolean hasPreviousSpineSection() { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public int gotoNextSpineSection(Object source) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public int gotoResource(String resourceHref, Object source) { + return -1; + } + + public int gotoResource(Resource resource, Object source) { + return -1; + } + public boolean equals(Object obj) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + + public int gotoResourceId(String resourceId, Object source) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public int gotoSpineSection(int newIndex, Object source) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public int gotoLastSpineSection(Object source) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public int getCurrentSpinePos() { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public Resource getCurrentResource() { + return resourcesByHref.values().iterator().next(); + } + public void setCurrentSpinePos(int currentIndex) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + + public int setCurrentResource(Resource currentResource) { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + public String toString() { + throw new UnsupportedOperationException("Method not supported in mock implementation"); + } + + public Resource getMockResource() { + return mockResource; + } + } + + @Test + public void test1() { + MockSectionWalker navigator = new MockSectionWalker(new MockBook()); + NavigationHistory browserHistory = new NavigationHistory(navigator); + + assertEquals(navigator.getCurrentResource().getHref(), browserHistory.getCurrentHref()); + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(1, browserHistory.getCurrentSize()); + + browserHistory.addLocation(navigator.getMockResource().getHref()); + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(1, browserHistory.getCurrentSize()); + + browserHistory.addLocation("bar"); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.addLocation("bar"); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.move(1); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.addLocation("bar"); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.move(-1); + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.move(0); + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.move(-1); + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.move(1); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.move(1); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + } + + @Test + public void test2() { + MockSectionWalker navigator = new MockSectionWalker(new MockBook()); + NavigationHistory browserHistory = new NavigationHistory(navigator); + + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(1, browserHistory.getCurrentSize()); + + browserHistory.addLocation("green"); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + + browserHistory.addLocation("blue"); + assertEquals(2, browserHistory.getCurrentPos()); + assertEquals(3, browserHistory.getCurrentSize()); + + browserHistory.addLocation("yellow"); + assertEquals(3, browserHistory.getCurrentPos()); + assertEquals(4, browserHistory.getCurrentSize()); + + browserHistory.addLocation("orange"); + assertEquals(4, browserHistory.getCurrentPos()); + assertEquals(5, browserHistory.getCurrentSize()); + + browserHistory.move(-1); + assertEquals(3, browserHistory.getCurrentPos()); + assertEquals(5, browserHistory.getCurrentSize()); + + browserHistory.move(-1); + assertEquals(2, browserHistory.getCurrentPos()); + assertEquals(5, browserHistory.getCurrentSize()); + + browserHistory.addLocation("taupe"); + assertEquals(3, browserHistory.getCurrentPos()); + assertEquals(4, browserHistory.getCurrentSize()); + + } + + @Test + public void test3() { + MockSectionWalker navigator = new MockSectionWalker(new MockBook()); + NavigationHistory browserHistory = new NavigationHistory(navigator); + + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(1, browserHistory.getCurrentSize()); + + browserHistory.addLocation("red"); + browserHistory.addLocation("green"); + browserHistory.addLocation("blue"); + + assertEquals(3, browserHistory.getCurrentPos()); + assertEquals(4, browserHistory.getCurrentSize()); + + browserHistory.move(-1); + assertEquals(2, browserHistory.getCurrentPos()); + assertEquals(4, browserHistory.getCurrentSize()); + + browserHistory.move(-1); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(4, browserHistory.getCurrentSize()); + + browserHistory.move(-1); + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(4, browserHistory.getCurrentSize()); + + browserHistory.move(-1); + assertEquals(0, browserHistory.getCurrentPos()); + assertEquals(4, browserHistory.getCurrentSize()); + + browserHistory.addLocation("taupe"); + assertEquals(1, browserHistory.getCurrentPos()); + assertEquals(2, browserHistory.getCurrentSize()); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/domain/BookTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/domain/BookTest.java new file mode 100644 index 00000000..6ddf8684 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/domain/BookTest.java @@ -0,0 +1,55 @@ +package nl.siegmann.epublib.domain; + +import nl.siegmann.epublib.service.MediatypeService; + +import org.junit.Assert; +import org.junit.Test; + +public class BookTest { + + @Test + public void testGetContents1() { + Book book = new Book(); + Resource resource1 = new Resource("id1", "Hello, world !".getBytes(), "chapter1.html", MediatypeService.XHTML); + book.getSpine().addResource(resource1); + book.getTableOfContents().addSection(resource1, "My first chapter"); + Assert.assertEquals(1, book.getContents().size()); + } + + @Test + public void testGetContents2() { + Book book = new Book(); + Resource resource1 = new Resource("id1", "Hello, world !".getBytes(), "chapter1.html", MediatypeService.XHTML); + book.getSpine().addResource(resource1); + Resource resource2 = new Resource("id1", "Hello, world !".getBytes(), "chapter2.html", MediatypeService.XHTML); + book.getTableOfContents().addSection(resource2, "My first chapter"); + Assert.assertEquals(2, book.getContents().size()); + } + + @Test + public void testGetContents3() { + Book book = new Book(); + Resource resource1 = new Resource("id1", "Hello, world !".getBytes(), "chapter1.html", MediatypeService.XHTML); + book.getSpine().addResource(resource1); + Resource resource2 = new Resource("id1", "Hello, world !".getBytes(), "chapter2.html", MediatypeService.XHTML); + book.getTableOfContents().addSection(resource2, "My first chapter"); + book.getGuide().addReference(new GuideReference(resource2, GuideReference.FOREWORD, "The Foreword")); + Assert.assertEquals(2, book.getContents().size()); + } + + @Test + public void testGetContents4() { + Book book = new Book(); + + Resource resource1 = new Resource("id1", "Hello, world !".getBytes(), "chapter1.html", MediatypeService.XHTML); + book.getSpine().addResource(resource1); + + Resource resource2 = new Resource("id1", "Hello, world !".getBytes(), "chapter2.html", MediatypeService.XHTML); + book.getTableOfContents().addSection(resource2, "My first chapter"); + + Resource resource3 = new Resource("id1", "Hello, world !".getBytes(), "foreword.html", MediatypeService.XHTML); + book.getGuide().addReference(new GuideReference(resource3, GuideReference.FOREWORD, "The Foreword")); + + Assert.assertEquals(3, book.getContents().size()); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/domain/ResourcesTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/domain/ResourcesTest.java new file mode 100644 index 00000000..ea852644 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/domain/ResourcesTest.java @@ -0,0 +1,32 @@ +package nl.siegmann.epublib.domain; + +import nl.siegmann.epublib.service.MediatypeService; + +import org.junit.Assert; +import org.junit.Test; + +public class ResourcesTest { + + @Test + public void testGetResourcesByMediaType1() { + Resources resources = new Resources(); + resources.add(new Resource("foo".getBytes(), MediatypeService.XHTML)); + resources.add(new Resource("bar".getBytes(), MediatypeService.XHTML)); + Assert.assertEquals(0, resources.getResourcesByMediaType(MediatypeService.PNG).size()); + Assert.assertEquals(2, resources.getResourcesByMediaType(MediatypeService.XHTML).size()); + Assert.assertEquals(2, resources.getResourcesByMediaTypes(new MediaType[] {MediatypeService.XHTML}).size()); + } + + @Test + public void testGetResourcesByMediaType2() { + Resources resources = new Resources(); + resources.add(new Resource("foo".getBytes(), MediatypeService.XHTML)); + resources.add(new Resource("bar".getBytes(), MediatypeService.PNG)); + resources.add(new Resource("baz".getBytes(), MediatypeService.PNG)); + Assert.assertEquals(2, resources.getResourcesByMediaType(MediatypeService.PNG).size()); + Assert.assertEquals(1, resources.getResourcesByMediaType(MediatypeService.XHTML).size()); + Assert.assertEquals(1, resources.getResourcesByMediaTypes(new MediaType[] {MediatypeService.XHTML}).size()); + Assert.assertEquals(3, resources.getResourcesByMediaTypes(new MediaType[] {MediatypeService.XHTML, MediatypeService.PNG}).size()); + Assert.assertEquals(3, resources.getResourcesByMediaTypes(new MediaType[] {MediatypeService.CSS, MediatypeService.XHTML, MediatypeService.PNG}).size()); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/domain/TableOfContentsTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/domain/TableOfContentsTest.java new file mode 100644 index 00000000..9b058d2d --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/domain/TableOfContentsTest.java @@ -0,0 +1,100 @@ +package nl.siegmann.epublib.domain; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import org.junit.Test; + +public class TableOfContentsTest{ + + @Test + public void testCalculateDepth_simple1() { + TableOfContents tableOfContents = new TableOfContents(); + assertEquals(0, tableOfContents.calculateDepth()); + } + + @Test + public void testCalculateDepth_simple2() { + TableOfContents tableOfContents = new TableOfContents(); + tableOfContents.addTOCReference(new TOCReference()); + assertEquals(1, tableOfContents.calculateDepth()); + } + + @Test + public void testCalculateDepth_simple3() { + TableOfContents tableOfContents = new TableOfContents(); + tableOfContents.addTOCReference(new TOCReference()); + TOCReference childTOCReference = tableOfContents.addTOCReference(new TOCReference()); + childTOCReference.addChildSection(new TOCReference()); + tableOfContents.addTOCReference(new TOCReference()); + + assertEquals(2, tableOfContents.calculateDepth()); + } + + @Test + public void testAddResource1() { + Resource resource = new Resource("foo"); + TableOfContents toc = new TableOfContents(); + TOCReference tocReference = toc.addSection(resource, "apple/pear", "/"); + assertNotNull(tocReference); + assertNotNull(tocReference.getResource()); + assertEquals(2, toc.size()); + assertEquals("pear", tocReference.getTitle()); + } + + @Test + public void testAddResource2() { + Resource resource = new Resource("foo"); + TableOfContents toc = new TableOfContents(); + TOCReference tocReference = toc.addSection(resource, "apple/pear", "/"); + assertNotNull(tocReference); + assertNotNull(tocReference.getResource()); + assertEquals(2, toc.size()); + assertEquals("pear", tocReference.getTitle()); + + TOCReference tocReference2 = toc.addSection(resource, "apple/banana", "/"); + assertNotNull(tocReference2); + assertNotNull(tocReference2.getResource()); + assertEquals(3, toc.size()); + assertEquals("banana", tocReference2.getTitle()); + + TOCReference tocReference3 = toc.addSection(resource, "apple", "/"); + assertNotNull(tocReference3); + assertNotNull(tocReference.getResource()); + assertEquals(3, toc.size()); + assertEquals("apple", tocReference3.getTitle()); + } + + @Test + public void testAddResource3() { + Resource resource = new Resource("foo"); + TableOfContents toc = new TableOfContents(); + TOCReference tocReference = toc.addSection(resource, "apple/pear"); + assertNotNull(tocReference); + assertNotNull(tocReference.getResource()); + assertEquals(1, toc.getTocReferences().size()); + assertEquals(1, toc.getTocReferences().get(0).getChildren().size()); + assertEquals(2, toc.size()); + assertEquals("pear", tocReference.getTitle()); + } + + @Test + public void testAddResourceWithIndexes() { + Resource resource = new Resource("foo"); + TableOfContents toc = new TableOfContents(); + TOCReference tocReference = toc.addSection(resource, new int[] {0, 0}, "Section ", "."); + + // check newly created TOCReference + assertNotNull(tocReference); + assertNotNull(tocReference.getResource()); + assertEquals("Section 1.1", tocReference.getTitle()); + + // check table of contents + assertEquals(1, toc.getTocReferences().size()); + assertEquals(1, toc.getTocReferences().get(0).getChildren().size()); + assertEquals(2, toc.size()); + assertEquals("Section 1", toc.getTocReferences().get(0).getTitle()); + assertEquals("Section 1.1", toc.getTocReferences().get(0).getChildren().get(0).getTitle()); + assertEquals(1, toc.getTocReferences().get(0).getChildren().size()); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/epub/DOMUtilTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/epub/DOMUtilTest.java new file mode 100644 index 00000000..5d2c9565 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/epub/DOMUtilTest.java @@ -0,0 +1,54 @@ +package nl.siegmann.epublib.epub; + +import java.io.StringReader; + +import org.junit.Test; +import static org.junit.Assert.*; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; + + +@RunWith(Enclosed.class) +public class DOMUtilTest { + + public static class GetAttribute { + + @Test + public void test_simple_foo() { + // given + String input = ""; + + try { + Document document = EpubProcessorSupport.createDocumentBuilder().parse(new InputSource(new StringReader(input))); + + // when + String actualResult = DOMUtil.getAttribute(document.getDocumentElement(), "foo", "myattr"); + + // then + assertEquals("red", actualResult); + } catch (Exception e) { + fail(e.getMessage()); + } + } + + @Test + public void test_simple_bar() { + // given + String input = ""; + + try { + Document document = EpubProcessorSupport.createDocumentBuilder().parse(new InputSource(new StringReader(input))); + + // when + String actualResult = DOMUtil.getAttribute(document.getDocumentElement(), "bar", "myattr"); + + // then + assertEquals("green", actualResult); + } catch (Exception e) { + fail(e.getMessage()); + } + } + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/epub/EpubReaderTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/epub/EpubReaderTest.java new file mode 100644 index 00000000..523e5d3b --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/epub/EpubReaderTest.java @@ -0,0 +1,134 @@ +package nl.siegmann.epublib.epub; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import nl.siegmann.epublib.domain.OpfResource; +import org.junit.Test; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; + +public class EpubReaderTest { + + @Test + public void testCover_only_cover() throws IOException { + Book book = new Book(); + + book.setCoverImage(new Resource(this.getClass().getResourceAsStream( + "/book1/cover.png"), "cover.png")); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + (new EpubWriter()).write(book, out); + byte[] epubData = out.toByteArray(); + Book readBook = new EpubReader().readEpub(new ByteArrayInputStream( + epubData)); + assertNotNull(readBook.getCoverImage()); + } + + @Test + public void testCover_cover_one_section() throws IOException { + Book book = new Book(); + + book.setCoverImage(new Resource(this.getClass().getResourceAsStream( + "/book1/cover.png"), "cover.png")); + book.addSection("Introduction", new Resource(this.getClass() + .getResourceAsStream("/book1/chapter1.html"), "chapter1.html")); + book.generateSpineFromTableOfContents(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + (new EpubWriter()).write(book, out); + byte[] epubData = out.toByteArray(); + Book readBook = new EpubReader().readEpub(new ByteArrayInputStream( + epubData)); + assertNotNull(readBook.getCoverPage()); + assertEquals(1, readBook.getSpine().size()); + assertEquals(1, readBook.getTableOfContents().size()); + } + + @Test + public void testReadEpub_opf_ncx_docs() throws IOException { + Book book = new Book(); + + book.setCoverImage(new Resource(this.getClass().getResourceAsStream( + "/book1/cover.png"), "cover.png")); + book.addSection("Introduction", new Resource(this.getClass() + .getResourceAsStream("/book1/chapter1.html"), "chapter1.html")); + book.generateSpineFromTableOfContents(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + (new EpubWriter()).write(book, out); + byte[] epubData = out.toByteArray(); + Book readBook = new EpubReader().readEpub(new ByteArrayInputStream( + epubData)); + assertNotNull(readBook.getCoverPage()); + assertEquals(1, readBook.getSpine().size()); + assertEquals(1, readBook.getTableOfContents().size()); + assertNotNull(readBook.getOpfResource()); + assertNotNull(readBook.getNcxResource()); + assertEquals(MediatypeService.NCX, readBook.getNcxResource() + .getMediaType()); + } + + @Test + public void testReadEpub_opf_ncx_version() throws IOException { + Book book = new Book(); + + book.setOpfResource(new OpfResource(new Resource(this.getClass().getResourceAsStream( + "/opf/test3.opf"), "content.opf"))); + book.getOpfResource().setVersion("3.0"); + book.setCoverImage(new Resource(this.getClass().getResourceAsStream( + "/book1/cover.png"), "cover.png")); + book.addSection("Introduction", new Resource(this.getClass() + .getResourceAsStream("/book1/chapter1.html"), "chapter1.html")); + book.generateSpineFromTableOfContents(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + (new EpubWriter()).write(book, out); + byte[] epubData = out.toByteArray(); + Book readBook = new EpubReader().readEpub(new ByteArrayInputStream( + epubData)); + assertNotNull(readBook.getCoverPage()); + assertEquals(1, readBook.getSpine().size()); + assertEquals(1, readBook.getTableOfContents().size()); + assertNotNull(readBook.getOpfResource()); + assertEquals("3.0", readBook.getOpfResource().getVersion()); + assertNotNull(readBook.getNcxResource()); + assertEquals(MediatypeService.NCX, readBook.getNcxResource() + .getMediaType()); + } + + @Test + public void testReadEpub_opf_prefix() throws IOException { + Book book = new Book(); + + book.setOpfResource(new OpfResource(new Resource(this.getClass().getResourceAsStream( + "/opf/test3.opf"), "content.opf"))); + book.getOpfResource().setVersion("3.0"); + book.getOpfResource().setPrefix("test_prefix"); + book.setCoverImage(new Resource(this.getClass().getResourceAsStream( + "/book1/cover.png"), "cover.png")); + book.addSection("Introduction", new Resource(this.getClass() + .getResourceAsStream("/book1/chapter1.html"), "chapter1.html")); + book.generateSpineFromTableOfContents(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + (new EpubWriter()).write(book, out); + byte[] epubData = out.toByteArray(); + Book readBook = new EpubReader().readEpub(new ByteArrayInputStream( + epubData)); + assertNotNull(readBook.getCoverPage()); + assertEquals(1, readBook.getSpine().size()); + assertEquals(1, readBook.getTableOfContents().size()); + assertNotNull(readBook.getOpfResource()); + assertEquals("test_prefix", readBook.getOpfResource().getPrefix()); + assertNotNull(readBook.getNcxResource()); + assertEquals(MediatypeService.NCX, readBook.getNcxResource() + .getMediaType()); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/epub/EpubWriterTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/epub/EpubWriterTest.java new file mode 100644 index 00000000..aedb7b18 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/epub/EpubWriterTest.java @@ -0,0 +1,141 @@ +package nl.siegmann.epublib.epub; + +import nl.siegmann.epublib.domain.*; +import nl.siegmann.epublib.util.CollectionUtil; +import org.junit.Assert; +import org.junit.Test; + +import java.io.*; + +public class EpubWriterTest { + + @Test + public void testBook1() throws IOException { + // create test book + Book book = createTestBook(); + + // write book to byte[] + byte[] bookData = writeBookToByteArray(book); + FileOutputStream fileOutputStream = new FileOutputStream("foo.zip"); + fileOutputStream.write(bookData); + fileOutputStream.flush(); + fileOutputStream.close(); + Assert.assertNotNull(bookData); + Assert.assertTrue(bookData.length > 0); + + // read book from byte[] + Book readBook = new EpubReader().readEpub(new ByteArrayInputStream(bookData)); + + // assert book values are correct + Assert.assertEquals(book.getMetadata().getTitles(), readBook.getMetadata().getTitles()); + Assert.assertEquals(Scheme.ISBN.getName(), CollectionUtil.first(readBook.getMetadata().getIdentifiers()).getScheme().getName()); + Assert.assertEquals(CollectionUtil.first(book.getMetadata().getIdentifiers()).getValue(), CollectionUtil.first(readBook.getMetadata().getIdentifiers()).getValue()); + Assert.assertEquals(CollectionUtil.first(book.getMetadata().getAuthors()), CollectionUtil.first(readBook.getMetadata().getAuthors())); + Assert.assertEquals(1, readBook.getGuide().getGuideReferencesByType(GuideReference.COVER).size()); + Assert.assertEquals(5, readBook.getSpine().size()); + Assert.assertNotNull(book.getCoverPage()); + Assert.assertNotNull(book.getCoverImage()); + Assert.assertEquals(4, readBook.getTableOfContents().size()); + + } + + /** + * Test for a very old bug where epublib would throw a NullPointerException when writing a book with a cover that has no id. + * + * @throws IOException + * @throws FileNotFoundException + */ + @Test + public void testWritingBookWithCoverWithNullId() throws FileNotFoundException, IOException { + Book book = new Book(); + book.getMetadata().addTitle(new Title("Epub test book 1")); + book.getMetadata().addAuthor(new Author("Joe", "Tester")); + InputStream is = this.getClass().getResourceAsStream("/book1/cover.png"); + book.setCoverImage(new Resource(is, "cover.png")); + // Add Chapter 1 + InputStream is1 = this.getClass().getResourceAsStream("/book1/chapter1.html"); + book.addSection("Introduction", new Resource(is1, "chapter1.html")); + + EpubWriter epubWriter = new EpubWriter(); + epubWriter.write(book, new FileOutputStream("test1_book1.epub")); + } + + private Book createTestBook() throws IOException { + Book book = new Book(); + + book.getMetadata().addTitle(new Title("Epublib test book 1")); + book.getMetadata().addTitle(new Title("test2")); + + book.getMetadata().addIdentifier(new Identifier(Scheme.ISBN, "987654321")); + book.getMetadata().addAuthor(new Author("Joe", "Tester")); + book.setCoverPage(new Resource(this.getClass().getResourceAsStream("/book1/cover.html"), "cover.html")); + book.setCoverImage(new Resource(this.getClass().getResourceAsStream("/book1/cover.png"), "cover.png")); + book.addSection("Chapter 1", new Resource(this.getClass().getResourceAsStream("/book1/chapter1.html"), "chapter1.html")); + book.addResource(new Resource(this.getClass().getResourceAsStream("/book1/book1.css"), "book1.css")); + TOCReference chapter2 = book.addSection("Second chapter", new Resource(this.getClass().getResourceAsStream("/book1/chapter2.html"), "chapter2.html")); + book.addResource(new Resource(this.getClass().getResourceAsStream("/book1/flowers_320x240.jpg"), "flowers.jpg")); + book.addSection(chapter2, "Chapter 2 section 1", new Resource(this.getClass().getResourceAsStream("/book1/chapter2_1.html"), "chapter2_1.html")); + book.addSection("Chapter 3", new Resource(this.getClass().getResourceAsStream("/book1/chapter3.html"), "chapter3.html")); + return book; + } + + + private byte[] writeBookToByteArray(Book book) throws IOException { + EpubWriter epubWriter = new EpubWriter(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + epubWriter.write(book, out); + return out.toByteArray(); + } +// +// public static void writeEpub(BookDTO dto) throws IOException{ +// Book book = new Book(); +// +// Resource coverImg = new Resource(new FileInputStream(ResourceBundle.getBundle("info.pxdev.pfi.webclient.resources.Config").getString("COVER_DIR")+dto.getCoverFileName()),dto.getCoverFileName()); +// +// book.getMetadata().addTitle(dto.getTitle()); +// +// if(dto.getIdentifier().getType().getName().equals("ISBN")) +// book.getMetadata().addIdentifier(new Identifier(Identifier.Scheme.ISBN, dto.getIdentifier().getIdentifier())); +// else +// book.getMetadata().addIdentifier(new Identifier(Identifier.Scheme.UUID, dto.getIdentifier().getIdentifier())); +// +// book.getMetadata().addAuthor(new Author(dto.getCreator().getName(), dto.getCreator().getLastName())); +// book.getMetadata().addPublisher(dto.getPublisher()); +// book.getMetadata().addDate(new Date(dto.getLastModified())); +// book.getMetadata().addDescription(dto.getDescription()); +// book.getMetadata().addType("TEXT"); +// book.getMetadata().setLanguage(dto.getLanguage()); +// book.getMetadata().setCoverImage(coverImg); +// book.getMetadata().setFormat(MediatypeService.EPUB.getName()); +// +// for(BookSubCategoryDTO subject : dto.getSubjects()){ +// book.getMetadata().getSubjects().add(subject.getName()); +// } +// for(BookContributorDTO contrib : dto.getContributors()){ +// Author contributor = new Author(contrib.getName(), contrib.getLastName()); +// contributor.setRelator(Relator.byCode(contrib.getType().getShortName())); +// book.getMetadata().addContributor(contributor); +// } +// +// +// book.setCoverImage(coverImg); +// for(BookChapterDTO chapter : dto.getChapters()){ +// Resource aux = new Resource(HTMLGenerator.generateChapterHtmlStream(dto,chapter), "chapter"+chapter.getNumber()+".html"); +// book.addSection(chapter.getTitle(), aux ); +// } +// +// EpubWriter writer = new EpubWriter(); +// FileOutputStream output = new FileOutputStream(ResourceBundle.getBundle("info.pxdev.pfi.webclient.resources.Config").getString("HTML_CHAPTERS")+dto.getId_book()+"\\test.epub"); +// +// try { +// writer.write(book, output); +// } catch (XMLStreamException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (FactoryConfigurationError e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } +// } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/epub/NCXDocumentTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/epub/NCXDocumentTest.java new file mode 100644 index 00000000..a64f9c2c --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/epub/NCXDocumentTest.java @@ -0,0 +1,70 @@ +package nl.siegmann.epublib.epub; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.IOUtil; + +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NCXDocumentTest { + + byte[] ncxData; + + public NCXDocumentTest() { + } + + @BeforeClass + public static void setUpClass() { + } + + @AfterClass + public static void tearDownClass() { + } + + @Before + public void setUp() throws IOException { + ncxData = IOUtil.toByteArray(new FileInputStream(new File("src/test/resources/toc.xml"))); + } + + @After + public void tearDown() { + } + + private void addResource(Book book, String filename) { + Resource chapterResource = new Resource("id1", "Hello, world !".getBytes(), filename, MediatypeService.XHTML); + book.addResource(chapterResource); + book.getSpine().addResource(chapterResource); + } + + /** + * Test of read method, of class NCXDocument. + */ + @Test + public void testReadWithNonRootLevelTOC() { + + // If the tox.ncx file is not in the root, the hrefs it refers to need to preserve its path. + Book book = new Book(); + Resource ncxResource = new Resource(ncxData, "xhtml/toc.ncx"); + addResource(book, "xhtml/chapter1.html"); + addResource(book, "xhtml/chapter2.html"); + addResource(book, "xhtml/chapter2_1.html"); + addResource(book, "xhtml/chapter3.html"); + + book.setNcxResource(ncxResource); + book.getSpine().setTocResource(ncxResource); + + NCXDocument.read(book, new EpubReader()); + assertEquals("xhtml/chapter1.html", book.getTableOfContents().getTocReferences().get(0).getCompleteHref()); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/epub/PackageDocumentMetadataReaderTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/epub/PackageDocumentMetadataReaderTest.java new file mode 100644 index 00000000..155ec281 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/epub/PackageDocumentMetadataReaderTest.java @@ -0,0 +1,99 @@ +package nl.siegmann.epublib.epub; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.io.StringReader; + +import nl.siegmann.epublib.domain.Identifier; +import nl.siegmann.epublib.domain.Metadata; + +import org.junit.Assert; +import org.junit.Test; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +public class PackageDocumentMetadataReaderTest { + + @Test + public void test1() { + try { + Document document = EpubProcessorSupport.createDocumentBuilder().parse(PackageDocumentMetadataReader.class.getResourceAsStream("/opf/test2.opf")); + Metadata metadata = PackageDocumentMetadataReader.readMetadata(document); + assertEquals(1, metadata.getAuthors().size()); + } catch (Exception e) { + e.printStackTrace(); + assertTrue(false); + } + } + + @Test + public void testReadsLanguage() { + Metadata metadata = getMetadata("/opf/test_language.opf"); + assertEquals("fi", metadata.getLanguage()); + } + + @Test + public void testDefaultsToEnglish() { + Metadata metadata = getMetadata("/opf/test_default_language.opf"); + assertEquals("en", metadata.getLanguage()); + } + + private Metadata getMetadata(String file) { + try { + Document document = EpubProcessorSupport.createDocumentBuilder().parse(PackageDocumentMetadataReader.class.getResourceAsStream(file)); + + return PackageDocumentMetadataReader.readMetadata(document); + } catch (Exception e) { + e.printStackTrace(); + assertTrue(false); + + return null; + } + } + + @Test + public void test2() throws SAXException, IOException { + // given + String input = "" + + "" + + "Three Men in a Boat" + + "Jerome K. Jerome" + + "A. Frederics" + + "en" + + "1889" + + "2009-05-17" + + "zelda@mobileread.com:2010040720" + + "zelda pinwheel" + + "zelda pinwheel" + + "Public Domain" + + "Text" + + "Image" + + "Travel" + + "Humour" + + "Three Men in a Boat (To Say Nothing of the Dog), published in 1889, is a humorous account by Jerome K. Jerome of a boating holiday on the Thames between Kingston and Oxford. The book was initially intended to be a serious travel guide, with accounts of local history along the route, but the humorous elements took over to the point where the serious and somewhat sentimental passages seem a distraction to the comic novel. One of the most praised things about Three Men in a Boat is how undated it appears to modern readers, the jokes seem fresh and witty even today." + + "" + + "" + + "" + + ""; + + // when + Document metadataDocument = EpubProcessorSupport.createDocumentBuilder().parse(new InputSource(new StringReader(input))); + Metadata metadata = PackageDocumentMetadataReader.readMetadata(metadataDocument); + + // then + Assert.assertEquals("Three Men in a Boat", metadata.getFirstTitle().getValue()); + + // test identifier + Assert.assertNotNull(metadata.getIdentifiers()); + Assert.assertEquals(1, metadata.getIdentifiers().size()); + Identifier identifier = metadata.getIdentifiers().get(0); + Assert.assertEquals("URI", identifier.getScheme().getName()); + Assert.assertEquals("zelda@mobileread.com:2010040720", identifier.getValue()); + + Assert.assertEquals("8", metadata.getMetaAttribute("calibre:rating")); + Assert.assertEquals("cover_pic", metadata.getMetaAttribute("cover")); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/epub/PackageDocumentReaderTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/epub/PackageDocumentReaderTest.java new file mode 100644 index 00000000..ea6a7a0c --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/epub/PackageDocumentReaderTest.java @@ -0,0 +1,152 @@ +package nl.siegmann.epublib.epub; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.Resources; +import nl.siegmann.epublib.service.MediatypeService; + +public class PackageDocumentReaderTest { + + @Test + public void testFindCoverHref_content1() throws SAXException, IOException { + Document packageDocument; + packageDocument = EpubProcessorSupport.createDocumentBuilder().parse(PackageDocumentReaderTest.class.getResourceAsStream("/opf/test1.opf")); + Collection coverHrefs = PackageDocumentReader.findCoverHrefs(packageDocument); + assertEquals(1, coverHrefs.size()); + assertEquals("cover.html", coverHrefs.iterator().next()); + } + + @Test + public void testFindTableOfContentsResource_simple_correct_toc_id() { + // given + String tocResourceId = "foo"; + Resources resources = mock(Resources.class); + Resource resource = mock(Resource.class); + when(resources.getByIdOrHref(tocResourceId)).thenReturn(resource); + + // when + Resource actualResult = PackageDocumentReader.findTableOfContentsResource("foo", resources); + + // then + Assert.assertEquals(resource, actualResult); + Mockito.verify(resources).getByIdOrHref(tocResourceId); + Mockito.verifyNoMoreInteractions(resources); + } + + @Test + public void testFindTableOfContentsResource_NCX_media_resource() { + // given + String tocResourceId = "foo"; + Resources resources = mock(Resources.class); + Resource resource = mock(Resource.class); + when(resources.getByIdOrHref(tocResourceId)).thenReturn(null); + when(resources.findFirstResourceByMediaType(MediatypeService.NCX)).thenReturn(resource); + + // when + Resource actualResult = PackageDocumentReader.findTableOfContentsResource("foo", resources); + + // then + Assert.assertEquals(resource, actualResult); + Mockito.verify(resources).getByIdOrHref(tocResourceId); + Mockito.verify(resources).findFirstResourceByMediaType(MediatypeService.NCX); + Mockito.verifyNoMoreInteractions(resources); + } + + @Test + public void testFindTableOfContentsResource_by_possible_id() { + // given + String tocResourceId = "foo"; + Resources resources = mock(Resources.class); + Resource resource = mock(Resource.class); + when(resources.getByIdOrHref(tocResourceId)).thenReturn(null); + when(resources.findFirstResourceByMediaType(MediatypeService.NCX)).thenReturn(null); + when(resources.getByIdOrHref("NCX")).thenReturn(resource); + + // when + Resource actualResult = PackageDocumentReader.findTableOfContentsResource("foo", resources); + + // then + Assert.assertEquals(resource, actualResult); + Mockito.verify(resources).getByIdOrHref(tocResourceId); + Mockito.verify(resources).getByIdOrHref("toc"); + Mockito.verify(resources).getByIdOrHref("TOC"); + Mockito.verify(resources).getByIdOrHref("ncx"); + Mockito.verify(resources).getByIdOrHref("NCX"); + Mockito.verify(resources).findFirstResourceByMediaType(MediatypeService.NCX); + Mockito.verifyNoMoreInteractions(resources); + } + + @Test + public void testFindTableOfContentsResource_nothing_found() { + // given + String tocResourceId = "foo"; + Resources resources = mock(Resources.class); + Resource resource = mock(Resource.class); + when(resources.getByIdOrHref(Mockito.anyString())).thenReturn(null); + when(resources.findFirstResourceByMediaType(MediatypeService.NCX)).thenReturn(null); + + // when + Resource actualResult = PackageDocumentReader.findTableOfContentsResource("foo", resources); + + // then + Assert.assertNull(actualResult); + Mockito.verify(resources).getByIdOrHref(tocResourceId); + Mockito.verify(resources).getByIdOrHref("toc"); + Mockito.verify(resources).getByIdOrHref("TOC"); + Mockito.verify(resources).getByIdOrHref("ncx"); + Mockito.verify(resources).getByIdOrHref("NCX"); + Mockito.verify(resources).getByIdOrHref("ncxtoc"); + Mockito.verify(resources).getByIdOrHref("NCXTOC"); + Mockito.verify(resources).findFirstResourceByMediaType(MediatypeService.NCX); + Mockito.verifyNoMoreInteractions(resources); + } + + @Test + public void testFixHrefs_simple_correct() { + // given + String packageHref = "OEBPS/content.opf"; + String resourceHref = "OEBPS/foo/bar.html"; + Resources resources = mock(Resources.class); + Resource resource = mock(Resource.class); + when(resources.getAll()).thenReturn(Arrays.asList(resource)); + when(resource.getHref()).thenReturn(resourceHref); + + // when + PackageDocumentReader.fixHrefs(packageHref, resources); + + // then + Mockito.verify(resource).setHref("foo/bar.html"); + } + + + @Test + public void testFixHrefs_invalid_prefix() { + // given + String packageHref = "123456789/"; + String resourceHref = "1/2.html"; + Resources resources = mock(Resources.class); + Resource resource = mock(Resource.class); + when(resources.getAll()).thenReturn(Arrays.asList(resource)); + when(resource.getHref()).thenReturn(resourceHref); + + // when + PackageDocumentReader.fixHrefs(packageHref, resources); + + // then + Assert.assertTrue(true); + } + +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/epub/ResourcesLoaderTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/epub/ResourcesLoaderTest.java new file mode 100644 index 00000000..a4ea4da9 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/epub/ResourcesLoaderTest.java @@ -0,0 +1,169 @@ +package nl.siegmann.epublib.epub; + +import net.sf.jazzlib.ZipException; +import net.sf.jazzlib.ZipFile; +import net.sf.jazzlib.ZipInputStream; +import nl.siegmann.epublib.domain.LazyResource; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.Resources; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.IOUtil; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class ResourcesLoaderTest { + + private static final String encoding = "UTF-8"; + private static String testBookFilename; + + @BeforeClass + public static void setUpClass() throws IOException { + File testBook = File.createTempFile("testBook", ".epub"); + OutputStream out = new FileOutputStream(testBook); + IOUtil.copy(ResourcesLoaderTest.class.getResourceAsStream("/testbook1.epub"), out); + out.close(); + + ResourcesLoaderTest.testBookFilename = testBook.getAbsolutePath(); + } + + @AfterClass + public static void tearDownClass() { + //noinspection ResultOfMethodCallIgnored + new File(testBookFilename).delete(); + } + + /** + * Loads the Resources from a ZipInputStream + */ + @Test + public void testLoadResources_ZipInputStream() throws IOException { + // given + ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(new File(testBookFilename))); + + // when + Resources resources = ResourcesLoader.loadResources(zipInputStream, encoding); + + // then + verifyResources(resources); + } + + /** + * Loads the Resources from a zero length file, using ZipInputStream
+ * See Issue #122 Infinite loop. + */ + @Test(expected = ZipException.class) + public void testLoadResources_ZipInputStream_WithZeroLengthFile() throws IOException { + // given + ZipInputStream zipInputStream = new ZipInputStream(this.getClass().getResourceAsStream("/zero_length_file.epub")); + + // when + ResourcesLoader.loadResources(zipInputStream, encoding); + } + + /** + * Loads the Resources from a file that is not a valid zip, using ZipInputStream
+ * See Issue #122 Infinite loop. + */ + @Test(expected = ZipException.class) + public void testLoadResources_ZipInputStream_WithInvalidFile() throws IOException { + // given + ZipInputStream zipInputStream = new ZipInputStream(this.getClass().getResourceAsStream("/not_a_zip.epub")); + + // when + ResourcesLoader.loadResources(zipInputStream, encoding); + } + + /** + * Loads the Resources from a ZipFile + */ + @Test + public void testLoadResources_ZipFile() throws IOException { + // given + ZipFile zipFile = new ZipFile(testBookFilename); + + // when + Resources resources = ResourcesLoader.loadResources(zipFile, encoding); + + // then + verifyResources(resources); + } + + /** + * Loads all Resources lazily from a ZipFile + */ + @Test + public void testLoadResources_ZipFile_lazy_all() throws IOException { + // given + ZipFile zipFile = new ZipFile(testBookFilename); + + // when + Resources resources = ResourcesLoader.loadResources(zipFile, encoding, Arrays.asList(MediatypeService.mediatypes)); + + // then + verifyResources(resources); + Assert.assertEquals(Resource.class, resources.getById("container").getClass()); + Assert.assertEquals(LazyResource.class, resources.getById("book1").getClass()); + } + + /** + * Loads the Resources from a ZipFile, some of them lazily. + */ + @Test + public void testLoadResources_ZipFile_partial_lazy() throws IOException { + // given + ZipFile zipFile = new ZipFile(testBookFilename); + + // when + Resources resources = ResourcesLoader.loadResources(zipFile, encoding, Collections.singletonList(MediatypeService.CSS)); + + // then + verifyResources(resources); + Assert.assertEquals(Resource.class, resources.getById("container").getClass()); + Assert.assertEquals(LazyResource.class, resources.getById("book1").getClass()); + Assert.assertEquals(Resource.class, resources.getById("chapter1").getClass()); + } + + private void verifyResources(Resources resources) throws IOException { + Assert.assertNotNull(resources); + Assert.assertEquals(12, resources.getAll().size()); + List allHrefs = new ArrayList<>(resources.getAllHrefs()); + Collections.sort(allHrefs); + + Resource resource; + byte[] expectedData; + + // container + resource = resources.getByHref(allHrefs.get(0)); + Assert.assertEquals("container", resource.getId()); + Assert.assertEquals("META-INF/container.xml", resource.getHref()); + Assert.assertNull(resource.getMediaType()); + Assert.assertEquals(230, resource.getData().length); + + // book1.css + resource = resources.getByHref(allHrefs.get(1)); + Assert.assertEquals("book1", resource.getId()); + Assert.assertEquals("OEBPS/book1.css", resource.getHref()); + Assert.assertEquals(MediatypeService.CSS, resource.getMediaType()); + Assert.assertEquals(65, resource.getData().length); + expectedData = IOUtil.toByteArray(this.getClass().getResourceAsStream("/book1/book1.css")); + Assert.assertArrayEquals(expectedData, resource.getData()); + + + // chapter1 + resource = resources.getByHref(allHrefs.get(2)); + Assert.assertEquals("chapter1", resource.getId()); + Assert.assertEquals("OEBPS/chapter1.html", resource.getHref()); + Assert.assertEquals(MediatypeService.XHTML, resource.getMediaType()); + Assert.assertEquals(247, resource.getData().length); + expectedData = IOUtil.toByteArray(this.getClass().getResourceAsStream("/book1/chapter1.html")); + Assert.assertArrayEquals(expectedData, resource.getData()); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/epub/Simple1.java b/epublib-core/src/test/java/nl/siegmann/epublib/epub/Simple1.java new file mode 100644 index 00000000..0cdc52fc --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/epub/Simple1.java @@ -0,0 +1,49 @@ +package nl.siegmann.epublib.epub; + +import java.io.FileOutputStream; + +import nl.siegmann.epublib.domain.*; + +public class Simple1 { + public static void main(String[] args) { + try { + // Create new Book + Book book = new Book(); + + // Set the title + book.getMetadata().addTitle(new Title("Epublib test book 1")); + + // Add an Author + book.getMetadata().addAuthor(new Author("Joe", "Tester")); + + // Set cover image + book.setCoverImage(new Resource(Simple1.class.getResourceAsStream("/book1/test_cover.png"), "cover.png")); + + // Add Chapter 1 + book.addSection("Introduction", new Resource(Simple1.class.getResourceAsStream("/book1/chapter1.html"), "chapter1.html")); + + // Add css file + book.getResources().add(new Resource(Simple1.class.getResourceAsStream("/book1/book1.css"), "book1.css")); + + // Add Chapter 2 + TOCReference chapter2 = book.addSection("Second Chapter", new Resource(Simple1.class.getResourceAsStream("/book1/chapter2.html"), "chapter2.html")); + + // Add image used by Chapter 2 + book.getResources().add(new Resource(Simple1.class.getResourceAsStream("/book1/flowers_320x240.jpg"), "flowers.jpg")); + + // Add Chapter2, Section 1 + book.addSection(chapter2, "Chapter 2, section 1", new Resource(Simple1.class.getResourceAsStream("/book1/chapter2_1.html"), "chapter2_1.html")); + + // Add Chapter 3 + book.addSection("Conclusion", new Resource(Simple1.class.getResourceAsStream("/book1/chapter3.html"), "chapter3.html")); + + // Create EpubWriter + EpubWriter epubWriter = new EpubWriter(); + + // Write the Book as Epub + epubWriter.write(book, new FileOutputStream("test1_book1.epub")); + } catch (Exception e) { + e.printStackTrace(); + } + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/util/CollectionUtilTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/util/CollectionUtilTest.java new file mode 100644 index 00000000..ef9e7c26 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/util/CollectionUtilTest.java @@ -0,0 +1,25 @@ +package nl.siegmann.epublib.util; + +import java.util.ArrayList; +import java.util.Arrays; + +import org.junit.Assert; +import org.junit.Test; + +public class CollectionUtilTest { + + @Test + public void testIsEmpty_null() { + Assert.assertTrue(CollectionUtil.isEmpty(null)); + } + + @Test + public void testIsEmpty_empty() { + Assert.assertTrue(CollectionUtil.isEmpty(new ArrayList())); + } + + @Test + public void testIsEmpty_elements() { + Assert.assertFalse(CollectionUtil.isEmpty(Arrays.asList("foo"))); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/util/IOUtilTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/util/IOUtilTest.java new file mode 100644 index 00000000..19ed7282 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/util/IOUtilTest.java @@ -0,0 +1,77 @@ +package nl.siegmann.epublib.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Random; + +import org.junit.Test; + +public class IOUtilTest { + + @Test + public void testToByteArray1() { + byte[] testArray = new byte[Byte.MAX_VALUE - Byte.MIN_VALUE]; + for (int i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) { + testArray[i - Byte.MIN_VALUE] = (byte) i; + } + try { + byte[] result = IOUtil.toByteArray(new ByteArrayInputStream(testArray)); + assertTrue(Arrays.equals(testArray, result)); + } catch (IOException e) { + e.printStackTrace(); + assertTrue(false); + } + } + + @Test + public void testToByteArray2() { + byte[] testArray = new byte[IOUtil.IO_COPY_BUFFER_SIZE + 1]; + Random random = new Random(); + random.nextBytes(testArray); + try { + byte[] result = IOUtil.toByteArray(new ByteArrayInputStream(testArray)); + assertTrue(Arrays.equals(testArray, result)); + } catch (IOException e) { + e.printStackTrace(); + assertTrue(false); + } + } + + @Test + public void testCopyInputStream1() { + byte[] testArray = new byte[(IOUtil.IO_COPY_BUFFER_SIZE * 3) + 10]; + Random random = new Random(); + random.nextBytes(testArray); + try { + ByteArrayOutputStream result = new ByteArrayOutputStream(); + int copySize = IOUtil.copy(new ByteArrayInputStream(testArray), result); + assertTrue(Arrays.equals(testArray, result.toByteArray())); + assertEquals(testArray.length, copySize); + } catch (IOException e) { + e.printStackTrace(); + assertTrue(false); + } + } + + @Test + public void testCalcNrRead() { + Integer[] testData = new Integer[] { + // nrRead, totalNrRead, reault + 0, 0, 0, + 1, 1, 2, + 10, Integer.MAX_VALUE - 10, Integer.MAX_VALUE, + 1, Integer.MAX_VALUE - 1, Integer.MAX_VALUE, + 10, Integer.MAX_VALUE - 9, -1 + }; + for (int i = 0; i < testData.length; i += 3) { + int actualResult = IOUtil.calcNewNrReadSize(testData[i], testData[i + 1]); + int expectedResult = testData[i + 2]; + assertEquals((i / 3) + " : " + testData[i] + ", " + testData[i + 1], expectedResult, actualResult); + } + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/util/NoCloseOutputStreamTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/util/NoCloseOutputStreamTest.java new file mode 100644 index 00000000..3d48ecd8 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/util/NoCloseOutputStreamTest.java @@ -0,0 +1,60 @@ +package nl.siegmann.epublib.util; + +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import java.io.IOException; +import java.io.OutputStream; + +public class NoCloseOutputStreamTest { + + @Mock + private OutputStream outputStream; + + private NoCloseOutputStream noCloseOutputStream; + + @Before + public void setUp() { + MockitoAnnotations.initMocks(this); + this.noCloseOutputStream = new NoCloseOutputStream(outputStream); + } + + @Test + public void testWrite() throws IOException { + // given + + // when + noCloseOutputStream.write(17); + + // then + Mockito.verify(outputStream).write(17); + Mockito.verifyNoMoreInteractions(outputStream); + } + + @Test + public void testClose() throws IOException { + // given + + // when + noCloseOutputStream.close(); + + // then + Mockito.verifyNoMoreInteractions(outputStream); + } + + @Test + public void testWriteClose() throws IOException { + // given + + // when + noCloseOutputStream.write(17); + noCloseOutputStream.close(); + + // then + Mockito.verify(outputStream).write(17); + Mockito.verifyNoMoreInteractions(outputStream); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/util/NoCloseWriterTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/util/NoCloseWriterTest.java new file mode 100644 index 00000000..3e35d349 --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/util/NoCloseWriterTest.java @@ -0,0 +1,72 @@ +package nl.siegmann.epublib.util; + +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import java.io.IOException; +import java.io.Writer; + +public class NoCloseWriterTest { + + @Mock + private Writer delegateWriter; + + private NoCloseWriter noCloseWriter; + + @Before + public void setUp() { + MockitoAnnotations.initMocks(this); + this.noCloseWriter = new NoCloseWriter(delegateWriter); + } + + @Test + public void testWrite() throws IOException { + // given + + // when + noCloseWriter.write(new char[]{'e','f','g'},2,1); + + // then + Mockito.verify(delegateWriter).write(new char[]{'e','f','g'},2,1); + Mockito.verifyNoMoreInteractions(delegateWriter); + } + + @Test + public void testFlush() throws IOException { + // given + + // when + noCloseWriter.flush(); + + // then + Mockito.verify(delegateWriter).flush(); + Mockito.verifyNoMoreInteractions(delegateWriter); + } + + @Test + public void testClose() throws IOException { + // given + + // when + noCloseWriter.close(); + + // then + Mockito.verifyNoMoreInteractions(delegateWriter); + } + + @Test + public void testWriteClose() throws IOException { + // given + + // when + noCloseWriter.write(new char[]{'e','f','g'},2,1); + noCloseWriter.close(); + + // then + Mockito.verify(delegateWriter).write(new char[]{'e','f','g'},2,1); + Mockito.verifyNoMoreInteractions(delegateWriter); + } +} diff --git a/epublib-core/src/test/java/nl/siegmann/epublib/util/StringUtilTest.java b/epublib-core/src/test/java/nl/siegmann/epublib/util/StringUtilTest.java new file mode 100644 index 00000000..2d2b277c --- /dev/null +++ b/epublib-core/src/test/java/nl/siegmann/epublib/util/StringUtilTest.java @@ -0,0 +1,200 @@ +package nl.siegmann.epublib.util; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import org.junit.Test; + +public class StringUtilTest { + + @Test + public void testDefaultIfNull() { + Object[] testData = new Object[] { null, "", "", "", " ", " ", "foo", + "foo" }; + for (int i = 0; i < testData.length; i += 2) { + String actualResult = StringUtil + .defaultIfNull((String) testData[i]); + String expectedResult = (String) testData[i + 1]; + assertEquals((i / 2) + " : " + testData[i], expectedResult, + actualResult); + } + } + + @Test + public void testDefaultIfNull_with_default() { + Object[] testData = new Object[] { null, null, null, "", null, "", + null, "", "", "foo", "", "foo", "", "foo", "", " ", " ", " ", + null, "foo", "foo", }; + for (int i = 0; i < testData.length; i += 3) { + String actualResult = StringUtil.defaultIfNull( + (String) testData[i], (String) testData[i + 1]); + String expectedResult = (String) testData[i + 2]; + assertEquals( + (i / 3) + " : " + testData[i] + ", " + testData[i + 1], + expectedResult, actualResult); + } + } + + @Test + public void testIsEmpty() { + Object[] testData = new Object[] { null, true, "", true, " ", false, + "asdfasfd", false }; + for (int i = 0; i < testData.length; i += 2) { + boolean actualResult = StringUtil.isEmpty((String) testData[i]); + boolean expectedResult = (Boolean) testData[i + 1]; + assertEquals(expectedResult, actualResult); + } + } + + @Test + public void testIsBlank() { + Object[] testData = new Object[] { null, true, "", true, " ", true, + "\t\t \n\n", true, "asdfasfd", false }; + for (int i = 0; i < testData.length; i += 2) { + boolean actualResult = StringUtil.isBlank((String) testData[i]); + boolean expectedResult = (Boolean) testData[i + 1]; + assertEquals(expectedResult, actualResult); + } + } + + @Test + public void testIsNotBlank() { + Object[] testData = new Object[] { null, !true, "", !true, " ", !true, + "\t\t \n\n", !true, "asdfasfd", !false }; + for (int i = 0; i < testData.length; i += 2) { + boolean actualResult = StringUtil.isNotBlank((String) testData[i]); + boolean expectedResult = (Boolean) testData[i + 1]; + assertEquals((i / 2) + " : " + testData[i], expectedResult, + actualResult); + } + } + + @Test + public void testEquals() { + Object[] testData = new Object[] { null, null, true, "", "", true, + null, "", false, "", null, false, null, "foo", false, "foo", + null, false, "", "foo", false, "foo", "", false, "foo", "bar", + false, "foo", "foo", true }; + for (int i = 0; i < testData.length; i += 3) { + boolean actualResult = StringUtil.equals((String) testData[i], + (String) testData[i + 1]); + boolean expectedResult = (Boolean) testData[i + 2]; + assertEquals( + (i / 3) + " : " + testData[i] + ", " + testData[i + 1], + expectedResult, actualResult); + } + } + + @Test + public void testEndWithIgnoreCase() { + Object[] testData = new Object[] { null, null, true, "", "", true, "", + "foo", false, "foo", "foo", true, "foo.bar", "bar", true, + "foo.bar", "barX", false, "foo.barX", "bar", false, "foo", + "bar", false, "foo.BAR", "bar", true, "foo.bar", "BaR", true }; + for (int i = 0; i < testData.length; i += 3) { + boolean actualResult = StringUtil.endsWithIgnoreCase( + (String) testData[i], (String) testData[i + 1]); + boolean expectedResult = (Boolean) testData[i + 2]; + assertEquals( + (i / 3) + " : " + testData[i] + ", " + testData[i + 1], + expectedResult, actualResult); + } + } + + @Test + public void testSubstringBefore() { + Object[] testData = new Object[] { "", ' ', "", "", 'X', "", "fox", + 'x', "fo", "foo.bar", 'b', "foo.", "aXbXc", 'X', "a", }; + for (int i = 0; i < testData.length; i += 3) { + String actualResult = StringUtil.substringBefore( + (String) testData[i], (Character) testData[i + 1]); + String expectedResult = (String) testData[i + 2]; + assertEquals( + (i / 3) + " : " + testData[i] + ", " + testData[i + 1], + expectedResult, actualResult); + } + } + + @Test + public void testSubstringBeforeLast() { + Object[] testData = new Object[] { "", ' ', "", "", 'X', "", "fox", + 'x', "fo", "foo.bar", 'b', "foo.", "aXbXc", 'X', "aXb", }; + for (int i = 0; i < testData.length; i += 3) { + String actualResult = StringUtil.substringBeforeLast( + (String) testData[i], (Character) testData[i + 1]); + String expectedResult = (String) testData[i + 2]; + assertEquals( + (i / 3) + " : " + testData[i] + ", " + testData[i + 1], + expectedResult, actualResult); + } + } + + @Test + public void testSubstringAfter() { + Object[] testData = new Object[] { "", ' ', "", "", 'X', "", "fox", + 'f', "ox", "foo.bar", 'b', "ar", "aXbXc", 'X', "bXc", }; + for (int i = 0; i < testData.length; i += 3) { + String actualResult = StringUtil.substringAfter( + (String) testData[i], (Character) testData[i + 1]); + String expectedResult = (String) testData[i + 2]; + assertEquals( + (i / 3) + " : " + testData[i] + ", " + testData[i + 1], + expectedResult, actualResult); + } + } + + @Test + public void testSubstringAfterLast() { + Object[] testData = new Object[] { "", ' ', "", "", 'X', "", "fox", + 'f', "ox", "foo.bar", 'b', "ar", "aXbXc", 'X', "c", }; + for (int i = 0; i < testData.length; i += 3) { + String actualResult = StringUtil.substringAfterLast( + (String) testData[i], (Character) testData[i + 1]); + String expectedResult = (String) testData[i + 2]; + assertEquals( + (i / 3) + " : " + testData[i] + ", " + testData[i + 1], + expectedResult, actualResult); + } + } + + @Test + public void testToString() { + assertEquals("[name: 'paul']", StringUtil.toString("name", "paul")); + assertEquals("[name: 'paul', address: 'a street']", + StringUtil.toString("name", "paul", "address", "a street")); + assertEquals("[name: ]", StringUtil.toString("name", null)); + assertEquals("[name: 'paul', address: ]", + StringUtil.toString("name", "paul", "address")); + } + + @Test + public void testHashCode() { + assertEquals(2522795, StringUtil.hashCode("isbn", "1234")); + assertEquals(3499691, StringUtil.hashCode("ISBN", "1234")); + } + + @Test + public void testReplacementForCollapsePathDots() throws IOException { + // This used to test StringUtil.collapsePathDots(String path). + // I have left it to confirm that the Apache commons + // FilenameUtils.normalize + // is a suitable replacement, but works where for "/a/b/../../c", which + // the old method did not. + String[] testData = new String[] { // + "/foo/bar.html", "/foo/bar.html", + "/foo/../bar.html", "/bar.html", // + "/foo/moo/../../bar.html", // + "/bar.html", "/foo//bar.html", // + "/foo/bar.html", "/foo/./bar.html", // + "/foo/bar.html", // + "/a/b/../../c", "/c", // + "/foo/../sub/bar.html", "/sub/bar.html" // + }; + for (int i = 0; i < testData.length; i += 2) { + String actualResult = StringUtil.collapsePathDots(testData[i]); + assertEquals(testData[i], testData[i + 1], actualResult); + } + } + +} diff --git a/epublib-core/src/test/resources/book1/book1.css b/epublib-core/src/test/resources/book1/book1.css new file mode 100644 index 00000000..d59e76d1 --- /dev/null +++ b/epublib-core/src/test/resources/book1/book1.css @@ -0,0 +1,5 @@ +@CHARSET "UTF-8"; + +body { + font: New Century Schoolbook, serif; +} \ No newline at end of file diff --git a/epublib-core/src/test/resources/book1/chapter1.html b/epublib-core/src/test/resources/book1/chapter1.html new file mode 100644 index 00000000..2970e934 --- /dev/null +++ b/epublib-core/src/test/resources/book1/chapter1.html @@ -0,0 +1,14 @@ + + + Chapter 1 + + + + +

Introduction

+

+Welcome to Chapter 1 of the epublib book1 test book.
+We hope you enjoy the test. +

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/book1/chapter2.html b/epublib-core/src/test/resources/book1/chapter2.html new file mode 100644 index 00000000..73ab75ed --- /dev/null +++ b/epublib-core/src/test/resources/book1/chapter2.html @@ -0,0 +1,15 @@ + + + Chapter 2 + + + +

Second chapter

+

+Welcome to Chapter 2 of the epublib book1 test book.
+Pretty flowers:
+flowers
+We hope you are still enjoying the test. +

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/book1/chapter2_1.html b/epublib-core/src/test/resources/book1/chapter2_1.html new file mode 100644 index 00000000..91f2974a --- /dev/null +++ b/epublib-core/src/test/resources/book1/chapter2_1.html @@ -0,0 +1,27 @@ + + + Chapter 2.1 + + + +

Second chapter, first subsection

+

+A subsection of the second chapter. +

+

+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec eleifend ligula et odio malesuada luctus. Proin tristique blandit interdum. In a lorem augue, non iaculis ante. In hac habitasse platea dictumst. Suspendisse sed dolor in lacus dictum imperdiet quis id enim. Duis mattis, ante at posuere pretium, tortor nisl placerat ligula, quis vulputate lorem turpis id augue. Quisque tempus elementum leo, mattis vestibulum quam pulvinar tincidunt. Sed eu nulla mi, sed venenatis purus. Suspendisse potenti. Mauris feugiat mollis commodo. Donec ipsum ante, aliquam et imperdiet quis, posuere in nibh. Mauris non felis eget nunc auctor pharetra. Mauris sagittis malesuada pellentesque. Phasellus accumsan semper turpis eu pretium. Duis iaculis convallis viverra. Aliquam eu turpis ac elit euismod mollis. Duis velit velit, venenatis quis porta ut, adipiscing sit amet elit. Ut vehicula lacinia facilisis. Cras at turpis ac quam cursus accumsan sed quis nunc. Phasellus neque tortor, dapibus in aliquet non, sollicitudin quis libero. +

+

+Ut vulputate ultrices nunc, in suscipit lorem porta quis. Nulla sit amet odio libero. Donec et felis diam. Phasellus ut libero non metus pulvinar tristique ut sit amet dui. Praesent a sapien libero, eget imperdiet enim. Aenean accumsan, elit facilisis tincidunt cursus, massa erat volutpat ante, non rhoncus ante neque eget neque. Cras id faucibus eros. In eleifend imperdiet magna lobortis viverra. Nunc at quam sed leo lobortis malesuada. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Aliquam erat volutpat. Nam risus ante, rhoncus ac condimentum non, accumsan nec quam. Quisque vitae nulla eget sem viverra condimentum. Ut iaculis neque eget orci tincidunt venenatis. Nunc ac tellus sit amet nibh tristique dignissim eget ac libero. Mauris tincidunt orci vitae turpis rhoncus pellentesque. Proin scelerisque ultricies placerat. Suspendisse vel consectetur libero. +

+

N +am ornare convallis tortor, semper convallis velit semper non. Nulla velit tortor, cursus bibendum cursus sit amet, placerat vel arcu. Nullam vel ipsum quis mauris gravida bibendum at id risus. Suspendisse massa nisl, luctus at tempor sed, tristique vel risus. Vestibulum erat nisl, porttitor sit amet tincidunt sit amet, sodales vel odio. Vivamus vitae pharetra nisi. Praesent a turpis quis lectus malesuada vehicula a in quam. Quisque consectetur imperdiet urna et convallis. Phasellus malesuada, neque non aliquet dictum, purus arcu volutpat odio, nec sodales justo urna vel justo. Phasellus venenatis leo id sapien tempor hendrerit. Nullam ac elit sodales velit dapibus tempor eu at risus. Sed quis nibh velit. Fusce sapien lacus, dapibus eu convallis luctus, molestie vel est. Proin pellentesque blandit felis nec dapibus. Sed vel felis eu libero viverra porttitor et nec diam. Aenean ac cursus quam. Sed ut tortor nisi. Nullam viverra velit ac velit interdum eu porta justo iaculis. Aliquam egestas fermentum auctor. Fusce viverra lorem augue. +

+

+Integer quis dolor et quam hendrerit consectetur sit amet sed neque. Praesent vel vulputate arcu. Integer vestibulum congue mauris, sit amet tincidunt mauris fermentum sit amet. Etiam quam felis, tempus at laoreet at, hendrerit et urna. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque ut mollis nibh. Integer quis est mi, eget aliquam nunc. Quisque hendrerit pulvinar lacus, nec ullamcorper sapien gravida nec. Morbi eleifend interdum magna, ultrices euismod sapien ultricies et. In adipiscing est vitae ligula tristique porta. Sed enim lectus, sodales ac cursus vel, suscipit id erat. Praesent tristique congue massa, ac sagittis neque ullamcorper vestibulum. Fusce vel elit quis quam convallis blandit. Duis nibh massa, porttitor sit amet sodales sit amet, varius at sem. Maecenas consequat ultrices dolor nec tincidunt. Cras id tellus urna. Etiam ut odio tellus, in ornare quam. Curabitur vel est nulla. +

+

+In aliquet dolor ut elit tempor nec tincidunt tortor porttitor. Etiam consequat tincidunt consectetur. Morbi erat elit, rutrum at molestie a, posuere pretium nisl. Nam at vestibulum nunc. In sed nisl ante, ac molestie nibh. Donec eu neque eget lectus dignissim faucibus sit amet nec quam. Pellentesque tincidunt porttitor vestibulum. Aliquam ut ligula diam, eget egestas augue. Proin ac venenatis purus. Morbi malesuada luctus libero sed laoreet. Curabitur molestie dui ac nunc molestie hendrerit. In congue luctus faucibus. Morbi elit turpis, feugiat nec venenatis vel, tempor cursus nibh. Pellentesque sagittis consectetur ante, eu luctus quam hendrerit in. +

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/book1/chapter3.html b/epublib-core/src/test/resources/book1/chapter3.html new file mode 100644 index 00000000..c6d258bf --- /dev/null +++ b/epublib-core/src/test/resources/book1/chapter3.html @@ -0,0 +1,13 @@ + + + Chapter 3 + + + +

Final chapter

+

+Welcome to Chapter 3 of the epublib book1 test book.
+We hope you enjoyed the test. +

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/book1/cover.html b/epublib-core/src/test/resources/book1/cover.html new file mode 100644 index 00000000..fba37680 --- /dev/null +++ b/epublib-core/src/test/resources/book1/cover.html @@ -0,0 +1,8 @@ + + + Cover + + + + + \ No newline at end of file diff --git a/epublib-core/src/test/resources/book1/cover.png b/epublib-core/src/test/resources/book1/cover.png new file mode 100644 index 00000000..a2c37d16 Binary files /dev/null and b/epublib-core/src/test/resources/book1/cover.png differ diff --git a/epublib-core/src/test/resources/book1/flowers_320x240.jpg b/epublib-core/src/test/resources/book1/flowers_320x240.jpg new file mode 100644 index 00000000..88c152ab Binary files /dev/null and b/epublib-core/src/test/resources/book1/flowers_320x240.jpg differ diff --git a/epublib-core/src/test/resources/chm1/#IDXHDR b/epublib-core/src/test/resources/chm1/#IDXHDR new file mode 100644 index 00000000..9dc95b8b Binary files /dev/null and b/epublib-core/src/test/resources/chm1/#IDXHDR differ diff --git a/epublib-core/src/test/resources/chm1/#IVB b/epublib-core/src/test/resources/chm1/#IVB new file mode 100644 index 00000000..4691d0d0 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/#IVB differ diff --git a/epublib-core/src/test/resources/chm1/#STRINGS b/epublib-core/src/test/resources/chm1/#STRINGS new file mode 100644 index 00000000..07ecca06 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/#STRINGS differ diff --git a/epublib-core/src/test/resources/chm1/#SYSTEM b/epublib-core/src/test/resources/chm1/#SYSTEM new file mode 100644 index 00000000..f9b54b07 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/#SYSTEM differ diff --git a/epublib-core/src/test/resources/chm1/#TOPICS b/epublib-core/src/test/resources/chm1/#TOPICS new file mode 100644 index 00000000..71c22a07 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/#TOPICS differ diff --git a/epublib-core/src/test/resources/chm1/#URLSTR b/epublib-core/src/test/resources/chm1/#URLSTR new file mode 100644 index 00000000..8ca19868 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/#URLSTR differ diff --git a/epublib-core/src/test/resources/chm1/#URLTBL b/epublib-core/src/test/resources/chm1/#URLTBL new file mode 100644 index 00000000..3792848b Binary files /dev/null and b/epublib-core/src/test/resources/chm1/#URLTBL differ diff --git a/epublib-core/src/test/resources/chm1/#WINDOWS b/epublib-core/src/test/resources/chm1/#WINDOWS new file mode 100644 index 00000000..56f0f01b Binary files /dev/null and b/epublib-core/src/test/resources/chm1/#WINDOWS differ diff --git a/epublib-core/src/test/resources/chm1/$FIftiMain b/epublib-core/src/test/resources/chm1/$FIftiMain new file mode 100644 index 00000000..39973cd6 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$FIftiMain differ diff --git a/epublib-core/src/test/resources/chm1/$OBJINST b/epublib-core/src/test/resources/chm1/$OBJINST new file mode 100644 index 00000000..22d7b26c Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$OBJINST differ diff --git a/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/BTree b/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/BTree new file mode 100644 index 00000000..ae5bc4df Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/BTree differ diff --git a/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Data b/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Data new file mode 100644 index 00000000..433135b4 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Data differ diff --git a/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Map b/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Map new file mode 100644 index 00000000..d45cdf3e Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Map differ diff --git a/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Property b/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Property new file mode 100644 index 00000000..69d161d3 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$WWAssociativeLinks/Property differ diff --git a/epublib-core/src/test/resources/chm1/$WWKeywordLinks/BTree b/epublib-core/src/test/resources/chm1/$WWKeywordLinks/BTree new file mode 100644 index 00000000..ad58448c Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$WWKeywordLinks/BTree differ diff --git a/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Data b/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Data new file mode 100644 index 00000000..6cf94bdd Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Data differ diff --git a/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Map b/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Map new file mode 100644 index 00000000..8f07274c Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Map differ diff --git a/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Property b/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Property new file mode 100644 index 00000000..69d161d3 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/$WWKeywordLinks/Property differ diff --git a/epublib-core/src/test/resources/chm1/CHM-example.hhc b/epublib-core/src/test/resources/chm1/CHM-example.hhc new file mode 100644 index 00000000..2a2fc7b8 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/CHM-example.hhc @@ -0,0 +1,108 @@ + + + + + + + + + +
    +
  • + + + +
  • + + +
      +
    • + + + +
    • + + + +
    • + + + +
    • + + + +
    +
  • + + + +
      +
    • + + + +
    • + + + +
    +
  • + + +
      +
    • + + + +
    • + + + +
    • + + + +
    • + + + +
    • + + + +
    • + + + +
    • + + + +
    • + + + +
    • + + + +
    • + + + + +
    • + + + + +
    • + + + + + +
    +
+ diff --git a/epublib-core/src/test/resources/chm1/CHM-example.hhk b/epublib-core/src/test/resources/chm1/CHM-example.hhk new file mode 100644 index 00000000..f2ee57c6 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/CHM-example.hhk @@ -0,0 +1,458 @@ + + + + + + + + + + +
    +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + + + + + +
  • +
  • + + + + + + + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + + + + + +
  • +
  • + + + + + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + + + + + + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + +
  • +
  • + + + + +
  • +
  • + + + + + + +
  • +
  • + + + + +
  • +
+ + diff --git a/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-10000.htm b/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-10000.htm new file mode 100644 index 00000000..825aef71 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-10000.htm @@ -0,0 +1,64 @@ + + + + +Context sensitive help topic 10000 + + + + + + + + + + + +
+ +

Context sensitive help topic 10000

+

This topic is only used to show context sensitive help with Visual Basic applications. + This is contextID 10000.

+

+

Open your project (.hhp) file in notepad and add following sections:

+

[MAP]

+

Add a [MAP] section and define the IDs your require.

+

#define IDH_frmMainControl1 10000
+ #define IDH_frmMainControl2 10010
+ #define IDH_frmChildControl1 20000
+ #define IDH_frmChildControl2 20010
+

+

[ALIAS]

+

Add an [ALIAS] section and define the mapping between each ID and a help topic.

+

[ALIAS]
+ IDH_frmMainControl1=Context-sensitive example\contextID-10000.htm
+ IDH_frmMainControl2=Context-sensitive example\contextID-10010.htm
+ IDH_frmChildControl1=Context-sensitive example\contextID-20000.htm
+ IDH_frmChildControl2=Context-sensitive example\contextID-20010.htm

+

Alternatively you can do this:

+

In a text editor enter the ALIAS details like IDH_90000=index.htm. + Save the file as 'alias.h' in same folder as your help project file.

+
;---------------------------------------------------
; alias.h file example for HTMLHelp (CHM)
; www.help-info.de
;
; All IDH's > 10000 for better format
; last edited: 2006-07-09
;---------------------------------------------------
IDH_90000=index.htm
IDH_10000=Context-sensitive_example\contextID-10000.htm
IDH_10010=Context-sensitive_example\contextID-10010.htm
IDH_20000=Context-sensitive_example\contextID-20000.htm
IDH_20010=Context-sensitive_example\contextID-20010.htm
+

In a text editor enter the MAP details like #define IDH_90000 90000;frmMain. + Save the file as 'map.h' in same folder as your help project file.

+
;--------------------------------------------------
; map.h file example for HTMLHelp (CHM)
; www.help-info.de
;
; All IDH's > 10000 for better format
; ;comment at end of line
;--------------------------------------------------
#define IDH_90000 90000;frmMain
#define IDH_10000 10000;frmAddressDataContextID-1
#define IDH_10010 10010;frmAddressDataContextID-2
#define IDH_20000 20000;frmAddressDataContextID-3
#define IDH_20010 20010;frmAddressDataContextID-4
+

Open your .hhp file in a text editor and add these sections

+

[ALIAS]
+ #include alias.h

+

[MAP]
+ #include map.h

+

Recompile your .HHP file. Now your application can call help using context + help ID's instead of topic file names.

+

 

+

 

+ + + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-10010.htm b/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-10010.htm new file mode 100644 index 00000000..8c9b1389 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-10010.htm @@ -0,0 +1,63 @@ + + + + +Context sensitive help topic 10010 + + + + + + + + + + + +
+ +

Context sensitive help topic 10010

+

This topic is only used to show context sensitive help with Visual Basic applications. + This is contextID 10010.

+

+

Open your project (.hhp) file in notepad and add following sections:

+

[MAP]

+

Add a [MAP] section and define the IDs your require.

+

#define IDH_frmMainControl1 10000
+ #define IDH_frmMainControl2 10010
+ #define IDH_frmChildControl1 20000
+ #define IDH_frmChildControl2 20010
+

+

[ALIAS]

+

Add an [ALIAS] section and define the mapping between each ID and a help topic.

+

[ALIAS]
+ IDH_frmMainControl1=Context-sensitive example\contextID-10000.htm
+ IDH_frmMainControl2=Context-sensitive example\contextID-10010.htm
+ IDH_frmChildControl1=Context-sensitive example\contextID-20000.htm
+ IDH_frmChildControl2=Context-sensitive example\contextID-20010.htm

+

Alternatively you can do this:

+

In a text editor enter the ALIAS details like IDH_90000=index.htm. + Save the file as 'alias.h' in same folder as your help project file.

+
;---------------------------------------------------
; alias.h file example for HTMLHelp (CHM)
; www.help-info.de
;
; All IDH's > 10000 for better format
; last edited: 2006-07-09
;---------------------------------------------------
IDH_90000=index.htm
IDH_10000=Context-sensitive_example\contextID-10000.htm
IDH_10010=Context-sensitive_example\contextID-10010.htm
IDH_20000=Context-sensitive_example\contextID-20000.htm
IDH_20010=Context-sensitive_example\contextID-20010.htm
+

In a text editor enter the MAP details like #define IDH_90000 90000;frmMain. + Save the file as 'map.h' in same folder as your help project file.

+
;--------------------------------------------------
; map.h file example for HTMLHelp (CHM)
; www.help-info.de
;
; All IDH's > 10000 for better format
; ;comment at end of line
;--------------------------------------------------
#define IDH_90000 90000;frmMain
#define IDH_10000 10000;frmAddressDataContextID-1
#define IDH_10010 10010;frmAddressDataContextID-2
#define IDH_20000 20000;frmAddressDataContextID-3
#define IDH_20010 20010;frmAddressDataContextID-4
+

Open your .hhp file in a text editor and add these sections

+

[ALIAS]
+ #include alias.h

+

[MAP]
+ #include map.h

+

Recompile your .HHP file. Now your application can call help using context + help ID's instead of topic file names.

+

 

+

+

 

+ + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-20000.htm b/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-20000.htm new file mode 100644 index 00000000..d2121050 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-20000.htm @@ -0,0 +1,66 @@ + + + + +Context sensitive help topic 20000 + + + + + + + + + + + +
+ +

Context sensitive help topic 20000

+

This topic is only used to show context sensitive help with Visual Basic applications. + This is contextID 20000.

+

+

Open your project (.hhp) file in notepad and add following sections:

+

[MAP]

+

Add a [MAP] section and define the IDs your require.

+

#define IDH_frmMainControl1 10000
+ #define IDH_frmMainControl2 10010
+ #define IDH_frmChildControl1 20000
+ #define IDH_frmChildControl2 20010
+

+

[ALIAS]

+

Add an [ALIAS] section and define the mapping between each ID and a help topic.

+

[ALIAS]
+ IDH_frmMainControl1=Context-sensitive example\contextID-10000.htm
+ IDH_frmMainControl2=Context-sensitive example\contextID-10010.htm
+ IDH_frmChildControl1=Context-sensitive example\contextID-20000.htm
+ IDH_frmChildControl2=Context-sensitive example\contextID-20010.htm

+

Alternatively you can do this:

+

In a text editor enter the ALIAS details like IDH_90000=index.htm. + Save the file as 'alias.h' in same folder as your help project file.

+
;---------------------------------------------------
; alias.h file example for HTMLHelp (CHM)
; www.help-info.de
;
; All IDH's > 10000 for better format
; last edited: 2006-07-09
;---------------------------------------------------
IDH_90000=index.htm
IDH_10000=Context-sensitive_example\contextID-10000.htm
IDH_10010=Context-sensitive_example\contextID-10010.htm
IDH_20000=Context-sensitive_example\contextID-20000.htm
IDH_20010=Context-sensitive_example\contextID-20010.htm
+

In a text editor enter the MAP details like #define IDH_90000 90000;frmMain. + Save the file as 'map.h' in same folder as your help project file.

+
;--------------------------------------------------
; map.h file example for HTMLHelp (CHM)
; www.help-info.de
;
; All IDH's > 10000 for better format
; ;comment at end of line
;--------------------------------------------------
#define IDH_90000 90000;frmMain
#define IDH_10000 10000;frmAddressDataContextID-1
#define IDH_10010 10010;frmAddressDataContextID-2
#define IDH_20000 20000;frmAddressDataContextID-3
#define IDH_20010 20010;frmAddressDataContextID-4
+

Open your .hhp file in a text editor and add these sections

+

[ALIAS]
+ #include alias.h

+

[MAP]
+ #include map.h

+

Recompile your .HHP file. Now your application can call help using context + help ID's instead of topic file names.

+

 

+

+

 

+

 

+ + + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-20010.htm b/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-20010.htm new file mode 100644 index 00000000..f44a6016 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/Context-sensitive_example/contextID-20010.htm @@ -0,0 +1,66 @@ + + + + +Context sensitive help topic 20010 + + + + + + + + + + + +
+ +

Context sensitive help topic 20010

+

This topic is only used to show context sensitive help with Visual Basic applications. + This is contextID 20010.

+

+

Open your project (.hhp) file in notepad and add following sections:

+

[MAP]

+

Add a [MAP] section and define the IDs your require.

+

#define IDH_frmMainControl1 10000
+ #define IDH_frmMainControl2 10010
+ #define IDH_frmChildControl1 20000
+ #define IDH_frmChildControl2 20010
+

+

[ALIAS]

+

Add an [ALIAS] section and define the mapping between each ID and a help topic.

+

[ALIAS]
+ IDH_frmMainControl1=Context-sensitive example\contextID-10000.htm
+ IDH_frmMainControl2=Context-sensitive example\contextID-10010.htm
+ IDH_frmChildControl1=Context-sensitive example\contextID-20000.htm
+ IDH_frmChildControl2=Context-sensitive example\contextID-20010.htm

+

Alternatively you can do this:

+

In a text editor enter the ALIAS details like IDH_90000=index.htm. + Save the file as 'alias.h' in same folder as your help project file.

+
;---------------------------------------------------
; alias.h file example for HTMLHelp (CHM)
; www.help-info.de
;
; All IDH's > 10000 for better format
; last edited: 2006-07-09
;---------------------------------------------------
IDH_90000=index.htm
IDH_10000=Context-sensitive_example\contextID-10000.htm
IDH_10010=Context-sensitive_example\contextID-10010.htm
IDH_20000=Context-sensitive_example\contextID-20000.htm
IDH_20010=Context-sensitive_example\contextID-20010.htm
+

In a text editor enter the MAP details like #define IDH_90000 90000;frmMain. + Save the file as 'map.h' in same folder as your help project file.

+
;--------------------------------------------------
; map.h file example for HTMLHelp (CHM)
; www.help-info.de
;
; All IDH's > 10000 for better format
; ;comment at end of line
;--------------------------------------------------
#define IDH_90000 90000;frmMain
#define IDH_10000 10000;frmAddressDataContextID-1
#define IDH_10010 10010;frmAddressDataContextID-2
#define IDH_20000 20000;frmAddressDataContextID-3
#define IDH_20010 20010;frmAddressDataContextID-4
+

Open your .hhp file in a text editor and add these sections

+

[ALIAS]
+ #include alias.h

+

[MAP]
+ #include map.h

+

Recompile your .HHP file. Now your application can call help using context + help ID's instead of topic file names.

+

 

+

+

 

+

 

+ + + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/Garden/flowers.htm b/epublib-core/src/test/resources/chm1/Garden/flowers.htm new file mode 100644 index 00000000..8a7900c6 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/Garden/flowers.htm @@ -0,0 +1,51 @@ + + + + +Flowers + + + + + + + + + + + + + + +
+ +

Flowers

+

You can cultivate flowers in your garden. It is beautiful if one can give his + wife a bunch of self-cultivated flowers.

+ + + + + + + + + + + + + +
+

 

+ +

 

+ + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/Garden/garden.htm b/epublib-core/src/test/resources/chm1/Garden/garden.htm new file mode 100644 index 00000000..86792d5a --- /dev/null +++ b/epublib-core/src/test/resources/chm1/Garden/garden.htm @@ -0,0 +1,59 @@ + + + + +Garden + + + + + + + + + + + + + + + + + + +
+ +

Own Garden

+

It is nice to have a garden near your home.

+

You can plant trees of one's own, lay out a pond with fish and cultivate flowers. + For the children a game lawn can be laid out. You can learn much about botany.

+

 

+ + + + + + + + + + + + + +
A garden is good for your health and you can relax + at the gardening.
+

 

+

 

+

 

+

 

+ + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/Garden/tree.htm b/epublib-core/src/test/resources/chm1/Garden/tree.htm new file mode 100644 index 00000000..10e34f7b --- /dev/null +++ b/epublib-core/src/test/resources/chm1/Garden/tree.htm @@ -0,0 +1,43 @@ + + + + +How one grows trees + + + + + + + + + + + + + + + + +
+ +

How one grows trees

+

You must dig a big hole first.

+

Wonder well which kind of tree you want to plant.

+

(oak, beech, alder)

+

The tree planted newly has always to be watered with sufficient water.

+

+

 

+ +

 

+ + + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/CloseWindowAutomatically.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/CloseWindowAutomatically.htm new file mode 100644 index 00000000..2655be2d --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/CloseWindowAutomatically.htm @@ -0,0 +1,58 @@ + + + + +Attention (!) - Close Window automatically + + + + + + + + + + + + + + + + + +
go to home ...
+ +

Close Window automatically

+

One can close HTML Help window without getting a click from user by the following + code. Use "Close" ActiveX Control and Javascript as shown below.

+

Code

+

 

+

<OBJECT id=hhctrl type="application/x-oleobject"
+ classid="clsid:adb880a6-d8ff-11cf-9377-00aa003b7a11"
+ codebase="hhctrl.ocx#Version=5,2,3790,233">
+ <PARAM name="Command" value="Close">
+ </OBJECT>
+ <script type="text/javascript" language="JavaScript">
+ <!--
+ window.setTimeout('hhctrl.Click();',1000);
+ // -->
+ </script>

+

 

+

 

+

 

+

 

+ + + + + +
back to top ...
+
+

 

+ + diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/Jump_to_anchor.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/Jump_to_anchor.htm new file mode 100644 index 00000000..f74f191c --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/Jump_to_anchor.htm @@ -0,0 +1,73 @@ + + + + +How to jump to a anchor + + + + + + + + + + + + + +
+ +

How to jump to a anchor

+

This topic shows how to jump to bookmarks in your HTML code like:

+

<a name="AnchorSample" id="AnchorSample"></a>

+ +

 

+

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+ + +

AnchorSample InnerText Headline

+

1. Example for use with Visual Basic 2003

+

This topic is used to show providing help for controls with a single HTML file + downloaded from a server (if internet connection is available) and jump to 'AnchorSample'.

+

2. Example for use with Compiled Help Module (CHM)

+

This topic is used to show how to jump to bookmarks AnchorSample.

+

 

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+

 

+ + +

Sample headline after anchor 'SecondAnchor'

+

Here is coded:

+

<a name="SecondAnchor" id="SecondAnchor"></a>

+

Example for use with Compiled Help Module (CHM)

+

This topic is used to show how to jump to bookmarks SecondAnchor.

+

 

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+ + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/LinkPDFfromCHM.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/LinkPDFfromCHM.htm new file mode 100644 index 00000000..03098bb3 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/LinkPDFfromCHM.htm @@ -0,0 +1,39 @@ + + + + +Linking to PDF from CHM + + + + + + + + + + +
+ +

Linking to PDF from CHM

+

This topic is only used to show linking from a compiled CHM to other files + and places. Open/Save dialog is used.

+

PDF

+

Link relative to PDF

+
+<p><a href="../embedded_files/example-embedded.pdf">Link relative to PDF</a></p>
+
+

 

+

 

+

 

+

 

+ + + + + +
back to top ...
+
+

 

+ + diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/Simple_link_example.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/Simple_link_example.htm new file mode 100644 index 00000000..7b3e1288 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/Simple_link_example.htm @@ -0,0 +1,112 @@ + + + + +Linking from CHM with standard HTML + + + + + + + + + + + + + + + + +
+ +

Linking from CHM with standard HTML

+

This is a simple sample how to link from a compiled CHM to HTML files. Some + files are on a web server some are local and relative to the CHM file.

+

 

+

Link relative to a HTML file that isn't compiled into the CHM

+ + +

The following technique of linking is useful if one permanently must update + some files on the PC of the customer without compiling the CHM again. The external + file must reside in the CHM folder or a subfolder.

+

Link relative to a external HTML file (external_files/external_topic.htm) +

+ +

Link code:

+
+<p>
+<SCRIPT Language="JScript">
+function parser(fn) {
+ var X, Y, sl, a, ra, link;
+ ra = /:/;
+ a = location.href.search(ra);
+ if (a == 2)
+  X = 14;
+ else
+  X = 7;
+  sl = "\\";
+  Y = location.href.lastIndexOf(sl) + 1;
+  link = 'file:///' + location.href.substring(X, Y) + fn;
+  location.href = link;
+ }
+</SCRIPT>
+</p>
+
+<p>
+  <a onclick="parser('./external_files/external_topic.htm')"
+  style="text-decoration: underline;
+  color: green; cursor: hand">Link relative to a external HTML file (external_files/external_topic.htm)</a>
+</p>
+
+

Links to HTML pages on the web

+ + + + + + + + + + + + + +
Windmill, Germany - Ditzum
+

In the past, energy was won with windmills in Germany.

+

See more information about + mills (click the link).

+
+

These are modern wind energy converters today.

+

Open technical information on a web server with iframe inside your content window.

+
Enercon, Germany
+

 

+ +

 

+ + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/example-external-pdf.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/example-external-pdf.htm new file mode 100644 index 00000000..9d9d6361 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/example-external-pdf.htm @@ -0,0 +1,23 @@ + + +Example load PDF from TOC + + + + + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/pop-up_example.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/pop-up_example.htm new file mode 100644 index 00000000..1f28dcf6 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/pop-up_example.htm @@ -0,0 +1,99 @@ + + + + +How to create PopUp + + + + + + + + + + + + + + + +
+ +

PopUp Example

+

Code see below!

+

(not working for all browsers/browser versions - see your systems security + updates).

+

+ Click here to see example information (PopUp).

+ +

+

+

To change the flower picture hoover with your mouse pointer!

+
+

Click + here to change the background color (PopUp).

+ + +

+

+

To change the flower picture hoover with your mouse pointer!

+
+

Another example to enlarge a screenshot (hoover with mouse pointer):

+

See what happens .. +

+

To enlarge the screenshot hoover with your mouse pointer!

+
+

Another example to enlarge a screenshot (click to screenshot):

+

+ +

+
+

This is the code for the second text link:

+
<p>
+<a class=popupspot
href="JavaScript:hhctrl.TextPopup
('This is a standard HTMLHelp text-only popup. + See the nice flowers below.','Verdana,8',10,10,00000000,0x66ffff)">
Click here to change the background color.</a> +</p> +
+

This is the code to change the flower picture:

+
+<p>
+<img
+ onmouseover="(src='../images/wintertree.jpg')"
+ onmouseout="(src='../images/insekt.jpg')"
+ src="../images/insekt.jpg" alt="" border="0"> 
</p> +
+

This is the code to enlarge the screenshot (hoover):

+
<p>
<img + src="../images/screenshot_small.png" alt="" border="0" + onmouseover="(src='../images/screenshot_big.png')" + onmouseout="(src='../images/screenshot_small.png')"> +</p>
+

This is the code to enlarge the screenshot (click):

+
<p>
<img src="../images/screenshot_small.png" alt="" + onclick="this.src='../images/screenshot_big.png'" />
</p>
+

 

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+ + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/shortcut_link.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/shortcut_link.htm new file mode 100644 index 00000000..01d1992e --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/shortcut_link.htm @@ -0,0 +1,61 @@ + + + + +Using CHM shortcut links + + + + + + + + + + + + + + + + + + + + + +
+ +

Using CHM shortcut links

+

This is a simple example how to use shortcut links from a CHM file and jump + to a URL with the users default browser.

+

Example:

+

Click me to go to www-help-info.de

+

Note:

+
    +
  • Wont work on the web
  • +
  • Only works in compressed CHM file.
  • +
  • Dosn't work with "Open dialog". You have to save to local disc.
  • +
  • MyUniqueID must be a unique name for each shortcut you create in a HTML + file.
  • +
+

Put this code in your <head> section:

+

<OBJECT id=MyUniqueID type="application/x-oleobject"
+ classid="clsid:adb880a6-d8ff-11cf-9377-00aa003b7a11">
+ <PARAM name="Command" value="ShortCut">
+ <PARAM name="Item1" value=",http://www.help-info.de/index_e.htm,">
+ </OBJECT>

+

Put this code in your <body> section:

+

<p><a href="javascript:MyUniqueID.Click()">Click me to + go to www-help-info.de</a></p>

+ + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-02.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-02.htm new file mode 100644 index 00000000..e6fb4530 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-02.htm @@ -0,0 +1,41 @@ + + + + +Topic 2 + + + + + + + + + + +
+

To do so insert following code to the HTML file at this place:

+
  <object type="application/x-oleobject"
+     classid="clsid:1e2a7bd0-dab9-1­1d0-b93a-00c04fc99f9
+     <param name="New HTML file" value="topic-02.htm">
+     <param name="New HTML title" value="Topic 2">
+  </object>
+

Split example - Topic 2

+

This example is used to show how the SPLIT function is working for generating + sub-topics from one HTML file to the table of contents. This is the topic + 2 of one HTML file.

+

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+ + + + +
back to top ...
+
+ + + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-03.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-03.htm new file mode 100644 index 00000000..bdd34b32 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-03.htm @@ -0,0 +1,41 @@ + + + + +Topic 3 + + + + + + + + + + +
+

To do so insert following code to the HTML file at this place:

+
  <object type="application/x-oleobject"
+     classid="clsid:1e2a7bd0-dab9-1­1d0-b93a-00c04fc99f9
+     <param name="New HTML file" value="topic-03.htm">
+     <param name="New HTML title" value="Topic 3">
+  </object>
+

Split example - Topic 3

+

This example is used to show how the SPLIT function is working for generating + sub-topics from one HTML file to the table of contents. This is the topic + 3 of one HTML file.

+

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+

 

+ + + + +
back to top ...
+
+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-04.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-04.htm new file mode 100644 index 00000000..59297630 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic-04.htm @@ -0,0 +1,23 @@ + + + + +Topic 4 + + + + + + + + + +
+

Split example - Topic 4

+

This is a short example text for Topic 4 for a small pop-up window.

+

See link at Topic 1.

+

 

+

 

+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic_split_example.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic_split_example.htm new file mode 100644 index 00000000..d623572e --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/topic_split_example.htm @@ -0,0 +1,67 @@ + + + + +Topic split example + + + + + + + + + + + + + + +
+ +

Split example - Main Topic 1

+

It's possible to have one mega HTML file splitting into several files by using + a HHCTRL.OCX split file object tag in your HTML. This instructs the HTML Help + compiler to split the HTML file at the specific points where it finds this tag. + The object tag has the following format:

+
  <object type="application/x-oleobject"
+     classid="clsid:1e2a7bd0-dab9-1­1d0-b93a-00c04fc99f9
+     <param name="New HTML file" value="a_new_file.htm">       e.g "topic-04.htm"
+     <param name="New HTML title" value="My new topic title">  e.g. "Topic 4"
+  </object>
+

The first value - "file" - specifies the name you want to give to + the file that would be created for this topic. The second value - "title" + - specifies what you would want in the <TITLE> tag for the document. You + shouldn't change any details apart from the value parameter. +

+

The file then gets created within the .chm file at compile time, though you'll + never see it on disk. A pretty neat feature.

+

The trick of course is that if you have links in your .chm file, whether from + the contents/index or from topic to topic, you'll need to reference the file + name that you specify in the tag above.

+

If you are using HTML Help Workshop, you can use the Split File command on + the Edit menu to insert the <object> tags.

+

The following hyperlink displays a topic file in popup-type window:

+

Link from this main to topic 4 (only working in the compiled help CHM + and for a locally saved CHM)

+
<a href="#"
+ onClick="window.open('topic-04.htm','Sample',
+ 'toolbar=no,width=200,height=200,left=500,top=400,
+ status=no,scrollbars=no,resize=no');return false">
+ Link from this main to topic 4</a>
+

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

+ + + + +
back to top ...
+
+ + + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/using_window_open.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/using_window_open.htm new file mode 100644 index 00000000..dbca0d8f --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/using_window_open.htm @@ -0,0 +1,62 @@ + + + + +Using window.open + + + + + + + + + + + + + + + + +
+ +

Using window.open

+

This is a simple example how to use the "window.open" command

+

Click here to open a HTML file

+

 

+

Neues Fenster +

+

<script type="text/javascript">
+ function NeuFenster () {
+ MeinFenster = window.open("datei2.htm", "Zweitfenster", + "width=300,height=200,scrollbars");
+ MeinFenster.focus();
+ }
+ </script>
+

+

 

+

Put this code in your <body> section:

+

<A HREF= "#" onClick="window.open('/external_files/external.htm',
+ 'Window Open Sample','toolbar=no,width=850,height=630,left=300,top=200,
+ status=no,scrollbars=no,resize=no');return false"> Click here to open + a HTML file</A>

+ + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/xp-style_radio-button_check-boxes.htm b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/xp-style_radio-button_check-boxes.htm new file mode 100644 index 00000000..44dbbbc2 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/HTMLHelp_Examples/xp-style_radio-button_check-boxes.htm @@ -0,0 +1,75 @@ + + + + +XP Style for RadioButton and Check Boxes + + + + + + + + + + + + + +
+ +

XP Style for RadioButton and Check Boxes

+

This is a simple example how to use XP Style for RadioButton and Check Boxes

+

 

+ +

Click to select a special pizza

+ +
+

+ + Salami
+ + Pilze
+ + Sardellen

+

 

+
+ +

Your manner of payment:

+ +
+

+ + Mastercard
+ + Visa
+ + American Express

+
+

 

+

Select also another favorite

+ +
+

+ +

+
+ + + + + + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/design.css b/epublib-core/src/test/resources/chm1/design.css new file mode 100644 index 00000000..572fd425 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/design.css @@ -0,0 +1,177 @@ +/* Formatvorlage*/ +/* (c) Ulrich Kulle Hannover*/ +/*---------------------------------------------*/ +/* Die Formatierungen gelten fr alle Dateien,*/ +/* die im Hauptframe angezeigt werden*/ + +/*mgliche Einstellung Rollbalken MS IE 5.5*/ +/*scrollbar-3d-light-color : red*/ +/*scrollbar-arrow-color : yellow*/ +/*scrollbar-base-color : green*/ +/*scrollbar-dark-shadow-color : orange*/ +/*scrollbar-face-color : purple */ +/*scrollbar-highloight-color : black*/ +/*scrollbar-shadow-color : blue */ + +/*BODY-tag Steuermglichkeit */ +/*margin-top:0px; margin-left=0px; */ + +body +{ + background: #ffffff; + scrollbar-base-color: #A88000; + scrollbar-arrow-color: yellow; + margin-left : 0px; + margin-top: 0px; + margin-right: 0px; +} + +hr { +color: #FFCC00; +margin-left : 10px; +margin-right: 10px; +} + +hr.simple { +margin-left : 10px; +margin-right: 10px; +} +h1 { +background-image: url(images/verlauf-gelb.jpg); +font-family: Verdana; +font-size: 10pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h2 { +background-image: url(images/verlauf-blau.jpg); +font-family: Verdana; +font-size: 10pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h3 { +background-image: url(images/verlauf-rot.jpg); +color:white; +font-family: Verdana; +font-size: 10pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h4 { +background-image: url(images/verlauf-gelb.jpg); +font-family: Verdana; +font-size: 8pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h5{ +background-image: url(images/verlauf-blau.jpg); +font-family: Verdana; +font-size: 8pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h6 { +background-image: url(images/verlauf-rot.jpg); +color:white; +font-family: Verdana; +font-size: 8pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +li { +font-family: Verdana, Arial, Helvetica; +font-size:10pt; +line-height:13pt; +margin-left : 10px; +} +p { +font-family: Verdana, Arial, Helvetica; +font-size:10pt; +line-height:13pt; +margin-left : 10px; +margin-right: 10px; +} +/* note box */ +p.note { + background-color : #FFFFCC; + border : 1px solid #FFCC00; + clear : both; + color : black; + margin : 12px 30px 12px 20px; + padding : 6px 6px; +} +/* used in tutorial */ +p.tip { + background-color : #FFFFCC; + border : 1px solid black; + clear : both; + color : black; + margin-left : 10%; + padding : 6px 6px; + width : 90%; +} +/* pre note box */ +pre { + background-color : #FFFFCC; + border : 1px solid #FFCC00; + clear : both; + color : black; + margin : 12px 30px 12px 20px; + padding : 6px 6px; +} +table.sitemap { +margin-left: 10px; +} + +table.code { +margin-left:10px; +} + +table.top { +background-image: url(images/site/help-info_logo_3px.jpg); +margin-left:0px; +margin-top:0px; +} + +td.siteheader { + background-color:#E10033; + COLOR:white; + padding-left:3px; +} + +td { +font-family: Verdana, Arial, Helvetica; +font-size:10pt; +line-height:13pt; +} +tr { +font-family: Verdana, Arial, Helvetica; +font-size:10pt; +line-height:13pt; +} + +ul{ + list-style-image : url(images/list_arrow.gif); + list-style-position : outside; +} +ul.extlinklist { + list-style-image : url(images/extlink.gif); +} + +A:visited { + color: Blue; + text-decoration: none; + font-weight: bold; + font-size: 10pt +} +A:link {color: #800080;text-decoration: none;font-weight: bold;font-size: 10pt} +A:hover {color: #FF0000;text-decoration: underline;font-weight: bold;font-size: 10pt} +A:active {color: #FF0000;text-decoration: none;font-weight: bold;font-size: 10pt} diff --git a/epublib-core/src/test/resources/chm1/embedded_files/example-embedded.pdf b/epublib-core/src/test/resources/chm1/embedded_files/example-embedded.pdf new file mode 100644 index 00000000..53e936b8 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/embedded_files/example-embedded.pdf differ diff --git a/epublib-core/src/test/resources/chm1/external_files/external_topic.htm b/epublib-core/src/test/resources/chm1/external_files/external_topic.htm new file mode 100644 index 00000000..a1388261 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/external_files/external_topic.htm @@ -0,0 +1,47 @@ + + + + +External Topic + + + + + + + + + + + + +
+ +

External Topic

+

 

+

This is a external topic that resides relativ to the CHM files and isn't compiled + into the CHM file. Here it's used to show how to link to external files in a + CHM topic window.

+

Delete links in all HTML files of your project - otherwise the external file + is compiled to the CHM file.

+

Make a copy of the external file and delete the file in your project structure + before the last compile runs. So the file isn't compiled into the CHM file. + But you have to install the external file on the customers PC.

+

To try this example you must download the complete + project example to a local folder, delete all files excepting "CHM-example.chm" + and folder "external_files".

+

Edit following date in the external HTML file "external_topic.htm" + to check that you can update the HTML file without recompiling the CHM file:

+

 

+

2005-05-17

+

 

+
+ + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/filelist.txt b/epublib-core/src/test/resources/chm1/filelist.txt new file mode 100644 index 00000000..9582bc44 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/filelist.txt @@ -0,0 +1,64 @@ +#IDXHDR +#IVB +#STRINGS +#SYSTEM +#TOPICS +#URLSTR +#URLTBL +#WINDOWS +$FIftiMain +$OBJINST +$WWAssociativeLinks/BTree +$WWAssociativeLinks/Data +$WWAssociativeLinks/Map +$WWAssociativeLinks/Property +$WWKeywordLinks/BTree +$WWKeywordLinks/Data +$WWKeywordLinks/Map +$WWKeywordLinks/Property +CHM-example.hhc +CHM-example.hhk +Context-sensitive_example/contextID-10000.htm +Context-sensitive_example/contextID-10010.htm +Context-sensitive_example/contextID-20000.htm +Context-sensitive_example/contextID-20010.htm +design.css +embedded_files/example-embedded.pdf +external_files/external_topic.htm +Garden/flowers.htm +Garden/garden.htm +Garden/tree.htm +HTMLHelp_Examples/CloseWindowAutomatically.htm +HTMLHelp_Examples/example-external-pdf.htm +HTMLHelp_Examples/Jump_to_anchor.htm +HTMLHelp_Examples/LinkPDFfromCHM.htm +HTMLHelp_Examples/pop-up_example.htm +HTMLHelp_Examples/shortcut_link.htm +HTMLHelp_Examples/Simple_link_example.htm +HTMLHelp_Examples/topic-02.htm +HTMLHelp_Examples/topic-03.htm +HTMLHelp_Examples/topic-04.htm +HTMLHelp_Examples/topic_split_example.htm +HTMLHelp_Examples/using_window_open.htm +HTMLHelp_Examples/xp-style_radio-button_check-boxes.htm +images/blume.jpg +images/ditzum.jpg +images/eiche.jpg +images/extlink.gif +images/insekt.jpg +images/list_arrow.gif +images/lupine.jpg +images/riffel_40px.jpg +images/riffel_helpinformation.jpg +images/riffel_home.jpg +images/rotor_enercon.jpg +images/screenshot_big.png +images/screenshot_small.png +images/up_rectangle.png +images/verlauf-blau.jpg +images/verlauf-gelb.jpg +images/verlauf-rot.jpg +images/welcome_small_big-en.gif +images/wintertree.jpg +index.htm +topic.txt \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/images/blume.jpg b/epublib-core/src/test/resources/chm1/images/blume.jpg new file mode 100644 index 00000000..b3735fb9 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/blume.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/ditzum.jpg b/epublib-core/src/test/resources/chm1/images/ditzum.jpg new file mode 100644 index 00000000..d461951e Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/ditzum.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/eiche.jpg b/epublib-core/src/test/resources/chm1/images/eiche.jpg new file mode 100644 index 00000000..507bfa1d Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/eiche.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/extlink.gif b/epublib-core/src/test/resources/chm1/images/extlink.gif new file mode 100644 index 00000000..5f37645d Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/extlink.gif differ diff --git a/epublib-core/src/test/resources/chm1/images/insekt.jpg b/epublib-core/src/test/resources/chm1/images/insekt.jpg new file mode 100644 index 00000000..09f8d5f9 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/insekt.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/list_arrow.gif b/epublib-core/src/test/resources/chm1/images/list_arrow.gif new file mode 100644 index 00000000..9d0d3607 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/list_arrow.gif differ diff --git a/epublib-core/src/test/resources/chm1/images/lupine.jpg b/epublib-core/src/test/resources/chm1/images/lupine.jpg new file mode 100644 index 00000000..0e0ea94f Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/lupine.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/riffel_40px.jpg b/epublib-core/src/test/resources/chm1/images/riffel_40px.jpg new file mode 100644 index 00000000..70ee391a Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/riffel_40px.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/riffel_helpinformation.jpg b/epublib-core/src/test/resources/chm1/images/riffel_helpinformation.jpg new file mode 100644 index 00000000..2e9843f8 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/riffel_helpinformation.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/riffel_home.jpg b/epublib-core/src/test/resources/chm1/images/riffel_home.jpg new file mode 100644 index 00000000..e0d0ba30 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/riffel_home.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/rotor_enercon.jpg b/epublib-core/src/test/resources/chm1/images/rotor_enercon.jpg new file mode 100644 index 00000000..844539ea Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/rotor_enercon.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/screenshot_big.png b/epublib-core/src/test/resources/chm1/images/screenshot_big.png new file mode 100644 index 00000000..e5aa0f0e Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/screenshot_big.png differ diff --git a/epublib-core/src/test/resources/chm1/images/screenshot_small.png b/epublib-core/src/test/resources/chm1/images/screenshot_small.png new file mode 100644 index 00000000..a4398f4f Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/screenshot_small.png differ diff --git a/epublib-core/src/test/resources/chm1/images/up_rectangle.png b/epublib-core/src/test/resources/chm1/images/up_rectangle.png new file mode 100644 index 00000000..68c1999c Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/up_rectangle.png differ diff --git a/epublib-core/src/test/resources/chm1/images/verlauf-blau.jpg b/epublib-core/src/test/resources/chm1/images/verlauf-blau.jpg new file mode 100644 index 00000000..7622d8f4 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/verlauf-blau.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/verlauf-gelb.jpg b/epublib-core/src/test/resources/chm1/images/verlauf-gelb.jpg new file mode 100644 index 00000000..3c2d6921 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/verlauf-gelb.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/verlauf-rot.jpg b/epublib-core/src/test/resources/chm1/images/verlauf-rot.jpg new file mode 100644 index 00000000..c30b3fc7 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/verlauf-rot.jpg differ diff --git a/epublib-core/src/test/resources/chm1/images/welcome_small_big-en.gif b/epublib-core/src/test/resources/chm1/images/welcome_small_big-en.gif new file mode 100644 index 00000000..70427cba Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/welcome_small_big-en.gif differ diff --git a/epublib-core/src/test/resources/chm1/images/wintertree.jpg b/epublib-core/src/test/resources/chm1/images/wintertree.jpg new file mode 100644 index 00000000..006e1836 Binary files /dev/null and b/epublib-core/src/test/resources/chm1/images/wintertree.jpg differ diff --git a/epublib-core/src/test/resources/chm1/index.htm b/epublib-core/src/test/resources/chm1/index.htm new file mode 100644 index 00000000..9d9514f4 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/index.htm @@ -0,0 +1,43 @@ + + + + +Welcome + + + + + + + + + + + + +
+ +

Welcome

+

+

.. to CHM examples!

+

HTMLHelp is the current help system for Microsoft Windows. This file includes + some examples how to use Microsoft HTMLHelp and is used to show how to work + with HTMLHelp 1.x CHM files in Visual Basic Applications.

+

This "Welcome" page is the default page of the compiled help module + (CHM).

+

 

+

 

+

Version Information:

+

Release: 2005-07-17

+

(c) help-info.de

+

 

+
+ + + + +
back to top ...
+
+

 

+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/chm1/topic.txt b/epublib-core/src/test/resources/chm1/topic.txt new file mode 100644 index 00000000..1193ad86 --- /dev/null +++ b/epublib-core/src/test/resources/chm1/topic.txt @@ -0,0 +1,18 @@ +;------------------------------------------------- +; topic.h file example for HTMLHelp (CHM) +; www.help-info.de +; +; +; This is a file including the ID and PopUp text +;------------------------------------------------- +.topic 900;nohelp +Sorry, no help available! + +.topic 100;PopUp_AddressData_btnOK +This is context sensitive help text for a button (ID: IDH_100). + +.topic 110;PopUp_AddressData_txtFirstName +This is context sensitive help text for a text box (ID: IDH_110). + +.topic 120;PopUp_AddressData_txtLastName +This is context sensitive help text for a text box (ID: IDH_120). diff --git a/epublib-core/src/test/resources/holmes_scandal_bohemia.html b/epublib-core/src/test/resources/holmes_scandal_bohemia.html new file mode 100644 index 00000000..99f7ef7c --- /dev/null +++ b/epublib-core/src/test/resources/holmes_scandal_bohemia.html @@ -0,0 +1,942 @@ + + + + + +The Project Gutenberg eBook of The Adventures of Sherlock +Holmes, by Sir Arthur Conan Doyle + + + + + + +
+
+
+

THE ADVENTURES OF
+ +SHERLOCK HOLMES

+
+

BY

+
+

SIR ARTHUR CONAN DOYLE

+
+
+
+ +


+To Sherlock Holmes she is always the + +woman. I have seldom heard him mention her under any other name. In his +eyes she eclipses and predominates the whole of her sex. It was not that +he felt any emotion akin to love for Irene Adler. All emotions, and that +one particularly, were abhorrent to his cold, precise but admirably +balanced mind. He was, I take it, the most perfect reasoning and +observing machine that the world has seen, but as a lover he would have +placed himself in a false position. He never spoke of the softer +passions, save with a gibe and a sneer. They were admirable things for +the observer—excellent for drawing the veil from men’s motives and +actions. But for the trained reasoner to admit such intrusions into his +own delicate and finely adjusted temperament was to introduce a +distracting factor which might throw a doubt upon all his mental +results. Grit in a sensitive instrument, or a crack in one of his own +high-power lenses, would not be more disturbing than a strong emotion in +a nature such as his. And yet there was but one woman to him, and that +woman was the late Irene Adler, of dubious and questionable memory.

+

I had seen little of Holmes lately. My marriage had drifted us +away from each other. My own complete happiness, and the home-centred +interests which rise up around the man who first finds himself master of +his own establishment, were sufficient to absorb all my attention, while +Holmes, who loathed every form of society with his whole Bohemian soul, +remained in our lodgings in Baker Street, buried among his old books, +and alternating from week to week between cocaine and ambition, the +drowsiness of the drug, and the fierce energy of his own keen nature. He +was still, as ever, deeply attracted by the study of crime, and occupied +his immense faculties and extraordinary powers of observation in +following out those clues, and clearing up those mysteries which had +been abandoned as hopeless by the official police. From time to time I +heard some vague account of his doings: of his summons to Odessa in the +case of the Trepoff murder, of his clearing up of the singular tragedy +of the Atkinson brothers at Trincomalee, and finally of the mission +which he had accomplished so delicately and successfully for the +reigning family of Holland. Beyond these signs of his activity, however, +which I merely shared with all the readers of the daily press, I knew +little of my former friend and companion.

+

One night—it was on the twentieth of March, 1888—I was returning +from a journey to a patient (for I had now returned to civil practice), +when my way led me through Baker Street. As I passed the well-remembered +door, which must always be associated in my mind with my wooing, and +with the dark incidents of the Study in Scarlet, I was seized with a +keen desire to see Holmes again, and to know how he was employing his +extraordinary powers. His rooms were brilliantly lit, and, even as I +looked up, I saw his tall, spare figure pass twice in a dark silhouette +against the blind. He was pacing the room swiftly, eagerly, with his +head sunk upon his chest and his hands clasped behind him. To me, who +knew his every mood and habit, his attitude and manner told their own +story. He was at work again. He had risen out of his drug-created dreams +and was hot upon the scent of some new problem. I rang the bell and was +shown up to the chamber which had formerly been in part my own.

+

His manner was not effusive. It seldom was; but he was glad, I +think, to see me. With hardly a word spoken, but with a kindly eye, he +waved me to an armchair, threw across his case of cigars, and indicated +a spirit case and a gasogene in the corner. Then he stood before the +fire and looked me over in his singular introspective fashion.

+

“Wedlock suits you,” he remarked. “I think, Watson, that you have +put on seven and a half pounds since I saw you.”

+

“Seven!” I answered.

+

“Indeed, I should have thought a little more. Just a trifle more, +I fancy, Watson. And in practice again, I observe. You did not tell me +that you intended to go into harness.”

+

“Then, how do you know?”

+

“I see it, I deduce it. How do I know that you have been getting +yourself very wet lately, and that you have a most clumsy and careless +servant girl?”

+ +

“My dear Holmes,” said I, “this is too much. You would certainly +have been burned, had you lived a few centuries ago. It is true that I +had a country walk on Thursday and came home in a dreadful mess, but as +I have changed my clothes I can’t imagine how you deduce it. As to Mary +Jane, she is incorrigible, and my wife has given her notice, but there, +again, I fail to see how you work it out.”

+

He chuckled to himself and rubbed his long, nervous hands +together.

+

“It is simplicity itself,” said he; “my eyes tell me that on the +inside of your left shoe, just where the firelight strikes it, the +leather is scored by six almost parallel cuts. Obviously they have been +caused by someone who has very carelessly scraped round the edges of the +sole in order to remove crusted mud from it. Hence, you see, my double +deduction that you had been out in vile weather, and that you had a +particularly malignant boot-slitting specimen of the London slavey. As +to your practice, if a gentleman walks into my rooms smelling of +iodoform, with a black mark of nitrate of silver upon his right +forefinger, and a bulge on the right side of his top-hat to show where +he has secreted his stethoscope, I must be dull, indeed, if I do not +pronounce him to be an active member of the medical profession.”

+

I could not help laughing at the ease with which he explained his +process of deduction. “When I hear you give your reasons,” I remarked, +“the thing always appears to me to be so ridiculously simple that I +could easily do it myself, though at each successive instance of your +reasoning I am baffled until you explain your process. And yet I believe +that my eyes are as good as yours.”

+

“Quite so,” he answered, lighting a cigarette, and throwing +himself down into an armchair. “You see, but you do not observe. The +distinction is clear. For example, you have frequently seen the steps +which lead up from the hall to this room.”

+

“Frequently.”

+

“How often?”

+

“Well, some hundreds of times.”

+

“Then how many are there?”

+ +

“How many? I don’t know.”

+

“Quite so! You have not observed. And yet you have seen. That is +just my point. Now, I know that there are seventeen steps, because I +have both seen and observed. By the way, since you are interested in +these little problems, and since you are good enough to chronicle one or +two of my trifling experiences, you may be interested in this.” He threw +over a sheet of thick, pink-tinted notepaper which had been lying open +upon the table. “It came by the last post,” said he. “Read it aloud.”

+

The note was undated, and without either signature or address.

+

“There will call upon you to-night, at a quarter to eight +o’clock,” it said, “a gentleman who desires to consult you upon a matter +of the very deepest moment. Your recent services to one of the royal +houses of Europe have shown that you are one who may safely be trusted +with matters which are of an importance which can hardly be exaggerated. +This account of you we have from all quarters received. Be in your +chamber then at that hour, and do not take it amiss if your visitor wear +a mask.”

+

“This is indeed a mystery,” I remarked. “What do you imagine that +it means?”

+

“I have no data yet. It is a capital mistake to theorise before +one has data. Insensibly one begins to twist facts to suit theories, +instead of theories to suit facts. But the note itself. What do you +deduce from it?”

+

I carefully examined the writing, and the paper upon which it was +written.

+

“The man who wrote it was presumably well to do,” I remarked, +endeavouring to imitate my companion’s processes. “Such paper could not +be bought under half a crown a packet. It is peculiarly strong and +stiff.”

+

“Peculiar—that is the very word,” said Holmes. “It is not an +English paper at all. Hold it up to the light.”

+ +

I did so, and saw a large “E” with a small “g,” a “P,” and a +large “G” with a small “t” woven into the texture of the paper.

+

“What do you make of that?” asked Holmes.

+

“The name of the maker, no doubt; or his monogram, rather.”

+

“Not at all. The ‘G’ with the small ‘t’ stands for +‘Gesellschaft,’ which is the German for ‘Company.’ It is a customary +contraction like our ‘Co.’ ‘P,’ of course, stands for ‘Papier.’ Now for +the ‘Eg.’ Let us glance at our Continental Gazetteer.” He took down a +heavy brown volume from his shelves. “Eglow, Eglonitz—here we are, +Egria. It is in a German-speaking country—in Bohemia, not far from +Carlsbad. ‘Remarkable as being the scene of the death of Wallenstein, +and for its numerous glass-factories and paper-mills.’ Ha, ha, my boy, +what do you make of that?” His eyes sparkled, and he sent up a great +blue triumphant cloud from his cigarette.

+

“The paper was made in Bohemia,” I said.

+

“Precisely. And the man who wrote the note is a German. Do you +note the peculiar construction of the sentence—‘This account of you we +have from all quarters received.’ A Frenchman or Russian could not have +written that. It is the German who is so uncourteous to his verbs. It +only remains, therefore, to discover what is wanted by this German who +writes upon Bohemian paper and prefers wearing a mask to showing his +face. And here he comes, if I am not mistaken, to resolve all our +doubts.”

+

As he spoke there was the sharp sound of horses’ hoofs and +grating wheels against the curb, followed by a sharp pull at the bell. +Holmes whistled.

+

“A pair, by the sound,” said he. “Yes,” he continued, glancing +out of the window. “A nice little brougham and a pair of beauties. A +hundred and fifty guineas apiece. There’s money in this case, Watson, if +there is nothing else.”

+

“I think that I had better go, Holmes.”

+ +

“Not a bit, Doctor. Stay where you are. I am lost without my +Boswell. And this promises to be interesting. It would be a pity to miss +it.”

+

“But your client—”

+

“Never mind him. I may want your help, and so may he. Here he +comes. Sit down in that armchair, Doctor, and give us your best +attention.”

+

A slow and heavy step, which had been heard upon the stairs and +in the passage, paused immediately outside the door. Then there was a +loud and authoritative tap.

+

“Come in!” said Holmes.

+

A man entered who could hardly have been less than six feet six +inches in height, with the chest and limbs of a Hercules. His dress was +rich with a richness which would, in England, be looked upon as akin to +bad taste. Heavy bands of astrakhan were slashed across the sleeves and +fronts of his double-breasted coat, while the deep blue cloak which was +thrown over his shoulders was lined with flame-coloured silk and secured +at the neck with a brooch which consisted of a single flaming beryl. +Boots which extended halfway up his calves, and which were trimmed at +the tops with rich brown fur, completed the impression of barbaric +opulence which was suggested by his whole appearance. He carried a +broad-brimmed hat in his hand, while he wore across the upper part of +his face, extending down past the cheekbones, a black vizard mask, which +he had apparently adjusted that very moment, for his hand was still +raised to it as he entered. From the lower part of the face he appeared +to be a man of strong character, with a thick, hanging lip, and a long, +straight chin suggestive of resolution pushed to the length of +obstinacy.

+

“You had my note?” he asked with a deep harsh voice and a +strongly marked German accent. “I told you that I would call.” He looked +from one to the other of us, as if uncertain which to address.

+

“Pray take a seat,” said Holmes. “This is my friend and +colleague, Dr. Watson, who is occasionally good enough to help me in my +cases. Whom have I the honour to address?”

+

“You may address me as the Count Von Kramm, a Bohemian nobleman. +I understand that this gentleman, your friend, is a man of honour and +discretion, whom I may trust with a matter of the most extreme +importance. If not, I should much prefer to communicate with you alone.”

+ +

I rose to go, but Holmes caught me by the wrist and pushed me +back into my chair. “It is both, or none,” said he. “You may say before +this gentleman anything which you may say to me.”

+

The Count shrugged his broad shoulders. “Then I must begin,” said +he, “by binding you both to absolute secrecy for two years; at the end +of that time the matter will be of no importance. At present it is not +too much to say that it is of such weight it may have an influence upon +European history.”

+

“I promise,” said Holmes.

+

“And I.”

+

“You will excuse this mask,” continued our strange visitor. “The +august person who employs me wishes his agent to be unknown to you, and +I may confess at once that the title by which I have just called myself +is not exactly my own.”

+

“I was aware of it,” said Holmes dryly.

+

“The circumstances are of great delicacy, and every precaution +has to be taken to quench what might grow to be an immense scandal and +seriously compromise one of the reigning families of Europe. To speak +plainly, the matter implicates the great House of Ormstein, hereditary +kings of Bohemia.”

+

“I was also aware of that,” murmured Holmes, settling himself +down in his armchair and closing his eyes.

+

Our visitor glanced with some apparent surprise at the languid, +lounging figure of the man who had been no doubt depicted to him as the +most incisive reasoner and most energetic agent in Europe. Holmes slowly +reopened his eyes and looked impatiently at his gigantic client.

+ +

“If your Majesty would condescend to state your case,” he +remarked, “I should be better able to advise you.”

+

The man sprang from his chair and paced up and down the room in +uncontrollable agitation. Then, with a gesture of desperation, he tore +the mask from his face and hurled it upon the ground. “You are right,” +he cried; “I am the King. Why should I attempt to conceal it?”

+

“Why, indeed?” murmured Holmes. “Your Majesty had not spoken +before I was aware that I was addressing Wilhelm Gottsreich Sigismond +von Ormstein, Grand Duke of Cassel-Felstein, and hereditary King of +Bohemia.”

+

“But you can understand,” said our strange visitor, sitting down +once more and passing his hand over his high white forehead, “you can +understand that I am not accustomed to doing such business in my own +person. Yet the matter was so delicate that I could not confide it to an +agent without putting myself in his power. I have come incognito +from Prague for the purpose of consulting you.”

+

“Then, pray consult,” said Holmes, shutting his eyes once more.

+

“The facts are briefly these: Some five years ago, during a +lengthy visit to Warsaw, I made the acquaintance of the well-known +adventuress, Irene Adler. The name is no doubt familiar to you.”

+

“Kindly look her up in my index, Doctor,” murmured Holmes without +opening his eyes. For many years he had adopted a system of docketing +all paragraphs concerning men and things, so that it was difficult to +name a subject or a person on which he could not at once furnish +information. In this case I found her biography sandwiched in between +that of a Hebrew rabbi and that of a staff-commander who had written a +monograph upon the deep-sea fishes.

+ +

“Let me see!” said Holmes. “Hum! Born in New Jersey in the year +1858. Contralto—hum! La Scala, hum! Prima donna Imperial Opera of +Warsaw—yes! Retired from operatic stage—ha! Living in London—quite so! +Your Majesty, as I understand, became entangled with this young person, +wrote her some compromising letters, and is now desirous of getting +those letters back.”

+

“Precisely so. But how—”

+

“Was there a secret marriage?”

+

“None.”

+

“No legal papers or certificates?”

+

“None.”

+

“Then I fail to follow your Majesty. If this young person should +produce her letters for blackmailing or other purposes, how is she to +prove their authenticity?”

+

“There is the writing.”

+

“Pooh, pooh! Forgery.”

+ +

“My private note-paper.”

+

“Stolen.”

+

“My own seal.”

+

“Imitated.”

+

“My photograph.”

+

“Bought.”

+

“We were both in the photograph.”

+

“Oh, dear! That is very bad! Your Majesty has indeed committed an +indiscretion.”

+

“I was mad—insane.”

+ +

“You have compromised yourself seriously.”

+

“I was only Crown Prince then. I was young. I am but thirty now.”

+

“It must be recovered.”

+

“We have tried and failed.”

+

“Your Majesty must pay. It must be bought.”

+

“She will not sell.”

+

“Stolen, then.”

+

“Five attempts have been made. Twice burglars in my pay ransacked +her house. Once we diverted her luggage when she travelled. Twice she +has been waylaid. There has been no result.”

+

“No sign of it?”

+ +

“Absolutely none.”

+

Holmes laughed. “It is quite a pretty little problem,” said he.

+

“But a very serious one to me,” returned the King reproachfully.

+

“Very, indeed. And what does she propose to do with the +photograph?”

+

“To ruin me.”

+

“But how?”

+

“I am about to be married.”

+

“So I have heard.”

+

“To Clotilde Lothman von Saxe-Meningen, second daughter of the +King of Scandinavia. You may know the strict principles of her family. +She is herself the very soul of delicacy. A shadow of a doubt as to my +conduct would bring the matter to an end.”

+ +

“And Irene Adler?”

+

“Threatens to send them the photograph. And she will do it. I +know that she will do it. You do not know her, but she has a soul of +steel. She has the face of the most beautiful of women, and the mind of +the most resolute of men. Rather than I should marry another woman, +there are no lengths to which she would not go—none.”

+

“You are sure that she has not sent it yet?”

+

“I am sure.”

+

“And why?”

+

“Because she has said that she would send it on the day when the +betrothal was publicly proclaimed. That will be next Monday.”

+

“Oh, then we have three days yet,” said Holmes with a yawn. “That +is very fortunate, as I have one or two matters of importance to look +into just at present. Your Majesty will, of course, stay in London for +the present?”

+

“Certainly. You will find me at the Langham under the name of the +Count Von Kramm.”

+

“Then I shall drop you a line to let you know how we progress.”

+ +

“Pray do so. I shall be all anxiety.”

+

“Then, as to money?”

+

“You have carte blanche.”

+

“Absolutely?”

+

“I tell you that I would give one of the provinces of my kingdom +to have that photograph.”

+

“And for present expenses?”

+

The King took a heavy chamois leather bag from under his cloak +and laid it on the table.

+

“There are three hundred pounds in gold and seven hundred in +notes,” he said.

+ +

Holmes scribbled a receipt upon a sheet of his note-book and +handed it to him.

+

“And Mademoiselle’s address?” he asked.

+

“Is Briony Lodge, Serpentine Avenue, St. John’s Wood.”

+

Holmes took a note of it. “One other question,” said he. “Was the +photograph a cabinet?”

+

“It was.”

+

“Then, good-night, your Majesty, and I trust that we shall soon +have some good news for you. And good-night, Watson,” he added, as the +wheels of the royal brougham rolled down the street. “If you will be +good enough to call to-morrow afternoon at three o’clock I should like +to chat this little matter over with you.”
+
+

+
II.
+


+At three o’clock precisely I was at Baker Street, but Holmes had not yet +returned. The landlady informed me that he had left the house shortly +after eight o’clock in the morning. I sat down beside the fire, however, +with the intention of awaiting him, however long he might be. I was +already deeply interested in his inquiry, for, though it was surrounded +by none of the grim and strange features which were associated with the +two crimes which I have already recorded, still, the nature of the case +and the exalted station of his client gave it a character of its own. +Indeed, apart from the nature of the investigation which my friend had +on hand, there was something in his masterly grasp of a situation, and +his keen, incisive reasoning, which made it a pleasure to me to study +his system of work, and to follow the quick, subtle methods by which he +disentangled the most inextricable mysteries. So accustomed was I to his +invariable success that the very possibility of his failing had ceased +to enter into my head.

+ +

It was close upon four before the door opened, and a +drunken-looking groom, ill-kempt and side-whiskered, with an inflamed +face and disreputable clothes, walked into the room. Accustomed as I was +to my friend’s amazing powers in the use of disguises, I had to look +three times before I was certain that it was indeed he. With a nod he +vanished into the bedroom, whence he emerged in five minutes +tweed-suited and respectable, as of old. Putting his hands into his +pockets, he stretched out his legs in front of the fire and laughed +heartily for some minutes.

+

“Well, really!” he cried, and then he choked and laughed again +until he was obliged to lie back, limp and helpless, in the chair.

+

“What is it?”

+

“It’s quite too funny. I am sure you could never guess how I +employed my morning, or what I ended by doing.”

+

“I can’t imagine. I suppose that you have been watching the +habits, and perhaps the house, of Miss Irene Adler.”

+

“Quite so; but the sequel was rather unusual. I will tell you, +however. I left the house a little after eight o’clock this morning in +the character of a groom out of work. There is a wonderful sympathy and +freemasonry among horsey men. Be one of them, and you will know all that +there is to know. I soon found Briony Lodge. It is a bijou villa, +with a garden at the back, but built out in front right up to the road, +two stories. Chubb lock to the door. Large sitting-room on the right +side, well furnished, with long windows almost to the floor, and those +preposterous English window fasteners which a child could open. Behind +there was nothing remarkable, save that the passage window could be +reached from the top of the coach-house. I walked round it and examined +it closely from every point of view, but without noting anything else of +interest.

+

“I then lounged down the street and found, as I expected, that +there was a mews in a lane which runs down by one wall of the garden. I +lent the ostlers a hand in rubbing down their horses, and received in +exchange twopence, a glass of half-and-half, two fills of shag tobacco, +and as much information as I could desire about Miss Adler, to say +nothing of half a dozen other people in the neighbourhood in whom I was +not in the least interested, but whose biographies I was compelled to +listen to.”

+ +

“And what of Irene Adler?” I asked.

+

“Oh, she has turned all the men’s heads down in that part. She is +the daintiest thing under a bonnet on this planet. So say the +Serpentine-mews, to a man. She lives quietly, sings at concerts, drives +out at five every day, and returns at seven sharp for dinner. Seldom +goes out at other times, except when she sings. Has only one male +visitor, but a good deal of him. He is dark, handsome, and dashing, +never calls less than once a day, and often twice. He is a Mr. Godfrey +Norton, of the Inner Temple. See the advantages of a cabman as a +confidant. They had driven him home a dozen times from Serpentine-mews, +and knew all about him. When I had listened to all they had to tell, I +began to walk up and down near Briony Lodge once more, and to think over +my plan of campaign.

+

“This Godfrey Norton was evidently an important factor in the +matter. He was a lawyer. That sounded ominous. What was the relation +between them, and what the object of his repeated visits? Was she his +client, his friend, or his mistress? If the former, she had probably +transferred the photograph to his keeping. If the latter, it was less +likely. On the issue of this question depended whether I should continue +my work at Briony Lodge, or turn my attention to the gentleman’s +chambers in the Temple. It was a delicate point, and it widened the +field of my inquiry. I fear that I bore you with these details, but I +have to let you see my little difficulties, if you are to understand the +situation.”

+

“I am following you closely,” I answered.

+

“I was still balancing the matter in my mind when a hansom cab +drove up to Briony Lodge, and a gentleman sprang out. He was a +remarkably handsome man, dark, aquiline, and moustached—evidently the +man of whom I had heard. He appeared to be in a great hurry, shouted to +the cabman to wait, and brushed past the maid who opened the door with +the air of a man who was thoroughly at home.

+

“He was in the house about half an hour, and I could catch +glimpses of him in the windows of the sitting-room, pacing up and down, +talking excitedly, and waving his arms. Of her I could see nothing. +Presently he emerged, looking even more flurried than before. As he +stepped up to the cab, he pulled a gold watch from his pocket and looked +at it earnestly, ‘Drive like the devil,’ he shouted, ‘first to Gross +& Hankey’s in Regent Street, and then to the Church of St. Monica in +the Edgeware Road. Half a guinea if you do it in twenty minutes!’

+

“Away they went, and I was just wondering whether I should not do +well to follow them when up the lane came a neat little landau, the +coachman with his coat only half-buttoned, and his tie under his ear, +while all the tags of his harness were sticking out of the buckles. It +hadn’t pulled up before she shot out of the hall door and into it. I +only caught a glimpse of her at the moment, but she was a lovely woman, +with a face that a man might die for.

+

“ ‘The Church of St. Monica, John,’ she cried, ‘and half a +sovereign if you reach it in twenty minutes.’

+ +

“This was quite too good to lose, Watson. I was just balancing +whether I should run for it, or whether I should perch behind her landau +when a cab came through the street. The driver looked twice at such a +shabby fare, but I jumped in before he could object. ‘The Church of St. +Monica,’ said I, ‘and half a sovereign if you reach it in twenty +minutes.’ It was twenty-five minutes to twelve, and of course it was +clear enough what was in the wind.

+

“My cabby drove fast. I don’t think I ever drove faster, but the +others were there before us. The cab and the landau with their steaming +horses were in front of the door when I arrived. I paid the man and +hurried into the church. There was not a soul there save the two whom I +had followed and a surpliced clergyman, who seemed to be expostulating +with them. They were all three standing in a knot in front of the altar. +I lounged up the side aisle like any other idler who has dropped into a +church. Suddenly, to my surprise, the three at the altar faced round to +me, and Godfrey Norton came running as hard as he could towards me.

+

“ ‘Thank God,’ he cried. ‘You’ll do. Come! Come!’

+

“ ‘What then?’ I asked.

+

“ ‘Come, man, come, only three minutes, or it won’t be legal.’

+

“I was half-dragged up to the altar, and before I knew where I +was I found myself mumbling responses which were whispered in my ear, +and vouching for things of which I knew nothing, and generally assisting +in the secure tying up of Irene Adler, spinster, to Godfrey Norton, +bachelor. It was all done in an instant, and there was the gentleman +thanking me on the one side and the lady on the other, while the +clergyman beamed on me in front. It was the most preposterous position +in which I ever found myself in my life, and it was the thought of it +that started me laughing just now. It seems that there had been some +informality about their license, that the clergyman absolutely refused +to marry them without a witness of some sort, and that my lucky +appearance saved the bridegroom from having to sally out into the +streets in search of a best man. The bride gave me a sovereign, and I +mean to wear it on my watch chain in memory of the occasion.”

+

“This is a very unexpected turn of affairs,” said I; “and what +then?”

+

“Well, I found my plans very seriously menaced. It looked as if +the pair might take an immediate departure, and so necessitate very +prompt and energetic measures on my part. At the church door, however, +they separated, he driving back to the Temple, and she to her own house. +‘I shall drive out in the park at five as usual,’ she said as she left +him. I heard no more. They drove away in different directions, and I +went off to make my own arrangements.”

+

“Which are?”

+ +

“Some cold beef and a glass of beer,” he answered, ringing the +bell. “I have been too busy to think of food, and I am likely to be +busier still this evening. By the way, Doctor, I shall want your +co-operation.”

+

“I shall be delighted.”

+

“You don’t mind breaking the law?”

+

“Not in the least.”

+

“Nor running a chance of arrest?”

+

“Not in a good cause.”

+

“Oh, the cause is excellent!”

+

“Then I am your man.”

+

“I was sure that I might rely on you.”

+ +

“But what is it you wish?”

+

“When Mrs. Turner has brought in the tray I will make it clear to +you. Now,” he said as he turned hungrily on the simple fare that our +landlady had provided, “I must discuss it while I eat, for I have not +much time. It is nearly five now. In two hours we must be on the scene +of action. Miss Irene, or Madame, rather, returns from her drive at +seven. We must be at Briony Lodge to meet her.”

+

“And what then?”

+

“You must leave that to me. I have already arranged what is to +occur. There is only one point on which I must insist. You must not +interfere, come what may. You understand?”

+

“I am to be neutral?”

+

“To do nothing whatever. There will probably be some small +unpleasantness. Do not join in it. It will end in my being conveyed into +the house. Four or five minutes afterwards the sitting-room window will +open. You are to station yourself close to that open window.”

+

“Yes.”

+

“You are to watch me, for I will be visible to you.”

+

“Yes.”

+ +

“And when I raise my hand—so—you will throw into the room what I +give you to throw, and will, at the same time, raise the cry of fire. +You quite follow me?”

+

“Entirely.”

+

“It is nothing very formidable,” he said, taking a long +cigar-shaped roll from his pocket. “It is an ordinary plumber’s +smoke-rocket, fitted with a cap at either end to make it self-lighting. +Your task is confined to that. When you raise your cry of fire, it will +be taken up by quite a number of people. You may then walk to the end of +the street, and I will rejoin you in ten minutes. I hope that I have +made myself clear?”

+

“I am to remain neutral, to get near the window, to watch you, +and at the signal to throw in this object, then to raise the cry of +fire, and to wait you at the corner of the street.”

+

“Precisely.”

+

“Then you may entirely rely on me.”

+

“That is excellent. I think, perhaps, it is almost time that I +prepare for the new role I have to play.”

+

He disappeared into his bedroom and returned in a few minutes in +the character of an amiable and simple-minded Nonconformist clergyman. +His broad black hat, his baggy trousers, his white tie, his sympathetic +smile, and general look of peering and benevolent curiosity were such as +Mr. John Hare alone could have equalled. It was not merely that Holmes +changed his costume. His expression, his manner, his very soul seemed to +vary with every fresh part that he assumed. The stage lost a fine actor, +even as science lost an acute reasoner, when he became a specialist in +crime.

+

It was a quarter past six when we left Baker Street, and it still +wanted ten minutes to the hour when we found ourselves in Serpentine +Avenue. It was already dusk, and the lamps were just being lighted as we +paced up and down in front of Briony Lodge, waiting for the coming of +its occupant. The house was just such as I had pictured it from Sherlock +Holmes’ succinct description, but the locality appeared to be less +private than I expected. On the contrary, for a small street in a quiet +neighbourhood, it was remarkably animated. There was a group of shabbily +dressed men smoking and laughing in a corner, a scissors-grinder with +his wheel, two guardsmen who were flirting with a nurse-girl, and +several well-dressed young men who were lounging up and down with cigars +in their mouths.

+ +

“You see,” remarked Holmes, as we paced to and fro in front of +the house, “this marriage rather simplifies matters. The photograph +becomes a double-edged weapon now. The chances are that she would be as +averse to its being seen by Mr. Godfrey Norton, as our client is to its +coming to the eyes of his princess. Now the question is, Where are we to +find the photograph?”

+

“Where, indeed?”

+

“It is most unlikely that she carries it about with her. It is +cabinet size. Too large for easy concealment about a woman’s dress. She +knows that the King is capable of having her waylaid and searched. Two +attempts of the sort have already been made. We may take it, then, that +she does not carry it about with her.”

+

“Where, then?”

+

“Her banker or her lawyer. There is that double possibility. But +I am inclined to think neither. Women are naturally secretive, and they +like to do their own secreting. Why should she hand it over to anyone +else? She could trust her own guardianship, but she could not tell what +indirect or political influence might be brought to bear upon a business +man. Besides, remember that she had resolved to use it within a few +days. It must be where she can lay her hands upon it. It must be in her +own house.”

+

“But it has twice been burgled.”

+

“Pshaw! They did not know how to look.”

+

“But how will you look?”

+

“I will not look.”

+ +

“What then?”

+

“I will get her to show me.”

+

“But she will refuse.”

+

“She will not be able to. But I hear the rumble of wheels. It is +her carriage. Now carry out my orders to the letter.”

+

As he spoke the gleam of the sidelights of a carriage came round +the curve of the avenue. It was a smart little landau which rattled up +to the door of Briony Lodge. As it pulled up, one of the loafing men at +the corner dashed forward to open the door in the hope of earning a +copper, but was elbowed away by another loafer, who had rushed up with +the same intention. A fierce quarrel broke out, which was increased by +the two guardsmen, who took sides with one of the loungers, and by the +scissors-grinder, who was equally hot upon the other side. A blow was +struck, and in an instant the lady, who had stepped from her carriage, +was the centre of a little knot of flushed and struggling men, who +struck savagely at each other with their fists and sticks. Holmes dashed +into the crowd to protect the lady; but, just as he reached her, he gave +a cry and dropped to the ground, with the blood running freely down his +face. At his fall the guardsmen took to their heels in one direction and +the loungers in the other, while a number of better dressed people, who +had watched the scuffle without taking part in it, crowded in to help +the lady and to attend to the injured man. Irene Adler, as I will still +call her, had hurried up the steps; but she stood at the top with her +superb figure outlined against the lights of the hall, looking back into +the street.

+

“Is the poor gentleman much hurt?” she asked.

+

“He is dead,” cried several voices.

+

“No, no, there’s life in him!” shouted another. “But he’ll be +gone before you can get him to hospital.”

+

“He’s a brave fellow,” said a woman. “They would have had the +lady’s purse and watch if it hadn’t been for him. They were a gang, and +a rough one, too. Ah, he’s breathing now.”

+ +

“He can’t lie in the street. May we bring him in, marm?”

+

“Surely. Bring him into the sitting-room. There is a comfortable +sofa. This way, please!”

+

Slowly and solemnly he was borne into Briony Lodge and laid out +in the principal room, while I still observed the proceedings from my +post by the window. The lamps had been lit, but the blinds had not been +drawn, so that I could see Holmes as he lay upon the couch. I do not +know whether he was seized with compunction at that moment for the part +he was playing, but I know that I never felt more heartily ashamed of +myself in my life than when I saw the beautiful creature against whom I +was conspiring, or the grace and kindliness with which she waited upon +the injured man. And yet it would be the blackest treachery to Holmes to +draw back now from the part which he had intrusted to me. I hardened my +heart, and took the smoke-rocket from under my ulster. After all, I +thought, we are not injuring her. We are but preventing her from +injuring another.

+

Holmes had sat up upon the couch, and I saw him motion like a man +who is in need of air. A maid rushed across and threw open the window. +At the same instant I saw him raise his hand and at the signal I tossed +my rocket into the room with a cry of “Fire!” The word was no sooner out +of my mouth than the whole crowd of spectators, well dressed and +ill—gentlemen, ostlers, and servant maids—joined in a general shriek of +“Fire!” Thick clouds of smoke curled through the room and out at the +open window. I caught a glimpse of rushing figures, and a moment later +the voice of Holmes from within assuring them that it was a false alarm. +Slipping through the shouting crowd I made my way to the corner of the +street, and in ten minutes was rejoiced to find my friend’s arm in mine, +and to get away from the scene of uproar. He walked swiftly and in +silence for some few minutes until we had turned down one of the quiet +streets which lead towards the Edgeware Road.

+

“You did it very nicely, Doctor,” he remarked. “Nothing could +have been better. It is all right.”

+

“You have the photograph?”

+

“I know where it is.”

+

“And how did you find out?”

+

“She showed me, as I told you she would.”

+ +

“I am still in the dark.”

+

“I do not wish to make a mystery,” said he, laughing. “The matter +was perfectly simple. You, of course, saw that everyone in the street +was an accomplice. They were all engaged for the evening.”

+

“I guessed as much.”

+

“Then, when the row broke out, I had a little moist red paint in +the palm of my hand. I rushed forward, fell down, clapped my hand to my +face, and became a piteous spectacle. It is an old trick.”

+

“That also I could fathom.”

+

“Then they carried me in. She was bound to have me in. What else +could she do? And into her sitting-room, which was the very room which I +suspected. It lay between that and her bedroom, and I was determined to +see which. They laid me on a couch, I motioned for air, they were +compelled to open the window, and you had your chance.”

+

“How did that help you?”

+

“It was all-important. When a woman thinks that her house is on +fire, her instinct is at once to rush to the thing which she values +most. It is a perfectly overpowering impulse, and I have more than once +taken advantage of it. In the case of the Darlington Substitution +Scandal it was of use to me, and also in the Arnsworth Castle business. +A married woman grabs at her baby; an unmarried one reaches for her +jewel-box. Now it was clear to me that our lady of to-day had nothing in +the house more precious to her than what we are in quest of. She would +rush to secure it. The alarm of fire was admirably done. The smoke and +shouting were enough to shake nerves of steel. She responded +beautifully. The photograph is in a recess behind a sliding panel just +above the right bell-pull. She was there in an instant, and I caught a +glimpse of it as she half drew it out. When I cried out that it was a +false alarm, she replaced it, glanced at the rocket, rushed from the +room, and I have not seen her since. I rose, and, making my excuses, +escaped from the house. I hesitated whether to attempt to secure the +photograph at once; but the coachman had come in, and as he was watching +me narrowly, it seemed safer to wait. A little over-precipitance may +ruin all.”

+

“And now?” I asked.

+ +

“Our quest is practically finished. I shall call with the King +to-morrow, and with you, if you care to come with us. We will be shown +into the sitting-room to wait for the lady, but it is probable that when +she comes she may find neither us nor the photograph. It might be a +satisfaction to his Majesty to regain it with his own hands.”

+

“And when will you call?”

+

“At eight in the morning. She will not be up, so that we shall +have a clear field. Besides, we must be prompt, for this marriage may +mean a complete change in her life and habits. I must wire to the King +without delay.”

+

We had reached Baker Street and had stopped at the door. He was +searching his pockets for the key when someone passing said:

+

“Good-night, Mister Sherlock Holmes.”

+

There were several people on the pavement at the time, but the +greeting appeared to come from a slim youth in an ulster who had hurried +by.

+

“I’ve heard that voice before,” said Holmes, staring down the +dimly lit street. “Now, I wonder who the deuce that could have been.”
+
+

+
III.
+ +


+I slept at Baker Street that night, and we were engaged upon our toast +and coffee in the morning when the King of Bohemia rushed into the room.

+

“You have really got it!” he cried, grasping Sherlock Holmes by +either shoulder and looking eagerly into his face.

+

“Not yet.”

+

“But you have hopes?”

+

“I have hopes.”

+

“Then, come. I am all impatience to be gone.”

+

“We must have a cab.”

+

“No, my brougham is waiting.”

+ +

“Then that will simplify matters.” We descended and started off +once more for Briony Lodge.

+

“Irene Adler is married,” remarked Holmes.

+

“Married! When?”

+

“Yesterday.”

+

“But to whom?”

+

“To an English lawyer named Norton.”

+

“But she could not love him.”

+

“I am in hopes that she does.”

+

“And why in hopes?”

+ +

“Because it would spare your Majesty all fear of future +annoyance. If the lady loves her husband, she does not love your +Majesty. If she does not love your Majesty, there is no reason why she +should interfere with your Majesty’s plan.”

+

“It is true. And yet—! Well! I wish she had been of my own +station! What a queen she would have made!” He relapsed into a moody +silence, which was not broken until we drew up in Serpentine Avenue.

+

The door of Briony Lodge was open, and an elderly woman stood +upon the steps. She watched us with a sardonic eye as we stepped from +the brougham.

+

“Mr. Sherlock Holmes, I believe?” said she.

+

“I am Mr. Holmes,” answered my companion, looking at her with a +questioning and rather startled gaze.

+

“Indeed! My mistress told me that you were likely to call. She +left this morning with her husband by the 5:15 train from Charing Cross +for the Continent.”

+

“What!” Sherlock Holmes staggered back, white with chagrin and +surprise. “Do you mean that she has left England?”

+

“Never to return.”

+

“And the papers?” asked the King hoarsely. “All is lost.”

+ +

“We shall see.” He pushed past the servant and rushed into the +drawing-room, followed by the King and myself. The furniture was +scattered about in every direction, with dismantled shelves and open +drawers, as if the lady had hurriedly ransacked them before her flight. +Holmes rushed at the bell-pull, tore back a small sliding shutter, and, +plunging in his hand, pulled out a photograph and a letter. The +photograph was of Irene Adler herself in evening dress, the letter was +superscribed to “Sherlock Holmes, Esq. To be left till called for.” My +friend tore it open, and we all three read it together. It was dated at +midnight of the preceding night and ran in this way:
+
+

+

“MY DEAR MR. SHERLOCK HOLMES,—You really did it very well. You +took me in completely. Until after the alarm of fire, I had not a +suspicion. But then, when I found how I had betrayed myself, I began to +think. I had been warned against you months ago. I had been told that, +if the King employed an agent, it would certainly be you. And your +address had been given me. Yet, with all this, you made me reveal what +you wanted to know. Even after I became suspicious, I found it hard to +think evil of such a dear, kind old clergyman. But, you know, I have +been trained as an actress myself. Male costume is nothing new to me. I +often take advantage of the freedom which it gives. I sent John, the +coachman, to watch you, ran upstairs, got into my walking clothes, as I +call them, and came down just as you departed.

+

“Well, I followed you to your door, and so made sure that I was +really an object of interest to the celebrated Mr. Sherlock Holmes. Then +I, rather imprudently, wished you good-night, and started for the Temple +to see my husband.

+

“We both thought the best resource was flight, when pursued by so +formidable an antagonist; so you will find the nest empty when you call +to-morrow. As to the photograph, your client may rest in peace. I love +and am loved by a better man than he. The King may do what he will +without hindrance from one whom he has cruelly wronged. I keep it only +to safeguard myself, and to preserve a weapon which will always secure +me from any steps which he might take in the future. I leave a +photograph which he might care to possess; and I remain, dear Mr. +Sherlock Holmes,

+


+“Very truly yours,
+“IRENE NORTON, née ADLER.”
+ +
+

+

“What a woman—oh, what a woman!” cried the King of Bohemia, when +we had all three read this epistle. “Did I not tell you how quick and +resolute she was? Would she not have made an admirable queen? Is it not +a pity that she was not on my level?”

+

“From what I have seen of the lady, she seems, indeed, to be on a +very different level to your Majesty,” said Holmes coldly. “I am sorry +that I have not been able to bring your Majesty’s business to a more +successful conclusion.”

+

“On the contrary, my dear sir,” cried the King; “nothing could be +more successful. I know that her word is inviolate. The photograph is +now as safe as if it were in the fire.”

+

“I am glad to hear your Majesty say so.”

+

“I am immensely indebted to you. Pray tell me in what way I can +reward you. This ring—” He slipped an emerald snake ring from his finger +and held it out upon the palm of his hand.

+

“Your Majesty has something which I should value even more +highly,” said Holmes.

+

“You have but to name it.”

+

“This photograph!”

+ +

The King stared at him in amazement.

+

“Irene’s photograph!” he cried. “Certainly, if you wish it.”

+

“I thank your Majesty. Then there is no more to be done in the +matter. I have the honour to wish you a very good morning.” He bowed, +and, turning away without observing the hand which the King had +stretched out to him, he set off in my company for his chambers.
+
+

+

And that was how a great scandal threatened to affect the kingdom +of Bohemia, and how the best plans of Mr. Sherlock Holmes were beaten by +a woman’s wit. He used to make merry over the cleverness of women, but I +have not heard him do it of late. And when he speaks of Irene Adler, or +when he refers to her photograph, it is always under the honourable +title of the woman.
+
+

+
+ + \ No newline at end of file diff --git a/epublib-core/src/test/resources/not_a_zip.epub b/epublib-core/src/test/resources/not_a_zip.epub new file mode 100644 index 00000000..a977c666 --- /dev/null +++ b/epublib-core/src/test/resources/not_a_zip.epub @@ -0,0 +1,2 @@ +This is not a valid zip file. +Used for testing LoadResources. \ No newline at end of file diff --git a/epublib-core/src/test/resources/opf/test1.opf b/epublib-core/src/test/resources/opf/test1.opf new file mode 100644 index 00000000..6d3bacf0 --- /dev/null +++ b/epublib-core/src/test/resources/opf/test1.opf @@ -0,0 +1,32 @@ + + + + Epublib test book 1 + Joe Tester + 2010-05-27 + en + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/epublib-core/src/test/resources/opf/test2.opf b/epublib-core/src/test/resources/opf/test2.opf new file mode 100644 index 00000000..fdfb1688 --- /dev/null +++ b/epublib-core/src/test/resources/opf/test2.opf @@ -0,0 +1,23 @@ + + + + This Dynamic Earth + this_dynamic_earth-AAH813 + en + W. Jacquelyne Kious, Robert I. Tilling + + Infogrid Pacific + + + + 22-01-2009 + + + + + + + + + + diff --git a/epublib-core/src/test/resources/opf/test3.opf b/epublib-core/src/test/resources/opf/test3.opf new file mode 100644 index 00000000..fe58f592 --- /dev/null +++ b/epublib-core/src/test/resources/opf/test3.opf @@ -0,0 +1,17 @@ + + + + Epublib test book 1 + Joe Tester + 2010-05-27 + en + + + + + + + + + + diff --git a/epublib-core/src/test/resources/opf/test_default_language.opf b/epublib-core/src/test/resources/opf/test_default_language.opf new file mode 100644 index 00000000..bbbe94d6 --- /dev/null +++ b/epublib-core/src/test/resources/opf/test_default_language.opf @@ -0,0 +1,22 @@ + + + + This Dynamic Earth + this_dynamic_earth-AAH813 + W. Jacquelyne Kious, Robert I. Tilling + + Infogrid Pacific + + + + 22-01-2009 + + + + + + + + + + diff --git a/epublib-core/src/test/resources/opf/test_language.opf b/epublib-core/src/test/resources/opf/test_language.opf new file mode 100644 index 00000000..79423ac1 --- /dev/null +++ b/epublib-core/src/test/resources/opf/test_language.opf @@ -0,0 +1,23 @@ + + + + This Dynamic Earth + this_dynamic_earth-AAH813 + fi + W. Jacquelyne Kious, Robert I. Tilling + + Infogrid Pacific + + + + 22-01-2009 + + + + + + + + + + diff --git a/epublib-core/src/test/resources/testbook1.epub b/epublib-core/src/test/resources/testbook1.epub new file mode 100644 index 00000000..25992d8d Binary files /dev/null and b/epublib-core/src/test/resources/testbook1.epub differ diff --git a/epublib-core/src/test/resources/toc.xml b/epublib-core/src/test/resources/toc.xml new file mode 100644 index 00000000..5875b1fd --- /dev/null +++ b/epublib-core/src/test/resources/toc.xml @@ -0,0 +1,41 @@ + + + + + + + + + + Epublib test book 1 + + + Tester, Joe + + + + + Introduction + + + + + + Second Chapter + + + + + Chapter 2, section 1 + + + + + + + Conclusion + + + + + diff --git a/epublib-core/src/test/resources/zero_length_file.epub b/epublib-core/src/test/resources/zero_length_file.epub new file mode 100644 index 00000000..e69de29b diff --git a/epublib-parent/pom.xml b/epublib-parent/pom.xml new file mode 100644 index 00000000..d75547a7 --- /dev/null +++ b/epublib-parent/pom.xml @@ -0,0 +1,186 @@ + + + + + 4.0.0 + + nl.siegmann.epublib + epublib-parent + epublib-parent + pom + 4.0.1-EPUB3-SNAPSHOT + A java library for reading/writing/manipulating epub files + http://www.siegmann.nl/epublib + 2009 + + + 4.0 + UTF-8 + 1.6.1 + 3.8.1 + 3.8.2 + 3.2.1 + 3.2.0 + 3.1.1 + 1.7 + 1.7 + + + + ../epublib-core + ../epublib-tools + + + + + LGPL + http://www.gnu.org/licenses/lgpl.html + repo + + + + + + paul + Paul Siegmann + paul.siegmann+epublib@gmail.com + http://www.siegmann.nl/ + +1 + + + + + github + http://github.com/psiegman/epublib/issues + + + + + + net.sf.kxml + kxml2 + 2.3.0 + + + xmlpull + xmlpull + 1.1.3.4d_b4_min + + + net.sourceforge.htmlcleaner + htmlcleaner + 2.2 + + + commons-io + commons-io + 2.0.1 + + + commons-lang + commons-lang + 2.4 + + + net.sf.kxml + kxml2 + 2.3.0 + + + xmlpull + xmlpull + 1.1.3.4d_b4_min + + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.slf4j + slf4j-simple + ${slf4j.version} + + + commons-vfs + commons-vfs + 1.0 + + + junit + junit + 4.10 + + + org.mockito + mockito-all + 1.10.19 + + + + + + + thirdparty + https://tools.pageplace.de/nexus/content/repositories/thirdparty + + + snapshot_repo + https://tools.pageplace.de/nexus/content/repositories/snapshots/ + + + + + http://github.com/psiegman/epublib + scm:git:https://psiegman@github.com/psiegman/epublib.git + scm:git:https://psiegman@github.com/psiegman/epublib.git + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + ${source.version} + ${target.version} + + + + org.apache.maven.plugins + maven-source-plugin + ${maven-source-plugin.version} + + + attach-sources + + jar + + + + + + + + + + org.apache.maven.plugins + maven-site-plugin + ${maven-site-plugin.version} + + + + + + maven + http://repo1.maven.org/maven2/ + + + jboss + https://repository.jboss.org/nexus/ + + + + diff --git a/epublib-tools/.gitignore b/epublib-tools/.gitignore new file mode 100644 index 00000000..ea8c4bf7 --- /dev/null +++ b/epublib-tools/.gitignore @@ -0,0 +1 @@ +/target diff --git a/epublib-tools/README.md b/epublib-tools/README.md new file mode 100644 index 00000000..a0cd99fb --- /dev/null +++ b/epublib-tools/README.md @@ -0,0 +1,28 @@ +## Epub Viewer + +A simple epub viewer built with java Swing. + +### Startup + + java nl.siegmann.epublib.viewer.Viewer + +## Fileset2epub + +A tool to generate an epub from a windows help / chm file or from a set of html files. + + java nl.siegmann.epublib.Fileset2Epub + +Arguments: + + --author [lastname,firstname] + --cover-image [image to use as cover] + --input-ecoding [text encoding] # The encoding of the input html files. If funny characters show + # up in the result try 'iso-8859-1', 'windows-1252' or 'utf-8' + # If that doesn't work try to find an appropriate one from + # this list: http://en.wikipedia.org/wiki/Character_encoding + --in [input directory] + --isbn [isbn number] + --out [output epub file] + --title [book title] + --type [input type, can be 'epub', 'chm' or empty] + --xsl [html post processing file] diff --git a/epublib-tools/pom.xml b/epublib-tools/pom.xml new file mode 100644 index 00000000..22535c47 --- /dev/null +++ b/epublib-tools/pom.xml @@ -0,0 +1,149 @@ + + + + + 4.0.0 + + nl.siegmann.epublib + epublib-tools + epublib-tools + A java library for reading/writing/manipulating epub files + http://www.siegmann.nl/epublib + 2009 + + + nl.siegmann.epublib + epublib-parent + 4.0.1-EPUB3-SNAPSHOT + ../epublib-parent/pom.xml + + + + + nl.siegmann.epublib + epublib-core + ${epublib.version} + + + net.sourceforge.htmlcleaner + htmlcleaner + + + org.jdom + jdom + + + org.apache.ant + ant + + + + + commons-lang + commons-lang + + + net.sf.kxml + kxml2 + + + xmlpull + xmlpull + + + commons-io + commons-io + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-simple + + + commons-vfs + commons-vfs + + + junit + junit + test + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + true + complete + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + ${source.version} + ${target.version} + + + + com.jolira + onejar-maven-plugin + 1.4.4 + + + epublib commandline + + nl.siegmann.epublib.Fileset2Epub + ${project.name}-commandline-${project.version}.jar + + + one-jar + + + + epublib viewer + + nl.siegmann.epublib.viewer.Viewer + ${project.name}-viewer-${project.version}.jar + + + one-jar + + + + + + + + + + org.apache.maven.plugins + maven-site-plugin + ${maven-site-plugin.version} + + + + + + onejar-maven-plugin.googlecode.com + http://onejar-maven-plugin.googlecode.com/svn/mavenrepo + + + diff --git a/src/main/groovy/nl/siegmann/epublib/docbook2epub.groovy b/epublib-tools/src/main/groovy/nl/siegmann/epublib/docbook2epub.groovy similarity index 99% rename from src/main/groovy/nl/siegmann/epublib/docbook2epub.groovy rename to epublib-tools/src/main/groovy/nl/siegmann/epublib/docbook2epub.groovy index b1c70aed..7a82c623 100644 --- a/src/main/groovy/nl/siegmann/epublib/docbook2epub.groovy +++ b/epublib-tools/src/main/groovy/nl/siegmann/epublib/docbook2epub.groovy @@ -19,6 +19,7 @@ import groovy.xml.* import org.apache.commons.io.FileUtils import java.util.zip.* import nl.siegmann.epublib.* +import nl.siegmann.epublib.domain.* // the directory where the userguide xml files are located: inputXmlDir = '/home/paul/project/private/gradledoc/foo/gradle-0.8/src/docs/userguide' diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/Fileset2Epub.java b/epublib-tools/src/main/java/nl/siegmann/epublib/Fileset2Epub.java new file mode 100644 index 00000000..5bae7e80 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/Fileset2Epub.java @@ -0,0 +1,164 @@ +package nl.siegmann.epublib; + +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import nl.siegmann.epublib.bookprocessor.CoverpageBookProcessor; +import nl.siegmann.epublib.bookprocessor.DefaultBookProcessorPipeline; +import nl.siegmann.epublib.bookprocessor.XslBookProcessor; +import nl.siegmann.epublib.chm.ChmParser; +import nl.siegmann.epublib.domain.Author; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Identifier; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.epub.BookProcessor; +import nl.siegmann.epublib.epub.BookProcessorPipeline; +import nl.siegmann.epublib.epub.EpubReader; +import nl.siegmann.epublib.epub.EpubWriter; +import nl.siegmann.epublib.fileset.FilesetBookCreator; +import nl.siegmann.epublib.util.VFSUtil; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.vfs.FileSystemException; +import org.apache.commons.vfs.VFS; + +public class Fileset2Epub { + + public static void main(String[] args) throws Exception { + String inputLocation = ""; + String outLocation = ""; + String xslFile = ""; + String coverImage = ""; + String title = ""; + List authorNames = new ArrayList(); + String type = ""; + String isbn = ""; + String inputEncoding = Constants.CHARACTER_ENCODING; + List bookProcessorClassNames = new ArrayList(); + + for(int i = 0; i < args.length; i++) { + if(args[i].equalsIgnoreCase("--in")) { + inputLocation = args[++i]; + } else if(args[i].equalsIgnoreCase("--out")) { + outLocation = args[++i]; + } else if(args[i].equalsIgnoreCase("--input-encoding")) { + inputEncoding = args[++i]; + } else if(args[i].equalsIgnoreCase("--xsl")) { + xslFile = args[++i]; + } else if(args[i].equalsIgnoreCase("--book-processor-class")) { + bookProcessorClassNames.add(args[++i]); + } else if(args[i].equalsIgnoreCase("--cover-image")) { + coverImage = args[++i]; + } else if(args[i].equalsIgnoreCase("--author")) { + authorNames.add(args[++i]); + } else if(args[i].equalsIgnoreCase("--title")) { + title = args[++i]; + } else if(args[i].equalsIgnoreCase("--isbn")) { + isbn = args[++i]; + } else if(args[i].equalsIgnoreCase("--type")) { + type = args[++i]; + } + } + if(StringUtils.isBlank(inputLocation) || StringUtils.isBlank(outLocation)) { + usage(); + } + BookProcessorPipeline epubCleaner = new DefaultBookProcessorPipeline(); + epubCleaner.addBookProcessors(createBookProcessors(bookProcessorClassNames)); + EpubWriter epubWriter = new EpubWriter(epubCleaner); + if(! StringUtils.isBlank(xslFile)) { + epubCleaner.addBookProcessor(new XslBookProcessor(xslFile)); + } + + if (StringUtils.isBlank(inputEncoding)) { + inputEncoding = Constants.CHARACTER_ENCODING; + } + + Book book; + if("chm".equals(type)) { + book = ChmParser.parseChm(VFSUtil.resolveFileObject(inputLocation), inputEncoding); + } else if ("epub".equals(type)) { + book = new EpubReader().readEpub(VFSUtil.resolveInputStream(inputLocation), inputEncoding); + } else { + book = FilesetBookCreator.createBookFromDirectory(VFSUtil.resolveFileObject(inputLocation), inputEncoding); + } + + if(StringUtils.isNotBlank(coverImage)) { +// book.getResourceByHref(book.getCoverImage()); + book.setCoverImage(new Resource(VFSUtil.resolveInputStream(coverImage), coverImage)); + epubCleaner.getBookProcessors().add(new CoverpageBookProcessor()); + } + + if(StringUtils.isNotBlank(title)) { + List titles = new ArrayList(); + titles.add(title); + book.getMetadata().setTitles(titles); + } + + if(StringUtils.isNotBlank(isbn)) { + book.getMetadata().addIdentifier(new Identifier(Identifier.Scheme.ISBN, isbn)); + } + + initAuthors(authorNames, book); + + OutputStream result; + try { + result = VFS.getManager().resolveFile(outLocation).getContent().getOutputStream(); + } catch(FileSystemException e) { + result = new FileOutputStream(outLocation); + } + epubWriter.write(book, result); + } + + private static void initAuthors(List authorNames, Book book) { + if(authorNames == null || authorNames.isEmpty()) { + return; + } + List authorObjects = new ArrayList(); + for(String authorName: authorNames) { + String[] authorNameParts = authorName.split(","); + Author authorObject = null; + if(authorNameParts.length > 1) { + authorObject = new Author(authorNameParts[1], authorNameParts[0]); + } else if(authorNameParts.length > 0) { + authorObject = new Author(authorNameParts[0]); + } + authorObjects.add(authorObject); + } + book.getMetadata().setAuthors(authorObjects); + } + + + private static List createBookProcessors(List bookProcessorNames) { + List result = new ArrayList(bookProcessorNames.size()); + for (String bookProcessorName: bookProcessorNames) { + BookProcessor bookProcessor = null; + try { + bookProcessor = (BookProcessor) Class.forName(bookProcessorName).newInstance(); + result.add(bookProcessor); + } catch (Exception e) { + e.printStackTrace(); + } + } + return result; + } + + private static void usage() { + System.out.println("usage: " + Fileset2Epub.class.getName() + + "\n --author [lastname,firstname]" + + "\n --cover-image [image to use as cover]" + + "\n --input-ecoding [text encoding] # The encoding of the input html files. If funny characters show" + + "\n # up in the result try 'iso-8859-1', 'windows-1252' or 'utf-8'" + + "\n # If that doesn't work try to find an appropriate one from" + + "\n # this list: http://en.wikipedia.org/wiki/Character_encoding" + + "\n --in [input directory]" + + "\n --isbn [isbn number]" + + "\n --out [output epub file]" + + "\n --title [book title]" + + "\n --type [input type, can be 'epub', 'chm' or empty]" + + "\n --xsl [html post processing file]" + ); + System.exit(0); + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/CoverpageBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/CoverpageBookProcessor.java new file mode 100644 index 00000000..0268635f --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/CoverpageBookProcessor.java @@ -0,0 +1,210 @@ +package nl.siegmann.epublib.bookprocessor; + +import java.awt.AlphaComposite; +import java.awt.Dimension; +import java.awt.Graphics2D; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import javax.imageio.ImageIO; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Metadata; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.Resources; +import nl.siegmann.epublib.epub.BookProcessor; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.CollectionUtil; +import nl.siegmann.epublib.util.ResourceUtil; +import org.apache.commons.io.FilenameUtils; + +import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; + +/** + * If the book contains a cover image then this will add a cover page to the book. + * If the book contains a cover html page it will set that page's first image as the book's cover image. + * + * FIXME: + * will overwrite any "cover.jpg" or "cover.html" that are already there. + * + * @author paul + * + */ +public class CoverpageBookProcessor implements BookProcessor { + + public static int MAX_COVER_IMAGE_SIZE = 999; + private static final Logger log = LoggerFactory.getLogger(CoverpageBookProcessor.class); + public static final String DEFAULT_COVER_PAGE_ID = "cover"; + public static final String DEFAULT_COVER_PAGE_HREF = "cover.html"; + public static final String DEFAULT_COVER_IMAGE_ID = "cover-image"; + public static final String DEFAULT_COVER_IMAGE_HREF = "images/cover.png"; + + @Override + public Book processBook(Book book) { + Metadata metadata = book.getMetadata(); + if(book.getCoverPage() == null && book.getCoverImage() == null) { + return book; + } + Resource coverPage = book.getCoverPage(); + if (coverPage == null) { + coverPage = findCoverPage(book); + book.setCoverPage(coverPage); + } + Resource coverImage = book.getCoverImage(); + if(coverPage == null) { + if(coverImage == null) { + // give up + } else { // coverImage != null + if(StringUtils.isBlank(coverImage.getHref())) { + coverImage.setHref(getCoverImageHref(coverImage, book)); + } + String coverPageHtml = createCoverpageHtml(CollectionUtil.first(metadata.getTitles()), coverImage.getHref()); + coverPage = new Resource(null, coverPageHtml.getBytes(), getCoverPageHref(book), MediatypeService.XHTML); + fixCoverResourceId(book, coverPage, DEFAULT_COVER_PAGE_ID); + } + } else { // coverPage != null + if(book.getCoverImage() == null) { + coverImage = getFirstImageSource(coverPage, book.getResources()); + book.setCoverImage(coverImage); + if (coverImage != null) { + book.getResources().remove(coverImage.getHref()); + } + } else { // coverImage != null + + } + } + + book.setCoverImage(coverImage); + book.setCoverPage(coverPage); + setCoverResourceIds(book); + return book; + } + +// private String getCoverImageHref(Resource coverImageResource) { +// return "cover" + coverImageResource.getMediaType().getDefaultExtension(); +// } + + private Resource findCoverPage(Book book) { + if (book.getCoverPage() != null) { + return book.getCoverPage(); + } + if (! (book.getSpine().isEmpty())) { + return book.getSpine().getResource(0); + } + return null; + } + + private void setCoverResourceIds(Book book) { + if(book.getCoverImage() != null) { + fixCoverResourceId(book, book.getCoverImage(), DEFAULT_COVER_IMAGE_ID); + } + if(book.getCoverPage() != null) { + fixCoverResourceId(book, book.getCoverPage(), DEFAULT_COVER_PAGE_ID); + } + } + + + private void fixCoverResourceId(Book book, Resource resource, String defaultId) { + if (StringUtils.isBlank(resource.getId())) { + resource.setId(defaultId); + } + book.getResources().fixResourceId(resource); + } + + private String getCoverPageHref(Book book) { + return DEFAULT_COVER_PAGE_HREF; + } + + + private String getCoverImageHref(Resource imageResource, Book book) { + return DEFAULT_COVER_IMAGE_HREF; + } + + private Resource getFirstImageSource(Resource titlePageResource, Resources resources) { + try { + Document titlePageDocument = ResourceUtil.getAsDocument(titlePageResource); + NodeList imageElements = titlePageDocument.getElementsByTagName("img"); + for (int i = 0; i < imageElements.getLength(); i++) { + String relativeImageHref = ((Element) imageElements.item(i)).getAttribute("src"); + String absoluteImageHref = calculateAbsoluteImageHref(relativeImageHref, titlePageResource.getHref()); + Resource imageResource = resources.getByHref(absoluteImageHref); + if (imageResource != null) { + return imageResource; + } + } + } catch (Exception e) { + log.error(e.getMessage(), e); + } + return null; + } + + + + // package + static String calculateAbsoluteImageHref(String relativeImageHref, + String baseHref) { + if (relativeImageHref.startsWith("/")) { + return relativeImageHref; + } + String result = FilenameUtils.normalize(baseHref.substring(0, baseHref.lastIndexOf('/') + 1) + relativeImageHref, true); + return result; + } + + private String createCoverpageHtml(String title, String imageHref) { + return "" + + "\n" + + "\n" + + "\t\n" + + "\t\tCover\n" + + "\t\t\n" + + "\t\n" + + "\t\n" + + "\t\t
\n" + + "\t\t\t\""\n" + + "\t\t
\n" + + "\t\n" + + "\n"; + } + + private Dimension calculateResizeSize(BufferedImage image) { + Dimension result; + if (image.getWidth() > image.getHeight()) { + result = new Dimension(MAX_COVER_IMAGE_SIZE, (int) (((double) MAX_COVER_IMAGE_SIZE / (double) image.getWidth()) * (double) image.getHeight())); + } else { + result = new Dimension((int) (((double) MAX_COVER_IMAGE_SIZE / (double) image.getHeight()) * (double) image.getWidth()), MAX_COVER_IMAGE_SIZE); + } + return result; + } + + + @SuppressWarnings("unused") + private byte[] createThumbnail(byte[] imageData) throws IOException { + BufferedImage originalImage = ImageIO.read(new ByteArrayInputStream(imageData)); + Dimension thumbDimension = calculateResizeSize(originalImage); + BufferedImage thumbnailImage = createResizedCopy(originalImage, (int) thumbDimension.getWidth(), (int) thumbDimension.getHeight(), false); + ByteArrayOutputStream result = new ByteArrayOutputStream(); + ImageIO.write(thumbnailImage, "png", result); + return result.toByteArray(); + + } + + private BufferedImage createResizedCopy(java.awt.Image originalImage, int scaledWidth, int scaledHeight, boolean preserveAlpha) { + int imageType = preserveAlpha ? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB; + BufferedImage scaledBI = new BufferedImage(scaledWidth, scaledHeight, imageType); + Graphics2D g = scaledBI.createGraphics(); + if (preserveAlpha) { + g.setComposite(AlphaComposite.Src); + } + g.drawImage(originalImage, 0, 0, scaledWidth, scaledHeight, null); + g.dispose(); + return scaledBI; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/DefaultBookProcessorPipeline.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/DefaultBookProcessorPipeline.java new file mode 100644 index 00000000..38f6c4e6 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/DefaultBookProcessorPipeline.java @@ -0,0 +1,40 @@ +package nl.siegmann.epublib.bookprocessor; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import nl.siegmann.epublib.epub.BookProcessor; +import nl.siegmann.epublib.epub.BookProcessorPipeline; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A book processor that combines several other bookprocessors + * + * Fixes coverpage/coverimage. + * Cleans up the XHTML. + * + * @author paul.siegmann + * + */ +public class DefaultBookProcessorPipeline extends BookProcessorPipeline { + + private Logger log = LoggerFactory.getLogger(DefaultBookProcessorPipeline.class); + + public DefaultBookProcessorPipeline() { + super(createDefaultBookProcessors()); + } + + private static List createDefaultBookProcessors() { + List result = new ArrayList(); + result.addAll(Arrays.asList(new BookProcessor[] { + new SectionHrefSanityCheckBookProcessor(), + new HtmlCleanerBookProcessor(), + new CoverpageBookProcessor(), + new FixIdentifierBookProcessor() + })); + return result; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/FixIdentifierBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/FixIdentifierBookProcessor.java new file mode 100644 index 00000000..725a69b0 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/FixIdentifierBookProcessor.java @@ -0,0 +1,22 @@ +package nl.siegmann.epublib.bookprocessor; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Identifier; +import nl.siegmann.epublib.epub.BookProcessor; + +/** + * If the book has no identifier it adds a generated UUID as identifier. + * + * @author paul + * + */ +public class FixIdentifierBookProcessor implements BookProcessor { + + @Override + public Book processBook(Book book) { + if(book.getMetadata().getIdentifiers().isEmpty()) { + book.getMetadata().addIdentifier(new Identifier()); + } + return book; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/FixMissingResourceBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/FixMissingResourceBookProcessor.java new file mode 100644 index 00000000..2d7fd599 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/FixMissingResourceBookProcessor.java @@ -0,0 +1,23 @@ +package nl.siegmann.epublib.bookprocessor; + +import java.util.Collection; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.TOCReference; +import nl.siegmann.epublib.epub.BookProcessor; + +public class FixMissingResourceBookProcessor implements BookProcessor { + + @Override + public Book processBook(Book book) { + return book; + } + + private void fixMissingResources(Collection tocReferences, Book book) { + for (TOCReference tocReference: tocReferences) { + if (tocReference.getResource() == null) { + + } + } + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlBookProcessor.java new file mode 100644 index 00000000..4b3f131b --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlBookProcessor.java @@ -0,0 +1,50 @@ +package nl.siegmann.epublib.bookprocessor; + + +import java.io.IOException; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.epub.BookProcessor; +import nl.siegmann.epublib.service.MediatypeService; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Helper class for BookProcessors that only manipulate html type resources. + * + * @author paul + * + */ +public abstract class HtmlBookProcessor implements BookProcessor { + + private final static Logger log = LoggerFactory.getLogger(HtmlBookProcessor.class); + public static final String OUTPUT_ENCODING = "UTF-8"; + + public HtmlBookProcessor() { + } + + @Override + public Book processBook(Book book) { + for(Resource resource: book.getResources().getAll()) { + try { + cleanupResource(resource, book); + } catch (IOException e) { + log.error(e.getMessage(), e); + } + } + return book; + } + + private void cleanupResource(Resource resource, Book book) throws IOException { + if(resource.getMediaType() == MediatypeService.XHTML) { + byte[] cleanedHtml = processHtml(resource, book, Constants.CHARACTER_ENCODING); + resource.setData(cleanedHtml); + resource.setInputEncoding(Constants.CHARACTER_ENCODING); + } + } + + protected abstract byte[] processHtml(Resource resource, Book book, String encoding) throws IOException; +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlCleanerBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlCleanerBookProcessor.java new file mode 100644 index 00000000..a662a207 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlCleanerBookProcessor.java @@ -0,0 +1,75 @@ +package nl.siegmann.epublib.bookprocessor; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.epub.BookProcessor; +import nl.siegmann.epublib.util.NoCloseWriter; + +import org.htmlcleaner.CleanerProperties; +import org.htmlcleaner.DoctypeToken; +import org.htmlcleaner.EpublibXmlSerializer; +import org.htmlcleaner.HtmlCleaner; +import org.htmlcleaner.TagNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Cleans up regular html into xhtml. Uses HtmlCleaner to do this. + * + * @author paul + * + */ +public class HtmlCleanerBookProcessor extends HtmlBookProcessor implements + BookProcessor { + + @SuppressWarnings("unused") + private final static Logger log = LoggerFactory.getLogger(HtmlCleanerBookProcessor.class); + + private HtmlCleaner htmlCleaner; + + public HtmlCleanerBookProcessor() { + this.htmlCleaner = createHtmlCleaner(); + } + + private static HtmlCleaner createHtmlCleaner() { + HtmlCleaner result = new HtmlCleaner(); + CleanerProperties cleanerProperties = result.getProperties(); + cleanerProperties.setOmitXmlDeclaration(true); + cleanerProperties.setOmitDoctypeDeclaration(false); + cleanerProperties.setRecognizeUnicodeChars(true); + cleanerProperties.setTranslateSpecialEntities(false); + cleanerProperties.setIgnoreQuestAndExclam(true); + cleanerProperties.setUseEmptyElementTags(false); + return result; + } + + public byte[] processHtml(Resource resource, Book book, String outputEncoding) throws IOException { + + // clean html + TagNode node = htmlCleaner.clean(resource.getReader()); + + // post-process cleaned html + node.setAttribute("xmlns", Constants.NAMESPACE_XHTML); + node.setDocType(createXHTMLDoctypeToken()); + + // write result to output + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Writer writer = new OutputStreamWriter(out, outputEncoding); + writer = new NoCloseWriter(writer); + EpublibXmlSerializer xmlSerializer = new EpublibXmlSerializer(htmlCleaner.getProperties(), outputEncoding); + xmlSerializer.write(node, writer, outputEncoding); + writer.flush(); + + return out.toByteArray(); + } + + private DoctypeToken createXHTMLDoctypeToken(){ + return new DoctypeToken("html", "PUBLIC", "-//W3C//DTD XHTML 1.1//EN", "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlSplitterBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlSplitterBookProcessor.java new file mode 100644 index 00000000..0acb69e6 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/HtmlSplitterBookProcessor.java @@ -0,0 +1,19 @@ +package nl.siegmann.epublib.bookprocessor; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.epub.BookProcessor; + +/** + * In the future this will split up too large html documents into smaller ones. + * + * @author paul + * + */ +public class HtmlSplitterBookProcessor implements BookProcessor { + + @Override + public Book processBook(Book book) { + return book; + } + +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/SectionHrefSanityCheckBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/SectionHrefSanityCheckBookProcessor.java new file mode 100644 index 00000000..17b06102 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/SectionHrefSanityCheckBookProcessor.java @@ -0,0 +1,45 @@ +package nl.siegmann.epublib.bookprocessor; + +import java.util.ArrayList; +import java.util.List; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.Spine; +import nl.siegmann.epublib.domain.SpineReference; +import nl.siegmann.epublib.epub.BookProcessor; + +import org.apache.commons.lang.StringUtils; + +/** + * Removes Sections from the page flow that differ only from the previous section's href by the '#' in the url. + * + * @author paul + * + */ +public class SectionHrefSanityCheckBookProcessor implements BookProcessor { + + @Override + public Book processBook(Book book) { + book.getSpine().setSpineReferences(checkSpineReferences(book.getSpine())); + return book; + } + + private static List checkSpineReferences(Spine spine) { + List result = new ArrayList(spine.size()); + Resource previousResource = null; + for(SpineReference spineReference: spine.getSpineReferences()) { + if(spineReference.getResource() == null + || StringUtils.isBlank(spineReference.getResource().getHref())) { + continue; + } + if(previousResource == null + || spineReference.getResource() == null + || ( ! (spineReference.getResource().getHref().equals(previousResource.getHref())))) { + result.add(spineReference); + } + previousResource = spineReference.getResource(); + } + return result; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/SectionTitleBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/SectionTitleBookProcessor.java new file mode 100644 index 00000000..3f59c8f7 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/SectionTitleBookProcessor.java @@ -0,0 +1,60 @@ +package nl.siegmann.epublib.bookprocessor; + +import java.io.IOException; +import java.util.List; + +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.TOCReference; +import nl.siegmann.epublib.epub.BookProcessor; + +import org.apache.commons.lang.StringUtils; +import org.xml.sax.InputSource; + +public class SectionTitleBookProcessor implements BookProcessor { + + @Override + public Book processBook(Book book) { + XPath xpath = createXPathExpression(); + processSections(book.getTableOfContents().getTocReferences(), book, xpath); + return book; + } + + private void processSections(List tocReferences, Book book, XPath xpath) { + for(TOCReference tocReference: tocReferences) { + if(! StringUtils.isBlank(tocReference.getTitle())) { + continue; + } + try { + String title = getTitle(tocReference, book, xpath); + tocReference.setTitle(title); + } catch (XPathExpressionException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + + + private String getTitle(TOCReference tocReference, Book book, XPath xpath) throws IOException, XPathExpressionException { + Resource resource = tocReference.getResource(); + if(resource == null) { + return null; + } + InputSource inputSource = new InputSource(resource.getInputStream()); + String title = xpath.evaluate("/html/head/title", inputSource); + return title; + } + + + private XPath createXPathExpression() { + return XPathFactory.newInstance().newXPath(); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/TextReplaceBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/TextReplaceBookProcessor.java new file mode 100644 index 00000000..5bd46edf --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/TextReplaceBookProcessor.java @@ -0,0 +1,47 @@ +package nl.siegmann.epublib.bookprocessor; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Reader; +import java.io.Writer; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.epub.BookProcessor; + +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Cleans up regular html into xhtml. + * Uses HtmlCleaner to do this. + * + * @author paul + * + */ +public class TextReplaceBookProcessor extends HtmlBookProcessor implements BookProcessor { + + @SuppressWarnings("unused") + private final static Logger log = LoggerFactory.getLogger(TextReplaceBookProcessor.class); + + public TextReplaceBookProcessor() { + } + + public byte[] processHtml(Resource resource, Book book, String outputEncoding) throws IOException { + Reader reader = resource.getReader(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Writer writer = new OutputStreamWriter(out, Constants.CHARACTER_ENCODING); + for(String line: IOUtils.readLines(reader)) { + writer.write(processLine(line)); + writer.flush(); + } + return out.toByteArray(); + } + + private String processLine(String line) { + return line.replace("'", "'"); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/XslBookProcessor.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/XslBookProcessor.java new file mode 100644 index 00000000..45ed504c --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/XslBookProcessor.java @@ -0,0 +1,79 @@ +package nl.siegmann.epublib.bookprocessor; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.epub.BookProcessor; +import nl.siegmann.epublib.epub.EpubProcessorSupport; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + + +/** + * Uses the given xslFile to process all html resources of a Book. + * + * @author paul + * + */ +public class XslBookProcessor extends HtmlBookProcessor implements BookProcessor { + + private final static Logger log = LoggerFactory.getLogger(XslBookProcessor.class); + + private Transformer transformer; + + public XslBookProcessor(String xslFileName) throws TransformerConfigurationException { + File xslFile = new File(xslFileName); + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + transformer = transformerFactory.newTransformer(new StreamSource(xslFile)); + } + + @Override + public byte[] processHtml(Resource resource, Book book, String encoding) throws IOException { + byte[] result = null; + try { + DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbFactory.newDocumentBuilder(); + db.setEntityResolver(EpubProcessorSupport.getEntityResolver()); + + Document doc = db.parse(new InputSource(resource.getReader())); + + Source htmlSource = new DOMSource(doc.getDocumentElement()); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Writer writer = new OutputStreamWriter(out, "UTF-8"); + Result streamResult = new StreamResult(writer); + try { + transformer.transform(htmlSource, streamResult); + } catch (TransformerException e) { + log.error(e.getMessage(), e); + throw new IOException(e); + } + result = out.toByteArray(); + return result; + } catch (Exception e) { + throw new IOException(e); + } + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/package-info.java b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/package-info.java new file mode 100644 index 00000000..89108ccf --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/bookprocessor/package-info.java @@ -0,0 +1,5 @@ +/** + * The classes in this package are used for post-processing Books. + * Things like cleaning up the html, adding a cover page, etc. + */ +package nl.siegmann.epublib.bookprocessor; \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/chm/ChmParser.java b/epublib-tools/src/main/java/nl/siegmann/epublib/chm/ChmParser.java new file mode 100644 index 00000000..501a9f5b --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/chm/ChmParser.java @@ -0,0 +1,128 @@ +package nl.siegmann.epublib.chm; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.MediaType; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.Resources; +import nl.siegmann.epublib.domain.TOCReference; +import nl.siegmann.epublib.domain.TableOfContents; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.ResourceUtil; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.vfs.AllFileSelector; +import org.apache.commons.vfs.FileObject; +import org.apache.commons.vfs.FileSystemException; +import org.apache.commons.vfs.FileType; + +/** + * Reads the files that are extracted from a windows help ('.chm') file and creates a epublib Book out of it. + * + * @author paul + * + */ +public class ChmParser { + + public static final String DEFAULT_CHM_HTML_INPUT_ENCODING = "windows-1252"; + public static final int MINIMAL_SYSTEM_TITLE_LENGTH = 4; + + public static Book parseChm(FileObject chmRootDir) throws XPathExpressionException, IOException, ParserConfigurationException { + return parseChm(chmRootDir, DEFAULT_CHM_HTML_INPUT_ENCODING); + } + + public static Book parseChm(FileObject chmRootDir, String inputHtmlEncoding) + throws IOException, ParserConfigurationException, + XPathExpressionException { + Book result = new Book(); + result.getMetadata().addTitle(findTitle(chmRootDir)); + FileObject hhcFileObject = findHhcFileObject(chmRootDir); + if(hhcFileObject == null) { + throw new IllegalArgumentException("No index file found in directory " + chmRootDir + ". (Looked for file ending with extension '.hhc'"); + } + if(inputHtmlEncoding == null) { + inputHtmlEncoding = DEFAULT_CHM_HTML_INPUT_ENCODING; + } + Resources resources = findResources(chmRootDir, inputHtmlEncoding); + List tocReferences = HHCParser.parseHhc(hhcFileObject.getContent().getInputStream(), resources); + result.setTableOfContents(new TableOfContents(tocReferences)); + result.setResources(resources); + result.generateSpineFromTableOfContents(); + return result; + } + + + /** + * Finds in the '#SYSTEM' file the 3rd set of characters that have ascii value >= 32 and >= 126 and is more than 3 characters long. + * Assumes that that is then the title of the book. + * + * @param chmRootDir + * @return Finds in the '#SYSTEM' file the 3rd set of characters that have ascii value >= 32 and >= 126 and is more than 3 characters long. + * @throws IOException + */ + protected static String findTitle(FileObject chmRootDir) throws IOException { + FileObject systemFileObject = chmRootDir.resolveFile("#SYSTEM"); + InputStream in = systemFileObject.getContent().getInputStream(); + boolean inText = false; + int lineCounter = 0; + StringBuilder line = new StringBuilder(); + for(int c = in.read(); c >= 0; c = in.read()) { + if(c >= 32 && c <= 126) { + line.append((char) c); + inText = true; + } else { + if(inText) { + if(line.length() >= 3) { + lineCounter++; + if(lineCounter >= MINIMAL_SYSTEM_TITLE_LENGTH) { + return line.toString(); + } + } + line = new StringBuilder(); + } + inText = false; + } + } + return ""; + } + + private static FileObject findHhcFileObject(FileObject chmRootDir) throws FileSystemException { + FileObject[] files = chmRootDir.getChildren(); + for(int i = 0; i < files.length; i++) { + if("hhc".equalsIgnoreCase(files[i].getName().getExtension())) { + return files[i]; + } + } + return null; + } + + + private static Resources findResources(FileObject rootDir, String inputEncoding) throws IOException { + Resources result = new Resources(); + FileObject[] allFiles = rootDir.findFiles(new AllFileSelector()); + for(int i = 0; i < allFiles.length; i++) { + FileObject file = allFiles[i]; + if (file.getType() == FileType.FOLDER) { + continue; + } + MediaType mediaType = MediatypeService.determineMediaType(file.getName().getBaseName()); + if(mediaType == null) { + continue; + } + String href = file.getName().toString().substring(rootDir.getName().toString().length() + 1); + byte[] resourceData = IOUtils.toByteArray(file.getContent().getInputStream()); + if(mediaType == MediatypeService.XHTML && ! nl.siegmann.epublib.Constants.CHARACTER_ENCODING.equalsIgnoreCase(inputEncoding)) { + resourceData = ResourceUtil.recode(inputEncoding, nl.siegmann.epublib.Constants.CHARACTER_ENCODING, resourceData); + } + Resource fileResource = new Resource(null, resourceData, href, mediaType); + result.add(fileResource); + } + return result; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/chm/HHCParser.java b/epublib-tools/src/main/java/nl/siegmann/epublib/chm/HHCParser.java new file mode 100644 index 00000000..7ee8b6b7 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/chm/HHCParser.java @@ -0,0 +1,151 @@ +package nl.siegmann.epublib.chm; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.Resources; +import nl.siegmann.epublib.domain.TOCReference; +import nl.siegmann.epublib.util.ResourceUtil; + +import org.apache.commons.lang.StringUtils; +import org.htmlcleaner.CleanerProperties; +import org.htmlcleaner.DomSerializer; +import org.htmlcleaner.HtmlCleaner; +import org.htmlcleaner.TagNode; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +/** + * Parses the windows help index (.hhc) file. + * + * @author paul + * + */ +public class HHCParser { + + public static final String DEFAULT_HTML_INPUT_ENCODING = "Windows-1251"; + + public static List parseHhc(InputStream hhcFile, Resources resources) throws IOException, ParserConfigurationException, XPathExpressionException { + HtmlCleaner htmlCleaner = new HtmlCleaner(); + CleanerProperties props = htmlCleaner.getProperties(); + TagNode node = htmlCleaner.clean(hhcFile); + Document hhcDocument = new DomSerializer(props).createDOM(node); + XPath xpath = XPathFactory.newInstance().newXPath(); + Node ulNode = (Node) xpath.evaluate("body/ul", hhcDocument + .getDocumentElement(), XPathConstants.NODE); + List sections = processUlNode(ulNode, resources); + return sections; + } + + /* + * Sometimes the structure is: + *
  • + * ... + *
      ...
    + *
  • + * + * And sometimes: + *
  • + * ... + *
  • + *
      ...
    + */ + private static List processUlNode(Node ulNode, Resources resources) { + List result = new ArrayList(); + NodeList children = ulNode.getChildNodes(); + for(int i = 0; i < children.getLength(); i++) { + Node node = children.item(i); + if(node.getNodeName().equals("li")) { + List section = processLiNode(node, resources); + result.addAll(section); + } else if(node.getNodeName().equals("ul")) { + List childTOCReferences = processUlNode(node, resources); + if(result.isEmpty()) { + result = childTOCReferences; + } else { + result.get(result.size() - 1).getChildren().addAll(childTOCReferences); + } + } + } + return result; + } + + + private static List processLiNode(Node liNode, Resources resources) { + List result = new ArrayList(); + NodeList children = liNode.getChildNodes(); + for(int i = 0; i < children.getLength(); i++) { + Node node = children.item(i); + if(node.getNodeName().equals("object")) { + TOCReference section = processObjectNode(node, resources); + if(section != null) { + result.add(section); + } + } else if(node.getNodeName().equals("ul")) { + List childTOCReferences = processUlNode(node, resources); + if(result.isEmpty()) { + result = childTOCReferences; + } else { + result.get(result.size() - 1).getChildren().addAll(childTOCReferences); + } + } + } + return result; + } + + + /** + * Processes a CHM object node into a TOCReference + * If the local name is empty then a TOCReference node is made with a null href value. + * + * + * + * + * + * + * + * @param objectNode + * + * @return A TOCReference of the object has a non-blank param child with name 'Name' and a non-blank param name 'Local' + */ + private static TOCReference processObjectNode(Node objectNode, Resources resources) { + TOCReference result = null; + NodeList children = objectNode.getChildNodes(); + String name = null; + String href = null; + for(int i = 0; i < children.getLength(); i++) { + Node node = children.item(i); + if(node.getNodeName().equals("param")) { + String paramName = ((Element) node).getAttribute("name"); + if("Name".equals(paramName)) { + name = ((Element) node).getAttribute("value"); + } else if("Local".equals(paramName)) { + href = ((Element) node).getAttribute("value"); + } + } + } + if((! StringUtils.isBlank(href)) && href.startsWith("http://")) { + return result; + } + if(! StringUtils.isBlank(name)) { + Resource resource = resources.getByHref(href); + if (resource == null) { + resource = ResourceUtil.createResource(name, href); + resources.add(resource); + } + result = new TOCReference(name, resource); + } + return result; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/chm/package-info.java b/epublib-tools/src/main/java/nl/siegmann/epublib/chm/package-info.java new file mode 100644 index 00000000..5f28853c --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/chm/package-info.java @@ -0,0 +1,4 @@ +/** + * Classes related to making a Book out of a set of .chm (windows help) files. + */ +package nl.siegmann.epublib.chm; \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/fileset/FilesetBookCreator.java b/epublib-tools/src/main/java/nl/siegmann/epublib/fileset/FilesetBookCreator.java new file mode 100644 index 00000000..66f41280 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/fileset/FilesetBookCreator.java @@ -0,0 +1,120 @@ +package nl.siegmann.epublib.fileset; + + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.bookprocessor.DefaultBookProcessorPipeline; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.Resources; +import nl.siegmann.epublib.domain.Spine; +import nl.siegmann.epublib.domain.TOCReference; +import nl.siegmann.epublib.domain.TableOfContents; +import nl.siegmann.epublib.epub.BookProcessor; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.ResourceUtil; +import nl.siegmann.epublib.util.VFSUtil; + +import org.apache.commons.vfs.FileObject; +import org.apache.commons.vfs.FileType; +import org.apache.commons.vfs.VFS; + +/** + * Creates a Book from a collection of html and image files. + * + * @author paul + * + */ +public class FilesetBookCreator { + + private static Comparator fileComparator = new Comparator(){ + @Override + public int compare(FileObject o1, FileObject o2) { + return o1.getName().getBaseName().compareToIgnoreCase(o2.getName().getBaseName()); + } + }; + + private static final BookProcessor bookProcessor = new DefaultBookProcessorPipeline(); + + public static Book createBookFromDirectory(File rootDirectory) throws IOException { + return createBookFromDirectory(rootDirectory, Constants.CHARACTER_ENCODING); + } + + + public static Book createBookFromDirectory(File rootDirectory, String encoding) throws IOException { + FileObject rootFileObject = VFS.getManager().resolveFile("file:" + rootDirectory.getCanonicalPath()); + return createBookFromDirectory(rootFileObject, encoding); + } + + public static Book createBookFromDirectory(FileObject rootDirectory) throws IOException { + return createBookFromDirectory(rootDirectory, Constants.CHARACTER_ENCODING); + } + + /** + * Recursively adds all files that are allowed to be part of an epub to the Book. + * + * @see nl.siegmann.epublib.domain.MediaTypeService + * @param rootDirectory + * @return the newly created Book + * @throws IOException + */ + public static Book createBookFromDirectory(FileObject rootDirectory, String encoding) throws IOException { + Book result = new Book(); + List sections = new ArrayList(); + Resources resources = new Resources(); + processDirectory(rootDirectory, rootDirectory, sections, resources, encoding); + result.setResources(resources); + TableOfContents tableOfContents = new TableOfContents(sections); + result.setTableOfContents(tableOfContents); + result.setSpine(new Spine(tableOfContents)); + + result = bookProcessor.processBook(result); + + return result; + } + + private static void processDirectory(FileObject rootDir, FileObject directory, List sections, Resources resources, String inputEncoding) throws IOException { + FileObject[] files = directory.getChildren(); + Arrays.sort(files, fileComparator); + for(int i = 0; i < files.length; i++) { + FileObject file = files[i]; + if(file.getType() == FileType.FOLDER) { + processSubdirectory(rootDir, file, sections, resources, inputEncoding); + } else if (MediatypeService.determineMediaType(file.getName().getBaseName()) == null) { + continue; + } else { + Resource resource = VFSUtil.createResource(rootDir, file, inputEncoding); + if(resource == null) { + continue; + } + resources.add(resource); + if(MediatypeService.XHTML == resource.getMediaType()) { + TOCReference section = new TOCReference(file.getName().getBaseName(), resource); + sections.add(section); + } + } + } + } + + private static void processSubdirectory(FileObject rootDir, FileObject file, + List sections, Resources resources, String inputEncoding) + throws IOException { + List childTOCReferences = new ArrayList(); + processDirectory(rootDir, file, childTOCReferences, resources, inputEncoding); + if(! childTOCReferences.isEmpty()) { + String sectionName = file.getName().getBaseName(); + Resource sectionResource = ResourceUtil.createResource(sectionName, VFSUtil.calculateHref(rootDir,file)); + resources.add(sectionResource); + TOCReference section = new TOCReference(sectionName, sectionResource); + section.setChildren(childTOCReferences); + sections.add(section); + } + } + +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/html/htmlcleaner/XmlEventSerializer.java b/epublib-tools/src/main/java/nl/siegmann/epublib/html/htmlcleaner/XmlEventSerializer.java new file mode 100644 index 00000000..1e2c6b73 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/html/htmlcleaner/XmlEventSerializer.java @@ -0,0 +1,180 @@ +package nl.siegmann.epublib.html.htmlcleaner; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import javax.xml.stream.events.XMLEvent; + +import org.htmlcleaner.CleanerProperties; +import org.htmlcleaner.CommentNode; +import org.htmlcleaner.ContentNode; +import org.htmlcleaner.EndTagToken; +import org.htmlcleaner.TagNode; + +public class XmlEventSerializer implements XMLEventReader { + + protected CleanerProperties props; + + protected XmlEventSerializer(CleanerProperties props) { + this.props = props; + } + + + public void writeXml(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { +// if ( !props.isOmitXmlDeclaration() ) { +// String declaration = ""; +// writer.write(declaration + "\n"); +// } + +// if ( !props.isOmitDoctypeDeclaration() ) { +// DoctypeToken doctypeToken = tagNode.getDocType(); +// if ( doctypeToken != null ) { +// doctypeToken.serialize(this, writer); +// } +// } +// + serialize(tagNode, writer); + + writer.flush(); + } + + protected void serializeOpenTag(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { + String tagName = tagNode.getName(); + + writer.writeStartElement(tagName); + Map tagAtttributes = tagNode.getAttributes(); + for(Iterator it = tagAtttributes.entrySet().iterator();it.hasNext();) { + Map.Entry entry = (Map.Entry) it.next(); + String attName = (String) entry.getKey(); + String attValue = (String) entry.getValue(); + + if ( !props.isNamespacesAware() && ("xmlns".equals(attName) || attName.startsWith("xmlns:")) ) { + continue; + } + writer.writeAttribute(attName, attValue); + } + } + + protected void serializeEmptyTag(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { + String tagName = tagNode.getName(); + + writer.writeEmptyElement(tagName); + Map tagAtttributes = tagNode.getAttributes(); + for(Iterator it = tagAtttributes.entrySet().iterator();it.hasNext();) { + Map.Entry entry = (Map.Entry) it.next(); + String attName = (String) entry.getKey(); + String attValue = (String) entry.getValue(); + + if ( !props.isNamespacesAware() && ("xmlns".equals(attName) || attName.startsWith("xmlns:")) ) { + continue; + } + writer.writeAttribute(attName, attValue); + } + } + + protected void serializeEndTag(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { + writer.writeEndElement(); + } + + + protected void serialize(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { + if(tagNode.getChildren().isEmpty()) { + serializeEmptyTag(tagNode, writer); + } else { + serializeOpenTag(tagNode, writer); + + List tagChildren = tagNode.getChildren(); + for(Iterator childrenIt = tagChildren.iterator(); childrenIt.hasNext(); ) { + Object item = childrenIt.next(); + if (item != null) { + serializeToken(item, writer); + } + } + serializeEndTag(tagNode, writer); + } + } + + + private void serializeToken(Object item, XMLStreamWriter writer) throws XMLStreamException { + if ( item instanceof ContentNode ) { + writer.writeCharacters(((ContentNode) item).getContent().toString()); + } else if(item instanceof CommentNode) { + writer.writeComment(((CommentNode) item).getContent().toString()); + } else if(item instanceof EndTagToken) { +// writer.writeEndElement(); + } else if(item instanceof TagNode) { + serialize((TagNode) item, writer); + } + } + + + @Override + public void close() throws XMLStreamException { + // TODO Auto-generated method stub + + } + + + @Override + public String getElementText() throws XMLStreamException { + // TODO Auto-generated method stub + return null; + } + + + @Override + public Object getProperty(String name) throws IllegalArgumentException { + // TODO Auto-generated method stub + return null; + } + + + @Override + public boolean hasNext() { + // TODO Auto-generated method stub + return false; + } + + + @Override + public XMLEvent nextEvent() throws XMLStreamException { + // TODO Auto-generated method stub + return null; + } + + + @Override + public XMLEvent nextTag() throws XMLStreamException { + // TODO Auto-generated method stub + return null; + } + + + @Override + public XMLEvent peek() throws XMLStreamException { + // TODO Auto-generated method stub + return null; + } + + + @Override + public Object next() { + // TODO Auto-generated method stub + return null; + } + + + @Override + public void remove() { + // TODO Auto-generated method stub + + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/html/htmlcleaner/XmlSerializer.java b/epublib-tools/src/main/java/nl/siegmann/epublib/html/htmlcleaner/XmlSerializer.java new file mode 100644 index 00000000..54c76459 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/html/htmlcleaner/XmlSerializer.java @@ -0,0 +1,115 @@ +package nl.siegmann.epublib.html.htmlcleaner; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.htmlcleaner.CleanerProperties; +import org.htmlcleaner.CommentNode; +import org.htmlcleaner.ContentNode; +import org.htmlcleaner.EndTagToken; +import org.htmlcleaner.TagNode; + +public class XmlSerializer { + + protected CleanerProperties props; + + public XmlSerializer(CleanerProperties props) { + this.props = props; + } + + + public void writeXml(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { +// if ( !props.isOmitXmlDeclaration() ) { +// String declaration = ""; +// writer.write(declaration + "\n"); +// } + +// if ( !props.isOmitDoctypeDeclaration() ) { +// DoctypeToken doctypeToken = tagNode.getDocType(); +// if ( doctypeToken != null ) { +// doctypeToken.serialize(this, writer); +// } +// } +// + serialize(tagNode, writer); + + writer.flush(); + } + + protected void serializeOpenTag(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { + String tagName = tagNode.getName(); + + writer.writeStartElement(tagName); + Map tagAtttributes = tagNode.getAttributes(); + for(Iterator it = tagAtttributes.entrySet().iterator();it.hasNext();) { + Map.Entry entry = (Map.Entry) it.next(); + String attName = (String) entry.getKey(); + String attValue = (String) entry.getValue(); + + if ( !props.isNamespacesAware() && ("xmlns".equals(attName) || attName.startsWith("xmlns:")) ) { + continue; + } + writer.writeAttribute(attName, attValue); + } + } + + protected void serializeEmptyTag(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { + String tagName = tagNode.getName(); + + writer.writeEmptyElement(tagName); + Map tagAtttributes = tagNode.getAttributes(); + for(Iterator it = tagAtttributes.entrySet().iterator();it.hasNext();) { + Map.Entry entry = (Map.Entry) it.next(); + String attName = (String) entry.getKey(); + String attValue = (String) entry.getValue(); + + if ( !props.isNamespacesAware() && ("xmlns".equals(attName) || attName.startsWith("xmlns:")) ) { + continue; + } + writer.writeAttribute(attName, attValue); + } + } + + protected void serializeEndTag(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { + writer.writeEndElement(); + } + + + protected void serialize(TagNode tagNode, XMLStreamWriter writer) throws XMLStreamException { + if(tagNode.getChildren().isEmpty()) { + serializeEmptyTag(tagNode, writer); + } else { + serializeOpenTag(tagNode, writer); + + List tagChildren = tagNode.getChildren(); + for(Iterator childrenIt = tagChildren.iterator(); childrenIt.hasNext(); ) { + Object item = childrenIt.next(); + if (item != null) { + serializeToken(item, writer); + } + } + serializeEndTag(tagNode, writer); + } + } + + + private void serializeToken(Object item, XMLStreamWriter writer) throws XMLStreamException { + if ( item instanceof ContentNode ) { + writer.writeCharacters(((ContentNode) item).getContent().toString()); + } else if(item instanceof CommentNode) { + writer.writeComment(((CommentNode) item).getContent().toString()); + } else if(item instanceof EndTagToken) { +// writer.writeEndElement(); + } else if(item instanceof TagNode) { + serialize((TagNode) item, writer); + } + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/search/ResourceSearchIndex.java b/epublib-tools/src/main/java/nl/siegmann/epublib/search/ResourceSearchIndex.java new file mode 100644 index 00000000..e32f71fa --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/search/ResourceSearchIndex.java @@ -0,0 +1,29 @@ +package nl.siegmann.epublib.search; + +import nl.siegmann.epublib.domain.Resource; + +/** + * The search index for a single resource. + * + * @author paul.siegmann + * + */ +// package +class ResourceSearchIndex { + private String content; + private Resource resource; + + public ResourceSearchIndex(Resource resource, String searchContent) { + this.resource = resource; + this.content = searchContent; + } + + public String getContent() { + return content; + } + + public Resource getResource() { + return resource; + } + +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchIndex.java b/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchIndex.java new file mode 100644 index 00000000..1c1c5d11 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchIndex.java @@ -0,0 +1,215 @@ +package nl.siegmann.epublib.search; + +import java.io.IOException; +import java.io.Reader; +import java.text.Normalizer; +import java.util.ArrayList; +import java.util.List; +import java.util.Scanner; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; + +import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A searchindex for searching through a book. + * + * @author paul.siegmann + * + */ +public class SearchIndex { + + private static final Logger log = LoggerFactory.getLogger(SearchIndex.class); + + public static int NBSP = 0x00A0; + + // whitespace pattern that also matches U+00A0 (  in html) + private static final Pattern WHITESPACE_PATTERN = Pattern.compile("[\\p{Z}\\s]+"); + + private static final Pattern REMOVE_ACCENT_PATTERN = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); + + private List resourceSearchIndexes = new ArrayList(); + private Book book; + + public SearchIndex() { + } + + public SearchIndex(Book book) { + initBook(book); + } + + public Book getBook() { + return book; + } + + + private static class ResourceSearchIndex { + private String content; + private Resource resource; + + public String getContent() { + return content; + } + + public Resource getResource() { + return resource; + } + + public ResourceSearchIndex(Resource resource, String searchContent) { + this.resource = resource; + this.content = searchContent; + } + } + + private static ResourceSearchIndex createResourceSearchIndex(Resource resource) { + String searchContent = getSearchContent(resource); + if ( StringUtils.isBlank(searchContent)) { + return null; + } + ResourceSearchIndex searchIndex = new ResourceSearchIndex(resource, searchContent); + return searchIndex; + } + + public void initBook(Book book) { + this.resourceSearchIndexes = createSearchIndex(book); + } + + private static List createSearchIndex(Book book) { + List result = new ArrayList(); + if (book == null) { + return result; + } + for (Resource resource: book.getContents()) { + ResourceSearchIndex resourceSearchIndex = createResourceSearchIndex(resource); + if (resourceSearchIndex != null) { + result.add(resourceSearchIndex); + } + } + return result; + } + + public SearchResults doSearch(String searchTerm) { + SearchResults result = new SearchResults(); + if (StringUtils.isBlank(searchTerm)) { + return result; + } + searchTerm = cleanText(searchTerm); + for (ResourceSearchIndex resourceSearchIndex: resourceSearchIndexes) { + result.addAll(doSearch(searchTerm, resourceSearchIndex)); + } + result.setSearchTerm(searchTerm); + return result; + } + + + public static String getSearchContent(Resource resource) { + if (resource.getMediaType() != MediatypeService.XHTML) { + return ""; + } + String result = ""; + try { + result = getSearchContent(resource.getReader()); + } catch (IOException e) { + log.error(e.getMessage()); + } + return result; + } + + + public static String getSearchContent(Reader content) { + StringBuilder result = new StringBuilder(); + Scanner scanner = new Scanner(content); + scanner.useDelimiter("<"); + while(scanner.hasNext()) { + String text = scanner.next(); + int closePos = text.indexOf('>'); + String chunk = text.substring(closePos + 1).trim(); + chunk = StringEscapeUtils.unescapeHtml(chunk); + chunk = cleanText(chunk); + result.append(chunk); + } + return result.toString(); + } + + /** + * Checks whether the given character is a java whitespace or a non-breaking-space (&nbsp;). + * + * @param c + * @return whether the given character is a java whitespace or a non-breaking-space (&nbsp;). + */ + private static boolean isHtmlWhitespace(int c) { + return c == NBSP || Character.isWhitespace(c); + } + + public static String unicodeTrim(String text) { + int leadingWhitespaceCount = 0; + int trailingWhitespaceCount = 0; + for (int i = 0; i < text.length(); i++) { + if (! isHtmlWhitespace(text.charAt(i))) { + break; + } + leadingWhitespaceCount++; + } + for (int i = (text.length() - 1); i > leadingWhitespaceCount; i--) { + if (! isHtmlWhitespace(text.charAt(i))) { + break; + } + trailingWhitespaceCount++; + } + if (leadingWhitespaceCount > 0 || trailingWhitespaceCount > 0) { + text = text.substring(leadingWhitespaceCount, text.length() - trailingWhitespaceCount); + } + return text; + } + + /** + * Turns html encoded text into plain text. + * + * Replaces &ouml; type of expressions into ¨
    + * Removes accents
    + * Replaces multiple whitespaces with a single space.
    + * + * @param text + * @return html encoded text turned into plain text. + */ + public static String cleanText(String text) { + text = unicodeTrim(text); + + // replace all multiple whitespaces by a single space + Matcher matcher = WHITESPACE_PATTERN.matcher(text); + text = matcher.replaceAll(" "); + + // turn accented characters into normalized form. Turns ö into o" + text = Normalizer.normalize(text, Normalizer.Form.NFD); + + // removes the marks found in the previous line. + text = REMOVE_ACCENT_PATTERN.matcher(text).replaceAll(""); + + // lowercase everything + text = text.toLowerCase(); + return text; + } + + + private static List doSearch(String searchTerm, ResourceSearchIndex resourceSearchIndex) { + return doSearch(searchTerm, resourceSearchIndex.getContent(), resourceSearchIndex.getResource()); + } + + protected static List doSearch(String searchTerm, String content, Resource resource) { + List result = new ArrayList(); + int findPos = content.indexOf(searchTerm); + while(findPos >= 0) { + SearchResult searchResult = new SearchResult(findPos, searchTerm, resource); + result.add(searchResult); + findPos = content.indexOf(searchTerm, findPos + 1); + } + return result; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchResult.java b/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchResult.java new file mode 100644 index 00000000..670fe80b --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchResult.java @@ -0,0 +1,24 @@ +package nl.siegmann.epublib.search; + +import nl.siegmann.epublib.domain.Resource; + +public class SearchResult { + private int pagePos = -1; + private String searchTerm; + private Resource resource; + public SearchResult(int pagePos, String searchTerm, Resource resource) { + super(); + this.pagePos = pagePos; + this.searchTerm = searchTerm; + this.resource = resource; + } + public int getPagePos() { + return pagePos; + } + public String getSearchTerm() { + return searchTerm; + } + public Resource getResource() { + return resource; + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchResults.java b/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchResults.java new file mode 100644 index 00000000..c69dd7df --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/search/SearchResults.java @@ -0,0 +1,39 @@ +package nl.siegmann.epublib.search; + +import java.util.ArrayList; +import java.util.List; + +import nl.siegmann.epublib.domain.Book; + +public class SearchResults { + private String searchTerm; + public String getSearchTerm() { + return searchTerm; + } + public void setSearchTerm(String searchTerm) { + this.searchTerm = searchTerm; + } + public Book getBook() { + return book; + } + public void setBook(Book book) { + this.book = book; + } + public List getHits() { + return hits; + } + public void setHits(List hits) { + this.hits = hits; + } + private Book book; + private List hits = new ArrayList(); + public boolean isEmpty() { + return hits.isEmpty(); + } + public int size() { + return hits.size(); + } + public void addAll(List searchResults) { + hits.addAll(searchResults); + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/util/DesktopUtil.java b/epublib-tools/src/main/java/nl/siegmann/epublib/util/DesktopUtil.java new file mode 100644 index 00000000..aa18159f --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/util/DesktopUtil.java @@ -0,0 +1,38 @@ + +package nl.siegmann.epublib.util; + +import java.awt.Desktop; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.logging.Level; +import nl.siegmann.epublib.viewer.ContentPane; + +public class DesktopUtil { + + /** + * Open a URL in the default web browser. + * + * @param url a URL to open in a web browser. + * @return true if a browser has been launched. + */ + public static boolean launchBrowser(URL url) throws BrowserLaunchException { + if (Desktop.isDesktopSupported()) { + try { + Desktop.getDesktop().browse(url.toURI()); + return true; + } catch (Exception ex) { + throw new BrowserLaunchException("Browser could not be launched for "+url, ex); + } + } + return false; + } + + public static class BrowserLaunchException extends Exception { + + private BrowserLaunchException(String message, Throwable cause) { + super(message, cause); + } + + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/util/ToolsResourceUtil.java b/epublib-tools/src/main/java/nl/siegmann/epublib/util/ToolsResourceUtil.java new file mode 100644 index 00000000..3f84e175 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/util/ToolsResourceUtil.java @@ -0,0 +1,96 @@ +package nl.siegmann.epublib.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.util.Scanner; +import java.util.regex.Pattern; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.ParserConfigurationException; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.MediaType; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.epub.EpubProcessorSupport; +import nl.siegmann.epublib.service.MediatypeService; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringEscapeUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +/** + * Various resource utility methods + * + * @author paul + * + */ +public class ToolsResourceUtil { + + private static Logger log = LoggerFactory.getLogger(ToolsResourceUtil.class); + + + public static String getTitle(Resource resource) { + if (resource == null) { + return ""; + } + if (resource.getMediaType() != MediatypeService.XHTML) { + return resource.getHref(); + } + String title = findTitleFromXhtml(resource); + if (title == null) { + title = ""; + } + return title; + } + + + + + /** + * Retrieves whatever it finds between <title>...</title> or <h1-7>...</h1-7>. + * The first match is returned, even if it is a blank string. + * If it finds nothing null is returned. + * @param resource + * @return whatever it finds in the resource between <title>...</title> or <h1-7>...</h1-7>. + */ + public static String findTitleFromXhtml(Resource resource) { + if (resource == null) { + return ""; + } + if (resource.getTitle() != null) { + return resource.getTitle(); + } + Pattern h_tag = Pattern.compile("^h\\d\\s*", Pattern.CASE_INSENSITIVE); + String title = null; + try { + Reader content = resource.getReader(); + Scanner scanner = new Scanner(content); + scanner.useDelimiter("<"); + while(scanner.hasNext()) { + String text = scanner.next(); + int closePos = text.indexOf('>'); + String tag = text.substring(0, closePos); + if (tag.equalsIgnoreCase("title") + || h_tag.matcher(tag).find()) { + + title = text.substring(closePos + 1).trim(); + title = StringEscapeUtils.unescapeHtml(title); + break; + } + } + } catch (IOException e) { + log.error(e.getMessage()); + } + resource.setTitle(title); + return title; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/util/VFSUtil.java b/epublib-tools/src/main/java/nl/siegmann/epublib/util/VFSUtil.java new file mode 100644 index 00000000..4124ca42 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/util/VFSUtil.java @@ -0,0 +1,89 @@ +package nl.siegmann.epublib.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; + +import nl.siegmann.epublib.domain.MediaType; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.vfs.FileObject; +import org.apache.commons.vfs.FileSystemException; +import org.apache.commons.vfs.VFS; +import org.slf4j.Logger;import org.slf4j.LoggerFactory; + +/** + * Utitilies for making working with apache commons VFS easier. + * + * @author paul + * + */ +public class VFSUtil { + + private static final Logger log = LoggerFactory.getLogger(VFSUtil.class); + + public static Resource createResource(FileObject rootDir, FileObject file, String inputEncoding) throws IOException { + MediaType mediaType = MediatypeService.determineMediaType(file.getName().getBaseName()); + if(mediaType == null) { + return null; + } + String href = calculateHref(rootDir, file); + Resource result = new Resource(null, IOUtils.toByteArray(file.getContent().getInputStream()), href, mediaType); + result.setInputEncoding(inputEncoding); + return result; + } + + public static String calculateHref(FileObject rootDir, FileObject currentFile) throws IOException { + String result = currentFile.getName().toString().substring(rootDir.getName().toString().length() + 1); + result += ".html"; + return result; + } + + /** + * First tries to load the inputLocation via VFS; if that doesn't work it tries to load it as a local File + * @param inputLocation + * @return the FileObject referred to by the inputLocation + * @throws FileSystemException + */ + public static FileObject resolveFileObject(String inputLocation) throws FileSystemException { + FileObject result = null; + try { + result = VFS.getManager().resolveFile(inputLocation); + } catch (Exception e) { + try { + result = VFS.getManager().resolveFile(new File("."), inputLocation); + } catch (Exception e1) { + log.error(e.getMessage(), e); + log.error(e1.getMessage(), e); + } + } + return result; + } + + + /** + * First tries to load the inputLocation via VFS; if that doesn't work it tries to load it as a local File + * + * @param inputLocation + * @return the InputStream referred to by the inputLocation + * @throws FileSystemException + */ + public static InputStream resolveInputStream(String inputLocation) throws FileSystemException { + InputStream result = null; + try { + result = VFS.getManager().resolveFile(inputLocation).getContent().getInputStream(); + } catch (Exception e) { + try { + result = new FileInputStream(inputLocation); + } catch (FileNotFoundException e1) { + log.error(e.getMessage(), e); + log.error(e1.getMessage(), e); + } + } + return result; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/utilities/HtmlSplitter.java b/epublib-tools/src/main/java/nl/siegmann/epublib/utilities/HtmlSplitter.java new file mode 100644 index 00000000..80ecde82 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/utilities/HtmlSplitter.java @@ -0,0 +1,154 @@ +package nl.siegmann.epublib.utilities; + +import java.io.Reader; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.List; + +import javax.xml.stream.XMLEventFactory; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLEventWriter; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.events.XMLEvent; + +/** + * Splits up a xhtml document into pieces that are all valid xhtml documents. + * + * @author paul + * + */ +public class HtmlSplitter { + + private XMLEventFactory xmlEventFactory = XMLEventFactory.newInstance(); + private XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance(); + private List headerElements = new ArrayList(); + private List footerElements = new ArrayList(); + private int footerCloseTagLength; + private List elementStack = new ArrayList(); + private StringWriter currentDoc = new StringWriter(); + private List currentXmlEvents = new ArrayList(); + private XMLEventWriter out; + private int maxLength = 300000; // 300K, the max length of a chapter of an epub document + private List> result = new ArrayList>(); + + public List> splitHtml(Reader reader, int maxLength) throws XMLStreamException { + XMLEventReader xmlEventReader = XMLInputFactory.newInstance().createXMLEventReader(reader); + return splitHtml(xmlEventReader, maxLength); + } + + private static int calculateTotalTagStringLength(List xmlEvents) { + int result = 0; + for(XMLEvent xmlEvent: xmlEvents) { + result += xmlEvent.toString().length(); + } + return result; + } + + public List> splitHtml(XMLEventReader reader, int maxLength) throws XMLStreamException { + this.headerElements = getHeaderElements(reader); + this.footerElements = getFooterElements(); + footerCloseTagLength = calculateTotalTagStringLength(footerElements); + this.maxLength = (int) ((float) maxLength * 0.9); + currentXmlEvents = new ArrayList(); + currentXmlEvents.addAll(headerElements); + currentXmlEvents.addAll(elementStack); + out = xmlOutputFactory.createXMLEventWriter(currentDoc); + for(XMLEvent headerXmlEvent: headerElements) { + out.add(headerXmlEvent); + } + XMLEvent xmlEvent = reader.nextEvent(); + while(! isBodyEndElement(xmlEvent)) { + processXmlEvent(xmlEvent, result); + xmlEvent = reader.nextEvent(); + } + result.add(currentXmlEvents); + return result; + } + + + private void closeCurrentDocument() throws XMLStreamException { + closeAllTags(currentXmlEvents); + currentXmlEvents.addAll(footerElements); + result.add(currentXmlEvents); + } + + private void startNewDocument() throws XMLStreamException { + currentDoc = new StringWriter(); + out = xmlOutputFactory.createXMLEventWriter(currentDoc); + for(XMLEvent headerXmlEvent: headerElements) { + out.add(headerXmlEvent); + } + for(XMLEvent stackXmlEvent: elementStack) { + out.add(stackXmlEvent); + } + + currentXmlEvents = new ArrayList(); + currentXmlEvents.addAll(headerElements); + currentXmlEvents.addAll(elementStack); + } + + private void processXmlEvent(XMLEvent xmlEvent, List> docs) throws XMLStreamException { + out.flush(); + String currentSerializerDoc = currentDoc.toString(); + if((currentSerializerDoc.length() + xmlEvent.toString().length() + footerCloseTagLength) >= maxLength) { + closeCurrentDocument(); + startNewDocument(); + } + updateStack(xmlEvent); + out.add(xmlEvent); + currentXmlEvents.add(xmlEvent); + } + + private void closeAllTags(List xmlEvents) throws XMLStreamException { + for(int i = elementStack.size() - 1; i>= 0; i--) { + XMLEvent xmlEvent = elementStack.get(i); + XMLEvent xmlEndElementEvent = xmlEventFactory.createEndElement(xmlEvent.asStartElement().getName(), null); + xmlEvents.add(xmlEndElementEvent); + } + } + + private void updateStack(XMLEvent xmlEvent) { + if(xmlEvent.isStartElement()) { + elementStack.add(xmlEvent); + } else if(xmlEvent.isEndElement()) { + XMLEvent lastEvent = elementStack.get(elementStack.size() - 1); + if(lastEvent.isStartElement() && + xmlEvent.asEndElement().getName().equals(lastEvent.asStartElement().getName())) { + elementStack.remove(elementStack.size() - 1); + } + } + } + + private List getHeaderElements(XMLEventReader reader) throws XMLStreamException { + List result = new ArrayList(); + XMLEvent event = reader.nextEvent(); + while(event != null && (!isBodyStartElement(event))) { + result.add(event); + event = reader.nextEvent(); + } + + // add the body start tag to the result + if(event != null) { + result.add(event); + } + return result; + } + + private List getFooterElements() throws XMLStreamException { + List result = new ArrayList(); + result.add(xmlEventFactory.createEndElement("", null, "body")); + result.add(xmlEventFactory.createEndElement("", null, "html")); + result.add(xmlEventFactory.createEndDocument()); + return result; + } + + private static boolean isBodyStartElement(XMLEvent xmlEvent) { + return xmlEvent.isStartElement() && xmlEvent.asStartElement().getName().getLocalPart().equals("body"); + } + + private static boolean isBodyEndElement(XMLEvent xmlEvent) { + return xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().getLocalPart().equals("body"); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/utilities/NumberSayer.java b/epublib-tools/src/main/java/nl/siegmann/epublib/utilities/NumberSayer.java new file mode 100644 index 00000000..2ba8f07f --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/utilities/NumberSayer.java @@ -0,0 +1,28 @@ +package nl.siegmann.epublib.utilities; + +public class NumberSayer { + + private static final String[] NUMBER_BELOW_20 = new String[] {"zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "nineteen"}; + private static final String[] DECIMALS = new String[] {"zero", "ten", "twenty", "thirty", "fourty", "fifty", "sixty", "seventy", "eighty", "ninety"}; + private static final String[] ORDER_NUMBERS = new String[] {"hundred", "thousand", "million", "billion", "trillion"}; + + + public static String getNumberName(int number) { + if(number < 0) { + throw new IllegalArgumentException("Cannot handle numbers < 0 or > " + Integer.MAX_VALUE); + } + if(number < 20) { + return NUMBER_BELOW_20[number]; + } + if(number < 100) { + return DECIMALS[number / 10] + NUMBER_BELOW_20[number % 10]; + } + if(number >= 100 && number < 200) { + return ORDER_NUMBERS[0] + getNumberName(number - 100); + } + if(number < 1000) { + return NUMBER_BELOW_20[number / 100] + ORDER_NUMBERS[0] + getNumberName(number % 100); + } + throw new IllegalArgumentException("Cannot handle numbers < 0 or > " + Integer.MAX_VALUE); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/AboutDialog.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/AboutDialog.java new file mode 100644 index 00000000..6a84a1f6 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/AboutDialog.java @@ -0,0 +1,52 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.GridLayout; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; + +import javax.swing.JButton; +import javax.swing.JDialog; +import javax.swing.JFrame; +import javax.swing.JLabel; + +/** + * First stab at an about dialog. + * + * @author paul.siegmann + * + */ +public class AboutDialog extends JDialog { + + private static final long serialVersionUID = -1766802200843275782L; + + public AboutDialog(JFrame parent) { + super(parent, true); + + super.setResizable(false); + super.getContentPane().setLayout(new GridLayout(3, 1)); + super.setSize(400, 150); + super.setTitle("About epublib"); + super.setLocationRelativeTo(parent); + + JButton close = new JButton("Close"); + close.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + AboutDialog.this.dispose(); + } + }); + super.getRootPane().setDefaultButton(close); + add(new JLabel("epublib viewer")); + add(new JLabel("http://www.siegmann.nl/epublib")); + add(close); + super.addWindowListener(new WindowAdapter() { + public void windowClosing(WindowEvent e) { + AboutDialog.this.dispose(); + } + }); + pack(); + setVisible(true); + + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/BrowseBar.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/BrowseBar.java new file mode 100644 index 00000000..b3a79e63 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/BrowseBar.java @@ -0,0 +1,18 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.BorderLayout; + +import javax.swing.JPanel; + +import nl.siegmann.epublib.browsersupport.Navigator; + +public class BrowseBar extends JPanel { + + private static final long serialVersionUID = -5745389338067538254L; + + public BrowseBar(Navigator navigator, ContentPane chapterPane) { + super(new BorderLayout()); + add(new ButtonBar(navigator, chapterPane), BorderLayout.CENTER); + add(new SpineSlider(navigator), BorderLayout.NORTH); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ButtonBar.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ButtonBar.java new file mode 100644 index 00000000..553f74b3 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ButtonBar.java @@ -0,0 +1,97 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.GridLayout; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; + +import javax.swing.JButton; +import javax.swing.JPanel; + +import nl.siegmann.epublib.browsersupport.Navigator; + +/** + * Creates a panel with the first,previous,next and last buttons. + * + */ +class ButtonBar extends JPanel { + private static final long serialVersionUID = 6431437924245035812L; + + private JButton startButton = ViewerUtil.createButton("chapter-first", "|<"); + private JButton previousChapterButton = ViewerUtil.createButton("chapter-previous", "<<"); + private JButton previousPageButton = ViewerUtil.createButton("page-previous", "<"); + private JButton nextPageButton = ViewerUtil.createButton("page-next", ">"); + private JButton nextChapterButton = ViewerUtil.createButton("chapter-next", ">>"); + private JButton endButton = ViewerUtil.createButton("chapter-last", ">|"); + private ContentPane chapterPane; + private final ValueHolder navigatorHolder = new ValueHolder(); + + public ButtonBar(Navigator navigator, ContentPane chapterPane) { + super(new GridLayout(0, 4)); + this.chapterPane = chapterPane; + + JPanel bigPrevious = new JPanel(new GridLayout(0, 2)); + bigPrevious.add(startButton); + bigPrevious.add(previousChapterButton); + add(bigPrevious); + + add(previousPageButton); + add(nextPageButton); + + JPanel bigNext = new JPanel(new GridLayout(0, 2)); + bigNext.add(nextChapterButton); + bigNext.add(endButton); + add(bigNext); + + setSectionWalker(navigator); + } + + public void setSectionWalker(Navigator navigator) { + navigatorHolder.setValue(navigator); + + startButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + + navigatorHolder.getValue().gotoFirstSpineSection(ButtonBar.this); + } + }); + previousChapterButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + navigatorHolder.getValue().gotoPreviousSpineSection(ButtonBar.this); + } + }); + previousPageButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + chapterPane.gotoPreviousPage(); + } + }); + + nextPageButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + chapterPane.gotoNextPage(); + } + }); + nextChapterButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + navigatorHolder.getValue().gotoNextSpineSection(ButtonBar.this); + } + }); + + endButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + navigatorHolder.getValue().gotoLastSpineSection(ButtonBar.this); + } + }); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ContentPane.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ContentPane.java new file mode 100644 index 00000000..ae76fac6 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ContentPane.java @@ -0,0 +1,385 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.Color; +import java.awt.GridLayout; +import java.awt.Point; +import java.awt.Rectangle; +import java.awt.event.KeyEvent; +import java.awt.event.KeyListener; +import java.awt.event.MouseWheelEvent; +import java.awt.event.MouseWheelListener; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLDecoder; + +import javax.swing.JEditorPane; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.event.HyperlinkEvent; +import javax.swing.event.HyperlinkListener; +import javax.swing.text.AttributeSet; +import javax.swing.text.BadLocationException; +import javax.swing.text.html.HTML; +import javax.swing.text.html.HTMLDocument; +import javax.swing.text.html.HTMLEditorKit; + +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.browsersupport.NavigationEvent; +import nl.siegmann.epublib.browsersupport.NavigationEventListener; +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.util.DesktopUtil; + +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Displays a page + * + */ +public class ContentPane extends JPanel implements NavigationEventListener, + HyperlinkListener { + + private static final long serialVersionUID = -5322988066178102320L; + + private static final Logger log = LoggerFactory + .getLogger(ContentPane.class); + private Navigator navigator; + private Resource currentResource; + private JEditorPane editorPane; + private JScrollPane scrollPane; + private HTMLDocumentFactory htmlDocumentFactory; + + public ContentPane(Navigator navigator) { + super(new GridLayout(1, 0)); + this.scrollPane = (JScrollPane) add(new JScrollPane()); + this.scrollPane.addKeyListener(new KeyListener() { + + @Override + public void keyTyped(KeyEvent e) { + // TODO Auto-generated method stub + + } + + @Override + public void keyReleased(KeyEvent e) { + // TODO Auto-generated method stub + + } + + @Override + public void keyPressed(KeyEvent e) { + if (e.getKeyCode() == KeyEvent.VK_DOWN) { + Point viewPosition = scrollPane.getViewport().getViewPosition(); + int newY = (int) (viewPosition.getY() + 10); + scrollPane.getViewport().setViewPosition(new Point((int) viewPosition.getX(), newY)); + } + } + }); + this.scrollPane.addMouseWheelListener(new MouseWheelListener() { + + private boolean gotoNextPage = false; + private boolean gotoPreviousPage = false; + + @Override + public void mouseWheelMoved(MouseWheelEvent e) { + int notches = e.getWheelRotation(); + int increment = scrollPane.getVerticalScrollBar().getUnitIncrement(1); + if (notches < 0) { + Point viewPosition = scrollPane.getViewport().getViewPosition(); + if (viewPosition.getY() - increment < 0) { + if (gotoPreviousPage) { + gotoPreviousPage = false; + ContentPane.this.navigator.gotoPreviousSpineSection(-1, ContentPane.this); + } else { + gotoPreviousPage = true; + scrollPane.getViewport().setViewPosition(new Point((int) viewPosition.getX(), 0)); + } + } + } else { + // only move to the next page if we are exactly at the bottom of the current page + Point viewPosition = scrollPane.getViewport().getViewPosition(); + int viewportHeight = scrollPane.getViewport().getHeight(); + int scrollMax = scrollPane.getVerticalScrollBar().getMaximum(); + if (viewPosition.getY() + viewportHeight + increment > scrollMax) { + if (gotoNextPage) { + gotoNextPage = false; + ContentPane.this.navigator.gotoNextSpineSection(ContentPane.this); + } else { + gotoNextPage = true; + int newY = scrollMax - viewportHeight; + scrollPane.getViewport().setViewPosition(new Point((int) viewPosition.getX(), newY)); + } + } + } + } + }); + this.navigator = navigator; + navigator.addNavigationEventListener(this); + this.editorPane = createJEditorPane(); + scrollPane.getViewport().add(editorPane); + this.htmlDocumentFactory = new HTMLDocumentFactory(navigator, editorPane.getEditorKit()); + initBook(navigator.getBook()); + } + + private void initBook(Book book) { + if (book == null) { + return; + } + htmlDocumentFactory.init(book); + displayPage(book.getCoverPage()); + } + + + + /** + * Whether the given searchString matches any of the possibleValues. + * + * @param searchString + * @param possibleValues + * @return Whether the given searchString matches any of the possibleValues. + */ + private static boolean matchesAny(String searchString, String... possibleValues) { + for (int i = 0; i < possibleValues.length; i++) { + String attributeValue = possibleValues[i]; + if (StringUtils.isNotBlank(attributeValue) && (attributeValue.equals(searchString))) { + return true; + } + } + return false; + } + + + /** + * Scrolls the editorPane to the startOffset of the current element in the elementIterator + * + * @param requestFragmentId + * @param attributeValue + * @param editorPane + * @param elementIterator + * + * @return whether it was a match and we jumped there. + */ + private static void scrollToElement(JEditorPane editorPane, HTMLDocument.Iterator elementIterator) { + try { + Rectangle rectangle = editorPane.modelToView(elementIterator.getStartOffset()); + if (rectangle == null) { + return; + } + // the view is visible, scroll it to the + // center of the current visible area. + Rectangle visibleRectangle = editorPane.getVisibleRect(); + // r.y -= (vis.height / 2); + rectangle.height = visibleRectangle.height; + editorPane.scrollRectToVisible(rectangle); + } catch (BadLocationException e) { + log.error(e.getMessage()); + } + } + + + /** + * Scrolls the editorPane to the first anchor element whose id or name matches the given fragmentId. + * + * @param fragmentId + */ + private void scrollToNamedAnchor(String fragmentId) { + HTMLDocument doc = (HTMLDocument) editorPane.getDocument(); + for (HTMLDocument.Iterator iter = doc.getIterator(HTML.Tag.A); iter.isValid(); iter.next()) { + AttributeSet attributes = iter.getAttributes(); + if (matchesAny(fragmentId, (String) attributes.getAttribute(HTML.Attribute.NAME), + (String) attributes.getAttribute(HTML.Attribute.ID))) { + scrollToElement(editorPane, iter); + break; + } + } + } + + private JEditorPane createJEditorPane() { + JEditorPane editorPane = new JEditorPane(); + editorPane.setBackground(Color.white); + editorPane.setEditable(false); + HTMLEditorKit htmlKit = new HTMLEditorKit(); + // StyleSheet myStyleSheet = new StyleSheet(); + // String normalTextStyle = "font-size: 12px, font-family: georgia"; + // myStyleSheet.addRule("body {" + normalTextStyle + "}"); + // myStyleSheet.addRule("p {" + normalTextStyle + "}"); + // myStyleSheet.addRule("div {" + normalTextStyle + "}"); + // htmlKit.setStyleSheet(myStyleSheet); + editorPane.setEditorKit(htmlKit); + editorPane.addHyperlinkListener(this); + editorPane.addKeyListener(new KeyListener() { + + @Override + public void keyTyped(KeyEvent keyEvent) { + } + + @Override + public void keyReleased(KeyEvent e) { + // TODO Auto-generated method stub + + } + + @Override + public void keyPressed(KeyEvent keyEvent) { + if (keyEvent.getKeyCode() == KeyEvent.VK_RIGHT) { + navigator.gotoNextSpineSection(ContentPane.this); + } else if (keyEvent.getKeyCode() == KeyEvent.VK_LEFT) { + navigator.gotoPreviousSpineSection(ContentPane.this); +// } else if (keyEvent.getKeyCode() == KeyEvent.VK_UP) { +// ContentPane.this.gotoPreviousPage(); + } else if (keyEvent.getKeyCode() == KeyEvent.VK_SPACE) { +// || (keyEvent.getKeyCode() == KeyEvent.VK_DOWN)) { + ContentPane.this.gotoNextPage(); + } + } + }); + return editorPane; + } + + public void displayPage(Resource resource) { + displayPage(resource, 0); + } + + public void displayPage(Resource resource, int sectionPos) { + if (resource == null) { + return; + } + try { + HTMLDocument document = htmlDocumentFactory.getDocument(resource); + if (document == null) { + return; + } + currentResource = resource; + editorPane.setDocument(document); + scrollToCurrentPosition(sectionPos); + } catch (Exception e) { + log.error("When reading resource " + resource.getId() + "(" + + resource.getHref() + ") :" + e.getMessage(), e); + } + } + + private void scrollToCurrentPosition(int sectionPos) { + if (sectionPos < 0) { + editorPane.setCaretPosition(editorPane.getDocument().getLength()); + } else { + editorPane.setCaretPosition(sectionPos); + } + if (sectionPos == 0) { + scrollPane.getViewport().setViewPosition(new Point(0, 0)); + } else if (sectionPos < 0) { + int viewportHeight = scrollPane.getViewport().getHeight(); + int scrollMax = scrollPane.getVerticalScrollBar().getMaximum(); + scrollPane.getViewport().setViewPosition(new Point(0, scrollMax - viewportHeight)); + } + } + + public void hyperlinkUpdate(HyperlinkEvent event) { + if (event.getEventType() != HyperlinkEvent.EventType.ACTIVATED) { + return; + } + final URL url = event.getURL(); + if (url.getProtocol().toLowerCase().startsWith("http") && !"".equals(url.getHost())) { + try { + DesktopUtil.launchBrowser(event.getURL()); + return; + } catch (DesktopUtil.BrowserLaunchException ex) { + log.warn("Couldn't launch system web browser.", ex); + } + } + String resourceHref = calculateTargetHref(event.getURL()); + if (resourceHref.startsWith("#")) { + scrollToNamedAnchor(resourceHref.substring(1)); + return; + } + + Resource resource = navigator.getBook().getResources().getByHref(resourceHref); + if (resource == null) { + log.error("Resource with url " + resourceHref + " not found"); + } else { + navigator.gotoResource(resource, this); + } + } + + public void gotoPreviousPage() { + Point viewPosition = scrollPane.getViewport().getViewPosition(); + if (viewPosition.getY() <= 0) { + navigator.gotoPreviousSpineSection(this); + return; + } + int viewportHeight = scrollPane.getViewport().getHeight(); + int newY = (int) viewPosition.getY(); + newY -= viewportHeight; + newY = Math.max(0, newY - viewportHeight); + scrollPane.getViewport().setViewPosition( + new Point((int) viewPosition.getX(), newY)); + } + + public void gotoNextPage() { + Point viewPosition = scrollPane.getViewport().getViewPosition(); + int viewportHeight = scrollPane.getViewport().getHeight(); + int scrollMax = scrollPane.getVerticalScrollBar().getMaximum(); + if (viewPosition.getY() + viewportHeight >= scrollMax) { + navigator.gotoNextSpineSection(this); + return; + } + int newY = ((int) viewPosition.getY()) + viewportHeight; + scrollPane.getViewport().setViewPosition( + new Point((int) viewPosition.getX(), newY)); + } + + + /** + * Transforms a link generated by a click on a link in a document to a resource href. + * Property handles http encoded spaces and such. + * + * @param clickUrl + * @return a link generated by a click on a link transformed into a document to a resource href. + */ + private String calculateTargetHref(URL clickUrl) { + String resourceHref = clickUrl.toString(); + try { + resourceHref = URLDecoder.decode(resourceHref, + Constants.CHARACTER_ENCODING); + } catch (UnsupportedEncodingException e) { + log.error(e.getMessage()); + } + resourceHref = resourceHref.substring(ImageLoaderCache.IMAGE_URL_PREFIX + .length()); + + if (resourceHref.startsWith("#")) { + return resourceHref; + } + if (currentResource != null + && StringUtils.isNotBlank(currentResource.getHref())) { + int lastSlashPos = currentResource.getHref().lastIndexOf('/'); + if (lastSlashPos >= 0) { + resourceHref = currentResource.getHref().substring(0, + lastSlashPos + 1) + + resourceHref; + } + } + return resourceHref; + } + + + public void navigationPerformed(NavigationEvent navigationEvent) { + if (navigationEvent.isBookChanged()) { + initBook(navigationEvent.getCurrentBook()); + } else { + if (navigationEvent.isResourceChanged()) { + displayPage(navigationEvent.getCurrentResource(), + navigationEvent.getCurrentSectionPos()); + } else if (navigationEvent.isSectionPosChanged()) { + editorPane.setCaretPosition(navigationEvent.getCurrentSectionPos()); + } + if (StringUtils.isNotBlank(navigationEvent.getCurrentFragmentId())) { + scrollToNamedAnchor(navigationEvent.getCurrentFragmentId()); + } + } + } + + +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/GuidePane.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/GuidePane.java new file mode 100644 index 00000000..23d7e99f --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/GuidePane.java @@ -0,0 +1,74 @@ +package nl.siegmann.epublib.viewer; + +import java.util.ArrayList; +import java.util.List; + +import javax.swing.JScrollPane; +import javax.swing.JTable; +import javax.swing.event.ListSelectionEvent; +import javax.swing.event.ListSelectionListener; + +import nl.siegmann.epublib.browsersupport.NavigationEvent; +import nl.siegmann.epublib.browsersupport.NavigationEventListener; +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Guide; +import nl.siegmann.epublib.domain.GuideReference; + +/** + * Creates a Panel for navigating a Book via its Guide + * + * @author paul + * + */ +public class GuidePane extends JScrollPane implements NavigationEventListener { + + private static final long serialVersionUID = -8988054938907109295L; + private Navigator navigator; + + public GuidePane(Navigator navigator) { + this.navigator = navigator; + navigator.addNavigationEventListener(this); + initBook(navigator.getBook()); + } + + private void initBook(Book book) { + if (book == null) { + return; + } + getViewport().removeAll(); + JTable table = new JTable( + createTableData(navigator.getBook().getGuide()), + new String[] {"", ""}); +// table.setEnabled(false); + table.setFillsViewportHeight(true); + table.getSelectionModel().addListSelectionListener(new ListSelectionListener() { + + @Override + public void valueChanged(ListSelectionEvent e) { + if (navigator.getBook() == null) { + return; + } + int guideIndex = e.getFirstIndex(); + GuideReference guideReference = navigator.getBook().getGuide().getReferences().get(guideIndex); + navigator.gotoResource(guideReference.getResource(), GuidePane.this); + } + }); + getViewport().add(table); + } + + private Object[][] createTableData(Guide guide) { + List result = new ArrayList(); + for (GuideReference guideReference: guide.getReferences()) { + result.add(new String[] {guideReference.getType(), guideReference.getTitle()}); + } + return result.toArray(new Object[result.size()][2]); + } + + @Override + public void navigationPerformed(NavigationEvent navigationEvent) { + if (navigationEvent.isBookChanged()) { + initBook(navigationEvent.getCurrentBook()); + } + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/HTMLDocumentFactory.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/HTMLDocumentFactory.java new file mode 100644 index 00000000..3af997b0 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/HTMLDocumentFactory.java @@ -0,0 +1,220 @@ +package nl.siegmann.epublib.viewer; + +import java.io.Reader; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import javax.swing.text.EditorKit; +import javax.swing.text.html.HTMLDocument; +import javax.swing.text.html.HTMLEditorKit; +import javax.swing.text.html.HTMLEditorKit.Parser; + + +import nl.siegmann.epublib.browsersupport.NavigationEvent; +import nl.siegmann.epublib.browsersupport.NavigationEventListener; +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; + +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Creates swing HTML documents from resources. + * + * Between books the init(Book) function needs to be called in order for images to appear correctly. + * + * @author paul.siegmann + * + */ +public class HTMLDocumentFactory implements NavigationEventListener { + + private static final Logger log = LoggerFactory.getLogger(HTMLDocumentFactory.class); + + // After opening the book we wait a while before we starting indexing the rest of the pages. + // This way the book opens, everything settles down, and while the user looks at the cover page + // the rest of the book is indexed. + public static final int DOCUMENT_CACHE_INDEXER_WAIT_TIME = 500; + + private ImageLoaderCache imageLoaderCache; + private ReentrantReadWriteLock cacheLock = new ReentrantReadWriteLock(); + private Lock cacheReadLock = cacheLock.readLock(); + private Lock cacheWriteLock = cacheLock.writeLock(); + private Map documentCache = new HashMap(); + private MyHtmlEditorKit editorKit; + + public HTMLDocumentFactory(Navigator navigator, EditorKit editorKit) { + this.editorKit = new MyHtmlEditorKit((HTMLEditorKit) editorKit); + this.imageLoaderCache = new ImageLoaderCache(navigator); + init(navigator.getBook()); + navigator.addNavigationEventListener(this); + } + + public void init(Book book) { + if (book == null) { + return; + } + imageLoaderCache.initBook(book); + initDocumentCache(book); + } + + private void putDocument(Resource resource, HTMLDocument document) { + if (document == null) { + return; + } + cacheWriteLock.lock(); + try { + documentCache.put(resource.getHref(), document); + } finally { + cacheWriteLock.unlock(); + } + } + + + /** + * Get the HTMLDocument representation of the resource. + * If the resource is not an XHTML resource then it returns null. + * It first tries to get the document from the cache. + * If the document is not in the cache it creates a document from + * the resource and adds it to the cache. + * + * @param resource + * @return the HTMLDocument representation of the resource. + */ + public HTMLDocument getDocument(Resource resource) { + HTMLDocument document = null; + + // try to get the document from the cache + cacheReadLock.lock(); + try { + document = documentCache.get(resource.getHref()); + } finally { + cacheReadLock.unlock(); + } + + // document was not in the cache, try to create it and add it to the cache + if (document == null) { + document = createDocument(resource); + putDocument(resource, document); + } + + // initialize the imageLoader for the specific document + if (document != null) { + imageLoaderCache.initImageLoader(document); + } + + return document; + } + + private String stripHtml(String input) { + String result = removeControlTags(input); +// result = result.replaceAll("]*http-equiv=\"Content-Type\"[^>]*>", ""); + return result; + } + + /** + * Quick and dirty stripper of all <?...> and <!...> tags as + * these confuse the html viewer. + * + * @param input + * @return the input stripped of control characters + */ + private static String removeControlTags(String input) { + StringBuilder result = new StringBuilder(); + boolean inControlTag = false; + for (int i = 0; i < input.length(); i++) { + char c = input.charAt(i); + if (inControlTag) { + if (c == '>') { + inControlTag = false; + } + } else if (c == '<' // look for <! or <? + && i < input.length() - 1 + && (input.charAt(i + 1) == '!' || input.charAt(i + 1) == '?')) { + inControlTag = true; + } else { + result.append(c); + } + } + return result.toString(); + } + + /** + * Creates a swing HTMLDocument from the given resource. + * + * If the resources is not of type XHTML then null is returned. + * + * @param resource + * @return a swing HTMLDocument created from the given resource. + */ + private HTMLDocument createDocument(Resource resource) { + HTMLDocument result = null; + if (resource.getMediaType() != MediatypeService.XHTML) { + return result; + } + try { + HTMLDocument document = (HTMLDocument) editorKit.createDefaultDocument(); + MyParserCallback parserCallback = new MyParserCallback(document.getReader(0)); + Parser parser = editorKit.getParser(); + String pageContent = IOUtils.toString(resource.getReader()); + pageContent = stripHtml(pageContent); + document.remove(0, document.getLength()); + Reader contentReader = new StringReader(pageContent); + parser.parse(contentReader, parserCallback, true); + parserCallback.flush(); + result = document; + } catch (Exception e) { + log.error(e.getMessage()); + } + return result; + } + + private void initDocumentCache(Book book) { + if (book == null) { + return; + } + documentCache.clear(); + Thread documentIndexerThread = new Thread(new DocumentIndexer(book), "DocumentIndexer"); + documentIndexerThread.setPriority(Thread.MIN_PRIORITY); + documentIndexerThread.start(); + +// addAllDocumentsToCache(book); + } + + + private class DocumentIndexer implements Runnable { + private Book book; + + public DocumentIndexer(Book book) { + this.book = book; + } + @Override + public void run() { + try { + Thread.sleep(DOCUMENT_CACHE_INDEXER_WAIT_TIME); + } catch (InterruptedException e) { + log.error(e.getMessage()); + } + addAllDocumentsToCache(book); + } + + private void addAllDocumentsToCache(Book book) { + for (Resource resource: book.getResources().getAll()) { + getDocument(resource); + } + } + } + + + @Override + public void navigationPerformed(NavigationEvent navigationEvent) { + if (navigationEvent.isBookChanged() || navigationEvent.isResourceChanged()) { + imageLoaderCache.clear(); + } + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ImageLoaderCache.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ImageLoaderCache.java new file mode 100644 index 00000000..2ca5a250 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ImageLoaderCache.java @@ -0,0 +1,175 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.Image; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Dictionary; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; + +import javax.imageio.ImageIO; +import javax.swing.text.html.HTMLDocument; + +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.util.CollectionUtil; +import org.apache.commons.io.FilenameUtils; + +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is a trick to get the JEditorKit to load its images from the epub file instead of from the given url. + * + * This class is installed as the JEditorPane's image cache. + * Whenever it is requested an image it will try to load that image from the epub. + * + * Can be shared by multiple documents but can only be used by one document at the time because of the currentFolder issue. + * + * @author paul + * + */ +class ImageLoaderCache extends Dictionary { + + public static final String IMAGE_URL_PREFIX = "http:/"; + + private static final Logger log = LoggerFactory.getLogger(ImageLoaderCache.class); + + private Map cache = new HashMap(); + private Book book; + private String currentFolder = ""; + private Navigator navigator; + + public ImageLoaderCache(Navigator navigator) { + this.navigator = navigator; + initBook(navigator.getBook()); + } + + public void initBook(Book book) { + if (book == null) { + return; + } + this.book = book; + cache.clear(); + this.currentFolder = ""; + } + + public void setContextResource(Resource resource) { + if (resource == null) { + return; + } + if (StringUtils.isNotBlank(resource.getHref())) { + int lastSlashPos = resource.getHref().lastIndexOf('/'); + if (lastSlashPos >= 0) { + this.currentFolder = resource.getHref().substring(0, lastSlashPos + 1); + } + } + } + + public void initImageLoader(HTMLDocument document) { + try { + document.setBase(new URL(ImageLoaderCache.IMAGE_URL_PREFIX)); + } catch (MalformedURLException e) { + log.error(e.getMessage()); + } + setContextResource(navigator.getCurrentResource()); + document.getDocumentProperties().put("imageCache", this); + } + + + private String getResourceHref(String requestUrl) { + String resourceHref = requestUrl.toString().substring(IMAGE_URL_PREFIX.length()); + resourceHref = currentFolder + resourceHref; + resourceHref = FilenameUtils.normalize(resourceHref); + // normalize uses the SYSTEM_SEPARATOR, which on windows is a '\' + // replace with '/' to make it href '/' + resourceHref = resourceHref.replaceAll("\\\\", "/"); + return resourceHref; + } + + /** + * Create an Image from the data of the given resource. + * + * @param imageResource + * @return + */ + private Image createImage(Resource imageResource) { + Image result = null; + try { + result = ImageIO.read(imageResource.getInputStream()); + } catch (IOException e) { + log.error(e.getMessage()); + } + return result; + } + + public Image get(Object key) { + if (book == null) { + return null; + } + + String imageURL = key.toString(); + + // see if the image is already in the cache + Image result = cache.get(imageURL); + if (result != null) { + return result; + } + + // get the image resource href + String resourceHref = getResourceHref(imageURL); + + // find the image resource in the book resources + Resource imageResource = book.getResources().getByHref(resourceHref); + if (imageResource == null) { + return result; + } + + // create an image from the resource and add it to the cache + result = createImage(imageResource); + if (result != null) { + cache.put(imageURL.toString(), result); + } + + return result; + } + + public int size() { + return cache.size(); + } + + public boolean isEmpty() { + return cache.isEmpty(); + } + + public Enumeration keys() { + return CollectionUtil.createEnumerationFromIterator(cache.keySet().iterator()); + } + + public Enumeration elements() { + return CollectionUtil.createEnumerationFromIterator(cache.values().iterator()); + } + + public Image put(String key, Image value) { + return cache.put(key.toString(), (Image) value); + } + + public Image remove(Object key) { + return cache.remove(key); + } + + /** + * Clears the image cache. + */ + public void clear() { + cache.clear(); + } + + public String toString() { + return cache.toString(); + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MetadataPane.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MetadataPane.java new file mode 100644 index 00000000..da439835 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MetadataPane.java @@ -0,0 +1,151 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.BorderLayout; +import java.awt.GridLayout; +import java.awt.Image; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import javax.imageio.ImageIO; +import javax.swing.ImageIcon; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.JTable; +import javax.swing.table.AbstractTableModel; +import javax.swing.table.TableModel; + +import nl.siegmann.epublib.browsersupport.NavigationEvent; +import nl.siegmann.epublib.browsersupport.NavigationEventListener; +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Metadata; +import nl.siegmann.epublib.domain.Resource; + +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MetadataPane extends JPanel implements NavigationEventListener { + + private static final Logger log = LoggerFactory.getLogger(MetadataPane.class); + + private static final long serialVersionUID = -2810193923996466948L; + private JScrollPane scrollPane; + + public MetadataPane(Navigator navigator) { + super(new GridLayout(1, 0)); + this.scrollPane = (JScrollPane) add(new JScrollPane()); + navigator.addNavigationEventListener(this); + initBook(navigator.getBook()); + } + + private void initBook(Book book) { + if (book == null) { + return; + } + JTable table = new JTable( + createTableData(book.getMetadata()), + new String[] {"", ""}); + table.setEnabled(false); + table.setFillsViewportHeight(true); + JPanel contentPanel = new JPanel(new BorderLayout(0, 10)); + contentPanel.add(table, BorderLayout.CENTER); + setCoverImage(contentPanel, book); + + scrollPane.getViewport().removeAll(); + scrollPane.getViewport().add(contentPanel); + } + + private void setCoverImage(JPanel contentPanel, Book book) { + if (book == null) { + return; + } + Resource coverImageResource = book.getCoverImage(); + if (coverImageResource == null) { + return; + } + try { + Image image = ImageIO.read(coverImageResource.getInputStream()); + if (image == null) { + log.error("Unable to load cover image from book"); + return; + } + image = image.getScaledInstance(200, -1, Image.SCALE_SMOOTH); + JLabel label = new JLabel(new ImageIcon(image)); +// label.setSize(100, 100); + contentPanel.add(label, BorderLayout.NORTH); + } catch (IOException e) { + log.error("Unable to load cover image from book", e.getMessage()); + } + } + + private Object[][] createTableData(Metadata metadata) { + List result = new ArrayList(); + addStrings(metadata.getIdentifiers(), "Identifier", result); + addStrings(metadata.getTitles(), "Title", result); + addStrings(metadata.getAuthors(), "Author", result); + result.add(new String[] {"Language", metadata.getLanguage()}); + addStrings(metadata.getContributors(), "Contributor", result); + addStrings(metadata.getDescriptions(), "Description", result); + addStrings(metadata.getPublishers(), "Publisher", result); + addStrings(metadata.getDates(), "Date", result); + addStrings(metadata.getSubjects(), "Subject", result); + addStrings(metadata.getTypes(), "Type", result); + addStrings(metadata.getRights(), "Rights", result); + result.add(new String[] {"Format", metadata.getFormat()}); + return result.toArray(new Object[result.size()][2]); + } + + private void addStrings(List values, String label, List result) { + boolean labelWritten = false; + for (int i = 0; i < values.size(); i++) { + Object value = values.get(i); + if (value == null) { + continue; + } + String valueString = String.valueOf(value); + if (StringUtils.isBlank(valueString)) { + continue; + } + + String currentLabel = ""; + if (! labelWritten) { + currentLabel = label; + labelWritten = true; + } + result.add(new String[] {currentLabel, valueString}); + } + + } + + private TableModel createTableModel(Navigator navigator) { + return new AbstractTableModel() { + + @Override + public Object getValueAt(int rowIndex, int columnIndex) { + // TODO Auto-generated method stub + return null; + } + + @Override + public int getRowCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getColumnCount() { + return 2; + } + }; + } + + @Override + public void navigationPerformed(NavigationEvent navigationEvent) { + if (navigationEvent.isBookChanged()) { + initBook(navigationEvent.getCurrentBook()); + } + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MyHtmlEditorKit.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MyHtmlEditorKit.java new file mode 100644 index 00000000..853ebe6f --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MyHtmlEditorKit.java @@ -0,0 +1,156 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.Cursor; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Reader; +import java.io.Writer; + +import javax.accessibility.AccessibleContext; +import javax.swing.Action; +import javax.swing.JEditorPane; +import javax.swing.text.BadLocationException; +import javax.swing.text.Caret; +import javax.swing.text.Document; +import javax.swing.text.Element; +import javax.swing.text.MutableAttributeSet; +import javax.swing.text.ViewFactory; +import javax.swing.text.html.HTMLDocument; +import javax.swing.text.html.HTMLEditorKit; +import javax.swing.text.html.StyleSheet; +import javax.swing.text.html.HTML.Tag; +import javax.swing.text.html.HTMLEditorKit.Parser; + +/** + * Wraps a HTMLEditorKit so we can make getParser() public. + * + * @author paul.siegmann + * + */ +class MyHtmlEditorKit extends HTMLEditorKit { + private HTMLEditorKit htmlEditorKit; + + public MyHtmlEditorKit(HTMLEditorKit htmlEditorKit) { + this.htmlEditorKit = htmlEditorKit; + } + + public Parser getParser() { + return super.getParser(); + } + public int hashCode() { + return htmlEditorKit.hashCode(); + } + + public Element getCharacterAttributeRun() { + return htmlEditorKit.getCharacterAttributeRun(); + } + + public Caret createCaret() { + return htmlEditorKit.createCaret(); + } + + public void read(InputStream in, Document doc, int pos) + throws IOException, BadLocationException { + htmlEditorKit.read(in, doc, pos); + } + + public boolean equals(Object obj) { + return htmlEditorKit.equals(obj); + } + + public void write(OutputStream out, Document doc, int pos, int len) + throws IOException, BadLocationException { + htmlEditorKit.write(out, doc, pos, len); + } + + public String getContentType() { + return htmlEditorKit.getContentType(); + } + + public ViewFactory getViewFactory() { + return htmlEditorKit.getViewFactory(); + } + + public Document createDefaultDocument() { + return htmlEditorKit.createDefaultDocument(); + } + + public void read(Reader in, Document doc, int pos) throws IOException, + BadLocationException { + htmlEditorKit.read(in, doc, pos); + } + + public void insertHTML(HTMLDocument doc, int offset, String html, + int popDepth, int pushDepth, Tag insertTag) + throws BadLocationException, IOException { + htmlEditorKit.insertHTML(doc, offset, html, popDepth, pushDepth, + insertTag); + } + + public String toString() { + return htmlEditorKit.toString(); + } + + public void write(Writer out, Document doc, int pos, int len) + throws IOException, BadLocationException { + htmlEditorKit.write(out, doc, pos, len); + } + + public void install(JEditorPane c) { + htmlEditorKit.install(c); + } + + public void deinstall(JEditorPane c) { + htmlEditorKit.deinstall(c); + } + + public void setStyleSheet(StyleSheet s) { + htmlEditorKit.setStyleSheet(s); + } + + public StyleSheet getStyleSheet() { + return htmlEditorKit.getStyleSheet(); + } + + public Action[] getActions() { + return htmlEditorKit.getActions(); + } + + public MutableAttributeSet getInputAttributes() { + return htmlEditorKit.getInputAttributes(); + } + + public void setDefaultCursor(Cursor cursor) { + htmlEditorKit.setDefaultCursor(cursor); + } + + public Cursor getDefaultCursor() { + return htmlEditorKit.getDefaultCursor(); + } + + public void setLinkCursor(Cursor cursor) { + htmlEditorKit.setLinkCursor(cursor); + } + + public Cursor getLinkCursor() { + return htmlEditorKit.getLinkCursor(); + } + + public boolean isAutoFormSubmission() { + return htmlEditorKit.isAutoFormSubmission(); + } + + public void setAutoFormSubmission(boolean isAuto) { + htmlEditorKit.setAutoFormSubmission(isAuto); + } + + public Object clone() { + return htmlEditorKit.clone(); + } + + public AccessibleContext getAccessibleContext() { + return htmlEditorKit.getAccessibleContext(); + } + +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MyParserCallback.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MyParserCallback.java new file mode 100644 index 00000000..f4beaf9e --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/MyParserCallback.java @@ -0,0 +1,89 @@ +package nl.siegmann.epublib.viewer; + +import java.util.ArrayList; + +import java.util.List; + + +import javax.swing.text.BadLocationException; +import javax.swing.text.MutableAttributeSet; +import javax.swing.text.html.HTML; +import javax.swing.text.html.HTML.Attribute; +import javax.swing.text.html.HTML.Tag; +import javax.swing.text.html.HTMLEditorKit.ParserCallback; + +class MyParserCallback extends ParserCallback { + private ParserCallback parserCallback; + private List stylesheetHrefs = new ArrayList(); + + public MyParserCallback(ParserCallback parserCallback) { + this.parserCallback = parserCallback; + } + + public List getStylesheetHrefs() { + return stylesheetHrefs; + } + + public void setStylesheetHrefs(List stylesheetHrefs) { + this.stylesheetHrefs = stylesheetHrefs; + } + + private boolean isStylesheetLink(Tag tag, MutableAttributeSet attributes) { + return ((tag == Tag.LINK) + && (attributes.containsAttribute(HTML.Attribute.REL, "stylesheet")) + && (attributes.containsAttribute(HTML.Attribute.TYPE, "text/css"))); + } + + + private void handleStylesheet(Tag tag, MutableAttributeSet attributes) { + if (isStylesheetLink(tag, attributes)) { + stylesheetHrefs.add(attributes.getAttribute(HTML.Attribute.HREF).toString()); + } + } + + public int hashCode() { + return parserCallback.hashCode(); + } + + public boolean equals(Object obj) { + return parserCallback.equals(obj); + } + + public String toString() { + return parserCallback.toString(); + } + + public void flush() throws BadLocationException { + parserCallback.flush(); + } + + public void handleText(char[] data, int pos) { + parserCallback.handleText(data, pos); + } + + public void handleComment(char[] data, int pos) { + parserCallback.handleComment(data, pos); + } + + public void handleStartTag(Tag t, MutableAttributeSet a, int pos) { + handleStylesheet(t, a); + parserCallback.handleStartTag(t, a, pos); + } + + public void handleEndTag(Tag t, int pos) { + parserCallback.handleEndTag(t, pos); + } + + public void handleSimpleTag(Tag t, MutableAttributeSet a, int pos) { + handleStylesheet(t, a); + parserCallback.handleSimpleTag(t, a, pos); + } + + public void handleError(String errorMsg, int pos) { + parserCallback.handleError(errorMsg, pos); + } + + public void handleEndOfLineString(String eol) { + parserCallback.handleEndOfLineString(eol); + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/NavigationBar.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/NavigationBar.java new file mode 100644 index 00000000..367fc625 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/NavigationBar.java @@ -0,0 +1,178 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.BorderLayout; +import java.awt.Dimension; +import java.awt.Font; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.KeyEvent; +import java.awt.event.KeyListener; + +import javax.swing.JButton; +import javax.swing.JPanel; +import javax.swing.JTextField; +import javax.swing.JToolBar; + +import nl.siegmann.epublib.browsersupport.NavigationEvent; +import nl.siegmann.epublib.browsersupport.NavigationEventListener; +import nl.siegmann.epublib.browsersupport.NavigationHistory; +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.search.SearchIndex; +import nl.siegmann.epublib.search.SearchResult; +import nl.siegmann.epublib.search.SearchResults; +import nl.siegmann.epublib.util.ToolsResourceUtil; + +/** + * A toolbar that contains the history back and forward buttons and the page title. + * + * @author paul.siegmann + * + */ +public class NavigationBar extends JToolBar implements NavigationEventListener { + + /** + * + */ + private static final long serialVersionUID = 1166410773448311544L; + private JTextField titleField; + private JTextField searchField; + private final NavigationHistory navigationHistory; + private Navigator navigator; + private SearchIndex searchIndex = new SearchIndex(); + private String previousSearchTerm = null; + private int searchResultIndex = -1; + private SearchResults searchResults; + + public NavigationBar(Navigator navigator) { + this.navigationHistory = new NavigationHistory(navigator); + this.navigator = navigator; + navigator.addNavigationEventListener(this); + addHistoryButtons(); + titleField = (JTextField) add(new JTextField()); + addSearchButtons(); + initBook(navigator.getBook()); + } + + private void initBook(Book book) { + if (book == null) { + return; + } + searchIndex.initBook(book); + } + + private void addHistoryButtons() { + Font historyButtonFont = new Font("SansSerif", Font.BOLD, 24); + JButton previousButton = ViewerUtil.createButton("history-previous", "<="); + previousButton.setFont(historyButtonFont); +// previousButton.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_R, Event.CTRL_MASK)); + + previousButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + navigationHistory.move(-1); + } + }); + + add(previousButton); + + JButton nextButton = ViewerUtil.createButton("history-next", "=>"); + nextButton.setFont(historyButtonFont); + nextButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + navigationHistory.move(1); + } + }); + add(nextButton); + } + + private void doSearch(int move) { + String searchTerm = searchField.getText(); + if (searchTerm.equals(previousSearchTerm)) { + searchResultIndex += move; + } else { + searchResults = searchIndex.doSearch(searchTerm); + previousSearchTerm = searchTerm; + searchResultIndex = 0; + } + if (searchResultIndex < 0) { + searchResultIndex = searchResults.size() - 1; + } else if (searchResultIndex >= searchResults.size()) { + searchResultIndex = 0; + } + if (! searchResults.isEmpty()) { + SearchResult searchResult = searchResults.getHits().get(searchResultIndex); + navigator.gotoResource(searchResult.getResource(), searchResult.getPagePos(), NavigationBar.this); + } + + } + + private void addSearchButtons() { + JPanel searchForm = new JPanel(new BorderLayout()); + searchForm.setPreferredSize(new Dimension(200, 28)); + Font historyButtonFont = new Font("SansSerif", Font.BOLD, 20); + JButton previousButton = ViewerUtil.createButton("search-previous", "<"); + previousButton.setFont(historyButtonFont); +// previousButton.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_R, Event.CTRL_MASK)); + + previousButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + doSearch(-1); + } + }); + + searchForm.add(previousButton, BorderLayout.WEST); + + searchField = new JTextField(); +// JPanel searchInput = new JPanel(); +// searchInput.add(new JLabel(ViewerUtil.createImageIcon("search-icon"))); +// searchInput.add(searchField); + searchField.setMinimumSize(new Dimension(100, 20)); + searchField.addKeyListener(new KeyListener() { + + @Override + public void keyTyped(KeyEvent keyEvent) { + } + + @Override + public void keyPressed(KeyEvent e) { + } + + @Override + public void keyReleased(KeyEvent keyEvent) { + if (keyEvent.getKeyCode() == KeyEvent.VK_ENTER) { + doSearch(1); + } + } + }); +// searchInput.setMinimumSize(new Dimension(140, 20)); + searchForm.add(searchField, BorderLayout.CENTER); + JButton nextButton = ViewerUtil.createButton("search-next", ">"); + nextButton.setFont(historyButtonFont); + nextButton.addActionListener(new ActionListener() { + + @Override + public void actionPerformed(ActionEvent e) { + doSearch(1); + } + }); + searchForm.add(nextButton, BorderLayout.EAST); + add(searchForm); + } + + @Override + public void navigationPerformed(NavigationEvent navigationEvent) { + if (navigationEvent.isBookChanged()) { + initBook(navigationEvent.getCurrentBook()); + } + if (navigationEvent.getCurrentResource() != null) { + String title = ToolsResourceUtil.getTitle(navigationEvent.getCurrentResource()); + titleField.setText(title); + } + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/SpineSlider.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/SpineSlider.java new file mode 100644 index 00000000..27983609 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/SpineSlider.java @@ -0,0 +1,69 @@ +package nl.siegmann.epublib.viewer; + +import javax.swing.JSlider; +import javax.swing.event.ChangeEvent; +import javax.swing.event.ChangeListener; + +import nl.siegmann.epublib.browsersupport.NavigationEvent; +import nl.siegmann.epublib.browsersupport.NavigationEventListener; +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; + +// package +class SpineSlider extends JSlider implements NavigationEventListener { + + /** + * + */ + private static final long serialVersionUID = 8436441824668551056L; + private final Navigator navigator; + + public SpineSlider(Navigator navigator) { + super(JSlider.HORIZONTAL); + this.navigator = navigator; + navigator.addNavigationEventListener(this); + setPaintLabels(false); + addChangeListener(new ChangeListener() { + public void stateChanged(ChangeEvent evt) { + JSlider slider = (JSlider) evt.getSource(); + int value = slider.getValue(); + SpineSlider.this.navigator.gotoSpineSection(value, SpineSlider.this); + } + }); + initBook(navigator.getBook()); + } + + private void initBook(Book book) { + if (book == null) { + return; + } + super.setMinimum(0); + super.setMaximum(book.getSpine().size() - 1); + super.setValue(0); +// setPaintTicks(true); + updateToolTip(); + } + + private void updateToolTip() { + String tooltip = ""; + if (navigator.getCurrentSpinePos() >= 0 && navigator.getBook() != null) { + tooltip = String.valueOf(navigator.getCurrentSpinePos() + 1) + " / " + navigator.getBook().getSpine().size(); + } + setToolTipText(tooltip); + } + + @Override + public void navigationPerformed(NavigationEvent navigationEvent) { + updateToolTip(); + if (this == navigationEvent.getSource()) { + return; + } + + if (navigationEvent.isBookChanged()) { + initBook(navigationEvent.getCurrentBook()); + } else if (navigationEvent.isResourceChanged()) { + setValue(navigationEvent.getCurrentSpinePos()); + } + } + + } \ No newline at end of file diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/TableOfContentsPane.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/TableOfContentsPane.java new file mode 100644 index 00000000..5bd6631a --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/TableOfContentsPane.java @@ -0,0 +1,171 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.GridLayout; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.JTree; +import javax.swing.tree.DefaultMutableTreeNode; +import javax.swing.tree.TreeNode; +import javax.swing.tree.TreePath; +import javax.swing.tree.TreeSelectionModel; + +import nl.siegmann.epublib.browsersupport.NavigationEvent; +import nl.siegmann.epublib.browsersupport.NavigationEventListener; +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.domain.TOCReference; + +import org.apache.commons.lang.StringUtils; + +/** + * Creates a JTree for navigating a Book via its Table of Contents. + * + * @author paul + * + */ +public class TableOfContentsPane extends JPanel implements NavigationEventListener { + + private static final long serialVersionUID = 2277717264176049700L; + + private Map> href2treeNode = new HashMap>(); + private JScrollPane scrollPane; + private Navigator navigator; + private JTree tree; + + /** + * Creates a JTree that displays all the items in the table of contents from the book in SectionWalker. + * Also sets up a selectionListener that updates the SectionWalker when an item in the tree is selected. + * + * @param navigator + */ + public TableOfContentsPane(Navigator navigator) { + super(new GridLayout(1, 0)); + this.navigator = navigator; + navigator.addNavigationEventListener(this); + + this.scrollPane = new JScrollPane(); + add(scrollPane); + initBook(navigator.getBook()); + } + + /** + * Wrapper around a TOCReference that gives the TOCReference's title when toString() is called + * .createTableOfContentsTree + * @author paul + * + */ + private static class TOCItem { + private TOCReference tocReference; + + public TOCItem(TOCReference tocReference) { + super(); + this.tocReference = tocReference; + } + + public TOCReference getTOCReference() { + return tocReference; + } + + public String toString() { + return tocReference.getTitle(); + } + } + + private void addToHref2TreeNode(Resource resource, DefaultMutableTreeNode treeNode) { + if (resource == null || StringUtils.isBlank(resource.getHref())) { + return; + } + Collection treeNodes = href2treeNode.get(resource.getHref()); + if (treeNodes == null) { + treeNodes = new ArrayList(); + href2treeNode.put(resource.getHref(), treeNodes); + } + treeNodes.add(treeNode); + } + + private DefaultMutableTreeNode createTree(Book book) { + TOCItem rootTOCItem = new TOCItem(new TOCReference(book.getTitle(), book.getCoverPage())); + DefaultMutableTreeNode top = new DefaultMutableTreeNode(rootTOCItem); + addToHref2TreeNode(book.getCoverPage(), top); + createNodes(top, book); + return top; + } + + private void createNodes(DefaultMutableTreeNode top, Book book) { + addNodesToParent(top, book.getTableOfContents().getTocReferences()); + } + + private void addNodesToParent(DefaultMutableTreeNode parent, List tocReferences) { + if (tocReferences == null) { + return; + } + for (TOCReference tocReference: tocReferences) { + TOCItem tocItem = new TOCItem(tocReference); + DefaultMutableTreeNode treeNode = new DefaultMutableTreeNode(tocItem); + addToHref2TreeNode(tocReference.getResource(), treeNode); + addNodesToParent(treeNode, tocReference.getChildren()); + parent.add(treeNode); + } + } + + + @Override + public void navigationPerformed(NavigationEvent navigationEvent) { + if (this == navigationEvent.getSource()) { + return; + } + if (navigationEvent.isBookChanged()) { + initBook(navigationEvent.getCurrentBook()); + return; + } + if (this.tree == null) { + return; + } + if (navigationEvent.getCurrentResource() == null) { + return; + } + Collection treeNodes = href2treeNode.get(navigationEvent.getCurrentResource().getHref()); + if (treeNodes == null || treeNodes.isEmpty()) { + if (navigationEvent.getCurrentSpinePos() == (navigationEvent.getOldSpinePos() + 1)) { + return; + } + tree.setSelectionPath(null); + return; + } + for (DefaultMutableTreeNode treeNode: treeNodes) { + TreeNode[] path = treeNode.getPath(); + TreePath treePath = new TreePath(path); + tree.setSelectionPath(treePath); + } + } + + private void initBook(Book book) { + if (book == null) { + return; + } + this.tree = new JTree(createTree(book)); + tree.addMouseListener(new MouseAdapter() { + + public void mouseClicked(MouseEvent me) { + DefaultMutableTreeNode node = (DefaultMutableTreeNode) tree.getLastSelectedPathComponent(); + TOCItem tocItem = (TOCItem) node.getUserObject(); + navigator.gotoResource(tocItem.getTOCReference().getResource(), tocItem.getTOCReference().getFragmentId(), TableOfContentsPane.this); + } + }); + + tree.getSelectionModel().setSelectionMode(TreeSelectionModel.SINGLE_TREE_SELECTION); +// tree.setRootVisible(false); + tree.setSelectionRow(0); + this.scrollPane.getViewport().removeAll(); + this.scrollPane.getViewport().add(tree); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ValueHolder.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ValueHolder.java new file mode 100644 index 00000000..a0307b7b --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ValueHolder.java @@ -0,0 +1,22 @@ +package nl.siegmann.epublib.viewer; + +public class ValueHolder { + + private T value; + + public ValueHolder() { + } + + public ValueHolder(T value) { + this.value = value; + } + + + public T getValue() { + return value; + } + + public void setValue(T value) { + this.value = value; + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/Viewer.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/Viewer.java new file mode 100644 index 00000000..eea823f8 --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/Viewer.java @@ -0,0 +1,342 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.BorderLayout; +import java.awt.Dimension; +import java.awt.Event; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.KeyEvent; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; + +import javax.swing.JFileChooser; +import javax.swing.JFrame; +import javax.swing.JMenu; +import javax.swing.JMenuBar; +import javax.swing.JMenuItem; +import javax.swing.JPanel; +import javax.swing.JSplitPane; +import javax.swing.KeyStroke; +import javax.swing.UIManager; +import javax.swing.filechooser.FileNameExtensionFilter; + +import nl.siegmann.epublib.browsersupport.NavigationHistory; +import nl.siegmann.epublib.browsersupport.Navigator; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.epub.BookProcessor; +import nl.siegmann.epublib.epub.BookProcessorPipeline; +import nl.siegmann.epublib.epub.EpubReader; +import nl.siegmann.epublib.epub.EpubWriter; + +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class Viewer { + + static final Logger log = LoggerFactory.getLogger(Viewer.class); + private final JFrame mainWindow; + private BrowseBar browseBar; + private JSplitPane mainSplitPane; + private JSplitPane leftSplitPane; + private JSplitPane rightSplitPane; + private Navigator navigator = new Navigator(); + private NavigationHistory browserHistory; + private BookProcessorPipeline epubCleaner = new BookProcessorPipeline(Collections.emptyList()); + + public Viewer(InputStream bookStream) { + mainWindow = createMainWindow(); + Book book; + try { + book = (new EpubReader()).readEpub(bookStream); + gotoBook(book); + } catch (IOException e) { + log.error(e.getMessage(), e); + } + } + + public Viewer(Book book) { + mainWindow = createMainWindow(); + gotoBook(book); + } + + private JFrame createMainWindow() { + JFrame result = new JFrame(); + result.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + + result.setJMenuBar(createMenuBar()); + + JPanel mainPanel = new JPanel(new BorderLayout()); + + leftSplitPane = new JSplitPane(JSplitPane.VERTICAL_SPLIT); + leftSplitPane.setTopComponent(new TableOfContentsPane(navigator)); + leftSplitPane.setBottomComponent(new GuidePane(navigator)); + leftSplitPane.setOneTouchExpandable(true); + leftSplitPane.setContinuousLayout(true); + leftSplitPane.setResizeWeight(0.8); + + rightSplitPane = new JSplitPane(JSplitPane.HORIZONTAL_SPLIT); + rightSplitPane.setOneTouchExpandable(true); + rightSplitPane.setContinuousLayout(true); + rightSplitPane.setResizeWeight(1.0); + ContentPane htmlPane = new ContentPane(navigator); + JPanel contentPanel = new JPanel(new BorderLayout()); + contentPanel.add(htmlPane, BorderLayout.CENTER); + this.browseBar = new BrowseBar(navigator, htmlPane); + contentPanel.add(browseBar, BorderLayout.SOUTH); + rightSplitPane.setLeftComponent(contentPanel); + rightSplitPane.setRightComponent(new MetadataPane(navigator)); + + mainSplitPane = new JSplitPane(JSplitPane.HORIZONTAL_SPLIT); + mainSplitPane.setLeftComponent(leftSplitPane); + mainSplitPane.setRightComponent(rightSplitPane); + mainSplitPane.setOneTouchExpandable(true); + mainSplitPane.setContinuousLayout(true); + mainSplitPane.setResizeWeight(0.0); + + mainPanel.add(mainSplitPane, BorderLayout.CENTER); + mainPanel.setPreferredSize(new Dimension(1000, 750)); + mainPanel.add(new NavigationBar(navigator), BorderLayout.NORTH); + + result.add(mainPanel); + result.pack(); + setLayout(Layout.TocContentMeta); + result.setVisible(true); + return result; } + + + private void gotoBook(Book book) { + mainWindow.setTitle(book.getTitle()); + navigator.gotoBook(book, this); + } + + private static String getText(String text) { + return text; + } + + private static JFileChooser createFileChooser(File startDir) { + if (startDir == null) { + startDir = new File(System.getProperty("user.home")); + if (! startDir.exists()) { + startDir = null; + } + } + JFileChooser fileChooser = new JFileChooser(startDir); + fileChooser.setAcceptAllFileFilterUsed(true); + fileChooser.setFileFilter(new FileNameExtensionFilter("EPub files", "epub")); + + return fileChooser; + } + + private JMenuBar createMenuBar() { + final JMenuBar menuBar = new JMenuBar(); + JMenu fileMenu = new JMenu(getText("File")); + menuBar.add(fileMenu); + JMenuItem openFileMenuItem = new JMenuItem(getText("Open")); + openFileMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_O, Event.CTRL_MASK)); + openFileMenuItem.addActionListener(new ActionListener() { + + private File previousDir; + + public void actionPerformed(ActionEvent e) { + JFileChooser fileChooser = createFileChooser(previousDir); + int returnVal = fileChooser.showOpenDialog(mainWindow); + if(returnVal != JFileChooser.APPROVE_OPTION) { + return; + } + File selectedFile = fileChooser.getSelectedFile(); + if (selectedFile == null) { + return; + } + if (! selectedFile.isDirectory()) { + previousDir = selectedFile.getParentFile(); + } + try { + Book book = (new EpubReader()).readEpub(new FileInputStream(selectedFile)); + gotoBook(book); + } catch (Exception e1) { + log.error(e1.getMessage(), e1); + } + } + }); + fileMenu.add(openFileMenuItem); + + JMenuItem saveFileMenuItem = new JMenuItem(getText("Save as ...")); + saveFileMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_S, Event.CTRL_MASK | Event.SHIFT_MASK)); + saveFileMenuItem.addActionListener(new ActionListener() { + + private File previousDir; + + public void actionPerformed(ActionEvent e) { + if (navigator.getBook() == null) { + return; + } + JFileChooser fileChooser = createFileChooser(previousDir); + int returnVal = fileChooser.showOpenDialog(mainWindow); + if(returnVal != JFileChooser.APPROVE_OPTION) { + return; + } + File selectedFile = fileChooser.getSelectedFile(); + if (selectedFile == null) { + return; + } + if (! selectedFile.isDirectory()) { + previousDir = selectedFile.getParentFile(); + } + try { + (new EpubWriter()).write(navigator.getBook(), new FileOutputStream(selectedFile)); + } catch (Exception e1) { + log.error(e1.getMessage(), e1); + } + } + }); + fileMenu.add(saveFileMenuItem); + + JMenuItem reloadMenuItem = new JMenuItem(getText("Reload")); + reloadMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_R, Event.CTRL_MASK)); + reloadMenuItem.addActionListener(new ActionListener() { + + public void actionPerformed(ActionEvent e) { + gotoBook(navigator.getBook()); + } + }); + fileMenu.add(reloadMenuItem); + + JMenuItem exitMenuItem = new JMenuItem(getText("Exit")); + exitMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_Q, Event.CTRL_MASK)); + exitMenuItem.addActionListener(new ActionListener() { + + public void actionPerformed(ActionEvent e) { + System.exit(0); + } + }); + fileMenu.add(exitMenuItem); + + JMenu viewMenu = new JMenu(getText("View")); + menuBar.add(viewMenu); + + JMenuItem viewTocContentMenuItem = new JMenuItem(getText("TOCContent"), ViewerUtil.createImageIcon("layout-toc-content")); + viewTocContentMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_2, Event.CTRL_MASK)); + viewTocContentMenuItem.addActionListener(new ActionListener() { + + public void actionPerformed(ActionEvent e) { + setLayout(Layout.TocContent); + } + }); + viewMenu.add(viewTocContentMenuItem); + + JMenuItem viewContentMenuItem = new JMenuItem(getText("Content"), ViewerUtil.createImageIcon("layout-content")); + viewContentMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_1, Event.CTRL_MASK)); + viewContentMenuItem.addActionListener(new ActionListener() { + + public void actionPerformed(ActionEvent e) { + setLayout(Layout.Content); + } + }); + viewMenu.add(viewContentMenuItem); + + JMenuItem viewTocContentMetaMenuItem = new JMenuItem(getText("TocContentMeta"), ViewerUtil.createImageIcon("layout-toc-content-meta")); + viewTocContentMetaMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_3, Event.CTRL_MASK)); + viewTocContentMetaMenuItem.addActionListener(new ActionListener() { + + public void actionPerformed(ActionEvent e) { + setLayout(Layout.TocContentMeta); + } + }); + viewMenu.add(viewTocContentMetaMenuItem); + + JMenu helpMenu = new JMenu(getText("Help")); + menuBar.add(helpMenu); + JMenuItem aboutMenuItem = new JMenuItem(getText("About")); + aboutMenuItem.addActionListener(new ActionListener() { + + public void actionPerformed(ActionEvent e) { + new AboutDialog(Viewer.this.mainWindow); + } + }); + helpMenu.add(aboutMenuItem); + + return menuBar; + } + + private enum Layout { + TocContentMeta, + TocContent, + Content + } + + private class LayoutX { + private boolean tocPaneVisible; + private boolean contentPaneVisible; + private boolean metaPaneVisible; + + } + private void setLayout(Layout layout) { + switch (layout) { + case Content: + mainSplitPane.setDividerLocation(0.0d); + rightSplitPane.setDividerLocation(1.0d); + break; + case TocContent: + mainSplitPane.setDividerLocation(0.2d); + rightSplitPane.setDividerLocation(1.0d); + break; + case TocContentMeta: + mainSplitPane.setDividerLocation(0.2d); + rightSplitPane.setDividerLocation(0.6d); + break; + } + } + + private static InputStream getBookInputStream(String[] args) { + // jquery-fundamentals-book.epub +// final Book book = (new EpubReader()).readEpub(new FileInputStream("/home/paul/test2_book1.epub")); +// final Book book = (new EpubReader()).readEpub(new FileInputStream("/home/paul/three_men_in_a_boat_jerome_k_jerome.epub")); + +// String bookFile = "/home/paul/test2_book1.epub"; +// bookFile = "/home/paul/project/private/library/epub/this_dynamic_earth-AAH813.epub"; + + String bookFile = null; + if (args.length > 0) { + bookFile = args[0]; + } + InputStream result = null; + if (! StringUtils.isBlank(bookFile)) { + try { + result = new FileInputStream(bookFile); + } catch (Exception e) { + log.error("Unable to open " + bookFile, e); + } + } + if (result == null) { + result = Viewer.class.getResourceAsStream("/viewer/epublibviewer-help.epub"); + } + return result; + } + + + public static void main(String[] args) throws FileNotFoundException, IOException { + try { + UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName()); + } catch (Exception e) { + log.error("Unable to set native look and feel", e); + } + + final InputStream bookStream = getBookInputStream(args); +// final Book book = readBook(args); + + // Schedule a job for the event dispatch thread: + // creating and showing this application's GUI. + javax.swing.SwingUtilities.invokeLater(new Runnable() { + public void run() { + new Viewer(bookStream); + } + }); + } +} diff --git a/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ViewerUtil.java b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ViewerUtil.java new file mode 100644 index 00000000..7f9e3b0e --- /dev/null +++ b/epublib-tools/src/main/java/nl/siegmann/epublib/viewer/ViewerUtil.java @@ -0,0 +1,48 @@ +package nl.siegmann.epublib.viewer; + +import java.awt.Image; + +import javax.imageio.ImageIO; +import javax.swing.ImageIcon; +import javax.swing.JButton; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ViewerUtil { + + private static Logger log = LoggerFactory.getLogger(ViewerUtil.class); + + /** + * Creates a button with the given icon. The icon will be loaded from the classpath. + * If loading the icon is unsuccessful it will use the defaultLabel. + * + * @param iconName + * @param backupLabel + * @return a button with the given icon. + */ + // package + static JButton createButton(String iconName, String backupLabel) { + JButton result = null; + ImageIcon icon = createImageIcon(iconName); + if (icon == null) { + result = new JButton(backupLabel); + } else { + result = new JButton(icon); + } + return result; + } + + + static ImageIcon createImageIcon(String iconName) { + ImageIcon result = null; + String fullIconPath = "/viewer/icons/" + iconName + ".png"; + try { + Image image = ImageIO.read(ViewerUtil.class.getResourceAsStream(fullIconPath)); + result = new ImageIcon(image); + } catch(Exception e) { + log.error("Icon \'" + fullIconPath + "\' not found"); + } + return result; + } +} diff --git a/epublib-tools/src/main/java/org/htmlcleaner/EpublibXmlSerializer.java b/epublib-tools/src/main/java/org/htmlcleaner/EpublibXmlSerializer.java new file mode 100644 index 00000000..220f392e --- /dev/null +++ b/epublib-tools/src/main/java/org/htmlcleaner/EpublibXmlSerializer.java @@ -0,0 +1,128 @@ +package org.htmlcleaner; + +import java.io.IOException; +import java.io.Writer; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +public class EpublibXmlSerializer extends SimpleXmlSerializer { + private String outputEncoding; + + public EpublibXmlSerializer(CleanerProperties paramCleanerProperties, String outputEncoding) { + super(paramCleanerProperties); + this.outputEncoding = outputEncoding; + } + + protected String escapeXml(String xmlContent) { + return xmlContent; + } + + /** + * Differs from the super.serializeOpenTag in that it: + *
      + *
    • skips the xmlns:xml="xml" attribute
    • + *
    • if the tagNode is a meta tag setting the contentType then it sets the encoding to the actual encoding
    • + *
    + */ + protected void serializeOpenTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException { + String tagName = tagNode.getName(); + + if (Utils.isEmptyString(tagName)) { + return; + } + + boolean nsAware = props.isNamespacesAware(); + + Set definedNSPrefixes = null; + Set additionalNSDeclNeeded = null; + + String tagPrefix = Utils.getXmlNSPrefix(tagName); + if (tagPrefix != null) { + if (nsAware) { + definedNSPrefixes = new HashSet(); + tagNode.collectNamespacePrefixesOnPath(definedNSPrefixes); + if ( !definedNSPrefixes.contains(tagPrefix) ) { + additionalNSDeclNeeded = new TreeSet(); + additionalNSDeclNeeded.add(tagPrefix); + } + } else { + tagName = Utils.getXmlName(tagName); + } + } + + writer.write("<" + tagName); + + if (isMetaContentTypeTag(tagNode)) { + tagNode.setAttribute("content", "text/html; charset=" + outputEncoding); + } + + // write attributes + for (Map.Entry entry: tagNode.getAttributes().entrySet()) { + String attName = entry.getKey(); + String attPrefix = Utils.getXmlNSPrefix(attName); + if (attPrefix != null) { + if (nsAware) { + // collect used namespace prefixes in attributes in order to explicitly define + // ns declaration if needed; otherwise it would be ill-formed xml + if (definedNSPrefixes == null) { + definedNSPrefixes = new HashSet(); + tagNode.collectNamespacePrefixesOnPath(definedNSPrefixes); + } + if ( !definedNSPrefixes.contains(attPrefix) ) { + if (additionalNSDeclNeeded == null) { + additionalNSDeclNeeded = new TreeSet(); + } + additionalNSDeclNeeded.add(attPrefix); + } + } else { + attName = Utils.getXmlName(attName); + } + } + writer.write(" " + attName + "=\"" + escapeXml(entry.getValue()) + "\""); + } + + // write namespace declarations + if (nsAware) { + Map nsDeclarations = tagNode.getNamespaceDeclarations(); + if (nsDeclarations != null) { + for (Map.Entry entry: nsDeclarations.entrySet()) { + String prefix = entry.getKey(); + String att = "xmlns"; + if (prefix.length() > 0) { + att += ":" + prefix; + } + writer.write(" " + att + "=\"" + escapeXml(entry.getValue()) + "\""); + } + } + } + + // write additional namespace declarations needed for this tag in order xml to be well-formed + if (additionalNSDeclNeeded != null) { + for (String prefix: additionalNSDeclNeeded) { + // skip the xmlns:xml="xml" attribute + if (prefix.equalsIgnoreCase("xml")) { + continue; + } + writer.write(" xmlns:" + prefix + "=\"" + prefix + "\""); + } + } + + if ( isMinimizedTagSyntax(tagNode) ) { + writer.write(" />"); + if (newLine) { + writer.write("\n"); + } + } else if (dontEscape(tagNode)) { + writer.write(">"); + } + } + + private boolean isMetaContentTypeTag(TagNode tagNode) { + return tagNode.getName().equalsIgnoreCase("meta") + && "Content-Type".equalsIgnoreCase(tagNode.getAttributeByName("http-equiv")); + } +} \ No newline at end of file diff --git a/epublib-tools/src/main/resources/viewer/book/00_cover.html b/epublib-tools/src/main/resources/viewer/book/00_cover.html new file mode 100644 index 00000000..119fe011 --- /dev/null +++ b/epublib-tools/src/main/resources/viewer/book/00_cover.html @@ -0,0 +1,59 @@ + +epublib - a java epub library + +

    Epublib - a java epub library

    +

    +Epublib is a java library for reading/writing epub files. It comes with both a viewer and a command-line tool. +It’s intended use is both as part of a larger java application and as a command-line tool.

    +

    Features

    +
    +
    Builtin viewer
    +
    +A viewer that supports table of contents, guide, meta info and pages. +
    + +
    Comprehensive coverage of the epub standard
    +
    + +
    +
    Simple things are simple
    +
    +

    The api is designed to be as simple as possible, while at the same time making complex things possible too.

    +
    +// read epub
    +EpubReader epubReader = new EpubReader();
    +Book book = epubReader.readEpub(new FileInputStream("mybook.epub"));
    +
    +// set title
    +book.getMetadata().setTitles(new ArrayList<String>() {{ add("an awesome book");}});
    +
    +// write epub
    +EpubWriter epubWriter = new EpubWriter();
    +epubWriter.write(book, new FileOutputStream("mynewbook.epub"));
    +
    +
    +
    Cleans up html into xhtml
    +
    Does not remove non-standard tags and attributes, but makes html files into valid xml (using xmlcleaner http://htmlcleaner.sourceforge.net/)
    +
    Cleans up non-standards compliant epub
    +
    +Epublib tries to be as forgiving as possible when reading epubs and writes them as standards-compliant as possible.
    +If you’ve created an epub by zipping up several html files then running it through epub will make it much more standards-compliant. +
    +
    Fixes the coverpage on many readers
    +
    +For different epub readers the coverpage needs to be specified in a different way. Epublib tries several ways of extracting the coverpage from an existing epub and writes it in such a way that most (all the readers I’ve tested it on) display the coverpage correctly. +
    +
    Support for creation of epubs
    +
    +The api supports the creation of epubs from scratch. See Creating a simple book example for an example.
    +
    Convert (uncompressed) windows help (.chm) files to epub
    +
    +After uncompressing a windows help file with something like chmlib epublib can make an epub file out of the resulting html and windows help index files. +
    +
    +

    + + \ No newline at end of file diff --git a/epublib-tools/src/main/resources/viewer/book/index.txt b/epublib-tools/src/main/resources/viewer/book/index.txt new file mode 100644 index 00000000..e23f4798 --- /dev/null +++ b/epublib-tools/src/main/resources/viewer/book/index.txt @@ -0,0 +1,4 @@ +title: Epublib - a java epub library +author: P. Siegmann + +00_cover.html \ No newline at end of file diff --git a/epublib-tools/src/main/resources/viewer/epublibviewer-help.epub b/epublib-tools/src/main/resources/viewer/epublibviewer-help.epub new file mode 100644 index 00000000..ab501bd6 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/epublibviewer-help.epub differ diff --git a/epublib-tools/src/main/resources/viewer/icons/chapter-first.png b/epublib-tools/src/main/resources/viewer/icons/chapter-first.png new file mode 100644 index 00000000..3b3d639a Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/chapter-first.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/chapter-last.png b/epublib-tools/src/main/resources/viewer/icons/chapter-last.png new file mode 100644 index 00000000..b451b0e9 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/chapter-last.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/chapter-next.png b/epublib-tools/src/main/resources/viewer/icons/chapter-next.png new file mode 100644 index 00000000..0b30c83e Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/chapter-next.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/chapter-previous.png b/epublib-tools/src/main/resources/viewer/icons/chapter-previous.png new file mode 100644 index 00000000..320e528f Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/chapter-previous.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/history-next.png b/epublib-tools/src/main/resources/viewer/icons/history-next.png new file mode 100644 index 00000000..174d7458 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/history-next.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/history-previous.png b/epublib-tools/src/main/resources/viewer/icons/history-previous.png new file mode 100644 index 00000000..9fbddda8 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/history-previous.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/layout-content.png b/epublib-tools/src/main/resources/viewer/icons/layout-content.png new file mode 100644 index 00000000..3b3f88ba Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/layout-content.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/layout-toc-content-meta.png b/epublib-tools/src/main/resources/viewer/icons/layout-toc-content-meta.png new file mode 100644 index 00000000..28354cf0 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/layout-toc-content-meta.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/layout-toc-content.png b/epublib-tools/src/main/resources/viewer/icons/layout-toc-content.png new file mode 100644 index 00000000..eae31e24 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/layout-toc-content.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/page-next.png b/epublib-tools/src/main/resources/viewer/icons/page-next.png new file mode 100644 index 00000000..17996209 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/page-next.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/page-previous.png b/epublib-tools/src/main/resources/viewer/icons/page-previous.png new file mode 100644 index 00000000..cdf1fb07 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/page-previous.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/search-icon.png b/epublib-tools/src/main/resources/viewer/icons/search-icon.png new file mode 100644 index 00000000..1b289ed0 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/search-icon.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/search-next.png b/epublib-tools/src/main/resources/viewer/icons/search-next.png new file mode 100644 index 00000000..195a9d70 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/search-next.png differ diff --git a/epublib-tools/src/main/resources/viewer/icons/search-previous.png b/epublib-tools/src/main/resources/viewer/icons/search-previous.png new file mode 100644 index 00000000..79558ab1 Binary files /dev/null and b/epublib-tools/src/main/resources/viewer/icons/search-previous.png differ diff --git a/epublib-tools/src/main/resources/xsl/chm_remove_prev_next.xsl b/epublib-tools/src/main/resources/xsl/chm_remove_prev_next.xsl new file mode 100644 index 00000000..32d9fcf1 --- /dev/null +++ b/epublib-tools/src/main/resources/xsl/chm_remove_prev_next.xsl @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/epublib-tools/src/main/resources/xsl/chm_remove_prev_next_2.xsl b/epublib-tools/src/main/resources/xsl/chm_remove_prev_next_2.xsl new file mode 100644 index 00000000..1664414b --- /dev/null +++ b/epublib-tools/src/main/resources/xsl/chm_remove_prev_next_2.xsl @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + diff --git a/epublib-tools/src/main/resources/xsl/chm_remove_prev_next_3.xsl b/epublib-tools/src/main/resources/xsl/chm_remove_prev_next_3.xsl new file mode 100644 index 00000000..1d8eff23 --- /dev/null +++ b/epublib-tools/src/main/resources/xsl/chm_remove_prev_next_3.xsl @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + diff --git a/epublib-tools/src/main/resources/xsl/remove_comment_container.xsl b/epublib-tools/src/main/resources/xsl/remove_comment_container.xsl new file mode 100644 index 00000000..c71d62eb --- /dev/null +++ b/epublib-tools/src/main/resources/xsl/remove_comment_container.xsl @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/FilesetBookCreatorTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/FilesetBookCreatorTest.java new file mode 100644 index 00000000..10548917 --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/FilesetBookCreatorTest.java @@ -0,0 +1,32 @@ +package nl.siegmann.epublib; + +import junit.framework.TestCase; +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.fileset.FilesetBookCreator; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.vfs.FileObject; +import org.apache.commons.vfs.FileSystemManager; +import org.apache.commons.vfs.NameScope; +import org.apache.commons.vfs.VFS; + +public class FilesetBookCreatorTest extends TestCase { + + public void test1() { + try { + FileSystemManager fsManager = VFS.getManager(); + FileObject dir = fsManager.resolveFile("ram://test-dir"); + dir.createFolder(); + FileObject chapter1 = dir.resolveFile("chapter1.html", NameScope.CHILD); + chapter1.createFile(); + IOUtils.copy(this.getClass().getResourceAsStream("/book1/chapter1.html"), chapter1.getContent().getOutputStream()); + Book bookFromDirectory = FilesetBookCreator.createBookFromDirectory(dir, Constants.CHARACTER_ENCODING); + assertEquals(1, bookFromDirectory.getResources().size()); + assertEquals(1, bookFromDirectory.getSpine().size()); + assertEquals(1, bookFromDirectory.getTableOfContents().size()); + } catch(Exception e) { + assertTrue(false); + } + } +} diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/bookprocessor/CoverpageBookProcessorTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/bookprocessor/CoverpageBookProcessorTest.java new file mode 100644 index 00000000..a8897aad --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/bookprocessor/CoverpageBookProcessorTest.java @@ -0,0 +1,19 @@ +package nl.siegmann.epublib.bookprocessor; + +import junit.framework.TestCase; + +public class CoverpageBookProcessorTest extends TestCase { + + public void testCalculateAbsoluteImageHref1() { + String[] testData = new String[] { + "/foo/index.html", "bar.html", "/foo/bar.html", + "/foo/index.html", "../bar.html", "/bar.html", + "/foo/index.html", "../sub/bar.html", "/sub/bar.html" + }; + for (int i = 0; i < testData.length; i+= 3) { + String actualResult = CoverpageBookProcessor.calculateAbsoluteImageHref(testData[i + 1], testData[i]); + assertEquals(testData[i + 2], actualResult); + } + } + +} diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/fileset/FilesetBookCreatorTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/fileset/FilesetBookCreatorTest.java new file mode 100644 index 00000000..e51dcabd --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/fileset/FilesetBookCreatorTest.java @@ -0,0 +1,79 @@ +package nl.siegmann.epublib.fileset; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import junit.framework.TestCase; +import nl.siegmann.epublib.domain.Book; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.vfs.FileObject; +import org.apache.commons.vfs.FileSystemManager; +import org.apache.commons.vfs.NameScope; +import org.apache.commons.vfs.VFS; + +public class FilesetBookCreatorTest extends TestCase { + + public void test1() { + try { + FileObject dir = createDirWithSourceFiles(); + Book book = FilesetBookCreator.createBookFromDirectory(dir); + assertEquals(5, book.getSpine().size()); + assertEquals(5, book.getTableOfContents().size()); + } catch(Exception e) { + e.printStackTrace(); + assertTrue(false); + } + } + + public void test2() { + try { + FileObject dir = createDirWithSourceFiles(); + + // this file should be ignored + copyInputStreamToFileObject(new ByteArrayInputStream("hi".getBytes()), dir, "foo.nonsense"); + + Book book = FilesetBookCreator.createBookFromDirectory(dir); + assertEquals(5, book.getSpine().size()); + assertEquals(5, book.getTableOfContents().size()); + } catch(Exception e) { + e.printStackTrace(); + assertTrue(false); + } + } + + private FileObject createDirWithSourceFiles() throws IOException { + FileSystemManager fsManager = VFS.getManager(); + FileObject dir = fsManager.resolveFile("ram://fileset_test_dir"); + dir.createFolder(); + String[] sourceFiles = new String[] { + "book1.css", + "chapter1.html", + "chapter2_1.html", + "chapter2.html", + "chapter3.html", + "cover.html", + "flowers_320x240.jpg", + "cover.png" + }; + String testSourcesDir = "/book1"; + for (String filename: sourceFiles) { + String sourceFileName = testSourcesDir + "/" + filename; + copyResourceToFileObject(sourceFileName, dir, filename); + } + return dir; + } + + private void copyResourceToFileObject(String resourceUrl, FileObject targetDir, String targetFilename) throws IOException { + InputStream inputStream = this.getClass().getResourceAsStream(resourceUrl); + copyInputStreamToFileObject(inputStream, targetDir, targetFilename); + } + + private void copyInputStreamToFileObject(InputStream inputStream, FileObject targetDir, String targetFilename) throws IOException { + FileObject targetFile = targetDir.resolveFile(targetFilename, NameScope.DESCENDENT); + targetFile.createFile(); + IOUtils.copy(inputStream, targetFile.getContent().getOutputStream()); + targetFile.getContent().close(); + } +} diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/hhc/ChmParserTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/hhc/ChmParserTest.java new file mode 100644 index 00000000..8c1718fa --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/hhc/ChmParserTest.java @@ -0,0 +1,45 @@ +package nl.siegmann.epublib.hhc; + +import java.util.Iterator; + +import junit.framework.TestCase; +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.chm.ChmParser; +import nl.siegmann.epublib.domain.Book; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.vfs.FileObject; +import org.apache.commons.vfs.FileSystemManager; +import org.apache.commons.vfs.NameScope; +import org.apache.commons.vfs.VFS; + +public class ChmParserTest extends TestCase { + + public void test1() { + try { + FileSystemManager fsManager = VFS.getManager(); + FileObject dir = fsManager.resolveFile("ram://chm_test_dir"); + dir.createFolder(); + String chm1Dir = "/chm1"; + Iterator lineIter = IOUtils.lineIterator(ChmParserTest.class.getResourceAsStream(chm1Dir + "/filelist.txt"), Constants.CHARACTER_ENCODING); + while(lineIter.hasNext()) { + String line = lineIter.next(); + FileObject file = dir.resolveFile(line, NameScope.DESCENDENT); + file.createFile(); + IOUtils.copy(this.getClass().getResourceAsStream(chm1Dir + "/" + line), file.getContent().getOutputStream()); + file.getContent().close(); + } + + Book chmBook = ChmParser.parseChm(dir, Constants.CHARACTER_ENCODING); + assertEquals(45, chmBook.getResources().size()); + assertEquals(18, chmBook.getSpine().size()); + assertEquals(19, chmBook.getTableOfContents().size()); + assertEquals("chm-example", chmBook.getMetadata().getTitles().get(0)); + } catch(Exception e) { + e.printStackTrace(); + assertTrue(false); + } + } + + +} \ No newline at end of file diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/html/htmlcleaner/FixIdentifierBookProcessorTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/html/htmlcleaner/FixIdentifierBookProcessorTest.java new file mode 100644 index 00000000..9b71e5b2 --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/html/htmlcleaner/FixIdentifierBookProcessorTest.java @@ -0,0 +1,31 @@ +package nl.siegmann.epublib.html.htmlcleaner; + +import junit.framework.TestCase; +import nl.siegmann.epublib.bookprocessor.FixIdentifierBookProcessor; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Identifier; +import nl.siegmann.epublib.epub.EpubWriter; +import nl.siegmann.epublib.util.CollectionUtil; + +public class FixIdentifierBookProcessorTest extends TestCase { + + public void test_empty_book() { + Book book = new Book(); + FixIdentifierBookProcessor fixIdentifierBookProcessor = new FixIdentifierBookProcessor(); + Book resultBook = fixIdentifierBookProcessor.processBook(book); + assertEquals(1, resultBook.getMetadata().getIdentifiers().size()); + Identifier identifier = CollectionUtil.first(resultBook.getMetadata().getIdentifiers()); + assertEquals(Identifier.Scheme.UUID, identifier.getScheme()); + } + + public void test_single_identifier() { + Book book = new Book(); + Identifier identifier = new Identifier(Identifier.Scheme.ISBN, "1234"); + book.getMetadata().addIdentifier(identifier); + FixIdentifierBookProcessor fixIdentifierBookProcessor = new FixIdentifierBookProcessor(); + Book resultBook = fixIdentifierBookProcessor.processBook(book); + assertEquals(1, resultBook.getMetadata().getIdentifiers().size()); + Identifier actualIdentifier = CollectionUtil.first(resultBook.getMetadata().getIdentifiers()); + assertEquals(identifier, actualIdentifier); + } +} diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/html/htmlcleaner/HtmlCleanerBookProcessorTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/html/htmlcleaner/HtmlCleanerBookProcessorTest.java new file mode 100644 index 00000000..a39071a2 --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/html/htmlcleaner/HtmlCleanerBookProcessorTest.java @@ -0,0 +1,154 @@ +package nl.siegmann.epublib.html.htmlcleaner; + +import java.io.IOException; + +import junit.framework.TestCase; +import nl.siegmann.epublib.Constants; +import nl.siegmann.epublib.bookprocessor.HtmlCleanerBookProcessor; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; + +public class HtmlCleanerBookProcessorTest extends TestCase { + + public void testSimpleDocument1() { + Book book = new Book(); + String testInput = "titleHello, world!"; + String expectedResult = Constants.DOCTYPE_XHTML + "\ntitleHello, world!"; + try { + Resource resource = new Resource(testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html"); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String actualResult = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(expectedResult, actualResult); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + + public void testSimpleDocument2() { + Book book = new Book(); + String testInput = "test pageHello, world!"; + try { + Resource resource = new Resource(testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html"); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String result = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(Constants.DOCTYPE_XHTML + "\n" + testInput, result); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + + public void testSimpleDocument3() { + Book book = new Book(); + String testInput = "test pageHello, world! ß"; + try { + Resource resource = new Resource(null, testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html", MediatypeService.XHTML, Constants.CHARACTER_ENCODING); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String result = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(Constants.DOCTYPE_XHTML + "\n" + testInput, result); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + + public void testSimpleDocument4() { + Book book = new Book(); + String testInput = "titleHello, world!\nHow are you ?"; + String expectedResult = Constants.DOCTYPE_XHTML + "\ntitleHello, world!\nHow are you ?"; + try { + Resource resource = new Resource(testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html"); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String actualResult = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(expectedResult, actualResult); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + + + public void testMetaContentType() { + Book book = new Book(); + String testInput = "titleHello, world!"; + String expectedResult = Constants.DOCTYPE_XHTML + "\ntitleHello, world!"; + try { + Resource resource = new Resource(testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html"); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String actualResult = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(expectedResult, actualResult); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + + public void testDocType1() { + Book book = new Book(); + String testInput = "titleHello, world!"; + String expectedResult = Constants.DOCTYPE_XHTML + "\ntitleHello, world!"; + try { + Resource resource = new Resource(testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html"); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String actualResult = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(expectedResult, actualResult); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + + public void testDocType2() { + Book book = new Book(); + String testInput = Constants.DOCTYPE_XHTML + "\ntitleHello, world!"; + String expectedResult = Constants.DOCTYPE_XHTML + "\ntitleHello, world!"; + try { + Resource resource = new Resource(testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html"); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String actualResult = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(expectedResult, actualResult); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + + public void testXmlNS() { + Book book = new Book(); + String testInput = "titleHello, world!"; + String expectedResult = Constants.DOCTYPE_XHTML + "\ntitleHello, world!"; + try { + Resource resource = new Resource(testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html"); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String actualResult = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(expectedResult, actualResult); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + public void testApos() { + Book book = new Book(); + String testInput = "test page'hi'"; + try { + Resource resource = new Resource(null, testInput.getBytes(Constants.CHARACTER_ENCODING), "test.html", MediatypeService.XHTML, Constants.CHARACTER_ENCODING); + book.getResources().add(resource); + HtmlCleanerBookProcessor htmlCleanerBookProcessor = new HtmlCleanerBookProcessor(); + byte[] processedHtml = htmlCleanerBookProcessor.processHtml(resource, book, Constants.CHARACTER_ENCODING); + String result = new String(processedHtml, Constants.CHARACTER_ENCODING); + assertEquals(Constants.DOCTYPE_XHTML + "\n" + testInput, result); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } +} diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/search/SearchIndexTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/search/SearchIndexTest.java new file mode 100644 index 00000000..df272c17 --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/search/SearchIndexTest.java @@ -0,0 +1,100 @@ +package nl.siegmann.epublib.search; + +import java.io.IOException; +import java.io.StringReader; +import java.util.List; + +import junit.framework.TestCase; +import nl.siegmann.epublib.domain.Book; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; + +public class SearchIndexTest extends TestCase { + + public void testDoSearch1() { + try { + Book testBook = new Book(); + testBook.addSection("chapter1", new Resource(new StringReader("a"), "chapter1.html")); + testBook.addSection("chapter2", new Resource(new StringReader("ab"), "chapter2.html")); + testBook.addSection("chapter3", new Resource(new StringReader("ba"), "chapter3.html")); + testBook.addSection("chapter4", new Resource(new StringReader("aa"), "chapter4.html")); + SearchIndex searchIndex = new SearchIndex(testBook); + SearchResults searchResults = searchIndex.doSearch("a"); + assertFalse(searchResults.isEmpty()); + assertEquals(5, searchResults.size()); + assertEquals(0, searchResults.getHits().get(0).getPagePos()); + assertEquals(0, searchResults.getHits().get(1).getPagePos()); + assertEquals(1, searchResults.getHits().get(2).getPagePos()); + assertEquals(0, searchResults.getHits().get(3).getPagePos()); + assertEquals(1, searchResults.getHits().get(4).getPagePos()); + } catch (IOException e) { + assertTrue(e.getMessage(), false); + } + } + + public void testUnicodeTrim() { + String[] testData = new String[] { + "", "", + " ", "", + "a", "a", + "a ", "a", + " a", "a", + " a ", "a", + "\ta", "a", + "\u00a0a", "a" + }; + for (int i = 0; i < testData.length; i+= 2) { + String actualText = SearchIndex.unicodeTrim(testData[i]); + assertEquals((i / 2) + ": ", testData[i + 1], actualText); + } + } + + public void testInContent() { + Object[] testData = new Object[] { + "a", "a", new Integer[] {0}, + "a", "aa", new Integer[] {0,1}, + "a", "a \n\t\t\ta", new Integer[] {0,2}, + "a", "\u00c3\u00a4", new Integer[] {0}, // ä + "a", "A", new Integer[] {0}, + // ä  + "a", "\u00a0\u00c4", new Integer[] {0}, + "u", "ü", new Integer[] {0}, + "a", "b", new Integer[] {}, + "XXX", "my title1

    wrong title

    ", new Integer[] {}, + "title", "my title1

    wrong title

    ", new Integer[] {3, 15} + }; + for (int i = 0; i < testData.length; i+= 3) { + Resource resource = new Resource(((String) testData[i + 1]).getBytes(), MediatypeService.XHTML); + String content = SearchIndex.getSearchContent(new StringReader((String) testData[i + 1])); + String searchTerm = (String) testData[i]; + Integer[] expectedResult = (Integer[]) testData[i + 2]; + List actualResult = SearchIndex.doSearch(searchTerm, content, resource); + assertEquals("test " + ((i / 3) + 1), expectedResult.length, actualResult.size()); + for (int j = 0; j < expectedResult.length; j++) { + SearchResult searchResult = actualResult.get(j); + assertEquals("test " + (i / 3) + ", match " + j, expectedResult[j].intValue(), searchResult.getPagePos()); + } + } + } + + public void testCleanText() { + String[] testData = new String[] { + "", "", + " ", "", + "a", "a", + "A", "a", + "a b", "a b", + "a b", "a b", + "a\tb", "a b", + "a\nb", "a b", + "a\n\t\r \n\tb", "a b", + // "ä", "a", + "\u00c4\u00a0", "a", + "", "" + }; + for (int i = 0; i < testData.length; i+= 2) { + String actualText = SearchIndex.cleanText(testData[i]); + assertEquals((i / 2) + ": '" + testData[i] + "' => '" + actualText + "' does not match '" + testData[i + 1] + "\'", testData[i + 1], actualText); + } + } +} diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/HtmlSplitterTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/HtmlSplitterTest.java new file mode 100644 index 00000000..651713dc --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/HtmlSplitterTest.java @@ -0,0 +1,41 @@ +package nl.siegmann.epublib.utilities; + +import java.io.ByteArrayOutputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.List; + +import javax.xml.stream.XMLEventWriter; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.events.XMLEvent; + +import junit.framework.TestCase; +import nl.siegmann.epublib.Constants; + +public class HtmlSplitterTest extends TestCase { + + public void test1() { + HtmlSplitter htmlSplitter = new HtmlSplitter(); + try { + String bookResourceName = "/holmes_scandal_bohemia.html"; + Reader input = new InputStreamReader(HtmlSplitterTest.class.getResourceAsStream(bookResourceName), Constants.CHARACTER_ENCODING); + int maxSize = 3000; + List> result = htmlSplitter.splitHtml(input, maxSize); + XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance(); + for(int i = 0; i < result.size(); i++) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + XMLEventWriter writer = xmlOutputFactory.createXMLEventWriter(out); + for(XMLEvent xmlEvent: result.get(i)) { + writer.add(xmlEvent); + } + writer.close(); + byte[] data = out.toByteArray(); + assertTrue(data.length > 0); + assertTrue(data.length <= maxSize); + } + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } +} diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/NumberSayerTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/NumberSayerTest.java new file mode 100644 index 00000000..2ff3b072 --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/NumberSayerTest.java @@ -0,0 +1,17 @@ +package nl.siegmann.epublib.utilities; + +import junit.framework.TestCase; + +public class NumberSayerTest extends TestCase { + public void test1() { + Object[] testinput = new Object[] { + 1, "one", + 42, "fourtytwo", + 127, "hundredtwentyseven", + 433, "fourhundredthirtythree" + }; + for(int i = 0; i < testinput.length; i += 2) { + assertEquals((String) testinput[i + 1], NumberSayer.getNumberName((Integer) testinput[i])); + } + } +} diff --git a/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/ResourceUtilTest.java b/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/ResourceUtilTest.java new file mode 100644 index 00000000..a6544bc1 --- /dev/null +++ b/epublib-tools/src/test/java/nl/siegmann/epublib/utilities/ResourceUtilTest.java @@ -0,0 +1,25 @@ +package nl.siegmann.epublib.utilities; + +import junit.framework.TestCase; +import nl.siegmann.epublib.domain.Resource; +import nl.siegmann.epublib.service.MediatypeService; +import nl.siegmann.epublib.util.ToolsResourceUtil; + +public class ResourceUtilTest extends TestCase { + + public void testFindTitle() { + String[] testData = new String[] { + "my title1

    wrong title

    ", "my title1", + "my title2

    wrong title

    ", "my title2", + "

    my h1 title3

    ", "my h1 title3", + "

    my h1 title4

    ", "my h1 title4", + "

    my h1 title5

    ", "my h1 title5", + "wrong title

    test title 6

    ", "test title 6", + }; + for (int i = 0; i < testData.length; i+= 2) { + Resource resource = new Resource(testData[i].getBytes(), MediatypeService.XHTML); + String actualTitle = ToolsResourceUtil.findTitleFromXhtml(resource); + assertEquals(testData[i + 1], actualTitle); + } + } +} diff --git a/epublib-tools/src/test/resources/book1/book1.css b/epublib-tools/src/test/resources/book1/book1.css new file mode 100644 index 00000000..d59e76d1 --- /dev/null +++ b/epublib-tools/src/test/resources/book1/book1.css @@ -0,0 +1,5 @@ +@CHARSET "UTF-8"; + +body { + font: New Century Schoolbook, serif; +} \ No newline at end of file diff --git a/epublib-tools/src/test/resources/book1/chapter1.html b/epublib-tools/src/test/resources/book1/chapter1.html new file mode 100644 index 00000000..2970e934 --- /dev/null +++ b/epublib-tools/src/test/resources/book1/chapter1.html @@ -0,0 +1,14 @@ + + + Chapter 1 + + + + +

    Introduction

    +

    +Welcome to Chapter 1 of the epublib book1 test book.
    +We hope you enjoy the test. +

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/book1/chapter2.html b/epublib-tools/src/test/resources/book1/chapter2.html new file mode 100644 index 00000000..73ab75ed --- /dev/null +++ b/epublib-tools/src/test/resources/book1/chapter2.html @@ -0,0 +1,15 @@ + + + Chapter 2 + + + +

    Second chapter

    +

    +Welcome to Chapter 2 of the epublib book1 test book.
    +Pretty flowers:
    +flowers
    +We hope you are still enjoying the test. +

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/book1/chapter2_1.html b/epublib-tools/src/test/resources/book1/chapter2_1.html new file mode 100644 index 00000000..91f2974a --- /dev/null +++ b/epublib-tools/src/test/resources/book1/chapter2_1.html @@ -0,0 +1,27 @@ + + + Chapter 2.1 + + + +

    Second chapter, first subsection

    +

    +A subsection of the second chapter. +

    +

    +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec eleifend ligula et odio malesuada luctus. Proin tristique blandit interdum. In a lorem augue, non iaculis ante. In hac habitasse platea dictumst. Suspendisse sed dolor in lacus dictum imperdiet quis id enim. Duis mattis, ante at posuere pretium, tortor nisl placerat ligula, quis vulputate lorem turpis id augue. Quisque tempus elementum leo, mattis vestibulum quam pulvinar tincidunt. Sed eu nulla mi, sed venenatis purus. Suspendisse potenti. Mauris feugiat mollis commodo. Donec ipsum ante, aliquam et imperdiet quis, posuere in nibh. Mauris non felis eget nunc auctor pharetra. Mauris sagittis malesuada pellentesque. Phasellus accumsan semper turpis eu pretium. Duis iaculis convallis viverra. Aliquam eu turpis ac elit euismod mollis. Duis velit velit, venenatis quis porta ut, adipiscing sit amet elit. Ut vehicula lacinia facilisis. Cras at turpis ac quam cursus accumsan sed quis nunc. Phasellus neque tortor, dapibus in aliquet non, sollicitudin quis libero. +

    +

    +Ut vulputate ultrices nunc, in suscipit lorem porta quis. Nulla sit amet odio libero. Donec et felis diam. Phasellus ut libero non metus pulvinar tristique ut sit amet dui. Praesent a sapien libero, eget imperdiet enim. Aenean accumsan, elit facilisis tincidunt cursus, massa erat volutpat ante, non rhoncus ante neque eget neque. Cras id faucibus eros. In eleifend imperdiet magna lobortis viverra. Nunc at quam sed leo lobortis malesuada. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Aliquam erat volutpat. Nam risus ante, rhoncus ac condimentum non, accumsan nec quam. Quisque vitae nulla eget sem viverra condimentum. Ut iaculis neque eget orci tincidunt venenatis. Nunc ac tellus sit amet nibh tristique dignissim eget ac libero. Mauris tincidunt orci vitae turpis rhoncus pellentesque. Proin scelerisque ultricies placerat. Suspendisse vel consectetur libero. +

    +

    N +am ornare convallis tortor, semper convallis velit semper non. Nulla velit tortor, cursus bibendum cursus sit amet, placerat vel arcu. Nullam vel ipsum quis mauris gravida bibendum at id risus. Suspendisse massa nisl, luctus at tempor sed, tristique vel risus. Vestibulum erat nisl, porttitor sit amet tincidunt sit amet, sodales vel odio. Vivamus vitae pharetra nisi. Praesent a turpis quis lectus malesuada vehicula a in quam. Quisque consectetur imperdiet urna et convallis. Phasellus malesuada, neque non aliquet dictum, purus arcu volutpat odio, nec sodales justo urna vel justo. Phasellus venenatis leo id sapien tempor hendrerit. Nullam ac elit sodales velit dapibus tempor eu at risus. Sed quis nibh velit. Fusce sapien lacus, dapibus eu convallis luctus, molestie vel est. Proin pellentesque blandit felis nec dapibus. Sed vel felis eu libero viverra porttitor et nec diam. Aenean ac cursus quam. Sed ut tortor nisi. Nullam viverra velit ac velit interdum eu porta justo iaculis. Aliquam egestas fermentum auctor. Fusce viverra lorem augue. +

    +

    +Integer quis dolor et quam hendrerit consectetur sit amet sed neque. Praesent vel vulputate arcu. Integer vestibulum congue mauris, sit amet tincidunt mauris fermentum sit amet. Etiam quam felis, tempus at laoreet at, hendrerit et urna. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque ut mollis nibh. Integer quis est mi, eget aliquam nunc. Quisque hendrerit pulvinar lacus, nec ullamcorper sapien gravida nec. Morbi eleifend interdum magna, ultrices euismod sapien ultricies et. In adipiscing est vitae ligula tristique porta. Sed enim lectus, sodales ac cursus vel, suscipit id erat. Praesent tristique congue massa, ac sagittis neque ullamcorper vestibulum. Fusce vel elit quis quam convallis blandit. Duis nibh massa, porttitor sit amet sodales sit amet, varius at sem. Maecenas consequat ultrices dolor nec tincidunt. Cras id tellus urna. Etiam ut odio tellus, in ornare quam. Curabitur vel est nulla. +

    +

    +In aliquet dolor ut elit tempor nec tincidunt tortor porttitor. Etiam consequat tincidunt consectetur. Morbi erat elit, rutrum at molestie a, posuere pretium nisl. Nam at vestibulum nunc. In sed nisl ante, ac molestie nibh. Donec eu neque eget lectus dignissim faucibus sit amet nec quam. Pellentesque tincidunt porttitor vestibulum. Aliquam ut ligula diam, eget egestas augue. Proin ac venenatis purus. Morbi malesuada luctus libero sed laoreet. Curabitur molestie dui ac nunc molestie hendrerit. In congue luctus faucibus. Morbi elit turpis, feugiat nec venenatis vel, tempor cursus nibh. Pellentesque sagittis consectetur ante, eu luctus quam hendrerit in. +

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/book1/chapter3.html b/epublib-tools/src/test/resources/book1/chapter3.html new file mode 100644 index 00000000..c6d258bf --- /dev/null +++ b/epublib-tools/src/test/resources/book1/chapter3.html @@ -0,0 +1,13 @@ + + + Chapter 3 + + + +

    Final chapter

    +

    +Welcome to Chapter 3 of the epublib book1 test book.
    +We hope you enjoyed the test. +

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/book1/cover.html b/epublib-tools/src/test/resources/book1/cover.html new file mode 100644 index 00000000..fba37680 --- /dev/null +++ b/epublib-tools/src/test/resources/book1/cover.html @@ -0,0 +1,8 @@ + + + Cover + + + + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/book1/cover.png b/epublib-tools/src/test/resources/book1/cover.png new file mode 100644 index 00000000..a2c37d16 Binary files /dev/null and b/epublib-tools/src/test/resources/book1/cover.png differ diff --git a/epublib-tools/src/test/resources/book1/flowers_320x240.jpg b/epublib-tools/src/test/resources/book1/flowers_320x240.jpg new file mode 100644 index 00000000..88c152ab Binary files /dev/null and b/epublib-tools/src/test/resources/book1/flowers_320x240.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/#IDXHDR b/epublib-tools/src/test/resources/chm1/#IDXHDR new file mode 100644 index 00000000..9dc95b8b Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/#IDXHDR differ diff --git a/epublib-tools/src/test/resources/chm1/#IVB b/epublib-tools/src/test/resources/chm1/#IVB new file mode 100644 index 00000000..4691d0d0 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/#IVB differ diff --git a/epublib-tools/src/test/resources/chm1/#STRINGS b/epublib-tools/src/test/resources/chm1/#STRINGS new file mode 100644 index 00000000..07ecca06 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/#STRINGS differ diff --git a/epublib-tools/src/test/resources/chm1/#SYSTEM b/epublib-tools/src/test/resources/chm1/#SYSTEM new file mode 100644 index 00000000..f9b54b07 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/#SYSTEM differ diff --git a/epublib-tools/src/test/resources/chm1/#TOPICS b/epublib-tools/src/test/resources/chm1/#TOPICS new file mode 100644 index 00000000..71c22a07 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/#TOPICS differ diff --git a/epublib-tools/src/test/resources/chm1/#URLSTR b/epublib-tools/src/test/resources/chm1/#URLSTR new file mode 100644 index 00000000..8ca19868 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/#URLSTR differ diff --git a/epublib-tools/src/test/resources/chm1/#URLTBL b/epublib-tools/src/test/resources/chm1/#URLTBL new file mode 100644 index 00000000..3792848b Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/#URLTBL differ diff --git a/epublib-tools/src/test/resources/chm1/#WINDOWS b/epublib-tools/src/test/resources/chm1/#WINDOWS new file mode 100644 index 00000000..56f0f01b Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/#WINDOWS differ diff --git a/epublib-tools/src/test/resources/chm1/$FIftiMain b/epublib-tools/src/test/resources/chm1/$FIftiMain new file mode 100644 index 00000000..39973cd6 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$FIftiMain differ diff --git a/epublib-tools/src/test/resources/chm1/$OBJINST b/epublib-tools/src/test/resources/chm1/$OBJINST new file mode 100644 index 00000000..22d7b26c Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$OBJINST differ diff --git a/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/BTree b/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/BTree new file mode 100644 index 00000000..ae5bc4df Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/BTree differ diff --git a/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Data b/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Data new file mode 100644 index 00000000..433135b4 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Data differ diff --git a/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Map b/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Map new file mode 100644 index 00000000..d45cdf3e Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Map differ diff --git a/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Property b/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Property new file mode 100644 index 00000000..69d161d3 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$WWAssociativeLinks/Property differ diff --git a/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/BTree b/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/BTree new file mode 100644 index 00000000..ad58448c Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/BTree differ diff --git a/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Data b/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Data new file mode 100644 index 00000000..6cf94bdd Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Data differ diff --git a/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Map b/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Map new file mode 100644 index 00000000..8f07274c Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Map differ diff --git a/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Property b/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Property new file mode 100644 index 00000000..69d161d3 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/$WWKeywordLinks/Property differ diff --git a/epublib-tools/src/test/resources/chm1/CHM-example.hhc b/epublib-tools/src/test/resources/chm1/CHM-example.hhc new file mode 100644 index 00000000..2a2fc7b8 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/CHM-example.hhc @@ -0,0 +1,108 @@ + + + + + + + + + +
      +
    • + + + +
    • + + +
        +
      • + + + +
      • + + + +
      • + + + +
      • + + + +
      +
    • + + + +
        +
      • + + + +
      • + + + +
      +
    • + + +
        +
      • + + + +
      • + + + +
      • + + + +
      • + + + +
      • + + + +
      • + + + +
      • + + + +
      • + + + +
      • + + + +
      • + + + + +
      • + + + + +
      • + + + + + +
      +
    + diff --git a/epublib-tools/src/test/resources/chm1/CHM-example.hhk b/epublib-tools/src/test/resources/chm1/CHM-example.hhk new file mode 100644 index 00000000..f2ee57c6 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/CHM-example.hhk @@ -0,0 +1,458 @@ + + + + + + + + + + +
      +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + + + + + +
    • +
    • + + + + + + + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + + + + + +
    • +
    • + + + + + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + + + + + + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + +
    • +
    • + + + + +
    • +
    • + + + + + + +
    • +
    • + + + + +
    • +
    + + diff --git a/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-10000.htm b/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-10000.htm new file mode 100644 index 00000000..825aef71 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-10000.htm @@ -0,0 +1,64 @@ + + + + +Context sensitive help topic 10000 + + + + + + + + + + + +
    + +

    Context sensitive help topic 10000

    +

    This topic is only used to show context sensitive help with Visual Basic applications. + This is contextID 10000.

    +

    +

    Open your project (.hhp) file in notepad and add following sections:

    +

    [MAP]

    +

    Add a [MAP] section and define the IDs your require.

    +

    #define IDH_frmMainControl1 10000
    + #define IDH_frmMainControl2 10010
    + #define IDH_frmChildControl1 20000
    + #define IDH_frmChildControl2 20010
    +

    +

    [ALIAS]

    +

    Add an [ALIAS] section and define the mapping between each ID and a help topic.

    +

    [ALIAS]
    + IDH_frmMainControl1=Context-sensitive example\contextID-10000.htm
    + IDH_frmMainControl2=Context-sensitive example\contextID-10010.htm
    + IDH_frmChildControl1=Context-sensitive example\contextID-20000.htm
    + IDH_frmChildControl2=Context-sensitive example\contextID-20010.htm

    +

    Alternatively you can do this:

    +

    In a text editor enter the ALIAS details like IDH_90000=index.htm. + Save the file as 'alias.h' in same folder as your help project file.

    +
    ;---------------------------------------------------
    ; alias.h file example for HTMLHelp (CHM)
    ; www.help-info.de
    ;
    ; All IDH's > 10000 for better format
    ; last edited: 2006-07-09
    ;---------------------------------------------------
    IDH_90000=index.htm
    IDH_10000=Context-sensitive_example\contextID-10000.htm
    IDH_10010=Context-sensitive_example\contextID-10010.htm
    IDH_20000=Context-sensitive_example\contextID-20000.htm
    IDH_20010=Context-sensitive_example\contextID-20010.htm
    +

    In a text editor enter the MAP details like #define IDH_90000 90000;frmMain. + Save the file as 'map.h' in same folder as your help project file.

    +
    ;--------------------------------------------------
    ; map.h file example for HTMLHelp (CHM)
    ; www.help-info.de
    ;
    ; All IDH's > 10000 for better format
    ; ;comment at end of line
    ;--------------------------------------------------
    #define IDH_90000 90000;frmMain
    #define IDH_10000 10000;frmAddressDataContextID-1
    #define IDH_10010 10010;frmAddressDataContextID-2
    #define IDH_20000 20000;frmAddressDataContextID-3
    #define IDH_20010 20010;frmAddressDataContextID-4
    +

    Open your .hhp file in a text editor and add these sections

    +

    [ALIAS]
    + #include alias.h

    +

    [MAP]
    + #include map.h

    +

    Recompile your .HHP file. Now your application can call help using context + help ID's instead of topic file names.

    +

     

    +

     

    + + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-10010.htm b/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-10010.htm new file mode 100644 index 00000000..8c9b1389 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-10010.htm @@ -0,0 +1,63 @@ + + + + +Context sensitive help topic 10010 + + + + + + + + + + + +
    + +

    Context sensitive help topic 10010

    +

    This topic is only used to show context sensitive help with Visual Basic applications. + This is contextID 10010.

    +

    +

    Open your project (.hhp) file in notepad and add following sections:

    +

    [MAP]

    +

    Add a [MAP] section and define the IDs your require.

    +

    #define IDH_frmMainControl1 10000
    + #define IDH_frmMainControl2 10010
    + #define IDH_frmChildControl1 20000
    + #define IDH_frmChildControl2 20010
    +

    +

    [ALIAS]

    +

    Add an [ALIAS] section and define the mapping between each ID and a help topic.

    +

    [ALIAS]
    + IDH_frmMainControl1=Context-sensitive example\contextID-10000.htm
    + IDH_frmMainControl2=Context-sensitive example\contextID-10010.htm
    + IDH_frmChildControl1=Context-sensitive example\contextID-20000.htm
    + IDH_frmChildControl2=Context-sensitive example\contextID-20010.htm

    +

    Alternatively you can do this:

    +

    In a text editor enter the ALIAS details like IDH_90000=index.htm. + Save the file as 'alias.h' in same folder as your help project file.

    +
    ;---------------------------------------------------
    ; alias.h file example for HTMLHelp (CHM)
    ; www.help-info.de
    ;
    ; All IDH's > 10000 for better format
    ; last edited: 2006-07-09
    ;---------------------------------------------------
    IDH_90000=index.htm
    IDH_10000=Context-sensitive_example\contextID-10000.htm
    IDH_10010=Context-sensitive_example\contextID-10010.htm
    IDH_20000=Context-sensitive_example\contextID-20000.htm
    IDH_20010=Context-sensitive_example\contextID-20010.htm
    +

    In a text editor enter the MAP details like #define IDH_90000 90000;frmMain. + Save the file as 'map.h' in same folder as your help project file.

    +
    ;--------------------------------------------------
    ; map.h file example for HTMLHelp (CHM)
    ; www.help-info.de
    ;
    ; All IDH's > 10000 for better format
    ; ;comment at end of line
    ;--------------------------------------------------
    #define IDH_90000 90000;frmMain
    #define IDH_10000 10000;frmAddressDataContextID-1
    #define IDH_10010 10010;frmAddressDataContextID-2
    #define IDH_20000 20000;frmAddressDataContextID-3
    #define IDH_20010 20010;frmAddressDataContextID-4
    +

    Open your .hhp file in a text editor and add these sections

    +

    [ALIAS]
    + #include alias.h

    +

    [MAP]
    + #include map.h

    +

    Recompile your .HHP file. Now your application can call help using context + help ID's instead of topic file names.

    +

     

    +

    +

     

    + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-20000.htm b/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-20000.htm new file mode 100644 index 00000000..d2121050 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-20000.htm @@ -0,0 +1,66 @@ + + + + +Context sensitive help topic 20000 + + + + + + + + + + + +
    + +

    Context sensitive help topic 20000

    +

    This topic is only used to show context sensitive help with Visual Basic applications. + This is contextID 20000.

    +

    +

    Open your project (.hhp) file in notepad and add following sections:

    +

    [MAP]

    +

    Add a [MAP] section and define the IDs your require.

    +

    #define IDH_frmMainControl1 10000
    + #define IDH_frmMainControl2 10010
    + #define IDH_frmChildControl1 20000
    + #define IDH_frmChildControl2 20010
    +

    +

    [ALIAS]

    +

    Add an [ALIAS] section and define the mapping between each ID and a help topic.

    +

    [ALIAS]
    + IDH_frmMainControl1=Context-sensitive example\contextID-10000.htm
    + IDH_frmMainControl2=Context-sensitive example\contextID-10010.htm
    + IDH_frmChildControl1=Context-sensitive example\contextID-20000.htm
    + IDH_frmChildControl2=Context-sensitive example\contextID-20010.htm

    +

    Alternatively you can do this:

    +

    In a text editor enter the ALIAS details like IDH_90000=index.htm. + Save the file as 'alias.h' in same folder as your help project file.

    +
    ;---------------------------------------------------
    ; alias.h file example for HTMLHelp (CHM)
    ; www.help-info.de
    ;
    ; All IDH's > 10000 for better format
    ; last edited: 2006-07-09
    ;---------------------------------------------------
    IDH_90000=index.htm
    IDH_10000=Context-sensitive_example\contextID-10000.htm
    IDH_10010=Context-sensitive_example\contextID-10010.htm
    IDH_20000=Context-sensitive_example\contextID-20000.htm
    IDH_20010=Context-sensitive_example\contextID-20010.htm
    +

    In a text editor enter the MAP details like #define IDH_90000 90000;frmMain. + Save the file as 'map.h' in same folder as your help project file.

    +
    ;--------------------------------------------------
    ; map.h file example for HTMLHelp (CHM)
    ; www.help-info.de
    ;
    ; All IDH's > 10000 for better format
    ; ;comment at end of line
    ;--------------------------------------------------
    #define IDH_90000 90000;frmMain
    #define IDH_10000 10000;frmAddressDataContextID-1
    #define IDH_10010 10010;frmAddressDataContextID-2
    #define IDH_20000 20000;frmAddressDataContextID-3
    #define IDH_20010 20010;frmAddressDataContextID-4
    +

    Open your .hhp file in a text editor and add these sections

    +

    [ALIAS]
    + #include alias.h

    +

    [MAP]
    + #include map.h

    +

    Recompile your .HHP file. Now your application can call help using context + help ID's instead of topic file names.

    +

     

    +

    +

     

    +

     

    + + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-20010.htm b/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-20010.htm new file mode 100644 index 00000000..f44a6016 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/Context-sensitive_example/contextID-20010.htm @@ -0,0 +1,66 @@ + + + + +Context sensitive help topic 20010 + + + + + + + + + + + +
    + +

    Context sensitive help topic 20010

    +

    This topic is only used to show context sensitive help with Visual Basic applications. + This is contextID 20010.

    +

    +

    Open your project (.hhp) file in notepad and add following sections:

    +

    [MAP]

    +

    Add a [MAP] section and define the IDs your require.

    +

    #define IDH_frmMainControl1 10000
    + #define IDH_frmMainControl2 10010
    + #define IDH_frmChildControl1 20000
    + #define IDH_frmChildControl2 20010
    +

    +

    [ALIAS]

    +

    Add an [ALIAS] section and define the mapping between each ID and a help topic.

    +

    [ALIAS]
    + IDH_frmMainControl1=Context-sensitive example\contextID-10000.htm
    + IDH_frmMainControl2=Context-sensitive example\contextID-10010.htm
    + IDH_frmChildControl1=Context-sensitive example\contextID-20000.htm
    + IDH_frmChildControl2=Context-sensitive example\contextID-20010.htm

    +

    Alternatively you can do this:

    +

    In a text editor enter the ALIAS details like IDH_90000=index.htm. + Save the file as 'alias.h' in same folder as your help project file.

    +
    ;---------------------------------------------------
    ; alias.h file example for HTMLHelp (CHM)
    ; www.help-info.de
    ;
    ; All IDH's > 10000 for better format
    ; last edited: 2006-07-09
    ;---------------------------------------------------
    IDH_90000=index.htm
    IDH_10000=Context-sensitive_example\contextID-10000.htm
    IDH_10010=Context-sensitive_example\contextID-10010.htm
    IDH_20000=Context-sensitive_example\contextID-20000.htm
    IDH_20010=Context-sensitive_example\contextID-20010.htm
    +

    In a text editor enter the MAP details like #define IDH_90000 90000;frmMain. + Save the file as 'map.h' in same folder as your help project file.

    +
    ;--------------------------------------------------
    ; map.h file example for HTMLHelp (CHM)
    ; www.help-info.de
    ;
    ; All IDH's > 10000 for better format
    ; ;comment at end of line
    ;--------------------------------------------------
    #define IDH_90000 90000;frmMain
    #define IDH_10000 10000;frmAddressDataContextID-1
    #define IDH_10010 10010;frmAddressDataContextID-2
    #define IDH_20000 20000;frmAddressDataContextID-3
    #define IDH_20010 20010;frmAddressDataContextID-4
    +

    Open your .hhp file in a text editor and add these sections

    +

    [ALIAS]
    + #include alias.h

    +

    [MAP]
    + #include map.h

    +

    Recompile your .HHP file. Now your application can call help using context + help ID's instead of topic file names.

    +

     

    +

    +

     

    +

     

    + + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/Garden/flowers.htm b/epublib-tools/src/test/resources/chm1/Garden/flowers.htm new file mode 100644 index 00000000..8a7900c6 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/Garden/flowers.htm @@ -0,0 +1,51 @@ + + + + +Flowers + + + + + + + + + + + + + + +
    + +

    Flowers

    +

    You can cultivate flowers in your garden. It is beautiful if one can give his + wife a bunch of self-cultivated flowers.

    + + + + + + + + + + + + + +
    +

     

    + +

     

    + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/Garden/garden.htm b/epublib-tools/src/test/resources/chm1/Garden/garden.htm new file mode 100644 index 00000000..86792d5a --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/Garden/garden.htm @@ -0,0 +1,59 @@ + + + + +Garden + + + + + + + + + + + + + + + + + + +
    + +

    Own Garden

    +

    It is nice to have a garden near your home.

    +

    You can plant trees of one's own, lay out a pond with fish and cultivate flowers. + For the children a game lawn can be laid out. You can learn much about botany.

    +

     

    + + + + + + + + + + + + + +
    A garden is good for your health and you can relax + at the gardening.
    +

     

    +

     

    +

     

    +

     

    + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/Garden/tree.htm b/epublib-tools/src/test/resources/chm1/Garden/tree.htm new file mode 100644 index 00000000..10e34f7b --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/Garden/tree.htm @@ -0,0 +1,43 @@ + + + + +How one grows trees + + + + + + + + + + + + + + + + +
    + +

    How one grows trees

    +

    You must dig a big hole first.

    +

    Wonder well which kind of tree you want to plant.

    +

    (oak, beech, alder)

    +

    The tree planted newly has always to be watered with sufficient water.

    +

    +

     

    + +

     

    + + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/CloseWindowAutomatically.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/CloseWindowAutomatically.htm new file mode 100644 index 00000000..2655be2d --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/CloseWindowAutomatically.htm @@ -0,0 +1,58 @@ + + + + +Attention (!) - Close Window automatically + + + + + + + + + + + + + + + + + +
    go to home ...
    + +

    Close Window automatically

    +

    One can close HTML Help window without getting a click from user by the following + code. Use "Close" ActiveX Control and Javascript as shown below.

    +

    Code

    +

     

    +

    <OBJECT id=hhctrl type="application/x-oleobject"
    + classid="clsid:adb880a6-d8ff-11cf-9377-00aa003b7a11"
    + codebase="hhctrl.ocx#Version=5,2,3790,233">
    + <PARAM name="Command" value="Close">
    + </OBJECT>
    + <script type="text/javascript" language="JavaScript">
    + <!--
    + window.setTimeout('hhctrl.Click();',1000);
    + // -->
    + </script>

    +

     

    +

     

    +

     

    +

     

    + + + + + +
    back to top ...
    +
    +

     

    + + diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/Jump_to_anchor.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/Jump_to_anchor.htm new file mode 100644 index 00000000..f74f191c --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/Jump_to_anchor.htm @@ -0,0 +1,73 @@ + + + + +How to jump to a anchor + + + + + + + + + + + + + +
    + +

    How to jump to a anchor

    +

    This topic shows how to jump to bookmarks in your HTML code like:

    +

    <a name="AnchorSample" id="AnchorSample"></a>

    + +

     

    +

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    + + +

    AnchorSample InnerText Headline

    +

    1. Example for use with Visual Basic 2003

    +

    This topic is used to show providing help for controls with a single HTML file + downloaded from a server (if internet connection is available) and jump to 'AnchorSample'.

    +

    2. Example for use with Compiled Help Module (CHM)

    +

    This topic is used to show how to jump to bookmarks AnchorSample.

    +

     

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    +

     

    + + +

    Sample headline after anchor 'SecondAnchor'

    +

    Here is coded:

    +

    <a name="SecondAnchor" id="SecondAnchor"></a>

    +

    Example for use with Compiled Help Module (CHM)

    +

    This topic is used to show how to jump to bookmarks SecondAnchor.

    +

     

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/LinkPDFfromCHM.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/LinkPDFfromCHM.htm new file mode 100644 index 00000000..03098bb3 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/LinkPDFfromCHM.htm @@ -0,0 +1,39 @@ + + + + +Linking to PDF from CHM + + + + + + + + + + +
    + +

    Linking to PDF from CHM

    +

    This topic is only used to show linking from a compiled CHM to other files + and places. Open/Save dialog is used.

    +

    PDF

    +

    Link relative to PDF

    +
    +<p><a href="../embedded_files/example-embedded.pdf">Link relative to PDF</a></p>
    +
    +

     

    +

     

    +

     

    +

     

    + + + + + +
    back to top ...
    +
    +

     

    + + diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/Simple_link_example.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/Simple_link_example.htm new file mode 100644 index 00000000..7b3e1288 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/Simple_link_example.htm @@ -0,0 +1,112 @@ + + + + +Linking from CHM with standard HTML + + + + + + + + + + + + + + + + +
    + +

    Linking from CHM with standard HTML

    +

    This is a simple sample how to link from a compiled CHM to HTML files. Some + files are on a web server some are local and relative to the CHM file.

    +

     

    +

    Link relative to a HTML file that isn't compiled into the CHM

    + + +

    The following technique of linking is useful if one permanently must update + some files on the PC of the customer without compiling the CHM again. The external + file must reside in the CHM folder or a subfolder.

    +

    Link relative to a external HTML file (external_files/external_topic.htm) +

    + +

    Link code:

    +
    +<p>
    +<SCRIPT Language="JScript">
    +function parser(fn) {
    + var X, Y, sl, a, ra, link;
    + ra = /:/;
    + a = location.href.search(ra);
    + if (a == 2)
    +  X = 14;
    + else
    +  X = 7;
    +  sl = "\\";
    +  Y = location.href.lastIndexOf(sl) + 1;
    +  link = 'file:///' + location.href.substring(X, Y) + fn;
    +  location.href = link;
    + }
    +</SCRIPT>
    +</p>
    +
    +<p>
    +  <a onclick="parser('./external_files/external_topic.htm')"
    +  style="text-decoration: underline;
    +  color: green; cursor: hand">Link relative to a external HTML file (external_files/external_topic.htm)</a>
    +</p>
    +
    +

    Links to HTML pages on the web

    + + + + + + + + + + + + + +
    Windmill, Germany - Ditzum
    +

    In the past, energy was won with windmills in Germany.

    +

    See more information about + mills (click the link).

    +
    +

    These are modern wind energy converters today.

    +

    Open technical information on a web server with iframe inside your content window.

    +
    Enercon, Germany
    +

     

    + +

     

    + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/example-external-pdf.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/example-external-pdf.htm new file mode 100644 index 00000000..9d9d6361 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/example-external-pdf.htm @@ -0,0 +1,23 @@ + + +Example load PDF from TOC + + + + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/pop-up_example.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/pop-up_example.htm new file mode 100644 index 00000000..1f28dcf6 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/pop-up_example.htm @@ -0,0 +1,99 @@ + + + + +How to create PopUp + + + + + + + + + + + + + + + +
    + +

    PopUp Example

    +

    Code see below!

    +

    (not working for all browsers/browser versions - see your systems security + updates).

    +

    + Click here to see example information (PopUp).

    + +

    +

    +

    To change the flower picture hoover with your mouse pointer!

    +
    +

    Click + here to change the background color (PopUp).

    + + +

    +

    +

    To change the flower picture hoover with your mouse pointer!

    +
    +

    Another example to enlarge a screenshot (hoover with mouse pointer):

    +

    See what happens .. +

    +

    To enlarge the screenshot hoover with your mouse pointer!

    +
    +

    Another example to enlarge a screenshot (click to screenshot):

    +

    + +

    +
    +

    This is the code for the second text link:

    +
    <p>
    +<a class=popupspot
    href="JavaScript:hhctrl.TextPopup
    ('This is a standard HTMLHelp text-only popup. + See the nice flowers below.','Verdana,8',10,10,00000000,0x66ffff)">
    Click here to change the background color.</a> +</p> +
    +

    This is the code to change the flower picture:

    +
    +<p>
    +<img
    + onmouseover="(src='../images/wintertree.jpg')"
    + onmouseout="(src='../images/insekt.jpg')"
    + src="../images/insekt.jpg" alt="" border="0"> 
    </p> +
    +

    This is the code to enlarge the screenshot (hoover):

    +
    <p>
    <img + src="../images/screenshot_small.png" alt="" border="0" + onmouseover="(src='../images/screenshot_big.png')" + onmouseout="(src='../images/screenshot_small.png')"> +</p>
    +

    This is the code to enlarge the screenshot (click):

    +
    <p>
    <img src="../images/screenshot_small.png" alt="" + onclick="this.src='../images/screenshot_big.png'" />
    </p>
    +

     

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/shortcut_link.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/shortcut_link.htm new file mode 100644 index 00000000..01d1992e --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/shortcut_link.htm @@ -0,0 +1,61 @@ + + + + +Using CHM shortcut links + + + + + + + + + + + + + + + + + + + + + +
    + +

    Using CHM shortcut links

    +

    This is a simple example how to use shortcut links from a CHM file and jump + to a URL with the users default browser.

    +

    Example:

    +

    Click me to go to www-help-info.de

    +

    Note:

    +
      +
    • Wont work on the web
    • +
    • Only works in compressed CHM file.
    • +
    • Dosn't work with "Open dialog". You have to save to local disc.
    • +
    • MyUniqueID must be a unique name for each shortcut you create in a HTML + file.
    • +
    +

    Put this code in your <head> section:

    +

    <OBJECT id=MyUniqueID type="application/x-oleobject"
    + classid="clsid:adb880a6-d8ff-11cf-9377-00aa003b7a11">
    + <PARAM name="Command" value="ShortCut">
    + <PARAM name="Item1" value=",http://www.help-info.de/index_e.htm,">
    + </OBJECT>

    +

    Put this code in your <body> section:

    +

    <p><a href="javascript:MyUniqueID.Click()">Click me to + go to www-help-info.de</a></p>

    + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-02.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-02.htm new file mode 100644 index 00000000..e6fb4530 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-02.htm @@ -0,0 +1,41 @@ + + + + +Topic 2 + + + + + + + + + + +
    +

    To do so insert following code to the HTML file at this place:

    +
      <object type="application/x-oleobject"
    +     classid="clsid:1e2a7bd0-dab9-1­1d0-b93a-00c04fc99f9
    +     <param name="New HTML file" value="topic-02.htm">
    +     <param name="New HTML title" value="Topic 2">
    +  </object>
    +

    Split example - Topic 2

    +

    This example is used to show how the SPLIT function is working for generating + sub-topics from one HTML file to the table of contents. This is the topic + 2 of one HTML file.

    +

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    + + + + +
    back to top ...
    +
    + + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-03.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-03.htm new file mode 100644 index 00000000..bdd34b32 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-03.htm @@ -0,0 +1,41 @@ + + + + +Topic 3 + + + + + + + + + + +
    +

    To do so insert following code to the HTML file at this place:

    +
      <object type="application/x-oleobject"
    +     classid="clsid:1e2a7bd0-dab9-1­1d0-b93a-00c04fc99f9
    +     <param name="New HTML file" value="topic-03.htm">
    +     <param name="New HTML title" value="Topic 3">
    +  </object>
    +

    Split example - Topic 3

    +

    This example is used to show how the SPLIT function is working for generating + sub-topics from one HTML file to the table of contents. This is the topic + 3 of one HTML file.

    +

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    +

     

    + + + + +
    back to top ...
    +
    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-04.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-04.htm new file mode 100644 index 00000000..59297630 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic-04.htm @@ -0,0 +1,23 @@ + + + + +Topic 4 + + + + + + + + + +
    +

    Split example - Topic 4

    +

    This is a short example text for Topic 4 for a small pop-up window.

    +

    See link at Topic 1.

    +

     

    +

     

    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic_split_example.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic_split_example.htm new file mode 100644 index 00000000..d623572e --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/topic_split_example.htm @@ -0,0 +1,67 @@ + + + + +Topic split example + + + + + + + + + + + + + + +
    + +

    Split example - Main Topic 1

    +

    It's possible to have one mega HTML file splitting into several files by using + a HHCTRL.OCX split file object tag in your HTML. This instructs the HTML Help + compiler to split the HTML file at the specific points where it finds this tag. + The object tag has the following format:

    +
      <object type="application/x-oleobject"
    +     classid="clsid:1e2a7bd0-dab9-1­1d0-b93a-00c04fc99f9
    +     <param name="New HTML file" value="a_new_file.htm">       e.g "topic-04.htm"
    +     <param name="New HTML title" value="My new topic title">  e.g. "Topic 4"
    +  </object>
    +

    The first value - "file" - specifies the name you want to give to + the file that would be created for this topic. The second value - "title" + - specifies what you would want in the <TITLE> tag for the document. You + shouldn't change any details apart from the value parameter. +

    +

    The file then gets created within the .chm file at compile time, though you'll + never see it on disk. A pretty neat feature.

    +

    The trick of course is that if you have links in your .chm file, whether from + the contents/index or from topic to topic, you'll need to reference the file + name that you specify in the tag above.

    +

    If you are using HTML Help Workshop, you can use the Split File command on + the Edit menu to insert the <object> tags.

    +

    The following hyperlink displays a topic file in popup-type window:

    +

    Link from this main to topic 4 (only working in the compiled help CHM + and for a locally saved CHM)

    +
    <a href="#"
    + onClick="window.open('topic-04.htm','Sample',
    + 'toolbar=no,width=200,height=200,left=500,top=400,
    + status=no,scrollbars=no,resize=no');return false">
    + Link from this main to topic 4</a>
    +

    +

    Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam
    +
    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, + no sea takimata sanctus est Lorem ipsum dolor sit amet.

    + + + + +
    back to top ...
    +
    + + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/using_window_open.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/using_window_open.htm new file mode 100644 index 00000000..dbca0d8f --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/using_window_open.htm @@ -0,0 +1,62 @@ + + + + +Using window.open + + + + + + + + + + + + + + + + +
    + +

    Using window.open

    +

    This is a simple example how to use the "window.open" command

    +

    Click here to open a HTML file

    +

     

    +

    Neues Fenster +

    +

    <script type="text/javascript">
    + function NeuFenster () {
    + MeinFenster = window.open("datei2.htm", "Zweitfenster", + "width=300,height=200,scrollbars");
    + MeinFenster.focus();
    + }
    + </script>
    +

    +

     

    +

    Put this code in your <body> section:

    +

    <A HREF= "#" onClick="window.open('/external_files/external.htm',
    + 'Window Open Sample','toolbar=no,width=850,height=630,left=300,top=200,
    + status=no,scrollbars=no,resize=no');return false"> Click here to open + a HTML file</A>

    + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/xp-style_radio-button_check-boxes.htm b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/xp-style_radio-button_check-boxes.htm new file mode 100644 index 00000000..44dbbbc2 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/HTMLHelp_Examples/xp-style_radio-button_check-boxes.htm @@ -0,0 +1,75 @@ + + + + +XP Style for RadioButton and Check Boxes + + + + + + + + + + + + + +
    + +

    XP Style for RadioButton and Check Boxes

    +

    This is a simple example how to use XP Style for RadioButton and Check Boxes

    +

     

    + +

    Click to select a special pizza

    + +
    +

    + + Salami
    + + Pilze
    + + Sardellen

    +

     

    +
    + +

    Your manner of payment:

    + +
    +

    + + Mastercard
    + + Visa
    + + American Express

    +
    +

     

    +

    Select also another favorite

    + +
    +

    + +

    +
    + + + + + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/design.css b/epublib-tools/src/test/resources/chm1/design.css new file mode 100644 index 00000000..572fd425 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/design.css @@ -0,0 +1,177 @@ +/* Formatvorlage*/ +/* (c) Ulrich Kulle Hannover*/ +/*---------------------------------------------*/ +/* Die Formatierungen gelten fr alle Dateien,*/ +/* die im Hauptframe angezeigt werden*/ + +/*mgliche Einstellung Rollbalken MS IE 5.5*/ +/*scrollbar-3d-light-color : red*/ +/*scrollbar-arrow-color : yellow*/ +/*scrollbar-base-color : green*/ +/*scrollbar-dark-shadow-color : orange*/ +/*scrollbar-face-color : purple */ +/*scrollbar-highloight-color : black*/ +/*scrollbar-shadow-color : blue */ + +/*BODY-tag Steuermglichkeit */ +/*margin-top:0px; margin-left=0px; */ + +body +{ + background: #ffffff; + scrollbar-base-color: #A88000; + scrollbar-arrow-color: yellow; + margin-left : 0px; + margin-top: 0px; + margin-right: 0px; +} + +hr { +color: #FFCC00; +margin-left : 10px; +margin-right: 10px; +} + +hr.simple { +margin-left : 10px; +margin-right: 10px; +} +h1 { +background-image: url(images/verlauf-gelb.jpg); +font-family: Verdana; +font-size: 10pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h2 { +background-image: url(images/verlauf-blau.jpg); +font-family: Verdana; +font-size: 10pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h3 { +background-image: url(images/verlauf-rot.jpg); +color:white; +font-family: Verdana; +font-size: 10pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h4 { +background-image: url(images/verlauf-gelb.jpg); +font-family: Verdana; +font-size: 8pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h5{ +background-image: url(images/verlauf-blau.jpg); +font-family: Verdana; +font-size: 8pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +h6 { +background-image: url(images/verlauf-rot.jpg); +color:white; +font-family: Verdana; +font-size: 8pt; +font-weight: bold; +margin-left : 10px; +margin-right: 10px; +} +li { +font-family: Verdana, Arial, Helvetica; +font-size:10pt; +line-height:13pt; +margin-left : 10px; +} +p { +font-family: Verdana, Arial, Helvetica; +font-size:10pt; +line-height:13pt; +margin-left : 10px; +margin-right: 10px; +} +/* note box */ +p.note { + background-color : #FFFFCC; + border : 1px solid #FFCC00; + clear : both; + color : black; + margin : 12px 30px 12px 20px; + padding : 6px 6px; +} +/* used in tutorial */ +p.tip { + background-color : #FFFFCC; + border : 1px solid black; + clear : both; + color : black; + margin-left : 10%; + padding : 6px 6px; + width : 90%; +} +/* pre note box */ +pre { + background-color : #FFFFCC; + border : 1px solid #FFCC00; + clear : both; + color : black; + margin : 12px 30px 12px 20px; + padding : 6px 6px; +} +table.sitemap { +margin-left: 10px; +} + +table.code { +margin-left:10px; +} + +table.top { +background-image: url(images/site/help-info_logo_3px.jpg); +margin-left:0px; +margin-top:0px; +} + +td.siteheader { + background-color:#E10033; + COLOR:white; + padding-left:3px; +} + +td { +font-family: Verdana, Arial, Helvetica; +font-size:10pt; +line-height:13pt; +} +tr { +font-family: Verdana, Arial, Helvetica; +font-size:10pt; +line-height:13pt; +} + +ul{ + list-style-image : url(images/list_arrow.gif); + list-style-position : outside; +} +ul.extlinklist { + list-style-image : url(images/extlink.gif); +} + +A:visited { + color: Blue; + text-decoration: none; + font-weight: bold; + font-size: 10pt +} +A:link {color: #800080;text-decoration: none;font-weight: bold;font-size: 10pt} +A:hover {color: #FF0000;text-decoration: underline;font-weight: bold;font-size: 10pt} +A:active {color: #FF0000;text-decoration: none;font-weight: bold;font-size: 10pt} diff --git a/epublib-tools/src/test/resources/chm1/embedded_files/example-embedded.pdf b/epublib-tools/src/test/resources/chm1/embedded_files/example-embedded.pdf new file mode 100644 index 00000000..53e936b8 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/embedded_files/example-embedded.pdf differ diff --git a/epublib-tools/src/test/resources/chm1/external_files/external_topic.htm b/epublib-tools/src/test/resources/chm1/external_files/external_topic.htm new file mode 100644 index 00000000..a1388261 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/external_files/external_topic.htm @@ -0,0 +1,47 @@ + + + + +External Topic + + + + + + + + + + + + +
    + +

    External Topic

    +

     

    +

    This is a external topic that resides relativ to the CHM files and isn't compiled + into the CHM file. Here it's used to show how to link to external files in a + CHM topic window.

    +

    Delete links in all HTML files of your project - otherwise the external file + is compiled to the CHM file.

    +

    Make a copy of the external file and delete the file in your project structure + before the last compile runs. So the file isn't compiled into the CHM file. + But you have to install the external file on the customers PC.

    +

    To try this example you must download the complete + project example to a local folder, delete all files excepting "CHM-example.chm" + and folder "external_files".

    +

    Edit following date in the external HTML file "external_topic.htm" + to check that you can update the HTML file without recompiling the CHM file:

    +

     

    +

    2005-05-17

    +

     

    +
    + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/filelist.txt b/epublib-tools/src/test/resources/chm1/filelist.txt new file mode 100644 index 00000000..9582bc44 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/filelist.txt @@ -0,0 +1,64 @@ +#IDXHDR +#IVB +#STRINGS +#SYSTEM +#TOPICS +#URLSTR +#URLTBL +#WINDOWS +$FIftiMain +$OBJINST +$WWAssociativeLinks/BTree +$WWAssociativeLinks/Data +$WWAssociativeLinks/Map +$WWAssociativeLinks/Property +$WWKeywordLinks/BTree +$WWKeywordLinks/Data +$WWKeywordLinks/Map +$WWKeywordLinks/Property +CHM-example.hhc +CHM-example.hhk +Context-sensitive_example/contextID-10000.htm +Context-sensitive_example/contextID-10010.htm +Context-sensitive_example/contextID-20000.htm +Context-sensitive_example/contextID-20010.htm +design.css +embedded_files/example-embedded.pdf +external_files/external_topic.htm +Garden/flowers.htm +Garden/garden.htm +Garden/tree.htm +HTMLHelp_Examples/CloseWindowAutomatically.htm +HTMLHelp_Examples/example-external-pdf.htm +HTMLHelp_Examples/Jump_to_anchor.htm +HTMLHelp_Examples/LinkPDFfromCHM.htm +HTMLHelp_Examples/pop-up_example.htm +HTMLHelp_Examples/shortcut_link.htm +HTMLHelp_Examples/Simple_link_example.htm +HTMLHelp_Examples/topic-02.htm +HTMLHelp_Examples/topic-03.htm +HTMLHelp_Examples/topic-04.htm +HTMLHelp_Examples/topic_split_example.htm +HTMLHelp_Examples/using_window_open.htm +HTMLHelp_Examples/xp-style_radio-button_check-boxes.htm +images/blume.jpg +images/ditzum.jpg +images/eiche.jpg +images/extlink.gif +images/insekt.jpg +images/list_arrow.gif +images/lupine.jpg +images/riffel_40px.jpg +images/riffel_helpinformation.jpg +images/riffel_home.jpg +images/rotor_enercon.jpg +images/screenshot_big.png +images/screenshot_small.png +images/up_rectangle.png +images/verlauf-blau.jpg +images/verlauf-gelb.jpg +images/verlauf-rot.jpg +images/welcome_small_big-en.gif +images/wintertree.jpg +index.htm +topic.txt \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/images/blume.jpg b/epublib-tools/src/test/resources/chm1/images/blume.jpg new file mode 100644 index 00000000..b3735fb9 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/blume.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/ditzum.jpg b/epublib-tools/src/test/resources/chm1/images/ditzum.jpg new file mode 100644 index 00000000..d461951e Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/ditzum.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/eiche.jpg b/epublib-tools/src/test/resources/chm1/images/eiche.jpg new file mode 100644 index 00000000..507bfa1d Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/eiche.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/extlink.gif b/epublib-tools/src/test/resources/chm1/images/extlink.gif new file mode 100644 index 00000000..5f37645d Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/extlink.gif differ diff --git a/epublib-tools/src/test/resources/chm1/images/insekt.jpg b/epublib-tools/src/test/resources/chm1/images/insekt.jpg new file mode 100644 index 00000000..09f8d5f9 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/insekt.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/list_arrow.gif b/epublib-tools/src/test/resources/chm1/images/list_arrow.gif new file mode 100644 index 00000000..9d0d3607 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/list_arrow.gif differ diff --git a/epublib-tools/src/test/resources/chm1/images/lupine.jpg b/epublib-tools/src/test/resources/chm1/images/lupine.jpg new file mode 100644 index 00000000..0e0ea94f Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/lupine.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/riffel_40px.jpg b/epublib-tools/src/test/resources/chm1/images/riffel_40px.jpg new file mode 100644 index 00000000..70ee391a Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/riffel_40px.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/riffel_helpinformation.jpg b/epublib-tools/src/test/resources/chm1/images/riffel_helpinformation.jpg new file mode 100644 index 00000000..2e9843f8 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/riffel_helpinformation.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/riffel_home.jpg b/epublib-tools/src/test/resources/chm1/images/riffel_home.jpg new file mode 100644 index 00000000..e0d0ba30 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/riffel_home.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/rotor_enercon.jpg b/epublib-tools/src/test/resources/chm1/images/rotor_enercon.jpg new file mode 100644 index 00000000..844539ea Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/rotor_enercon.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/screenshot_big.png b/epublib-tools/src/test/resources/chm1/images/screenshot_big.png new file mode 100644 index 00000000..e5aa0f0e Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/screenshot_big.png differ diff --git a/epublib-tools/src/test/resources/chm1/images/screenshot_small.png b/epublib-tools/src/test/resources/chm1/images/screenshot_small.png new file mode 100644 index 00000000..a4398f4f Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/screenshot_small.png differ diff --git a/epublib-tools/src/test/resources/chm1/images/up_rectangle.png b/epublib-tools/src/test/resources/chm1/images/up_rectangle.png new file mode 100644 index 00000000..68c1999c Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/up_rectangle.png differ diff --git a/epublib-tools/src/test/resources/chm1/images/verlauf-blau.jpg b/epublib-tools/src/test/resources/chm1/images/verlauf-blau.jpg new file mode 100644 index 00000000..7622d8f4 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/verlauf-blau.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/verlauf-gelb.jpg b/epublib-tools/src/test/resources/chm1/images/verlauf-gelb.jpg new file mode 100644 index 00000000..3c2d6921 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/verlauf-gelb.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/verlauf-rot.jpg b/epublib-tools/src/test/resources/chm1/images/verlauf-rot.jpg new file mode 100644 index 00000000..c30b3fc7 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/verlauf-rot.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/images/welcome_small_big-en.gif b/epublib-tools/src/test/resources/chm1/images/welcome_small_big-en.gif new file mode 100644 index 00000000..70427cba Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/welcome_small_big-en.gif differ diff --git a/epublib-tools/src/test/resources/chm1/images/wintertree.jpg b/epublib-tools/src/test/resources/chm1/images/wintertree.jpg new file mode 100644 index 00000000..006e1836 Binary files /dev/null and b/epublib-tools/src/test/resources/chm1/images/wintertree.jpg differ diff --git a/epublib-tools/src/test/resources/chm1/index.htm b/epublib-tools/src/test/resources/chm1/index.htm new file mode 100644 index 00000000..9d9514f4 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/index.htm @@ -0,0 +1,43 @@ + + + + +Welcome + + + + + + + + + + + + +
    + +

    Welcome

    +

    +

    .. to CHM examples!

    +

    HTMLHelp is the current help system for Microsoft Windows. This file includes + some examples how to use Microsoft HTMLHelp and is used to show how to work + with HTMLHelp 1.x CHM files in Visual Basic Applications.

    +

    This "Welcome" page is the default page of the compiled help module + (CHM).

    +

     

    +

     

    +

    Version Information:

    +

    Release: 2005-07-17

    +

    (c) help-info.de

    +

     

    +
    + + + + +
    back to top ...
    +
    +

     

    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/chm1/topic.txt b/epublib-tools/src/test/resources/chm1/topic.txt new file mode 100644 index 00000000..1193ad86 --- /dev/null +++ b/epublib-tools/src/test/resources/chm1/topic.txt @@ -0,0 +1,18 @@ +;------------------------------------------------- +; topic.h file example for HTMLHelp (CHM) +; www.help-info.de +; +; +; This is a file including the ID and PopUp text +;------------------------------------------------- +.topic 900;nohelp +Sorry, no help available! + +.topic 100;PopUp_AddressData_btnOK +This is context sensitive help text for a button (ID: IDH_100). + +.topic 110;PopUp_AddressData_txtFirstName +This is context sensitive help text for a text box (ID: IDH_110). + +.topic 120;PopUp_AddressData_txtLastName +This is context sensitive help text for a text box (ID: IDH_120). diff --git a/epublib-tools/src/test/resources/holmes_scandal_bohemia.html b/epublib-tools/src/test/resources/holmes_scandal_bohemia.html new file mode 100644 index 00000000..99f7ef7c --- /dev/null +++ b/epublib-tools/src/test/resources/holmes_scandal_bohemia.html @@ -0,0 +1,942 @@ + + + + + +The Project Gutenberg eBook of The Adventures of Sherlock +Holmes, by Sir Arthur Conan Doyle + + + + + + +
    +
    +
    +

    THE ADVENTURES OF
    + +SHERLOCK HOLMES

    +
    +

    BY

    +
    +

    SIR ARTHUR CONAN DOYLE

    +
    +
    +
    + +


    +To Sherlock Holmes she is always the + +woman. I have seldom heard him mention her under any other name. In his +eyes she eclipses and predominates the whole of her sex. It was not that +he felt any emotion akin to love for Irene Adler. All emotions, and that +one particularly, were abhorrent to his cold, precise but admirably +balanced mind. He was, I take it, the most perfect reasoning and +observing machine that the world has seen, but as a lover he would have +placed himself in a false position. He never spoke of the softer +passions, save with a gibe and a sneer. They were admirable things for +the observer—excellent for drawing the veil from men’s motives and +actions. But for the trained reasoner to admit such intrusions into his +own delicate and finely adjusted temperament was to introduce a +distracting factor which might throw a doubt upon all his mental +results. Grit in a sensitive instrument, or a crack in one of his own +high-power lenses, would not be more disturbing than a strong emotion in +a nature such as his. And yet there was but one woman to him, and that +woman was the late Irene Adler, of dubious and questionable memory.

    +

    I had seen little of Holmes lately. My marriage had drifted us +away from each other. My own complete happiness, and the home-centred +interests which rise up around the man who first finds himself master of +his own establishment, were sufficient to absorb all my attention, while +Holmes, who loathed every form of society with his whole Bohemian soul, +remained in our lodgings in Baker Street, buried among his old books, +and alternating from week to week between cocaine and ambition, the +drowsiness of the drug, and the fierce energy of his own keen nature. He +was still, as ever, deeply attracted by the study of crime, and occupied +his immense faculties and extraordinary powers of observation in +following out those clues, and clearing up those mysteries which had +been abandoned as hopeless by the official police. From time to time I +heard some vague account of his doings: of his summons to Odessa in the +case of the Trepoff murder, of his clearing up of the singular tragedy +of the Atkinson brothers at Trincomalee, and finally of the mission +which he had accomplished so delicately and successfully for the +reigning family of Holland. Beyond these signs of his activity, however, +which I merely shared with all the readers of the daily press, I knew +little of my former friend and companion.

    +

    One night—it was on the twentieth of March, 1888—I was returning +from a journey to a patient (for I had now returned to civil practice), +when my way led me through Baker Street. As I passed the well-remembered +door, which must always be associated in my mind with my wooing, and +with the dark incidents of the Study in Scarlet, I was seized with a +keen desire to see Holmes again, and to know how he was employing his +extraordinary powers. His rooms were brilliantly lit, and, even as I +looked up, I saw his tall, spare figure pass twice in a dark silhouette +against the blind. He was pacing the room swiftly, eagerly, with his +head sunk upon his chest and his hands clasped behind him. To me, who +knew his every mood and habit, his attitude and manner told their own +story. He was at work again. He had risen out of his drug-created dreams +and was hot upon the scent of some new problem. I rang the bell and was +shown up to the chamber which had formerly been in part my own.

    +

    His manner was not effusive. It seldom was; but he was glad, I +think, to see me. With hardly a word spoken, but with a kindly eye, he +waved me to an armchair, threw across his case of cigars, and indicated +a spirit case and a gasogene in the corner. Then he stood before the +fire and looked me over in his singular introspective fashion.

    +

    “Wedlock suits you,” he remarked. “I think, Watson, that you have +put on seven and a half pounds since I saw you.”

    +

    “Seven!” I answered.

    +

    “Indeed, I should have thought a little more. Just a trifle more, +I fancy, Watson. And in practice again, I observe. You did not tell me +that you intended to go into harness.”

    +

    “Then, how do you know?”

    +

    “I see it, I deduce it. How do I know that you have been getting +yourself very wet lately, and that you have a most clumsy and careless +servant girl?”

    + +

    “My dear Holmes,” said I, “this is too much. You would certainly +have been burned, had you lived a few centuries ago. It is true that I +had a country walk on Thursday and came home in a dreadful mess, but as +I have changed my clothes I can’t imagine how you deduce it. As to Mary +Jane, she is incorrigible, and my wife has given her notice, but there, +again, I fail to see how you work it out.”

    +

    He chuckled to himself and rubbed his long, nervous hands +together.

    +

    “It is simplicity itself,” said he; “my eyes tell me that on the +inside of your left shoe, just where the firelight strikes it, the +leather is scored by six almost parallel cuts. Obviously they have been +caused by someone who has very carelessly scraped round the edges of the +sole in order to remove crusted mud from it. Hence, you see, my double +deduction that you had been out in vile weather, and that you had a +particularly malignant boot-slitting specimen of the London slavey. As +to your practice, if a gentleman walks into my rooms smelling of +iodoform, with a black mark of nitrate of silver upon his right +forefinger, and a bulge on the right side of his top-hat to show where +he has secreted his stethoscope, I must be dull, indeed, if I do not +pronounce him to be an active member of the medical profession.”

    +

    I could not help laughing at the ease with which he explained his +process of deduction. “When I hear you give your reasons,” I remarked, +“the thing always appears to me to be so ridiculously simple that I +could easily do it myself, though at each successive instance of your +reasoning I am baffled until you explain your process. And yet I believe +that my eyes are as good as yours.”

    +

    “Quite so,” he answered, lighting a cigarette, and throwing +himself down into an armchair. “You see, but you do not observe. The +distinction is clear. For example, you have frequently seen the steps +which lead up from the hall to this room.”

    +

    “Frequently.”

    +

    “How often?”

    +

    “Well, some hundreds of times.”

    +

    “Then how many are there?”

    + +

    “How many? I don’t know.”

    +

    “Quite so! You have not observed. And yet you have seen. That is +just my point. Now, I know that there are seventeen steps, because I +have both seen and observed. By the way, since you are interested in +these little problems, and since you are good enough to chronicle one or +two of my trifling experiences, you may be interested in this.” He threw +over a sheet of thick, pink-tinted notepaper which had been lying open +upon the table. “It came by the last post,” said he. “Read it aloud.”

    +

    The note was undated, and without either signature or address.

    +

    “There will call upon you to-night, at a quarter to eight +o’clock,” it said, “a gentleman who desires to consult you upon a matter +of the very deepest moment. Your recent services to one of the royal +houses of Europe have shown that you are one who may safely be trusted +with matters which are of an importance which can hardly be exaggerated. +This account of you we have from all quarters received. Be in your +chamber then at that hour, and do not take it amiss if your visitor wear +a mask.”

    +

    “This is indeed a mystery,” I remarked. “What do you imagine that +it means?”

    +

    “I have no data yet. It is a capital mistake to theorise before +one has data. Insensibly one begins to twist facts to suit theories, +instead of theories to suit facts. But the note itself. What do you +deduce from it?”

    +

    I carefully examined the writing, and the paper upon which it was +written.

    +

    “The man who wrote it was presumably well to do,” I remarked, +endeavouring to imitate my companion’s processes. “Such paper could not +be bought under half a crown a packet. It is peculiarly strong and +stiff.”

    +

    “Peculiar—that is the very word,” said Holmes. “It is not an +English paper at all. Hold it up to the light.”

    + +

    I did so, and saw a large “E” with a small “g,” a “P,” and a +large “G” with a small “t” woven into the texture of the paper.

    +

    “What do you make of that?” asked Holmes.

    +

    “The name of the maker, no doubt; or his monogram, rather.”

    +

    “Not at all. The ‘G’ with the small ‘t’ stands for +‘Gesellschaft,’ which is the German for ‘Company.’ It is a customary +contraction like our ‘Co.’ ‘P,’ of course, stands for ‘Papier.’ Now for +the ‘Eg.’ Let us glance at our Continental Gazetteer.” He took down a +heavy brown volume from his shelves. “Eglow, Eglonitz—here we are, +Egria. It is in a German-speaking country—in Bohemia, not far from +Carlsbad. ‘Remarkable as being the scene of the death of Wallenstein, +and for its numerous glass-factories and paper-mills.’ Ha, ha, my boy, +what do you make of that?” His eyes sparkled, and he sent up a great +blue triumphant cloud from his cigarette.

    +

    “The paper was made in Bohemia,” I said.

    +

    “Precisely. And the man who wrote the note is a German. Do you +note the peculiar construction of the sentence—‘This account of you we +have from all quarters received.’ A Frenchman or Russian could not have +written that. It is the German who is so uncourteous to his verbs. It +only remains, therefore, to discover what is wanted by this German who +writes upon Bohemian paper and prefers wearing a mask to showing his +face. And here he comes, if I am not mistaken, to resolve all our +doubts.”

    +

    As he spoke there was the sharp sound of horses’ hoofs and +grating wheels against the curb, followed by a sharp pull at the bell. +Holmes whistled.

    +

    “A pair, by the sound,” said he. “Yes,” he continued, glancing +out of the window. “A nice little brougham and a pair of beauties. A +hundred and fifty guineas apiece. There’s money in this case, Watson, if +there is nothing else.”

    +

    “I think that I had better go, Holmes.”

    + +

    “Not a bit, Doctor. Stay where you are. I am lost without my +Boswell. And this promises to be interesting. It would be a pity to miss +it.”

    +

    “But your client—”

    +

    “Never mind him. I may want your help, and so may he. Here he +comes. Sit down in that armchair, Doctor, and give us your best +attention.”

    +

    A slow and heavy step, which had been heard upon the stairs and +in the passage, paused immediately outside the door. Then there was a +loud and authoritative tap.

    +

    “Come in!” said Holmes.

    +

    A man entered who could hardly have been less than six feet six +inches in height, with the chest and limbs of a Hercules. His dress was +rich with a richness which would, in England, be looked upon as akin to +bad taste. Heavy bands of astrakhan were slashed across the sleeves and +fronts of his double-breasted coat, while the deep blue cloak which was +thrown over his shoulders was lined with flame-coloured silk and secured +at the neck with a brooch which consisted of a single flaming beryl. +Boots which extended halfway up his calves, and which were trimmed at +the tops with rich brown fur, completed the impression of barbaric +opulence which was suggested by his whole appearance. He carried a +broad-brimmed hat in his hand, while he wore across the upper part of +his face, extending down past the cheekbones, a black vizard mask, which +he had apparently adjusted that very moment, for his hand was still +raised to it as he entered. From the lower part of the face he appeared +to be a man of strong character, with a thick, hanging lip, and a long, +straight chin suggestive of resolution pushed to the length of +obstinacy.

    +

    “You had my note?” he asked with a deep harsh voice and a +strongly marked German accent. “I told you that I would call.” He looked +from one to the other of us, as if uncertain which to address.

    +

    “Pray take a seat,” said Holmes. “This is my friend and +colleague, Dr. Watson, who is occasionally good enough to help me in my +cases. Whom have I the honour to address?”

    +

    “You may address me as the Count Von Kramm, a Bohemian nobleman. +I understand that this gentleman, your friend, is a man of honour and +discretion, whom I may trust with a matter of the most extreme +importance. If not, I should much prefer to communicate with you alone.”

    + +

    I rose to go, but Holmes caught me by the wrist and pushed me +back into my chair. “It is both, or none,” said he. “You may say before +this gentleman anything which you may say to me.”

    +

    The Count shrugged his broad shoulders. “Then I must begin,” said +he, “by binding you both to absolute secrecy for two years; at the end +of that time the matter will be of no importance. At present it is not +too much to say that it is of such weight it may have an influence upon +European history.”

    +

    “I promise,” said Holmes.

    +

    “And I.”

    +

    “You will excuse this mask,” continued our strange visitor. “The +august person who employs me wishes his agent to be unknown to you, and +I may confess at once that the title by which I have just called myself +is not exactly my own.”

    +

    “I was aware of it,” said Holmes dryly.

    +

    “The circumstances are of great delicacy, and every precaution +has to be taken to quench what might grow to be an immense scandal and +seriously compromise one of the reigning families of Europe. To speak +plainly, the matter implicates the great House of Ormstein, hereditary +kings of Bohemia.”

    +

    “I was also aware of that,” murmured Holmes, settling himself +down in his armchair and closing his eyes.

    +

    Our visitor glanced with some apparent surprise at the languid, +lounging figure of the man who had been no doubt depicted to him as the +most incisive reasoner and most energetic agent in Europe. Holmes slowly +reopened his eyes and looked impatiently at his gigantic client.

    + +

    “If your Majesty would condescend to state your case,” he +remarked, “I should be better able to advise you.”

    +

    The man sprang from his chair and paced up and down the room in +uncontrollable agitation. Then, with a gesture of desperation, he tore +the mask from his face and hurled it upon the ground. “You are right,” +he cried; “I am the King. Why should I attempt to conceal it?”

    +

    “Why, indeed?” murmured Holmes. “Your Majesty had not spoken +before I was aware that I was addressing Wilhelm Gottsreich Sigismond +von Ormstein, Grand Duke of Cassel-Felstein, and hereditary King of +Bohemia.”

    +

    “But you can understand,” said our strange visitor, sitting down +once more and passing his hand over his high white forehead, “you can +understand that I am not accustomed to doing such business in my own +person. Yet the matter was so delicate that I could not confide it to an +agent without putting myself in his power. I have come incognito +from Prague for the purpose of consulting you.”

    +

    “Then, pray consult,” said Holmes, shutting his eyes once more.

    +

    “The facts are briefly these: Some five years ago, during a +lengthy visit to Warsaw, I made the acquaintance of the well-known +adventuress, Irene Adler. The name is no doubt familiar to you.”

    +

    “Kindly look her up in my index, Doctor,” murmured Holmes without +opening his eyes. For many years he had adopted a system of docketing +all paragraphs concerning men and things, so that it was difficult to +name a subject or a person on which he could not at once furnish +information. In this case I found her biography sandwiched in between +that of a Hebrew rabbi and that of a staff-commander who had written a +monograph upon the deep-sea fishes.

    + +

    “Let me see!” said Holmes. “Hum! Born in New Jersey in the year +1858. Contralto—hum! La Scala, hum! Prima donna Imperial Opera of +Warsaw—yes! Retired from operatic stage—ha! Living in London—quite so! +Your Majesty, as I understand, became entangled with this young person, +wrote her some compromising letters, and is now desirous of getting +those letters back.”

    +

    “Precisely so. But how—”

    +

    “Was there a secret marriage?”

    +

    “None.”

    +

    “No legal papers or certificates?”

    +

    “None.”

    +

    “Then I fail to follow your Majesty. If this young person should +produce her letters for blackmailing or other purposes, how is she to +prove their authenticity?”

    +

    “There is the writing.”

    +

    “Pooh, pooh! Forgery.”

    + +

    “My private note-paper.”

    +

    “Stolen.”

    +

    “My own seal.”

    +

    “Imitated.”

    +

    “My photograph.”

    +

    “Bought.”

    +

    “We were both in the photograph.”

    +

    “Oh, dear! That is very bad! Your Majesty has indeed committed an +indiscretion.”

    +

    “I was mad—insane.”

    + +

    “You have compromised yourself seriously.”

    +

    “I was only Crown Prince then. I was young. I am but thirty now.”

    +

    “It must be recovered.”

    +

    “We have tried and failed.”

    +

    “Your Majesty must pay. It must be bought.”

    +

    “She will not sell.”

    +

    “Stolen, then.”

    +

    “Five attempts have been made. Twice burglars in my pay ransacked +her house. Once we diverted her luggage when she travelled. Twice she +has been waylaid. There has been no result.”

    +

    “No sign of it?”

    + +

    “Absolutely none.”

    +

    Holmes laughed. “It is quite a pretty little problem,” said he.

    +

    “But a very serious one to me,” returned the King reproachfully.

    +

    “Very, indeed. And what does she propose to do with the +photograph?”

    +

    “To ruin me.”

    +

    “But how?”

    +

    “I am about to be married.”

    +

    “So I have heard.”

    +

    “To Clotilde Lothman von Saxe-Meningen, second daughter of the +King of Scandinavia. You may know the strict principles of her family. +She is herself the very soul of delicacy. A shadow of a doubt as to my +conduct would bring the matter to an end.”

    + +

    “And Irene Adler?”

    +

    “Threatens to send them the photograph. And she will do it. I +know that she will do it. You do not know her, but she has a soul of +steel. She has the face of the most beautiful of women, and the mind of +the most resolute of men. Rather than I should marry another woman, +there are no lengths to which she would not go—none.”

    +

    “You are sure that she has not sent it yet?”

    +

    “I am sure.”

    +

    “And why?”

    +

    “Because she has said that she would send it on the day when the +betrothal was publicly proclaimed. That will be next Monday.”

    +

    “Oh, then we have three days yet,” said Holmes with a yawn. “That +is very fortunate, as I have one or two matters of importance to look +into just at present. Your Majesty will, of course, stay in London for +the present?”

    +

    “Certainly. You will find me at the Langham under the name of the +Count Von Kramm.”

    +

    “Then I shall drop you a line to let you know how we progress.”

    + +

    “Pray do so. I shall be all anxiety.”

    +

    “Then, as to money?”

    +

    “You have carte blanche.”

    +

    “Absolutely?”

    +

    “I tell you that I would give one of the provinces of my kingdom +to have that photograph.”

    +

    “And for present expenses?”

    +

    The King took a heavy chamois leather bag from under his cloak +and laid it on the table.

    +

    “There are three hundred pounds in gold and seven hundred in +notes,” he said.

    + +

    Holmes scribbled a receipt upon a sheet of his note-book and +handed it to him.

    +

    “And Mademoiselle’s address?” he asked.

    +

    “Is Briony Lodge, Serpentine Avenue, St. John’s Wood.”

    +

    Holmes took a note of it. “One other question,” said he. “Was the +photograph a cabinet?”

    +

    “It was.”

    +

    “Then, good-night, your Majesty, and I trust that we shall soon +have some good news for you. And good-night, Watson,” he added, as the +wheels of the royal brougham rolled down the street. “If you will be +good enough to call to-morrow afternoon at three o’clock I should like +to chat this little matter over with you.”
    +
    +

    +
    II.
    +


    +At three o’clock precisely I was at Baker Street, but Holmes had not yet +returned. The landlady informed me that he had left the house shortly +after eight o’clock in the morning. I sat down beside the fire, however, +with the intention of awaiting him, however long he might be. I was +already deeply interested in his inquiry, for, though it was surrounded +by none of the grim and strange features which were associated with the +two crimes which I have already recorded, still, the nature of the case +and the exalted station of his client gave it a character of its own. +Indeed, apart from the nature of the investigation which my friend had +on hand, there was something in his masterly grasp of a situation, and +his keen, incisive reasoning, which made it a pleasure to me to study +his system of work, and to follow the quick, subtle methods by which he +disentangled the most inextricable mysteries. So accustomed was I to his +invariable success that the very possibility of his failing had ceased +to enter into my head.

    + +

    It was close upon four before the door opened, and a +drunken-looking groom, ill-kempt and side-whiskered, with an inflamed +face and disreputable clothes, walked into the room. Accustomed as I was +to my friend’s amazing powers in the use of disguises, I had to look +three times before I was certain that it was indeed he. With a nod he +vanished into the bedroom, whence he emerged in five minutes +tweed-suited and respectable, as of old. Putting his hands into his +pockets, he stretched out his legs in front of the fire and laughed +heartily for some minutes.

    +

    “Well, really!” he cried, and then he choked and laughed again +until he was obliged to lie back, limp and helpless, in the chair.

    +

    “What is it?”

    +

    “It’s quite too funny. I am sure you could never guess how I +employed my morning, or what I ended by doing.”

    +

    “I can’t imagine. I suppose that you have been watching the +habits, and perhaps the house, of Miss Irene Adler.”

    +

    “Quite so; but the sequel was rather unusual. I will tell you, +however. I left the house a little after eight o’clock this morning in +the character of a groom out of work. There is a wonderful sympathy and +freemasonry among horsey men. Be one of them, and you will know all that +there is to know. I soon found Briony Lodge. It is a bijou villa, +with a garden at the back, but built out in front right up to the road, +two stories. Chubb lock to the door. Large sitting-room on the right +side, well furnished, with long windows almost to the floor, and those +preposterous English window fasteners which a child could open. Behind +there was nothing remarkable, save that the passage window could be +reached from the top of the coach-house. I walked round it and examined +it closely from every point of view, but without noting anything else of +interest.

    +

    “I then lounged down the street and found, as I expected, that +there was a mews in a lane which runs down by one wall of the garden. I +lent the ostlers a hand in rubbing down their horses, and received in +exchange twopence, a glass of half-and-half, two fills of shag tobacco, +and as much information as I could desire about Miss Adler, to say +nothing of half a dozen other people in the neighbourhood in whom I was +not in the least interested, but whose biographies I was compelled to +listen to.”

    + +

    “And what of Irene Adler?” I asked.

    +

    “Oh, she has turned all the men’s heads down in that part. She is +the daintiest thing under a bonnet on this planet. So say the +Serpentine-mews, to a man. She lives quietly, sings at concerts, drives +out at five every day, and returns at seven sharp for dinner. Seldom +goes out at other times, except when she sings. Has only one male +visitor, but a good deal of him. He is dark, handsome, and dashing, +never calls less than once a day, and often twice. He is a Mr. Godfrey +Norton, of the Inner Temple. See the advantages of a cabman as a +confidant. They had driven him home a dozen times from Serpentine-mews, +and knew all about him. When I had listened to all they had to tell, I +began to walk up and down near Briony Lodge once more, and to think over +my plan of campaign.

    +

    “This Godfrey Norton was evidently an important factor in the +matter. He was a lawyer. That sounded ominous. What was the relation +between them, and what the object of his repeated visits? Was she his +client, his friend, or his mistress? If the former, she had probably +transferred the photograph to his keeping. If the latter, it was less +likely. On the issue of this question depended whether I should continue +my work at Briony Lodge, or turn my attention to the gentleman’s +chambers in the Temple. It was a delicate point, and it widened the +field of my inquiry. I fear that I bore you with these details, but I +have to let you see my little difficulties, if you are to understand the +situation.”

    +

    “I am following you closely,” I answered.

    +

    “I was still balancing the matter in my mind when a hansom cab +drove up to Briony Lodge, and a gentleman sprang out. He was a +remarkably handsome man, dark, aquiline, and moustached—evidently the +man of whom I had heard. He appeared to be in a great hurry, shouted to +the cabman to wait, and brushed past the maid who opened the door with +the air of a man who was thoroughly at home.

    +

    “He was in the house about half an hour, and I could catch +glimpses of him in the windows of the sitting-room, pacing up and down, +talking excitedly, and waving his arms. Of her I could see nothing. +Presently he emerged, looking even more flurried than before. As he +stepped up to the cab, he pulled a gold watch from his pocket and looked +at it earnestly, ‘Drive like the devil,’ he shouted, ‘first to Gross +& Hankey’s in Regent Street, and then to the Church of St. Monica in +the Edgeware Road. Half a guinea if you do it in twenty minutes!’

    +

    “Away they went, and I was just wondering whether I should not do +well to follow them when up the lane came a neat little landau, the +coachman with his coat only half-buttoned, and his tie under his ear, +while all the tags of his harness were sticking out of the buckles. It +hadn’t pulled up before she shot out of the hall door and into it. I +only caught a glimpse of her at the moment, but she was a lovely woman, +with a face that a man might die for.

    +

    “ ‘The Church of St. Monica, John,’ she cried, ‘and half a +sovereign if you reach it in twenty minutes.’

    + +

    “This was quite too good to lose, Watson. I was just balancing +whether I should run for it, or whether I should perch behind her landau +when a cab came through the street. The driver looked twice at such a +shabby fare, but I jumped in before he could object. ‘The Church of St. +Monica,’ said I, ‘and half a sovereign if you reach it in twenty +minutes.’ It was twenty-five minutes to twelve, and of course it was +clear enough what was in the wind.

    +

    “My cabby drove fast. I don’t think I ever drove faster, but the +others were there before us. The cab and the landau with their steaming +horses were in front of the door when I arrived. I paid the man and +hurried into the church. There was not a soul there save the two whom I +had followed and a surpliced clergyman, who seemed to be expostulating +with them. They were all three standing in a knot in front of the altar. +I lounged up the side aisle like any other idler who has dropped into a +church. Suddenly, to my surprise, the three at the altar faced round to +me, and Godfrey Norton came running as hard as he could towards me.

    +

    “ ‘Thank God,’ he cried. ‘You’ll do. Come! Come!’

    +

    “ ‘What then?’ I asked.

    +

    “ ‘Come, man, come, only three minutes, or it won’t be legal.’

    +

    “I was half-dragged up to the altar, and before I knew where I +was I found myself mumbling responses which were whispered in my ear, +and vouching for things of which I knew nothing, and generally assisting +in the secure tying up of Irene Adler, spinster, to Godfrey Norton, +bachelor. It was all done in an instant, and there was the gentleman +thanking me on the one side and the lady on the other, while the +clergyman beamed on me in front. It was the most preposterous position +in which I ever found myself in my life, and it was the thought of it +that started me laughing just now. It seems that there had been some +informality about their license, that the clergyman absolutely refused +to marry them without a witness of some sort, and that my lucky +appearance saved the bridegroom from having to sally out into the +streets in search of a best man. The bride gave me a sovereign, and I +mean to wear it on my watch chain in memory of the occasion.”

    +

    “This is a very unexpected turn of affairs,” said I; “and what +then?”

    +

    “Well, I found my plans very seriously menaced. It looked as if +the pair might take an immediate departure, and so necessitate very +prompt and energetic measures on my part. At the church door, however, +they separated, he driving back to the Temple, and she to her own house. +‘I shall drive out in the park at five as usual,’ she said as she left +him. I heard no more. They drove away in different directions, and I +went off to make my own arrangements.”

    +

    “Which are?”

    + +

    “Some cold beef and a glass of beer,” he answered, ringing the +bell. “I have been too busy to think of food, and I am likely to be +busier still this evening. By the way, Doctor, I shall want your +co-operation.”

    +

    “I shall be delighted.”

    +

    “You don’t mind breaking the law?”

    +

    “Not in the least.”

    +

    “Nor running a chance of arrest?”

    +

    “Not in a good cause.”

    +

    “Oh, the cause is excellent!”

    +

    “Then I am your man.”

    +

    “I was sure that I might rely on you.”

    + +

    “But what is it you wish?”

    +

    “When Mrs. Turner has brought in the tray I will make it clear to +you. Now,” he said as he turned hungrily on the simple fare that our +landlady had provided, “I must discuss it while I eat, for I have not +much time. It is nearly five now. In two hours we must be on the scene +of action. Miss Irene, or Madame, rather, returns from her drive at +seven. We must be at Briony Lodge to meet her.”

    +

    “And what then?”

    +

    “You must leave that to me. I have already arranged what is to +occur. There is only one point on which I must insist. You must not +interfere, come what may. You understand?”

    +

    “I am to be neutral?”

    +

    “To do nothing whatever. There will probably be some small +unpleasantness. Do not join in it. It will end in my being conveyed into +the house. Four or five minutes afterwards the sitting-room window will +open. You are to station yourself close to that open window.”

    +

    “Yes.”

    +

    “You are to watch me, for I will be visible to you.”

    +

    “Yes.”

    + +

    “And when I raise my hand—so—you will throw into the room what I +give you to throw, and will, at the same time, raise the cry of fire. +You quite follow me?”

    +

    “Entirely.”

    +

    “It is nothing very formidable,” he said, taking a long +cigar-shaped roll from his pocket. “It is an ordinary plumber’s +smoke-rocket, fitted with a cap at either end to make it self-lighting. +Your task is confined to that. When you raise your cry of fire, it will +be taken up by quite a number of people. You may then walk to the end of +the street, and I will rejoin you in ten minutes. I hope that I have +made myself clear?”

    +

    “I am to remain neutral, to get near the window, to watch you, +and at the signal to throw in this object, then to raise the cry of +fire, and to wait you at the corner of the street.”

    +

    “Precisely.”

    +

    “Then you may entirely rely on me.”

    +

    “That is excellent. I think, perhaps, it is almost time that I +prepare for the new role I have to play.”

    +

    He disappeared into his bedroom and returned in a few minutes in +the character of an amiable and simple-minded Nonconformist clergyman. +His broad black hat, his baggy trousers, his white tie, his sympathetic +smile, and general look of peering and benevolent curiosity were such as +Mr. John Hare alone could have equalled. It was not merely that Holmes +changed his costume. His expression, his manner, his very soul seemed to +vary with every fresh part that he assumed. The stage lost a fine actor, +even as science lost an acute reasoner, when he became a specialist in +crime.

    +

    It was a quarter past six when we left Baker Street, and it still +wanted ten minutes to the hour when we found ourselves in Serpentine +Avenue. It was already dusk, and the lamps were just being lighted as we +paced up and down in front of Briony Lodge, waiting for the coming of +its occupant. The house was just such as I had pictured it from Sherlock +Holmes’ succinct description, but the locality appeared to be less +private than I expected. On the contrary, for a small street in a quiet +neighbourhood, it was remarkably animated. There was a group of shabbily +dressed men smoking and laughing in a corner, a scissors-grinder with +his wheel, two guardsmen who were flirting with a nurse-girl, and +several well-dressed young men who were lounging up and down with cigars +in their mouths.

    + +

    “You see,” remarked Holmes, as we paced to and fro in front of +the house, “this marriage rather simplifies matters. The photograph +becomes a double-edged weapon now. The chances are that she would be as +averse to its being seen by Mr. Godfrey Norton, as our client is to its +coming to the eyes of his princess. Now the question is, Where are we to +find the photograph?”

    +

    “Where, indeed?”

    +

    “It is most unlikely that she carries it about with her. It is +cabinet size. Too large for easy concealment about a woman’s dress. She +knows that the King is capable of having her waylaid and searched. Two +attempts of the sort have already been made. We may take it, then, that +she does not carry it about with her.”

    +

    “Where, then?”

    +

    “Her banker or her lawyer. There is that double possibility. But +I am inclined to think neither. Women are naturally secretive, and they +like to do their own secreting. Why should she hand it over to anyone +else? She could trust her own guardianship, but she could not tell what +indirect or political influence might be brought to bear upon a business +man. Besides, remember that she had resolved to use it within a few +days. It must be where she can lay her hands upon it. It must be in her +own house.”

    +

    “But it has twice been burgled.”

    +

    “Pshaw! They did not know how to look.”

    +

    “But how will you look?”

    +

    “I will not look.”

    + +

    “What then?”

    +

    “I will get her to show me.”

    +

    “But she will refuse.”

    +

    “She will not be able to. But I hear the rumble of wheels. It is +her carriage. Now carry out my orders to the letter.”

    +

    As he spoke the gleam of the sidelights of a carriage came round +the curve of the avenue. It was a smart little landau which rattled up +to the door of Briony Lodge. As it pulled up, one of the loafing men at +the corner dashed forward to open the door in the hope of earning a +copper, but was elbowed away by another loafer, who had rushed up with +the same intention. A fierce quarrel broke out, which was increased by +the two guardsmen, who took sides with one of the loungers, and by the +scissors-grinder, who was equally hot upon the other side. A blow was +struck, and in an instant the lady, who had stepped from her carriage, +was the centre of a little knot of flushed and struggling men, who +struck savagely at each other with their fists and sticks. Holmes dashed +into the crowd to protect the lady; but, just as he reached her, he gave +a cry and dropped to the ground, with the blood running freely down his +face. At his fall the guardsmen took to their heels in one direction and +the loungers in the other, while a number of better dressed people, who +had watched the scuffle without taking part in it, crowded in to help +the lady and to attend to the injured man. Irene Adler, as I will still +call her, had hurried up the steps; but she stood at the top with her +superb figure outlined against the lights of the hall, looking back into +the street.

    +

    “Is the poor gentleman much hurt?” she asked.

    +

    “He is dead,” cried several voices.

    +

    “No, no, there’s life in him!” shouted another. “But he’ll be +gone before you can get him to hospital.”

    +

    “He’s a brave fellow,” said a woman. “They would have had the +lady’s purse and watch if it hadn’t been for him. They were a gang, and +a rough one, too. Ah, he’s breathing now.”

    + +

    “He can’t lie in the street. May we bring him in, marm?”

    +

    “Surely. Bring him into the sitting-room. There is a comfortable +sofa. This way, please!”

    +

    Slowly and solemnly he was borne into Briony Lodge and laid out +in the principal room, while I still observed the proceedings from my +post by the window. The lamps had been lit, but the blinds had not been +drawn, so that I could see Holmes as he lay upon the couch. I do not +know whether he was seized with compunction at that moment for the part +he was playing, but I know that I never felt more heartily ashamed of +myself in my life than when I saw the beautiful creature against whom I +was conspiring, or the grace and kindliness with which she waited upon +the injured man. And yet it would be the blackest treachery to Holmes to +draw back now from the part which he had intrusted to me. I hardened my +heart, and took the smoke-rocket from under my ulster. After all, I +thought, we are not injuring her. We are but preventing her from +injuring another.

    +

    Holmes had sat up upon the couch, and I saw him motion like a man +who is in need of air. A maid rushed across and threw open the window. +At the same instant I saw him raise his hand and at the signal I tossed +my rocket into the room with a cry of “Fire!” The word was no sooner out +of my mouth than the whole crowd of spectators, well dressed and +ill—gentlemen, ostlers, and servant maids—joined in a general shriek of +“Fire!” Thick clouds of smoke curled through the room and out at the +open window. I caught a glimpse of rushing figures, and a moment later +the voice of Holmes from within assuring them that it was a false alarm. +Slipping through the shouting crowd I made my way to the corner of the +street, and in ten minutes was rejoiced to find my friend’s arm in mine, +and to get away from the scene of uproar. He walked swiftly and in +silence for some few minutes until we had turned down one of the quiet +streets which lead towards the Edgeware Road.

    +

    “You did it very nicely, Doctor,” he remarked. “Nothing could +have been better. It is all right.”

    +

    “You have the photograph?”

    +

    “I know where it is.”

    +

    “And how did you find out?”

    +

    “She showed me, as I told you she would.”

    + +

    “I am still in the dark.”

    +

    “I do not wish to make a mystery,” said he, laughing. “The matter +was perfectly simple. You, of course, saw that everyone in the street +was an accomplice. They were all engaged for the evening.”

    +

    “I guessed as much.”

    +

    “Then, when the row broke out, I had a little moist red paint in +the palm of my hand. I rushed forward, fell down, clapped my hand to my +face, and became a piteous spectacle. It is an old trick.”

    +

    “That also I could fathom.”

    +

    “Then they carried me in. She was bound to have me in. What else +could she do? And into her sitting-room, which was the very room which I +suspected. It lay between that and her bedroom, and I was determined to +see which. They laid me on a couch, I motioned for air, they were +compelled to open the window, and you had your chance.”

    +

    “How did that help you?”

    +

    “It was all-important. When a woman thinks that her house is on +fire, her instinct is at once to rush to the thing which she values +most. It is a perfectly overpowering impulse, and I have more than once +taken advantage of it. In the case of the Darlington Substitution +Scandal it was of use to me, and also in the Arnsworth Castle business. +A married woman grabs at her baby; an unmarried one reaches for her +jewel-box. Now it was clear to me that our lady of to-day had nothing in +the house more precious to her than what we are in quest of. She would +rush to secure it. The alarm of fire was admirably done. The smoke and +shouting were enough to shake nerves of steel. She responded +beautifully. The photograph is in a recess behind a sliding panel just +above the right bell-pull. She was there in an instant, and I caught a +glimpse of it as she half drew it out. When I cried out that it was a +false alarm, she replaced it, glanced at the rocket, rushed from the +room, and I have not seen her since. I rose, and, making my excuses, +escaped from the house. I hesitated whether to attempt to secure the +photograph at once; but the coachman had come in, and as he was watching +me narrowly, it seemed safer to wait. A little over-precipitance may +ruin all.”

    +

    “And now?” I asked.

    + +

    “Our quest is practically finished. I shall call with the King +to-morrow, and with you, if you care to come with us. We will be shown +into the sitting-room to wait for the lady, but it is probable that when +she comes she may find neither us nor the photograph. It might be a +satisfaction to his Majesty to regain it with his own hands.”

    +

    “And when will you call?”

    +

    “At eight in the morning. She will not be up, so that we shall +have a clear field. Besides, we must be prompt, for this marriage may +mean a complete change in her life and habits. I must wire to the King +without delay.”

    +

    We had reached Baker Street and had stopped at the door. He was +searching his pockets for the key when someone passing said:

    +

    “Good-night, Mister Sherlock Holmes.”

    +

    There were several people on the pavement at the time, but the +greeting appeared to come from a slim youth in an ulster who had hurried +by.

    +

    “I’ve heard that voice before,” said Holmes, staring down the +dimly lit street. “Now, I wonder who the deuce that could have been.”
    +
    +

    +
    III.
    + +


    +I slept at Baker Street that night, and we were engaged upon our toast +and coffee in the morning when the King of Bohemia rushed into the room.

    +

    “You have really got it!” he cried, grasping Sherlock Holmes by +either shoulder and looking eagerly into his face.

    +

    “Not yet.”

    +

    “But you have hopes?”

    +

    “I have hopes.”

    +

    “Then, come. I am all impatience to be gone.”

    +

    “We must have a cab.”

    +

    “No, my brougham is waiting.”

    + +

    “Then that will simplify matters.” We descended and started off +once more for Briony Lodge.

    +

    “Irene Adler is married,” remarked Holmes.

    +

    “Married! When?”

    +

    “Yesterday.”

    +

    “But to whom?”

    +

    “To an English lawyer named Norton.”

    +

    “But she could not love him.”

    +

    “I am in hopes that she does.”

    +

    “And why in hopes?”

    + +

    “Because it would spare your Majesty all fear of future +annoyance. If the lady loves her husband, she does not love your +Majesty. If she does not love your Majesty, there is no reason why she +should interfere with your Majesty’s plan.”

    +

    “It is true. And yet—! Well! I wish she had been of my own +station! What a queen she would have made!” He relapsed into a moody +silence, which was not broken until we drew up in Serpentine Avenue.

    +

    The door of Briony Lodge was open, and an elderly woman stood +upon the steps. She watched us with a sardonic eye as we stepped from +the brougham.

    +

    “Mr. Sherlock Holmes, I believe?” said she.

    +

    “I am Mr. Holmes,” answered my companion, looking at her with a +questioning and rather startled gaze.

    +

    “Indeed! My mistress told me that you were likely to call. She +left this morning with her husband by the 5:15 train from Charing Cross +for the Continent.”

    +

    “What!” Sherlock Holmes staggered back, white with chagrin and +surprise. “Do you mean that she has left England?”

    +

    “Never to return.”

    +

    “And the papers?” asked the King hoarsely. “All is lost.”

    + +

    “We shall see.” He pushed past the servant and rushed into the +drawing-room, followed by the King and myself. The furniture was +scattered about in every direction, with dismantled shelves and open +drawers, as if the lady had hurriedly ransacked them before her flight. +Holmes rushed at the bell-pull, tore back a small sliding shutter, and, +plunging in his hand, pulled out a photograph and a letter. The +photograph was of Irene Adler herself in evening dress, the letter was +superscribed to “Sherlock Holmes, Esq. To be left till called for.” My +friend tore it open, and we all three read it together. It was dated at +midnight of the preceding night and ran in this way:
    +
    +

    +

    “MY DEAR MR. SHERLOCK HOLMES,—You really did it very well. You +took me in completely. Until after the alarm of fire, I had not a +suspicion. But then, when I found how I had betrayed myself, I began to +think. I had been warned against you months ago. I had been told that, +if the King employed an agent, it would certainly be you. And your +address had been given me. Yet, with all this, you made me reveal what +you wanted to know. Even after I became suspicious, I found it hard to +think evil of such a dear, kind old clergyman. But, you know, I have +been trained as an actress myself. Male costume is nothing new to me. I +often take advantage of the freedom which it gives. I sent John, the +coachman, to watch you, ran upstairs, got into my walking clothes, as I +call them, and came down just as you departed.

    +

    “Well, I followed you to your door, and so made sure that I was +really an object of interest to the celebrated Mr. Sherlock Holmes. Then +I, rather imprudently, wished you good-night, and started for the Temple +to see my husband.

    +

    “We both thought the best resource was flight, when pursued by so +formidable an antagonist; so you will find the nest empty when you call +to-morrow. As to the photograph, your client may rest in peace. I love +and am loved by a better man than he. The King may do what he will +without hindrance from one whom he has cruelly wronged. I keep it only +to safeguard myself, and to preserve a weapon which will always secure +me from any steps which he might take in the future. I leave a +photograph which he might care to possess; and I remain, dear Mr. +Sherlock Holmes,

    +


    +“Very truly yours,
    +“IRENE NORTON, née ADLER.”
    + +
    +

    +

    “What a woman—oh, what a woman!” cried the King of Bohemia, when +we had all three read this epistle. “Did I not tell you how quick and +resolute she was? Would she not have made an admirable queen? Is it not +a pity that she was not on my level?”

    +

    “From what I have seen of the lady, she seems, indeed, to be on a +very different level to your Majesty,” said Holmes coldly. “I am sorry +that I have not been able to bring your Majesty’s business to a more +successful conclusion.”

    +

    “On the contrary, my dear sir,” cried the King; “nothing could be +more successful. I know that her word is inviolate. The photograph is +now as safe as if it were in the fire.”

    +

    “I am glad to hear your Majesty say so.”

    +

    “I am immensely indebted to you. Pray tell me in what way I can +reward you. This ring—” He slipped an emerald snake ring from his finger +and held it out upon the palm of his hand.

    +

    “Your Majesty has something which I should value even more +highly,” said Holmes.

    +

    “You have but to name it.”

    +

    “This photograph!”

    + +

    The King stared at him in amazement.

    +

    “Irene’s photograph!” he cried. “Certainly, if you wish it.”

    +

    “I thank your Majesty. Then there is no more to be done in the +matter. I have the honour to wish you a very good morning.” He bowed, +and, turning away without observing the hand which the King had +stretched out to him, he set off in my company for his chambers.
    +
    +

    +

    And that was how a great scandal threatened to affect the kingdom +of Bohemia, and how the best plans of Mr. Sherlock Holmes were beaten by +a woman’s wit. He used to make merry over the cleverness of women, but I +have not heard him do it of late. And when he speaks of Irene Adler, or +when he refers to her photograph, it is always under the honourable +title of the woman.
    +
    +

    +
    + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/opf/test1.opf b/epublib-tools/src/test/resources/opf/test1.opf new file mode 100644 index 00000000..6d3bacf0 --- /dev/null +++ b/epublib-tools/src/test/resources/opf/test1.opf @@ -0,0 +1,32 @@ + + + + Epublib test book 1 + Joe Tester + 2010-05-27 + en + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/epublib-tools/src/test/resources/opf/test2.opf b/epublib-tools/src/test/resources/opf/test2.opf new file mode 100644 index 00000000..fdfb1688 --- /dev/null +++ b/epublib-tools/src/test/resources/opf/test2.opf @@ -0,0 +1,23 @@ + + + + This Dynamic Earth + this_dynamic_earth-AAH813 + en + W. Jacquelyne Kious, Robert I. Tilling + + Infogrid Pacific + + + + 22-01-2009 + + + + + + + + + + diff --git a/epublib-tools/src/test/resources/toc.xml b/epublib-tools/src/test/resources/toc.xml new file mode 100644 index 00000000..5875b1fd --- /dev/null +++ b/epublib-tools/src/test/resources/toc.xml @@ -0,0 +1,41 @@ + + + + + + + + + + Epublib test book 1 + + + Tester, Joe + + + + + Introduction + + + + + + Second Chapter + + + + + Chapter 2, section 1 + + + + + + + Conclusion + + + + + diff --git a/pom.xml b/pom.xml deleted file mode 100644 index 88932322..00000000 --- a/pom.xml +++ /dev/null @@ -1,90 +0,0 @@ - - - - - - 4.0.0 - - nl.siegmann.epublib - epublib - Example Project - 1.0-SNAPSHOT - - - - org.codehaus.groovy.maven.runtime - gmaven-runtime-default - 1.0-rc-3 - - - net.sourceforge.htmlcleaner - htmlcleaner - 2.1 - - - args4j - args4j - 2.0.16 - - - commons-lang - commons-lang - 2.4 - - - commons-io - commons-io - 1.4 - - - org.ccil.cowan.tagsoup - tagsoup - 1.2 - - - - junit - junit - 3.8.1 - test - - - - - - net.java.repository - Java.net repository - http://download.java.net/maven/2/ - - - org.hippocms - Hosts htmlcleaner - http://repository.hippocms.org/maven2/ - - - - - - - org.codehaus.groovy.maven - gmaven-plugin - 1.0-rc-3 - - - - - - - - - - - - diff --git a/src/main/java/nl/siegmann/epublib/Author.java b/src/main/java/nl/siegmann/epublib/Author.java deleted file mode 100644 index 93aadfdf..00000000 --- a/src/main/java/nl/siegmann/epublib/Author.java +++ /dev/null @@ -1,24 +0,0 @@ -package nl.siegmann.epublib; - -public class Author { - private String firstname; - private String lastname; - - public Author(String firstname, String lastname) { - this.firstname = firstname; - this.lastname = lastname; - } - - public String getFirstname() { - return firstname; - } - public void setFirstname(String firstname) { - this.firstname = firstname; - } - public String getLastname() { - return lastname; - } - public void setLastname(String lastname) { - this.lastname = lastname; - } -} diff --git a/src/main/java/nl/siegmann/epublib/Book.java b/src/main/java/nl/siegmann/epublib/Book.java deleted file mode 100644 index 2f127254..00000000 --- a/src/main/java/nl/siegmann/epublib/Book.java +++ /dev/null @@ -1,74 +0,0 @@ -package nl.siegmann.epublib; - -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.UUID; - -public class Book { - private String title = ""; - private String rights = ""; - private String uid = UUID.randomUUID().toString(); - private List authors = new ArrayList(); - private List subjects = new ArrayList(); - private Date date = new Date(); - private String language = ""; - - private List
    sections = new ArrayList
    (); - private List resources = new ArrayList(); - - public String getTitle() { - return title; - } - public void setTitle(String title) { - this.title = title; - } - public List
    getSections() { - return sections; - } - public void setSections(List
    sections) { - this.sections = sections; - } - public String getRights() { - return rights; - } - public void setRights(String rights) { - this.rights = rights; - } - public String getUid() { - return uid; - } - public void setUid(String uid) { - this.uid = uid; - } - public List getAuthors() { - return authors; - } - public void setAuthors(List authors) { - this.authors = authors; - } - public List getSubjects() { - return subjects; - } - public void setSubjects(List subjects) { - this.subjects = subjects; - } - public Date getDate() { - return date; - } - public void setDate(Date date) { - this.date = date; - } - public String getLanguage() { - return language; - } - public void setLanguage(String language) { - this.language = language; - } - public List getResources() { - return resources; - } - public void setResources(List resources) { - this.resources = resources; - } -} diff --git a/src/main/java/nl/siegmann/epublib/Constants.java b/src/main/java/nl/siegmann/epublib/Constants.java deleted file mode 100644 index f845964c..00000000 --- a/src/main/java/nl/siegmann/epublib/Constants.java +++ /dev/null @@ -1,10 +0,0 @@ -package nl.siegmann.epublib; - -public interface Constants { - String encoding = "UTF-8"; - - public interface MediaTypes { - String xhtml = "application/xhtml+xml"; - String epub = "application/epub+zip"; - } -} diff --git a/src/main/java/nl/siegmann/epublib/EpubWriter.java b/src/main/java/nl/siegmann/epublib/EpubWriter.java deleted file mode 100644 index 6e6a3943..00000000 --- a/src/main/java/nl/siegmann/epublib/EpubWriter.java +++ /dev/null @@ -1,86 +0,0 @@ -package nl.siegmann.epublib; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStream; -import java.io.Writer; - -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLEventFactory; -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; - -import org.apache.commons.io.FileUtils; - -/** - * Generates an epub file. Not thread-safe, single use object. - * - * @author paul - * - */ -public class EpubWriter { - - public void write(Book book, OutputStream out) throws IOException, XMLStreamException, FactoryConfigurationError { - File resultDir = new File("/home/paul/tmp/epublib"); - writeMimeType(resultDir); - File oebpsDir = new File(resultDir.getAbsolutePath() + File.separator + "OEBPS"); - FileUtils.forceMkdir(oebpsDir); - writeContainer(resultDir); - writeNcxDocument(book, oebpsDir); - writePackageDocument(book, oebpsDir); - } - - private void writePackageDocument(Book book, File oebpsDir) throws XMLStreamException, IOException { - XMLOutputFactory xmlOutputFactory = createXMLOutputFactory(); - Writer out = new FileWriter(oebpsDir.getAbsolutePath() + File.separator + "content.opf"); - XMLStreamWriter xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(out); - PackageDocument.write(this, xmlStreamWriter, book); - xmlStreamWriter.close(); - } - - private void writeNcxDocument(Book book, File oebpsDir) throws IOException, XMLStreamException, FactoryConfigurationError { - NCXDocument.write(book, new File(oebpsDir.getAbsolutePath() + File.separator + "toc.ncx")); - } - - private void writeContainer(File resultDir) throws IOException { - File containerDir = new File(resultDir.getAbsolutePath() + File.separator + "META-INF"); - FileUtils.forceMkdir(containerDir); - File containerFile = new File(containerDir + File.separator + "container.xml"); - Writer out = new FileWriter(containerFile); - out.write("\n"); - out.write("\n"); - out.write("\t\n"); - out.write("\t\t\n"); - out.write("\t\n"); - out.write(""); - out.close(); - } - - private void writeMimeType(File resultDir) throws IOException { - Writer out = new FileWriter(resultDir.getAbsolutePath() + File.separator + "mimetype"); - out.write(Constants.MediaTypes.epub); - out.close(); - } - - XMLEventFactory createXMLEventFactory() { - return XMLEventFactory.newInstance(); - } - - XMLOutputFactory createXMLOutputFactory() { - return XMLOutputFactory.newInstance(); - } - - String getNcxId() { - return "ncx"; - } - - String getNcxHref() { - return "toc.ncx"; - } - - String getNcxMediaType() { - return "application/x-dtbncx+xml"; - } -} diff --git a/src/main/java/nl/siegmann/epublib/FileResource.java b/src/main/java/nl/siegmann/epublib/FileResource.java deleted file mode 100644 index c746eb2b..00000000 --- a/src/main/java/nl/siegmann/epublib/FileResource.java +++ /dev/null @@ -1,42 +0,0 @@ -package nl.siegmann.epublib; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -public class FileResource implements Resource { - private File file; - private String href; - private String mediaType = Constants.MediaTypes.xhtml; - - public FileResource(File file, String href, String mediaType) { - super(); - this.file = file; - this.href = href; - this.mediaType = mediaType; - } - - public OutputStream getOutputStream() throws IOException { - return new FileOutputStream(file); - } - - public File getFile() { - return file; - } - public void setFile(File file) { - this.file = file; - } - public String getHref() { - return href; - } - public void setHref(String href) { - this.href = href; - } - public String getMediaType() { - return mediaType; - } - public void setMediaType(String mediaType) { - this.mediaType = mediaType; - } -} diff --git a/src/main/java/nl/siegmann/epublib/Main.java b/src/main/java/nl/siegmann/epublib/Main.java deleted file mode 100644 index ec6e823e..00000000 --- a/src/main/java/nl/siegmann/epublib/Main.java +++ /dev/null @@ -1,5 +0,0 @@ -package nl.siegmann.epublib; - -public class Main { - -} diff --git a/src/main/java/nl/siegmann/epublib/NCXDocument.java b/src/main/java/nl/siegmann/epublib/NCXDocument.java deleted file mode 100644 index 54f2e65d..00000000 --- a/src/main/java/nl/siegmann/epublib/NCXDocument.java +++ /dev/null @@ -1,99 +0,0 @@ -package nl.siegmann.epublib; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.List; - -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLEventFactory; -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; - -public class NCXDocument { - - public static final String NAMESPACE_NCX = "http://www.daisy.org/z3986/2005/ncx/"; - public static final String PREFIX_NCX = "ncx"; - - public static void write(Book book, File file) throws IOException, XMLStreamException, FactoryConfigurationError { - FileWriter out = new FileWriter(file); - write(XMLEventFactory.newInstance(), XMLOutputFactory.newInstance().createXMLStreamWriter(out), book); - out.close(); - } - - - public static void write(XMLEventFactory eventFactory, XMLStreamWriter writer, Book book) throws XMLStreamException { - writer.writeStartDocument(Constants.encoding, "1.0"); - writer.setDefaultNamespace(NAMESPACE_NCX); - writer.writeStartElement(NAMESPACE_NCX, "ncx"); -// writer.writeNamespace("ncx", NAMESPACE_NCX); - writer.writeAttribute("xmlns", NAMESPACE_NCX); - writer.writeAttribute("version", "2005-1"); - writer.writeStartElement(NAMESPACE_NCX, "head"); - - writer.writeStartElement(NAMESPACE_NCX, "meta"); - writer.writeAttribute("name", "dtb:uid"); - writer.writeAttribute("content", book.getUid()); - writer.writeEndElement(); - - writer.writeStartElement(NAMESPACE_NCX, "meta"); - writer.writeAttribute("name", "dtb:depth"); - writer.writeAttribute("content", "1"); - writer.writeEndElement(); - - writer.writeStartElement(NAMESPACE_NCX, "meta"); - writer.writeAttribute("name", "dtb:totalPageCount"); - writer.writeAttribute("content", "0"); - writer.writeEndElement(); - - writer.writeStartElement(NAMESPACE_NCX, "meta"); - writer.writeAttribute("name", "dtb:maxPageNumber"); - writer.writeAttribute("content", "0"); - writer.writeEndElement(); - - writer.writeEndElement(); - - writer.writeStartElement(NAMESPACE_NCX, "docTitle"); - writer.writeStartElement(NAMESPACE_NCX, "text"); - writer.writeCharacters(book.getTitle()); - writer.writeEndElement(); - writer.writeEndElement(); - for(Author author: book.getAuthors()) { - writer.writeStartElement(NAMESPACE_NCX, "docAuthor"); - writer.writeStartElement(NAMESPACE_NCX, "text"); - writer.writeCharacters(author.getLastname() + ", " + author.getFirstname()); - writer.writeEndElement(); - writer.writeEndElement(); - } - writer.writeStartElement(NAMESPACE_NCX, "navMap"); - writeNavPoints(book.getSections(), 1, writer); - writer.writeEndElement(); - writer.writeEndElement(); - writer.writeEndDocument(); - } - - - private static int writeNavPoints(List
    sections, int playOrder, - XMLStreamWriter writer) throws XMLStreamException { - for(Section section: sections) { - writer.writeStartElement(NAMESPACE_NCX, "navPoint"); - writer.writeAttribute("id", "navPoint-" + playOrder); - writer.writeAttribute("playOrder", String.valueOf(playOrder)); - writer.writeAttribute("class", "chapter"); - writer.writeStartElement(NAMESPACE_NCX, "navLabel"); - writer.writeStartElement(NAMESPACE_NCX, "text"); - writer.writeCharacters(section.getName()); - writer.writeEndElement(); // text - writer.writeEndElement(); // navLabel - writer.writeEmptyElement(NAMESPACE_NCX, "content"); - writer.writeAttribute("src", section.getHref()); - playOrder++; - if(! section.getChildren().isEmpty()) { - playOrder = writeNavPoints(section.getChildren(), playOrder, writer); - } - writer.writeEndElement(); // navPoint - } - return playOrder; - } -} diff --git a/src/main/java/nl/siegmann/epublib/PackageDocument.java b/src/main/java/nl/siegmann/epublib/PackageDocument.java deleted file mode 100644 index f09290a7..00000000 --- a/src/main/java/nl/siegmann/epublib/PackageDocument.java +++ /dev/null @@ -1,134 +0,0 @@ -package nl.siegmann.epublib; - -import java.text.SimpleDateFormat; - -import javax.xml.stream.XMLEventFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; - -import org.apache.commons.lang.StringUtils; - -public class PackageDocument { - public static final String NAMESPACE_OPF = "http://www.idpf.org/2007/opf"; - public static final String NAMESPACE_DUBLIN_CORE = "http://purl.org/dc/elements/1.1/"; - public static final String PREFIX_DUBLIN_CORE = "dc"; - public static final String dateFormat = "yyyy-MM-dd"; - - public static void write(EpubWriter writeAction, XMLStreamWriter writer, Book book) throws XMLStreamException { - writer.writeStartDocument(Constants.encoding, "1.0"); - writer.setDefaultNamespace(NAMESPACE_OPF); - writer.writeStartElement(NAMESPACE_OPF, "package"); - writer.writeNamespace(PREFIX_DUBLIN_CORE, NAMESPACE_DUBLIN_CORE); -// writer.writeNamespace("ncx", NAMESPACE_NCX); - writer.writeAttribute("xmlns", NAMESPACE_OPF); - writer.writeAttribute("version", "2.0"); - writer.writeAttribute("unique-identifier", "BookID"); - - writer.writeStartElement(NAMESPACE_OPF, "metadata"); - - writer.writeStartElement(NAMESPACE_DUBLIN_CORE, "identifier"); - writer.writeAttribute(NAMESPACE_DUBLIN_CORE, "id", "BookdID"); - writer.writeAttribute(NAMESPACE_OPF, "scheme", "UUID"); - writer.writeCharacters(book.getUid()); - writer.writeEndElement(); // dc:identifier - - writer.writeStartElement(NAMESPACE_DUBLIN_CORE, "title"); - writer.writeCharacters(book.getTitle()); - writer.writeEndElement(); // dc:title - - for(Author author: book.getAuthors()) { - writer.writeStartElement(NAMESPACE_DUBLIN_CORE, "creator"); - writer.writeAttribute(NAMESPACE_OPF, "role", "aut"); - writer.writeAttribute(NAMESPACE_OPF, "file-as", author.getLastname() + ", " + author.getFirstname()); - writer.writeCharacters(author.getFirstname() + " " + author.getLastname()); - writer.writeEndElement(); // dc:creator - } - - for(String subject: book.getSubjects()) { - writer.writeStartElement(NAMESPACE_DUBLIN_CORE, "subject"); - writer.writeCharacters(subject); - writer.writeEndElement(); // dc:subject - } - - writer.writeStartElement(NAMESPACE_DUBLIN_CORE, "date"); - writer.writeCharacters((new SimpleDateFormat(dateFormat)).format(book.getDate())); - writer.writeEndElement(); // dc:date - - if(StringUtils.isNotEmpty(book.getLanguage())) { - writer.writeStartElement(NAMESPACE_DUBLIN_CORE, "language"); - writer.writeCharacters(book.getLanguage()); - writer.writeEndElement(); // dc:date - } - - if(StringUtils.isNotEmpty(book.getRights())) { - writer.writeStartElement(NAMESPACE_DUBLIN_CORE, "rights"); - writer.writeCharacters(book.getRights()); - writer.writeEndElement(); // dc:rights - } - - writer.writeEndElement(); // dc:metadata - - writer.writeStartElement(NAMESPACE_OPF, "manifest"); - - writer.writeEmptyElement(NAMESPACE_OPF, "item"); - writer.writeAttribute("id", writeAction.getNcxId()); - writer.writeAttribute("href", writeAction.getNcxHref()); - writer.writeAttribute("media-type", writeAction.getNcxMediaType()); - - for(Resource resource: book.getResources()) { - writer.writeEmptyElement(NAMESPACE_OPF, "item"); - writer.writeAttribute("id", resource.getHref()); - writer.writeAttribute("href", resource.getHref()); - writer.writeAttribute("media-type", resource.getMediaType()); - } - - writer.writeEndElement(); // manifest - - writer.writeStartElement(NAMESPACE_OPF, "spine"); - writer.writeAttribute("toc", writeAction.getNcxId());; - for(Section section: book.getSections()) { - writer.writeEmptyElement(NAMESPACE_OPF, "itemref"); - writer.writeAttribute("idref", section.getId());; - } - writer.writeEndElement(); // spine - - writer.writeEndElement(); // package - writer.writeEndDocument(); - } - /* - - def writePackage(book) { - new File(targetDir + File.separator + contentDir).mkdir() - def packageWriter = new FileWriter(new File(targetDir + File.separator + contentDir + File.separator + 'content.opf')) - def markupBuilder = new MarkupBuilder(packageWriter) - markupBuilder.setDoubleQuotes(true) - markupBuilder.'package'(xmlns: "http://www.idpf.org/2007/opf", 'unique-identifier': "BookID", version: "2.0") { - metadata('xmlns:dc': "http://purl.org/dc/elements/1.1/", 'xmlns:opf': "http://www.idpf.org/2007/opf") { - 'dc:identifier'(id: "BookID", 'opf:scheme': "UUID", book.uid) - 'dc:title' (book.title) - book.authors.each() { author -> - 'dc:creator' ('opf:role' : "aut", 'opf:file-as': author.lastname + ', ' + author.firstname, author.firstname + ' ' + author.lastname) - } - book.subjects.each() { subject -> - 'dc:subject'(subject) - } - 'dc:date' (book.date.format('yyyy-MM-dd')) - 'dc:language'(book.language) - if (book.rights) { - 'dc:rights' (book.rights) - } - } - manifest { - item( id: "ncx", href: "toc.ncx", 'media-type': "application/x-dtbncx+xml") - copyAndIndexContentFiles(markupBuilder, new File(inputHtmlDir)) - } - spine (toc: 'ncx') { - book.sections.each() { - itemref(idref: it.id) - } - } - } -} - - */ -} diff --git a/src/main/java/nl/siegmann/epublib/Resource.java b/src/main/java/nl/siegmann/epublib/Resource.java deleted file mode 100644 index 2baaf61d..00000000 --- a/src/main/java/nl/siegmann/epublib/Resource.java +++ /dev/null @@ -1,11 +0,0 @@ -package nl.siegmann.epublib; - -import java.io.IOException; -import java.io.OutputStream; - -public interface Resource { - - public String getHref(); - public String getMediaType(); - public OutputStream getOutputStream() throws IOException; -} diff --git a/src/main/java/nl/siegmann/epublib/ResourceAdder.java b/src/main/java/nl/siegmann/epublib/ResourceAdder.java deleted file mode 100644 index 56926e01..00000000 --- a/src/main/java/nl/siegmann/epublib/ResourceAdder.java +++ /dev/null @@ -1,83 +0,0 @@ -package nl.siegmann.epublib; - -import java.io.File; -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.Collection; - -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMResult; -import javax.xml.transform.sax.SAXTransformerFactory; -import javax.xml.transform.sax.TransformerHandler; - -import org.ccil.cowan.tagsoup.Parser; -import org.w3c.dom.Document; -import org.xml.sax.InputSource; - -public class ResourceAdder { - - private File sourceDir; - private Collection result = new ArrayList(); - private SAXTransformerFactory stf = (SAXTransformerFactory) TransformerFactory - .newInstance(); - - private DomCleaner domCleaner; - - public ResourceAdder(File sourceDir) { - this.sourceDir = sourceDir; - } - - public interface DomCleaner { - public Document cleanupDocument(Document document); - } - - public static class Resource { - - } - - public void addResources() { - listFiles(sourceDir); - } - - public void listFiles(File currentDir) { - File[] directoryEntries = currentDir.listFiles(); - for (int i = 0; i < directoryEntries.length; i++) { - File entry = directoryEntries[i]; - if (entry.isDirectory()) { - listFiles(entry); - } else if (entry.isFile()) { - Resource resource = createResource(entry); - result.add(resource); - } - } - } - - Resource createResource(File file) { - - return new Resource(); - } - - /** - * @param urlString - * The URL of the page to retrieve - * @return A Node with a well formed XML doc coerced from the page. - * @throws Exception - * if something goes wrong. No error handling at all for - * brevity. - */ - public Document getHtmlUrlNode(File htmlFile) throws Exception { - - TransformerHandler transformerHandler = stf.newTransformerHandler(); - - // This dom result will contain the results of the transformation - DOMResult domResult = new DOMResult(); - transformerHandler.setResult(domResult); - - Parser tagsoupParser = new Parser(); - tagsoupParser.setContentHandler(transformerHandler); - - // This is where the magic happens to convert HTML to XML - tagsoupParser.parse(new InputSource(new FileInputStream(htmlFile))); - return domResult.getNode().getOwnerDocument(); - } -} diff --git a/src/main/java/nl/siegmann/epublib/Section.java b/src/main/java/nl/siegmann/epublib/Section.java deleted file mode 100644 index 2004591a..00000000 --- a/src/main/java/nl/siegmann/epublib/Section.java +++ /dev/null @@ -1,50 +0,0 @@ -package nl.siegmann.epublib; - -import java.util.ArrayList; -import java.util.List; - -public class Section { - private String id; - private String name; - private String href; - private List
    children; - - public Section(String id, String name, String href) { - this(id, name, href, new ArrayList
    ()); - } - - public Section(String id, String name, String href, List
    children) { - super(); - this.id = id; - this.name = name; - this.href = href; - this.children = children; - } - - public String getId() { - return id; - } - public void setId(String id) { - this.id = id; - } - public String getName() { - return name; - } - public void setName(String name) { - this.name = name; - } - public String getHref() { - return href; - } - public void setHref(String href) { - this.href = href; - } - - public List
    getChildren() { - return children; - } - - public void setChildren(List
    children) { - this.children = children; - } -} diff --git a/src/main/java/nl/siegmann/epublib/hhc/HHCParser.java b/src/main/java/nl/siegmann/epublib/hhc/HHCParser.java deleted file mode 100644 index 6b01407e..00000000 --- a/src/main/java/nl/siegmann/epublib/hhc/HHCParser.java +++ /dev/null @@ -1,185 +0,0 @@ -package nl.siegmann.epublib.hhc; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.xpath.XPath; -import javax.xml.xpath.XPathConstants; -import javax.xml.xpath.XPathExpressionException; -import javax.xml.xpath.XPathFactory; - -import nl.siegmann.epublib.Book; -import nl.siegmann.epublib.Constants; -import nl.siegmann.epublib.FileResource; -import nl.siegmann.epublib.Resource; -import nl.siegmann.epublib.Section; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.filefilter.TrueFileFilter; -import org.apache.commons.lang.StringUtils; -import org.htmlcleaner.CleanerProperties; -import org.htmlcleaner.DomSerializer; -import org.htmlcleaner.HtmlCleaner; -import org.htmlcleaner.PrettyXmlSerializer; -import org.htmlcleaner.TagNode; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - -public class HHCParser { - - public static Book parseHhc(File hhcFile, File chmRootDir) - throws IOException, ParserConfigurationException, - XPathExpressionException { - Book result = new Book(); - result.setTitle("test book"); - HtmlCleaner htmlCleaner = new HtmlCleaner(); - CleanerProperties props = htmlCleaner.getProperties(); - TagNode node = htmlCleaner.clean(hhcFile); - Document hhcDocument = new DomSerializer(props).createDOM(node); - PrettyXmlSerializer prettyPrinter = new PrettyXmlSerializer(props); - System.out.println(prettyPrinter.getXmlAsString(node)); - XPath xpath = XPathFactory.newInstance().newXPath(); - Node ulNode = (Node) xpath.evaluate("body/ul", hhcDocument - .getDocumentElement(), XPathConstants.NODE); - result.setSections(processUlNode(ulNode)); // processUlNode(xpath, chmRootDir, ulNode)); - result.setResources(findResources(chmRootDir)); - return result; - } - - /* - * Sometimes the structure is: - *
  • - * ... - *
      ...
    - *
  • - * - * And sometimes: - *
  • - * ... - *
  • - *
      ...
    - */ - private static List
    processUlNode(Node ulNode) { - List
    result = new ArrayList
    (); - NodeList children = ulNode.getChildNodes(); - for(int i = 0; i < children.getLength(); i++) { - Node node = children.item(i); - if(node.getNodeName().equals("li")) { - List
    section = processLiNode(node); - result.addAll(section); - } else if(node.getNodeName().equals("ul")) { - List
    childSections = processUlNode(node); - if(result.isEmpty()) { - result = childSections; - } else { - result.get(result.size() - 1).getChildren().addAll(childSections); - } - } - } - return result; - } - - - private static List
    processLiNode(Node liNode) { - List
    result = new ArrayList
    (); - NodeList children = liNode.getChildNodes(); - for(int i = 0; i < children.getLength(); i++) { - Node node = children.item(i); - if(node.getNodeName().equals("object")) { - Section section = processObjectNode(node); - if(section != null) { - result.add(section); - } - } else if(node.getNodeName().equals("ul")) { - List
    childSections = processUlNode(node); - if(result.isEmpty()) { - result = childSections; - } else { - result.get(result.size() - 1).getChildren().addAll(childSections); - } - } - } - return result; - } - - - /** - * Processes a CHM object node into a Section - * - * - * - * - * - * - * - * @param objectNode - * - * @return A Section of the object has a non-blank param child with name 'Name' and a non-blank param name 'Local' - */ - private static Section processObjectNode(Node objectNode) { - Section result = null; - NodeList children = objectNode.getChildNodes(); - String name = null; - String href = null; - for(int i = 0; i < children.getLength(); i++) { - Node node = children.item(i); - if(node.getNodeName().equals("param")) { - String paramName = ((Element) node).getAttribute("name"); - if("Name".equals(paramName)) { - name = ((Element) node).getAttribute("value"); - } else if("Local".equals(paramName)) { - href = ((Element) node).getAttribute("value"); - } - } - } - if((! (StringUtils.isBlank(name)) && (! StringUtils.isBlank(href)))) { - result = new Section(href, name, href); - } - return result; - } - - - @SuppressWarnings("unchecked") - private static List findResources(File rootDir) throws IOException { - List result = new ArrayList(); - Iterator fileIter = FileUtils.iterateFiles(rootDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE); - while(fileIter.hasNext()) { - File file = fileIter.next(); -// System.out.println("file:" + file); - if(file.isDirectory()) { - continue; - } - String mediaType = determineMediaType(file.getName()); - if(StringUtils.isBlank(mediaType)) { - continue; - } - String href = file.getCanonicalPath().substring(rootDir.getCanonicalPath().length() + 1); - result.add(new FileResource(file, href, mediaType)); - } - return result; - } - - private static String determineMediaType(String filename) { - String result = ""; - filename = filename.toLowerCase(); - if (filename.endsWith(".html") || filename.endsWith(".htm")) { - result = Constants.MediaTypes.xhtml; - } else if (filename.endsWith(".jpg") || filename.endsWith(".jpeg")) { - result = "image/jpeg"; - } else if (filename.endsWith(".png")) { - result = "image/png"; - } else if (filename.endsWith(".gif")) { - result = "image/gif"; - } else if (filename.endsWith(".css")) { - result = "text/css"; - } - return result; - - } -} diff --git a/src/test/java/nl/siegmann/epublib/hhc/HHCParserTest.java b/src/test/java/nl/siegmann/epublib/hhc/HHCParserTest.java deleted file mode 100644 index c634b78d..00000000 --- a/src/test/java/nl/siegmann/epublib/hhc/HHCParserTest.java +++ /dev/null @@ -1,43 +0,0 @@ -package nl.siegmann.epublib.hhc; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLStreamException; -import javax.xml.xpath.XPathExpressionException; - -import junit.framework.TestCase; -import nl.siegmann.epublib.Book; -import nl.siegmann.epublib.EpubWriter; - -public class HHCParserTest extends TestCase { - - public void test1() { - try { - String root = "/home/paul/project/veh/backbase/Backbase_Rich_Portal_4.1/documentation/client/Reference/ref/"; - String testHhc = root + "Reference.hhc"; -// String root = "/home/paul/project/private/library/chm/peaa/"; -// String testHhc = root + "0321127420.hhc"; - Book book = HHCParser.parseHhc(new File(testHhc), new File(root)); - (new EpubWriter()).write(book, new FileOutputStream("/home/paul/foo")); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (ParserConfigurationException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (XPathExpressionException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (XMLStreamException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (FactoryConfigurationError e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } -}