diff --git a/.travis.yml b/.travis.yml index 1a532e5..ca4225d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,5 @@ cache: language: java jdk: - - oraclejdk7 - oraclejdk8 diff --git a/NOTICE b/NOTICE index a026c60..b601485 100644 --- a/NOTICE +++ b/NOTICE @@ -8,17 +8,10 @@ Licensed under Apache 2 - http://www.apache.org/licenses/LICENSE-2.0.html This software includes third party software subject to the following licenses: - Apache Commons Lang under The Apache Software License, Version 2.0 Apache FontBox under Apache License, Version 2.0 - Apache JempBox under Apache License, Version 2.0 Apache PDFBox under Apache License, Version 2.0 Digipost Printability Validator under The Apache Software License, Version 2.0 - Hamcrest Core under New BSD License - Hamcrest library under New BSD License - JCL 1.1.1 implemented over SLF4J under MIT License - JUnit under Eclipse Public License 1.0 - Logback Classic Module under Eclipse Public License - v 1.0 or GNU Lesser General Public License - Logback Core Module under Eclipse Public License - v 1.0 or GNU Lesser General Public License + JCL 1.2 implemented over SLF4J under MIT License SLF4J API Module under MIT License diff --git a/pom.xml b/pom.xml index 641f02d..ca884ca 100644 --- a/pom.xml +++ b/pom.xml @@ -2,50 +2,45 @@ 4.0.0 - - 2.2.1 - - no.digipost digipost-open-super-pom - 0.3 + 1 printability-validator - 2.3-SNAPSHOT + 3.1-SNAPSHOT Digipost Printability Validator Library for validating 'printability' of documents. - UTF-8 - 1.7 - 1.7 + 1.8 + 1.8 org.slf4j slf4j-api - 1.7.10 + 1.7.25 org.slf4j jcl-over-slf4j - 1.7.10 + 1.7.25 runtime - ch.qos.logback - logback-classic - 1.1.2 + org.slf4j + slf4j-simple + 1.7.25 test org.apache.pdfbox pdfbox - 1.8.12 + 2.0.11 commons-logging @@ -53,11 +48,6 @@ - - org.apache.commons - commons-lang3 - 3.3.2 - junit junit @@ -77,7 +67,7 @@ maven-release-plugin - 2.5.1 + 2.5.3 @{project.version} @@ -85,21 +75,19 @@ org.jasig.maven maven-notice-plugin - 1.0.6.1 + 1.1.0 + test ${project.basedir}/src/main/notice/NOTICE.template - - ${project.basedir}/src/main/notice/license-mappings.xml - maven-compiler-plugin - 3.2 + 3.8.0 maven-surefire-plugin - 2.18.1 + 2.22.0 maven-deploy-plugin @@ -107,11 +95,11 @@ maven-clean-plugin - 2.6.1 + 3.1.0 maven-dependency-plugin - 2.10 + 3.1.1 maven-install-plugin @@ -119,15 +107,33 @@ maven-resources-plugin - 2.7 + 3.1.0 - maven-site-plugin - 3.4 + maven-jar-plugin + 3.1.0 - maven-jar-plugin - 2.5 + org.codehaus.mojo + versions-maven-plugin + 2.7 + + + com.github.siom79.japicmp + japicmp-maven-plugin + 0.13.0 + + + ${project.build.directory}/${project.build.finalName}.${project.packaging} + + + true + true + + no.digipost + + + @@ -135,7 +141,7 @@ com.mycila license-maven-plugin - 2.8 + 3.0
src/main/license-header.txt
true @@ -159,7 +165,7 @@
maven-javadoc-plugin - 2.10.1 + 3.0.1 attach-javadoc @@ -168,10 +174,14 @@ + + all,-missing + true + maven-source-plugin - 2.4 + 3.0.1 true @@ -211,30 +221,9 @@ - - org.codehaus.mojo - animal-sniffer-maven-plugin - 1.13 - - - ensure-java7-compatible - verify - - check - - - - org.codehaus.mojo.signature - java17 - 1.0 - - - - - maven-enforcer-plugin - 1.3.1 + 3.0.0-M2 test @@ -244,23 +233,19 @@ test + + 3.0.5 + *:* - org.apache.pdfbox:jempbox - org.apache.pdfbox:pdfbox - org.apache.pdfbox:fontbox - org.slf4j:slf4j-api - org.slf4j:jul-to-slf4j - org.slf4j:jcl-over-slf4j + org.apache.pdfbox + org.slf4j org.apache.commons:commons-lang3 - ch.qos.logback:logback-core - ch.qos.logback:logback-classic - org.hamcrest:hamcrest-core - org.hamcrest:hamcrest-library - junit:junit + org.hamcrest + junit true @@ -275,26 +260,6 @@ - - - doclint-java8-disable - - [1.8, - - - - - - maven-javadoc-plugin - - -Xdoclint:none - - - - - - - scm:git:git@github.com:digipost/printability-validator.git scm:git:git@github.com:digipost/printability-validator.git diff --git a/src/main/java/no/digipost/print/validate/EnhancedNonSequentialPDFParser.java b/src/main/java/no/digipost/print/validate/EnhancedNonSequentialPDFParser.java deleted file mode 100644 index 7815f89..0000000 --- a/src/main/java/no/digipost/print/validate/EnhancedNonSequentialPDFParser.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Copyright (C) Posten Norge AS - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package no.digipost.print.validate; - -import org.apache.pdfbox.pdfparser.NonSequentialPDFParser; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.persistence.util.COSObjectKey; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Set; - -/** - * Should not be used until pdfbox 1.8.9 is released - * Requires rework when 2.0.0 is released. - */ -class EnhancedNonSequentialPDFParser extends NonSequentialPDFParser implements AutoCloseable { - - static { - // Ensures that the parser does not read the entire PDF to memory. - System.setProperty(SYSPROP_PARSEMINIMAL, "true"); - } - - EnhancedNonSequentialPDFParser(InputStream in) throws IOException { - super(in); - super.initialParse(); - } - - public int getNumberOfPages() throws IOException { - return super.getPageNumber(); - } - - public boolean isEncrypted() { - return super.getSecurityHandler() != null; - } - - @Override - public PDPage getPage(int pageNr) throws IOException { - // Releases memory regularly - if (pageNr % 5 == 0) { - Set cosObjectKeys = super.xrefTrailerResolver.getXrefTable().keySet(); - for (COSObjectKey cosObjectKey : cosObjectKeys) { - super.getDocument().removeObject(cosObjectKey); - } - } - return super.getPage(pageNr); - } - - @Override - public void close() { - this.clearResources(); - } - -} diff --git a/src/main/java/no/digipost/print/validate/PDFBoxConfigurer.java b/src/main/java/no/digipost/print/validate/PDFBoxConfigurer.java new file mode 100644 index 0000000..414da56 --- /dev/null +++ b/src/main/java/no/digipost/print/validate/PDFBoxConfigurer.java @@ -0,0 +1,100 @@ +/** + * Copyright (C) Posten Norge AS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package no.digipost.print.validate; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Configure certain aspects of PDFBox, if the defaults provided by the + * printability-validator are not suitable. These settings are configured using + * system properties, and to make them effective, they need to be set early, + * i.e. before the static initializer in + * {@link PdfValidator} is executed. + *

+ * See pdfbox.apache.org/2.0/getting-started.html#pdfbox-and-java-8 + * + */ +public final class PDFBoxConfigurer { + + private static final Logger LOG = LoggerFactory.getLogger(PDFBoxConfigurer.class); + + static final class PDFBoxConfiguration { + volatile boolean enabled = true; + + volatile boolean useKcmsServiceProvider = true; + + volatile boolean usePureJavaCMYKConversion = true; + + private PDFBoxConfiguration() { + } + } + + private static final String alreadyConfiguredWarn = + "PDFBox system properties has already been configured, and calling {} may not be effective! " + + "Make sure you call this method early, before using the PdfValidator."; + + private static final AtomicBoolean configured = new AtomicBoolean(false); + private static final PDFBoxConfiguration pdfBoxConfiguration = new PDFBoxConfiguration(); + + public static void doNotConfigurePDFBox() { + if (configured.get()) { + LOG.warn(alreadyConfiguredWarn, "doNotConfigurePDFBox()"); + } + pdfBoxConfiguration.enabled = false; + } + + public static void useKcmsServiceProvider(boolean use) { + if (configured.get()) { + LOG.warn(alreadyConfiguredWarn, "useKcmsServiceProvider(" + use + ")"); + } + pdfBoxConfiguration.useKcmsServiceProvider = use; + } + + public static void usePureJavaCMYKConversion(boolean use) { + if (configured.get()) { + LOG.warn(alreadyConfiguredWarn, "usePureJavaCMYKConversion(" + use + ")"); + } + pdfBoxConfiguration.useKcmsServiceProvider = use; + } + + static synchronized void configure() { + configured.set(true); + if (pdfBoxConfiguration.enabled) { + if (pdfBoxConfiguration.useKcmsServiceProvider) { + LOG.info( + "Configuring sun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider as described at " + + "https://pdfbox.apache.org/2.0/getting-started.html#pdfbox-and-java-8 " + + "to increase PDF color operation."); + System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider"); + } + if (pdfBoxConfiguration.usePureJavaCMYKConversion) { + LOG.info( + "Configuring org.apache.pdfbox.rendering.UsePureJavaCMYKConversion=true as described at " + + "https://pdfbox.apache.org/2.0/getting-started.html#rendering-performance " + + "to increase PDF rendering performance."); + System.setProperty("org.apache.pdfbox.rendering.UsePureJavaCMYKConversion", "true"); + } + } else { + LOG.info("Using default settings for PDFBox for printability-validator library"); + } + } + + private PDFBoxConfigurer() { + } +} diff --git a/src/main/java/no/digipost/print/validate/PdfFontValidator.java b/src/main/java/no/digipost/print/validate/PdfFontValidator.java index 838690d..c7910b8 100644 --- a/src/main/java/no/digipost/print/validate/PdfFontValidator.java +++ b/src/main/java/no/digipost/print/validate/PdfFontValidator.java @@ -15,17 +15,29 @@ */ package no.digipost.print.validate; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.font.*; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; +import org.apache.pdfbox.pdmodel.font.PDType0Font; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; import static java.util.Arrays.asList; import static java.util.Collections.emptySet; import static java.util.Collections.unmodifiableList; -import static org.apache.commons.lang3.StringUtils.*; +import static java.util.Collections.unmodifiableSet; +import static java.util.stream.Collectors.collectingAndThen; +import static java.util.stream.Collectors.toSet; +import static java.util.stream.Stream.concat; class PdfFontValidator { @@ -36,22 +48,20 @@ class PdfFontValidator { // Helvetica (v3) (in regular, oblique, bold and bold oblique) // Symbol // Zapf Dingbats - private static final Set STANDARD_14_FONTS = new HashSet<>(asList("TIMES", "COURIER", "HELVETICA", "SYMBOL", "ZAPFDINGBATS")); + private static final Set STANDARD_14_FONTS = unmodifiableSet(new HashSet<>(asList("TIMES", "COURIER", "HELVETICA", "SYMBOL", "ZAPFDINGBATS"))); - private static final Set WHITE_LISTED_FONTS = new HashSet<>(asList("ARIAL")); + private static final Set WHITE_LISTED_FONTS = unmodifiableSet(new HashSet<>(asList("ARIAL"))); - private static final Set SUPPORTED_FONTS = new HashSet<>(); - - static { - SUPPORTED_FONTS.addAll(STANDARD_14_FONTS); - SUPPORTED_FONTS.addAll(WHITE_LISTED_FONTS); - } + private static final Set SUPPORTED_FONTS = concat(STANDARD_14_FONTS.stream(), WHITE_LISTED_FONTS.stream()).collect(collectingAndThen(toSet(), Collections::unmodifiableSet)); public Collection getPageFonts(PDPage page) throws IOException { PDResources resources = page.getResources(); if (resources != null) { - Map fontMap = resources.getFonts(); - return fontMap.values(); + Set fonts = new LinkedHashSet<>(); + for (COSName fontName : resources.getFontNames()) { + fonts.add(resources.getFont(fontName)); + } + return fonts; } return emptySet(); } @@ -59,16 +69,20 @@ public Collection getPageFonts(PDPage page) throws IOException { public List findNonSupportedFonts(Iterable fonter) { List nonSupported = new ArrayList<>(); for (PDFont font : fonter) { - PDFontDescriptor fontDescriptor = font.getFontDescriptor(); - if (fontDescriptor != null) { - if (!erFontDescriptorAkseptabelForPrint(fontDescriptor)) { - nonSupported.add(font); - } + if (font.isDamaged()) { + nonSupported.add(font); } else { - if (!(font instanceof PDType0Font)) { - if (!erAkseptabelForPrint(font.getBaseFont())) { + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) { + if (!erFontDescriptorAkseptabelForPrint(fontDescriptor)) { nonSupported.add(font); } + } else { + if (!(font instanceof PDType0Font)) { + if (!erAkseptabelForPrint(font.getName())) { + nonSupported.add(font); + } + } } } } @@ -76,26 +90,21 @@ public List findNonSupportedFonts(Iterable fonter) { } private boolean erFontDescriptorAkseptabelForPrint(PDFontDescriptor fontDescriptor) { - if (fontDescriptor instanceof PDFontDescriptorDictionary) { - PDFontDescriptorDictionary pdFontDescriptorDictionary = (PDFontDescriptorDictionary) fontDescriptor; - if (harIkkeEmbeddedFont(pdFontDescriptorDictionary)) { - return erAkseptabelForPrint(pdFontDescriptorDictionary.getFontName()); - } else { - return true; - } - } else if (fontDescriptor instanceof PDFontDescriptorAFM) { - PDFontDescriptorAFM fontDescriptorAFM = (PDFontDescriptorAFM) fontDescriptor; - return erAkseptabelForPrint(fontDescriptorAFM.getFontName()); + if (harIkkeEmbeddedFont(fontDescriptor)) { + return erAkseptabelForPrint(fontDescriptor.getFontName()); } else { - throw new IllegalArgumentException("Ukjent font descriptor brukt : " + fontDescriptor.getClass()); + return true; } } + + private static final String DASH_AND_WHITESPACE = "[\\-\\s]+"; + private boolean erAkseptabelForPrint(String fontnavn) { if (fontnavn == null) { return false; } - String normalisertFontnavn = upperCase(deleteWhitespace(remove(fontnavn, "-"))); + String normalisertFontnavn = fontnavn.replaceAll(DASH_AND_WHITESPACE, "").toUpperCase(); for (String supportertFontnavn : SUPPORTED_FONTS) { if (normalisertFontnavn.contains(supportertFontnavn)) { return true; @@ -104,7 +113,7 @@ private boolean erAkseptabelForPrint(String fontnavn) { return false; } - private boolean harIkkeEmbeddedFont(PDFontDescriptorDictionary fontDescriptor) { + private boolean harIkkeEmbeddedFont(PDFontDescriptor fontDescriptor) { return fontDescriptor.getFontFile() == null && fontDescriptor.getFontFile2() == null && fontDescriptor.getFontFile3() == null; diff --git a/src/main/java/no/digipost/print/validate/PdfValidateStrategy.java b/src/main/java/no/digipost/print/validate/PdfValidateStrategy.java deleted file mode 100644 index bffe766..0000000 --- a/src/main/java/no/digipost/print/validate/PdfValidateStrategy.java +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Copyright (C) Posten Norge AS - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package no.digipost.print.validate; - - -public enum PdfValidateStrategy { - - FULLY_IN_MEMORY, - NON_SEQUENTIALLY; - -} diff --git a/src/main/java/no/digipost/print/validate/PdfValidationError.java b/src/main/java/no/digipost/print/validate/PdfValidationError.java index 49e6072..c155718 100644 --- a/src/main/java/no/digipost/print/validate/PdfValidationError.java +++ b/src/main/java/no/digipost/print/validate/PdfValidationError.java @@ -15,18 +15,18 @@ */ package no.digipost.print.validate; -import org.apache.commons.lang3.StringUtils; - import java.util.Collections; import java.util.EnumSet; import java.util.Set; +import static java.util.stream.Collectors.joining; + public enum PdfValidationError { PDF_IS_ENCRYPTED("The PDF document is encrypted."), TOO_MANY_PAGES_FOR_AUTOMATED_PRINT("The PDF document contains too many pages."), UNSUPPORTED_PDF_VERSION_FOR_PRINT("The version of the PDF document is not supported. Supported versions are " - + StringUtils.join(PdfValidator.PDF_VERSIONS_SUPPORTED_FOR_PRINT, ", ") + "."), + + PdfValidator.PDF_VERSIONS_SUPPORTED_FOR_PRINT.stream().map(String::valueOf).collect(joining(", ")) + "."), INSUFFICIENT_MARGIN_FOR_PRINT("The left margin of the PDF document is too narrow. Minimum left margin is " + PdfValidator.BARCODE_AREA_WIDTH_MM + " mm."), UNABLE_TO_VERIFY_SUITABLE_MARGIN_FOR_PRINT("Could not verify the left margin of the PDF document. Minimum left margin is " diff --git a/src/main/java/no/digipost/print/validate/PdfValidator.java b/src/main/java/no/digipost/print/validate/PdfValidator.java index 10f81ee..a87a34f 100644 --- a/src/main/java/no/digipost/print/validate/PdfValidator.java +++ b/src/main/java/no/digipost/print/validate/PdfValidator.java @@ -16,36 +16,44 @@ package no.digipost.print.validate; import no.digipost.print.validate.PdfValidationSettings.Bleed; -import org.apache.commons.lang3.StringUtils; -import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.util.PDFTextStripperByArea; +import org.apache.pdfbox.text.PDFTextStripperByArea; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.awt.geom.Rectangle2D; -import java.io.*; +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.math.BigDecimal; import java.math.RoundingMode; import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static java.util.Arrays.asList; -import static no.digipost.print.validate.PdfValidateStrategy.FULLY_IN_MEMORY; -import static no.digipost.print.validate.PdfValidateStrategy.NON_SEQUENTIALLY; -import static no.digipost.print.validate.PdfValidationError.*; -import static org.apache.commons.lang3.StringUtils.join; +import static java.util.stream.Collectors.joining; +import static no.digipost.print.validate.PdfValidationError.INSUFFICIENT_MARGIN_FOR_PRINT; +import static no.digipost.print.validate.PdfValidationError.UNABLE_TO_VERIFY_SUITABLE_MARGIN_FOR_PRINT; +import static no.digipost.print.validate.PdfValidationError.UNSUPPORTED_DIMENSIONS; public class PdfValidator { private static final Logger LOG = LoggerFactory.getLogger(PdfValidator.class); + static { + PDFBoxConfigurer.configure(); + } + + private final PdfFontValidator fontValidator = new PdfFontValidator(); // MM_TO_UNITS copied from org.apache.pdfbox.pdmodel.PDPage @@ -61,126 +69,39 @@ public class PdfValidator { public PdfValidationResult validate(byte[] pdfContent, PdfValidationSettings printValidationSettings) { - return validateForPrint(new ByteArrayInputStream(pdfContent), printValidationSettings, PdfValidateStrategy.FULLY_IN_MEMORY); + return validateForPrint(new ByteArrayInputStream(pdfContent), printValidationSettings); } - public PdfValidationResult validate(File pdfFile, PdfValidationSettings printValidationSettings) throws IOException { - InputStream pdfStream = openFileAsInputStream(pdfFile); - return validateForPrint(pdfStream, printValidationSettings, PdfValidateStrategy.FULLY_IN_MEMORY); - } - - /** - * @param pdfStream the input stream for reading the PDF. It will be closed before returning from - * this method - * @param readStrategy decides if PDF is completely read into memory or not - */ - private PdfValidationResult validateForPrint(InputStream pdfStream, PdfValidationSettings printValidationSettings, PdfValidateStrategy readStrategy) { - int numberOfPages = -1; - try { - List errors; - try { - if (readStrategy == NON_SEQUENTIALLY) { - try (EnhancedNonSequentialPDFParser dpostNonSequentialPDFParser = new EnhancedNonSequentialPDFParser(pdfStream)){ - numberOfPages = dpostNonSequentialPDFParser.getNumberOfPages(); - errors = validateStreamForPrint(dpostNonSequentialPDFParser, printValidationSettings); - } - } else if (readStrategy == FULLY_IN_MEMORY) { - try (PDDocument pdDoc = PDDocument.load(pdfStream)) { - numberOfPages = pdDoc.getNumberOfPages(); - errors = validateDocumentForPrint(pdDoc, printValidationSettings); - } - } else { - throw new IllegalArgumentException("Unknown " + PdfValidateStrategy.class.getSimpleName() + ": " + readStrategy); - } - } catch (Exception e) { - errors = asList(PdfValidationError.PDF_PARSE_ERROR); - LOG.info("PDF could not be parsed. (" + e.getMessage() + ")"); - LOG.debug(e.getMessage(), e); - } - - return new PdfValidationResult(errors, numberOfPages, printValidationSettings.bleed); - } finally { - IOUtils.closeQuietly(pdfStream); + public PdfValidationResult validate(Path pdfFile, PdfValidationSettings printValidationSettings) throws IOException { + try (InputStream pdfStream = openFileAsInputStream(pdfFile)) { + return validateForPrint(pdfStream, printValidationSettings); } } /** - * Leser ikke hele dokumentet inn i minnet + * @param pdfStream the input stream for reading the PDF. This method will not close the stream. + * @param printValidationSettings settings for how to perform the validation */ - private List validateStreamForPrint(EnhancedNonSequentialPDFParser dpostNonSequentialPDFParser, - PdfValidationSettings settings) throws IOException { - - List errors = new ArrayList<>(); - - if (dpostNonSequentialPDFParser.isEncrypted()) { - return failValidationIfEncrypted(errors); - } - - if (settings.validateNumberOfPages) { - validerSideantall(dpostNonSequentialPDFParser.getNumberOfPages(), settings.maxNumberOfPages, errors); - } - - if (settings.validatePDFversion) { - validatePdfVersion(dpostNonSequentialPDFParser.getDocument().getVersion(), errors); - } - - boolean documentHasInvalidDimensions = false; - boolean documentContainsPagesWithInvalidPrintMargins = false; - boolean documentHasInvalidLeftMargin = false; - boolean documentHasPagesWhichCannotBeParsed = false; - for (int i = 1; i <= dpostNonSequentialPDFParser.getNumberOfPages(); i++) { - PDPage page = null; - try { - page = dpostNonSequentialPDFParser.getPage(i); - } catch (Exception e) { - documentHasPagesWhichCannotBeParsed = true; - } - if (page != null) { - - if (!documentHasInvalidDimensions) { - if (hasInvalidDimensions(page, settings.bleed)) { - documentHasInvalidDimensions = true; - } - } - - if (settings.validateLeftMargin) { - if (!documentHasInvalidLeftMargin) { - try { - if (hasTextInBarcodeArea(page, settings.bleed)) { - documentHasInvalidLeftMargin = true; - } - } catch (NullPointerException npe) { - LOG.info("Could not verify margin on the following side " + i); - documentContainsPagesWithInvalidPrintMargins = true; - } - } - - } - - if (settings.validateFonts) { - validateFonts(fontValidator.getPageFonts(page), errors); - } - - } else { - // TODO en eller annen algoritme som kaster feil om et visst antall - // sider ikke kan parses - LOG.warn("Could not fetch page {} in the pdf", i); - } + private PdfValidationResult validateForPrint(InputStream pdfStream, PdfValidationSettings printValidationSettings) { + int numberOfPages = -1; + List errors; + try (PDDocument pdDoc = PDDocument.load(pdfStream)) { + numberOfPages = pdDoc.getNumberOfPages(); + errors = validateDocumentForPrint(pdDoc, printValidationSettings); + } catch (InvalidPasswordException invalidPassword) { + errors = failValidationIfEncrypted(new ArrayList<>()); + } catch (Exception e) { + errors = asList(PdfValidationError.PDF_PARSE_ERROR); + LOG.debug("PDF could not be parsed. ({}: '{}')", e.getClass().getSimpleName(), e.getMessage(), e); } - addValidationError(documentHasInvalidDimensions, UNSUPPORTED_DIMENSIONS, errors); - addValidationError(documentHasInvalidLeftMargin, INSUFFICIENT_MARGIN_FOR_PRINT, errors); - addValidationError(documentHasPagesWhichCannotBeParsed, PDF_PARSE_PAGE_ERROR, errors); - addValidationError(documentContainsPagesWithInvalidPrintMargins, UNABLE_TO_VERIFY_SUITABLE_MARGIN_FOR_PRINT, errors); - - return errors; - + return new PdfValidationResult(errors, numberOfPages, printValidationSettings.bleed); } /** * Leser hele dokumentet inn i minnet */ - private List validateDocumentForPrint(final PDDocument pdDoc, final PdfValidationSettings settings) throws IOException { + List validateDocumentForPrint(PDDocument pdDoc, PdfValidationSettings settings) throws IOException { List errors = new ArrayList<>(); if (pdDoc.isEncrypted()) { @@ -196,7 +117,7 @@ private List validateDocumentForPrint(final PDDocument pdDoc } boolean documentHasInvalidDimensions = false; - for (PDPage page : getAllPagesFrom(pdDoc)) { + for (PDPage page : pdDoc.getPages()) { if (hasInvalidDimensions(page, settings.bleed)) { documentHasInvalidDimensions = true; break; @@ -208,15 +129,15 @@ private List validateDocumentForPrint(final PDDocument pdDoc boolean hasTextInBarcodeArea = false; boolean documentContainsPagesWithInvalidPrintMargins = false; if (settings.validateLeftMargin) { - for (PDPage page : getAllPagesFrom(pdDoc)) { + for (PDPage page : pdDoc.getPages()) { try { if (hasTextInBarcodeArea(page, settings.bleed)) { hasTextInBarcodeArea = true; break; } - } catch (NullPointerException npe) { + } catch (Exception npe) { documentContainsPagesWithInvalidPrintMargins = true; - LOG.info("Could not validate the margin on one of the sides"); + LOG.debug("Unable to validate the margin on one of the pages.", npe); } } } @@ -225,7 +146,7 @@ private List validateDocumentForPrint(final PDDocument pdDoc addValidationError(hasTextInBarcodeArea, INSUFFICIENT_MARGIN_FOR_PRINT, errors); if (settings.validateFonts) { - for (PDPage page : getAllPagesFrom(pdDoc)) { + for (PDPage page : pdDoc.getPages()) { validateFonts(fontValidator.getPageFonts(page), errors); } } @@ -240,56 +161,46 @@ private void addValidationError(boolean documentContainsPagesThatCannotBeParsed, } } - @SuppressWarnings("unchecked") - private List getAllPagesFrom(final PDDocument pdDoc) { - return pdDoc.getDocumentCatalog().getAllPages(); - } - private List failValidationIfEncrypted(List errors) { errors.add(PdfValidationError.PDF_IS_ENCRYPTED); - LOG.info("The pdf is encrypted."); + LOG.debug("The pdf is encrypted."); return errors; } - private void validateFonts(final Iterable fonter, final List errors) { + private void validateFonts(Iterable fonter, List errors) { List nonSupportedFonts = fontValidator.findNonSupportedFonts(fonter); if (!nonSupportedFonts.isEmpty()) { errors.add(PdfValidationError.REFERENCES_INVALID_FONT); if (LOG.isInfoEnabled()) { - LOG.info("The PDF has references to invalid fonts: [{}]", join(describe(nonSupportedFonts), ", ")); + LOG.debug("The PDF has references to invalid fonts: [{}]", nonSupportedFonts.stream().map(this::describe).collect(joining(", "))); } } } - private List describe(Iterable fonts) { - List fontDescriptions = new ArrayList<>(); - for (PDFont font : fonts) { - fontDescriptions.add(font.getSubType() + " '" + font.getBaseFont() + "'"); - } - return fontDescriptions; + private String describe(PDFont font) { + return font.getSubType() + " '" + font.getName() + "'"; } - private void validatePdfVersion(final float pdfVersion, final List errors) { + private void validatePdfVersion(float pdfVersion, List errors) { if (!PDF_VERSIONS_SUPPORTED_FOR_PRINT.contains(pdfVersion)) { errors.add(PdfValidationError.UNSUPPORTED_PDF_VERSION_FOR_PRINT); - LOG.info("The PDF is not in valid version. Valid versions are {}. Actual version is {}", - StringUtils.join(PDF_VERSIONS_SUPPORTED_FOR_PRINT, ", "), pdfVersion); + LOG.info("PDF version was {}. {}", pdfVersion, PdfValidationError.UNSUPPORTED_PDF_VERSION_FOR_PRINT); } } - private void validerSideantall(final int numberOfPages, int maxPages, final List errors) { + private void validerSideantall(int numberOfPages, int maxPages, final List errors) { if (numberOfPages > maxPages) { errors.add(PdfValidationError.TOO_MANY_PAGES_FOR_AUTOMATED_PRINT); - LOG.info("The PDF has too many pages. Max number of pages is {}. Actual number of pages is {}", maxPages, numberOfPages); + LOG.debug("The PDF has too many pages. Max number of pages is {}. Actual number of pages is {}", maxPages, numberOfPages); } if (numberOfPages == 0) { errors.add(PdfValidationError.DOCUMENT_HAS_NO_PAGES); - LOG.info("The PDF document does not contain any pages. The file may be corrupt.", numberOfPages); + LOG.debug("The PDF document does not contain any pages. The file may be corrupt.", numberOfPages); } } - private boolean hasTextInBarcodeArea(final PDPage pdPage, Bleed bleed) throws IOException { - SilentZone silentZone = new SilentZone(pdPage.findCropBox(), bleed); + private boolean hasTextInBarcodeArea(PDPage pdPage, Bleed bleed) throws IOException { + SilentZone silentZone = new SilentZone(pdPage.getCropBox(), bleed); Rectangle2D leftMarginBarcodeArea = new Rectangle2D.Double(silentZone.upperLeftCornerX, silentZone.upperLeftCornerY, silentZone.silentZoneXSize, silentZone.silentZoneYSize); @@ -297,12 +208,12 @@ private boolean hasTextInBarcodeArea(final PDPage pdPage, Bleed bleed) throws IO return hasTextInArea(pdPage, leftMarginBarcodeArea); } - private boolean hasInvalidDimensions(final PDPage page, Bleed bleed) { - PDRectangle findCropBox = page.findCropBox(); + private boolean hasInvalidDimensions(PDPage page, Bleed bleed) { + PDRectangle findCropBox = page.getCropBox(); long pageHeightInMillimeters = pointsTomm(findCropBox.getHeight()); long pageWidthInMillimeters = pointsTomm(findCropBox.getWidth()); if (!isPortraitA4(pageWidthInMillimeters, pageHeightInMillimeters, bleed) && !isLandscapeA4(pageWidthInMillimeters, pageHeightInMillimeters, bleed)) { - LOG.info("One or more pages in the PDF has invalid dimensions. Valid dimensions are width {} mm and height {} mm, alt " + + LOG.debug("One or more pages in the PDF has invalid dimensions. Valid dimensions are width {} mm and height {} mm, alt " + "width {} mm og height {} mm with {} mm lower flexibility and {} upper flexibility. " + "Actual dimensions are width: {} mm and height: {} mm.", new Object[] { A4_WIDTH_MM, A4_HEIGHT_MM, A4_HEIGHT_MM, A4_WIDTH_MM, bleed.negativeBleedInMM, @@ -326,7 +237,7 @@ private static boolean isLandscapeA4(long pageWidthInMillimeters, long pageHeigh return isPortraitA4(pageHeightInMillimeters, pageWidthInMillimeters, bleed); } - private boolean hasTextInArea(final PDPage pdPage, final Rectangle2D area) throws IOException { + private boolean hasTextInArea(PDPage pdPage, Rectangle2D area) throws IOException { boolean hasTextInArea = false; final PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.addRegion("marginArea", area); @@ -338,17 +249,17 @@ private boolean hasTextInArea(final PDPage pdPage, final Rectangle2D area) throw return hasTextInArea; } - private InputStream openFileAsInputStream(final File pdfFile) throws IOException { - return new BufferedInputStream(Files.newInputStream(pdfFile.toPath())); + private InputStream openFileAsInputStream(Path pdfFile) throws IOException { + return new BufferedInputStream(Files.newInputStream(pdfFile)); } - private static double mmToPoints(final int sizeInMillimeters) { + private static double mmToPoints(int sizeInMillimeters) { BigDecimal points = new BigDecimal(sizeInMillimeters * MM_TO_POINTS); points = points.setScale(1, RoundingMode.DOWN); return points.doubleValue(); } - private static long pointsTomm(final double sizeInPoints) { + private static long pointsTomm(double sizeInPoints) { return Math.round(sizeInPoints / MM_TO_POINTS); } diff --git a/src/main/notice/license-mappings.xml b/src/main/notice/license-mappings.xml deleted file mode 100644 index 3f646c5..0000000 --- a/src/main/notice/license-mappings.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - diff --git a/src/test/java/no/digipost/print/validate/PrintPdfValidatorTest.java b/src/test/java/no/digipost/print/validate/PrintPdfValidatorTest.java index 4f55dba..12489d9 100644 --- a/src/test/java/no/digipost/print/validate/PrintPdfValidatorTest.java +++ b/src/test/java/no/digipost/print/validate/PrintPdfValidatorTest.java @@ -16,16 +16,28 @@ package no.digipost.print.validate; import no.digipost.print.validate.PdfValidationSettings.Bleed; +import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.Test; -import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.List; -import static no.digipost.print.validate.PdfValidationError.*; +import static java.util.Objects.requireNonNull; +import static no.digipost.print.validate.PdfValidationError.DOCUMENT_HAS_NO_PAGES; +import static no.digipost.print.validate.PdfValidationError.INSUFFICIENT_MARGIN_FOR_PRINT; +import static no.digipost.print.validate.PdfValidationError.PDF_IS_ENCRYPTED; +import static no.digipost.print.validate.PdfValidationError.PDF_PARSE_ERROR; +import static no.digipost.print.validate.PdfValidationError.REFERENCES_INVALID_FONT; +import static no.digipost.print.validate.PdfValidationError.TOO_MANY_PAGES_FOR_AUTOMATED_PRINT; +import static no.digipost.print.validate.PdfValidationError.UNSUPPORTED_DIMENSIONS; import static no.digipost.print.validate.PdfValidationSettings.CHECK_ALL; -import static org.apache.commons.lang3.Validate.notNull; -import static org.hamcrest.Matchers.*; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.everyItem; +import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertThat; public class PrintPdfValidatorTest { @@ -96,8 +108,14 @@ public void doesNotFailPdfWithTooManyPagesForPrintIfCheckDisabled() { } @Test - public void failsCorruptPdfResultingInNoPages() { - assertThat(validationErrors("/pdf/corrupt_no_pages.pdf", CHECK_ALL), contains(DOCUMENT_HAS_NO_PAGES)); + public void failsCorruptPdfResultingInNoPages() throws IOException { + PDDocument zeroPagesDocument = new PDDocument() { + @Override + public int getNumberOfPages() { + return 0; + } + }; + assertThat(pdfValidator.validateDocumentForPrint(zeroPagesDocument, CHECK_ALL), contains(DOCUMENT_HAS_NO_PAGES)); } @Test @@ -146,7 +164,7 @@ public void failsForPDFLargerThatA4WhenBleedSettingIsInactive() { } public static List validationErrors(String pdfResourceName, PdfValidationSettings printValidationSettings) { - File pdf = new File(notNull(PrintPdfValidatorTest.class.getResource(pdfResourceName), pdfResourceName).getFile().replace("%20", " ")); + Path pdf = Paths.get(requireNonNull(PrintPdfValidatorTest.class.getResource(pdfResourceName), pdfResourceName).getFile().replace("%20", " ")); try { return pdfValidator.validate(pdf, printValidationSettings).errors; } catch (IOException e) { diff --git a/src/test/resources/logback.xml b/src/test/resources/logback.xml deleted file mode 100644 index aee2ce6..0000000 --- a/src/test/resources/logback.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - true - - - - - %d{HH:mm:ss.SSS} %-5level %logger{36} - %msg%n - - - - - - - diff --git a/src/test/resources/simplelogger.properties b/src/test/resources/simplelogger.properties new file mode 100644 index 0000000..70ed769 --- /dev/null +++ b/src/test/resources/simplelogger.properties @@ -0,0 +1,19 @@ +# +# Copyright (C) Posten Norge AS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +org.slf4j.simpleLogger.defaultLogLevel=info + +org.slf4j.simpleLogger.log.org.apache.pdfbox.pdmodel.font=error