From 08ac529a2bddc9d97d51c9431d951bb7741d6eaf Mon Sep 17 00:00:00 2001 From: hwding Date: Mon, 4 Sep 2017 14:14:59 +0800 Subject: [PATCH] - provide a more elegant and efficient way to load fonts in each page - bump version & head comment date --- pom.xml | 4 +- script/install | 4 +- .../amastigote/unstamper/core/Processor.java | 62 +++++++------------ .../unstamper/log/GeneralLogger.java | 4 +- 4 files changed, 30 insertions(+), 44 deletions(-) diff --git a/pom.xml b/pom.xml index 20fabe2..41196d1 100644 --- a/pom.xml +++ b/pom.xml @@ -6,8 +6,8 @@ 4.0.0 com.amastigote unstamper - 0.1.1 - text stamp remover for PDF files + 0.1.2 + Text stamp remover for PDF files. pdf-unstamper https://github.com/hwding/pdf-unstamper diff --git a/script/install b/script/install index 881e9fc..ff4e6d8 100755 --- a/script/install +++ b/script/install @@ -1,13 +1,13 @@ #!/bin/bash #AUTH hwding -#DATE AUG/25/2017 +#DATE SEP/04/2017 #DESC install unstamp as a command user_bin=`echo ~`"/bin/" jar_name="pdf-unstamper.jar" exe_name="unstamp" -_version="0.1.1" +_version="0.1.2" jar_durl="https://github.com/hwding/pdf-unstamper/releases/download/$_version/$jar_name" function chk_f() { diff --git a/src/com/amastigote/unstamper/core/Processor.java b/src/com/amastigote/unstamper/core/Processor.java index c54244a..a1e3702 100644 --- a/src/com/amastigote/unstamper/core/Processor.java +++ b/src/com/amastigote/unstamper/core/Processor.java @@ -1,6 +1,6 @@ /* AUTH | hwding - DATE | Aug 27 2017 + DATE | Sep 04 2017 DESC | text stamp remover for PDF files MAIL | m@amastigote.com GITH | github.com/hwding @@ -8,7 +8,6 @@ package com.amastigote.unstamper.core; import com.amastigote.unstamper.log.GeneralLogger; -import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; @@ -19,11 +18,9 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; public class Processor { public static void process(File file, String[] strings) { @@ -35,45 +32,34 @@ public static void process(File file, String[] strings) { PDDocument pdDocument = PDDocument.load(file); pdDocument.getPages().forEach(pdPage -> { try { - /* START: loading font resources for further parsing */ + /* START: loading font resources from current page */ PDFStreamParser pdfStreamParser = new PDFStreamParser(pdPage); pdfStreamParser.parse(); - List objects = - Collections.synchronizedList(pdfStreamParser.getTokens()); + List objects = pdfStreamParser.getTokens(); + Set pdFonts = new HashSet<>(); - List cosNames = - objects.parallelStream() - .filter(e -> e instanceof COSName) - .collect(Collectors.toList()); - - Set pdFonts = - Collections.synchronizedSet(new HashSet<>()); - - cosNames.parallelStream() - .forEach(e -> { - /* Ignore Any Exception During Parallel Processing */ - try { - PDFont pdFont = pdPage.getResources().getFont(((COSName) e)); - if (pdFont != null) - pdFonts.add(pdFont); - } catch (Exception ignored) { - } - }); + pdPage.getResources().getFontNames().forEach(e -> { + /* Ignore Any Exception During Parallel Processing */ + try { + PDFont pdFont = pdPage.getResources().getFont(e); + if (pdFont != null) + pdFonts.add(pdFont); + } catch (Exception ignored) { + } + }); /* END */ - objects - .parallelStream() - .forEach(e -> { - if (e instanceof COSString) { - /* Ignore Any Exception During Parallel Processing */ - try { - if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts)) - ((COSString) e).setValue(new byte[0]); - } catch (Exception ignored) { - } - } - } - ); + + objects.parallelStream().forEach(e -> { + if (e instanceof COSString) { + /* Ignore Any Exception During Parallel Processing */ + try { + if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts)) + ((COSString) e).setValue(new byte[0]); + } catch (Exception ignored) { + } + } + }); PDStream newContents = new PDStream(pdDocument); OutputStream out = newContents.createOutputStream(); diff --git a/src/com/amastigote/unstamper/log/GeneralLogger.java b/src/com/amastigote/unstamper/log/GeneralLogger.java index a7cc3cc..fcdd5bf 100644 --- a/src/com/amastigote/unstamper/log/GeneralLogger.java +++ b/src/com/amastigote/unstamper/log/GeneralLogger.java @@ -1,6 +1,6 @@ /* AUTH | hwding - DATE | Aug 27 2017 + DATE | Sep 04 2017 DESC | text stamp remover for PDF files MAIL | m@amastigote.com GITH | github.com/hwding @@ -10,7 +10,7 @@ public class GeneralLogger { public static class Help { private static final String usage = - "\nPDF-UnStamper ver. 0.1.1 by hwding@GitHub\n" + + "\nPDF-UnStamper ver. 0.1.2 by hwding@GitHub\n" + "\nUsage: " + "\n [OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF])" + "\n [OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR])\n" +