diff --git a/pom.xml b/pom.xml
old mode 100644
new mode 100755
index c89b0e9..0f62f6d
--- a/pom.xml
+++ b/pom.xml
@@ -43,8 +43,8 @@
maven-compiler-plugin
2.3.2
-
- 1.8
+
+ 1.7
diff --git a/src/main/java/com/testautomationguru/utility/ImageUtil.java b/src/main/java/com/testautomationguru/utility/ImageUtil.java
old mode 100644
new mode 100755
index 3c6010c..db7a986
--- a/src/main/java/com/testautomationguru/utility/ImageUtil.java
+++ b/src/main/java/com/testautomationguru/utility/ImageUtil.java
@@ -1,19 +1,18 @@
package com.testautomationguru.utility;
+import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.logging.Logger;
-import javax.imageio.ImageIO;
-
class ImageUtil {
-
+
static Logger logger = Logger.getLogger(ImageUtil.class.getName());
-
- static boolean compareAndHighlight(final BufferedImage img1, final BufferedImage img2, String fileName, boolean highlight, int colorCode) throws IOException {
- final int w = img1.getWidth();
+ static boolean compareAndHighlight(final BufferedImage img1, final BufferedImage img2, String fileName, PdfUtilImageListener imageListener, boolean highlight, int colorCode) throws IOException {
+
+ final int w = img1.getWidth();
final int h = img1.getHeight();
final int[] p1 = img1.getRGB(0, 0, w, h, null, 0, w);
final int[] p2 = img2.getRGB(0, 0, w, h, null, 0, w);
@@ -24,11 +23,11 @@ static boolean compareAndHighlight(final BufferedImage img1, final BufferedImage
for (int i = 0; i < p1.length; i++) {
if (p1[i] != p2[i]){
p1[i] = colorCode;
- }
+ }
}
final BufferedImage out = new BufferedImage(w, h, BufferedImage.TYPE_INT_ARGB);
out.setRGB(0, 0, w, h, p1, 0, w);
- saveImage(out, fileName);
+ imageListener.imageGenerated(out, fileName);
}
return false;
}
@@ -38,9 +37,9 @@ static boolean compareAndHighlight(final BufferedImage img1, final BufferedImage
static void saveImage(BufferedImage image, String file){
try{
File outputfile = new File(file);
- ImageIO.write(image, "png", outputfile);
- }catch(Exception e){
+ ImageIO.write(image, "png", outputfile);
+ } catch(Exception e){
e.printStackTrace();
}
- }
+ }
}
diff --git a/src/main/java/com/testautomationguru/utility/PDFUtil.java b/src/main/java/com/testautomationguru/utility/PDFUtil.java
old mode 100644
new mode 100755
index 7cbb928..cbe270a
--- a/src/main/java/com/testautomationguru/utility/PDFUtil.java
+++ b/src/main/java/com/testautomationguru/utility/PDFUtil.java
@@ -15,16 +15,8 @@
See the License for the specific language governing permissions and
limitations under the License.
*/
-import java.awt.Color;
-import java.awt.image.BufferedImage;
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import javax.imageio.ImageIO;
+import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPageTree;
@@ -32,9 +24,21 @@
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
-import org.apache.pdfbox.tools.imageio.ImageIOUtil;
import org.apache.pdfbox.text.PDFTextStripper;
-import org.apache.commons.io.FileUtils;
+
+import javax.imageio.ImageIO;
+import java.awt.*;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
/**
* PDF Utility
@@ -50,6 +54,7 @@ public class PDFUtil {
private final static Logger logger = Logger.getLogger(PDFUtil.class.getName());
private String imageDestinationPath;
+ private PdfUtilImageListener imageListener;
private boolean bTrimWhiteSpace;
private boolean bHighlightPdfDifference;
private Color imgColor;
@@ -57,8 +62,6 @@ public class PDFUtil {
private boolean bCompareAllPages;
private CompareMode compareMode;
private String[] excludePattern;
- private int startPage = 1;
- private int endPage = -1;
/*
* Constructor
@@ -135,8 +138,28 @@ public String getImageDestinationPath(){
public void setImageDestinationPath(String path){
this.imageDestinationPath = path;
}
-
- /**
+
+ /**
+ * Listener invoked each time an image is created using the savePdfAsImage
+ * or extractPdfImages.
+ *
+ * @return
+ */
+ public PdfUtilImageListener getImageListener() {
+ return imageListener;
+ }
+
+ /**
+ * Set the Listener invoked each time an image is created
+ * using the savePdfAsImages or extractPdfImages.
+ *
+ * @return
+ */
+ public void setImageListener(PdfUtilImageListener imageListener) {
+ this.imageListener = imageListener;
+ }
+
+ /**
* Highlight the difference when 2 pdf files are compared in Binary mode.
* The result is saved as an image.
*
@@ -184,10 +207,31 @@ public void useStripper(PDFTextStripper stripper){
*/
public int getPageCount(String file) throws IOException{
logger.info("file :" + file);
- PDDocument doc = PDDocument.load(new File(file));
- int pageCount = doc.getNumberOfPages();
+ try(FileInputStream fis = new FileInputStream(file)) {
+ return getPageCount(fis);
+ }
+ }
+
+ /**
+ * Get the page count of the document.
+ *
+ * @param file InputStream of the pdf-file
+ * @return int No of pages in the document.
+ * @throws java.io.IOException when file is not found.
+ */
+ public int getPageCount(InputStream file) throws IOException{
+ PDDocument doc = null;
+ try {
+ doc = PDDocument.load(file);
+ return getPageCount(doc);
+ } finally {
+ closeDocument(doc);
+ }
+ }
+
+ private int getPageCount(PDDocument document) {
+ int pageCount = document.getNumberOfPages();
logger.info("pageCount :" + pageCount);
- doc.close();
return pageCount;
}
@@ -227,34 +271,79 @@ public String getText(String file, int startPage, int endPage) throws IOExceptio
return this.getPDFText(file,startPage, endPage);
}
- /**
- * This method returns the content of the document
- */
- private String getPDFText(String file, int startPage, int endPage) throws IOException{
-
+ /**
+ * Get the content of the document as plain text.
+ *
+ * @param file InputStream of the pdf-file
+ * @return String document content in plain text.
+ * @throws java.io.IOException when file is not found.
+ */
+ public String getText(InputStream file) throws IOException{
+ return this.getPDFText(file,-1, -1);
+ }
+
+ /**
+ * Get the content of the document as plain text.
+ *
+ * @param file InputStream of the pdf-file
+ * @param startPage Starting page number of the document
+ * @return String document content in plain text.
+ * @throws java.io.IOException when file is not found.
+ */
+ public String getText(InputStream file, int startPage) throws IOException{
+ return this.getPDFText(file,startPage, -1);
+ }
+
+ /**
+ * Get the content of the document as plain text.
+ *
+ * @param file InputStream of the pdf-file
+ * @param startPage Starting page number of the document
+ * @param endPage Ending page number of the document
+ * @return String document content in plain text.
+ * @throws java.io.IOException when file is not found.
+ */
+ public String getText(InputStream file, int startPage, int endPage) throws IOException{
+ return this.getPDFText(file,startPage, endPage);
+ }
+
+ private String getPDFText(String file, int startPage, int endPage) throws IOException {
logger.info("file : " + file);
+ try(FileInputStream fis1 = new FileInputStream(file)) {
+ return getPDFText(fis1, startPage, endPage);
+ }
+ }
+
+ private String getPDFText(InputStream file, int startPage, int endPage) throws IOException {
+ PDDocument doc = null;
+ try {
+ doc = PDDocument.load(file);
+ return getPDFText(doc, startPage, endPage);
+ } finally {
+ closeDocument(doc);
+ }
+ }
+
+ private String getPDFText(PDDocument document, int startPage, int endPage) throws IOException{
logger.info("startPage : " + startPage);
logger.info("endPage : " + endPage);
-
- PDDocument doc = PDDocument.load(new File(file));
-
+
PDFTextStripper localStripper = new PDFTextStripper();
if(null!=this.stripper){
localStripper = this.stripper;
}
-
- this.updateStartAndEndPages(file, startPage, endPage);
- localStripper.setStartPage(this.startPage);
- localStripper.setEndPage(this.endPage);
-
- String txt = localStripper.getText(doc);
+
+ PageBounds pageBounds = this.getStartAndEndPages(document, startPage, endPage);
+ localStripper.setStartPage(pageBounds.startPage);
+ localStripper.setEndPage(pageBounds.endPage);
+
+ String txt = localStripper.getText(document);
logger.info("PDF Text before trimming : " + txt);
if(this.bTrimWhiteSpace){
txt = txt.trim().replaceAll("\\s+", " ").trim();
logger.info("PDF Text after trimming : " + txt);
}
- doc.close();
return txt;
}
@@ -266,8 +355,8 @@ public void excludeText(String... regexs){
/**
* Compares two given pdf documents.
- *
- * Note : TEXT_MODE : Compare 2 pdf documents contents with no formatting.
+ *
+ * Note : TEXT_MODE : Compare 2 pdf documents contents with no formatting.
* VISUAL_MODE : Compare 2 pdf documents pixel by pixel for the content and format.
* @param file1 Absolute file path of the expected file
* @param file2 Absolute file path of the actual file
@@ -280,10 +369,10 @@ public boolean compare(String file1, String file2) throws IOException{
/**
* Compares two given pdf documents.
- *
- * Note : TEXT_MODE : Compare 2 pdf documents contents with no formatting.
+ *
+ * Note : TEXT_MODE : Compare 2 pdf documents contents with no formatting.
* VISUAL_MODE : Compare 2 pdf documents pixel by pixel for the content and format.
- *
+ *
* @param file1 Absolute file path of the expected file
* @param file2 Absolute file path of the actual file
* @param startPage Starting page number of the document
@@ -310,36 +399,98 @@ public boolean compare(String file1, String file2, int startPage, int endPage) t
public boolean compare(String file1, String file2, int startPage) throws IOException{
return this.comparePdfFiles(file1, file2, startPage, -1);
}
-
+
+ /**
+ * Compares two given pdf documents.
+ *
+ * Note : TEXT_MODE : Compare 2 pdf documents contents with no formatting.
+ * VISUAL_MODE : Compare 2 pdf documents pixel by pixel for the content and format.
+ * @param file1 InputStream of the expected pdf-file
+ * @param file2 InputStream of the actual pdf-file
+ * @param identifier Identifier for resulting images
+ * @return boolean true if matches, false otherwise
+ * @throws java.io.IOException when file is not found.
+ */
+ public boolean compare(InputStream file1, InputStream file2, String identifier) throws IOException{
+ return this.comparePdfFiles(file1, file2, -1, -1, identifier);
+ }
+
+ /**
+ * Compares two given pdf documents.
+ *
+ * Note : TEXT_MODE : Compare 2 pdf documents contents with no formatting.
+ * VISUAL_MODE : Compare 2 pdf documents pixel by pixel for the content and format.
+ *
+ * @param file1 InputStream of the expected pdf-file
+ * @param file2 InputStream of the actual pdf-file
+ * @param startPage Starting page number of the document
+ * @param endPage Ending page number of the document
+ * @param identifier Identifier for resulting images
+ * @return boolean true if matches, false otherwise
+ * @throws java.io.IOException when file is not found.
+ */
+ public boolean compare(InputStream file1, InputStream file2, int startPage, int endPage, String identifier) throws IOException{
+ return this.comparePdfFiles(file1, file2, startPage, endPage, identifier);
+ }
+
+ /**
+ * Compares two given pdf documents.
+ *
+ * Note : TEXT_MODE : Compare 2 pdf documents contents with no formatting.
+ * VISUAL_MODE : Compare 2 pdf documents pixel by pixel for the content and format.
+ *
+ * @param file1 InputStream of the expected pdf-file
+ * @param file2 InputStream of the actual pdf-file
+ * @param startPage Starting page number of the document
+ * @param identifier Identifier for resulting images
+ * @return boolean true if matches, false otherwise
+ * @throws java.io.IOException when file is not found.
+ */
+ public boolean compare(InputStream file1, InputStream file2, int startPage, String identifier) throws IOException{
+ return this.comparePdfFiles(file1, file2, startPage, -1, identifier);
+ }
+
private boolean comparePdfFiles(String file1, String file2, int startPage, int endPage)throws IOException{
+ File file1AsFile = new File(file1);
+ String identifier = fileNameToIdentifier(file1AsFile);
+ if (this.bHighlightPdfDifference) {
+ this.createImageDestinationDirectory(file2);
+ }
+ try (FileInputStream fis1 = new FileInputStream(file1AsFile);
+ FileInputStream fis2 = new FileInputStream(file2)) {
+ return comparePdfFiles(fis1, fis2, startPage, endPage, identifier);
+ }
+ }
+
+ private boolean comparePdfFiles(InputStream file1, InputStream file2, int startPage, int endPage, String identifier) throws IOException{
if(CompareMode.TEXT_MODE==this.compareMode)
return comparepdfFilesWithTextMode(file1, file2, startPage, endPage);
else
- return comparePdfByImage(file1, file2, startPage, endPage);
+ return comparePdfByImage(file1, file2, startPage, endPage, identifier);
}
-
- private boolean comparepdfFilesWithTextMode(String file1, String file2, int startPage, int endPage) throws IOException{
-
+
+ private boolean comparepdfFilesWithTextMode(InputStream file1, InputStream file2, int startPage, int endPage) throws IOException{
+
String file1Txt = this.getPDFText(file1, startPage, endPage).trim();
String file2Txt = this.getPDFText(file2, startPage, endPage).trim();
-
+
if(null!=this.excludePattern && this.excludePattern.length>0){
for(int i=0; i savePdfAsImage(String file, int startPage) throws IOException{
- return this.saveAsImage(file, startPage, -1);
+ return this.saveAsImage(file, startPage, -1);
}
/**
@@ -364,7 +515,7 @@ public List savePdfAsImage(String file, int startPage) throws IOExceptio
* @throws java.io.IOException when file is not found.
*/
public List savePdfAsImage(String file, int startPage, int endPage) throws IOException{
- return this.saveAsImage(file, startPage, endPage);
+ return this.saveAsImage(file, startPage, endPage);
}
/**
@@ -377,209 +528,397 @@ public List savePdfAsImage(String file, int startPage, int endPage) thro
public List savePdfAsImage(String file) throws IOException{
return this.saveAsImage(file, -1, -1);
}
-
- /**
- * This method saves the each page of the pdf as image
- */
- private List saveAsImage(String file, int startPage, int endPage) throws IOException{
-
- logger.info("file : " + file);
+
+ /**
+ * Save each page of the pdf as image
+ *
+ * @param file InputStream of the pdf-file
+ * @param startPage Starting page number of the document
+ * @param identifier Identifier for resulting images
+ * @return List list of image file names with absolute path
+ * @throws java.io.IOException when file is not found.
+ */
+ public List savePdfAsImage(InputStream file, int startPage, String identifier) throws IOException{
+ return this.saveAsImage(file, startPage, -1, identifier);
+ }
+
+ /**
+ * Save each page of the pdf as image
+ *
+ * @param file InputStream of the pdf-file
+ * @param startPage Starting page number of the document
+ * @param endPage Ending page number of the document
+ * @param identifier Identifier for resulting images
+ * @return List list of image file names with absolute path
+ * @throws java.io.IOException when file is not found.
+ */
+ public List savePdfAsImage(InputStream file, int startPage, int endPage, String identifier) throws IOException{
+ return this.saveAsImage(file, startPage, endPage, identifier);
+ }
+
+ /**
+ * Save each page of the pdf as image
+ *
+ * @param file InputStream of the pdf-file
+ * @param identifier Identifier for resulting images
+ * @return List list of image file names with absolute path
+ * @throws java.io.IOException when file is not found.
+ */
+ public List savePdfAsImage(InputStream file, String identifier) throws IOException{
+ return this.saveAsImage(file, -1, -1, identifier);
+ }
+
+ /**
+ * This method saves the each page of the pdf as image
+ */
+ private List saveAsImage(String file, int startPage, int endPage) throws IOException {
+ logger.info("file : " + file);
+ File sourceFile = new File(file);
+ String identifier = fileNameToIdentifier(sourceFile);
+ try(FileInputStream fis1 = new FileInputStream(sourceFile)) {
+ return saveAsImage(fis1, startPage, endPage, identifier);
+ }
+ }
+
+ private List saveAsImage(InputStream file, int startPage, int endPage, String identifier) throws IOException {
+ PDDocument doc = null;
+ try {
+ doc = PDDocument.load(file);
+ return saveAsImage(doc, startPage, endPage, identifier);
+ } finally {
+ closeDocument(doc);
+ }
+ }
+
+ private List saveAsImage(PDDocument document, int startPage, int endPage, String identifier) throws IOException{
+
logger.info("startPage : " + startPage);
logger.info("endPage : " + endPage);
-
+
ArrayList imgNames = new ArrayList();
-
+
try {
- File sourceFile = new File(file);
- this.createImageDestinationDirectory(file);
- this.updateStartAndEndPages(file, startPage, endPage);
-
- String fileName = sourceFile.getName().replace(".pdf", "");
-
- PDDocument document = PDDocument.load(sourceFile);
+ PageBounds pageBounds = this.getStartAndEndPages(document, startPage, endPage);
+
PDFRenderer pdfRenderer = new PDFRenderer(document);
- for(int iPage=this.startPage-1;iPage extractImages(String file, int startPage) throws IOException{
- return this.extractimages(file, startPage, -1);
+ return this.extractimages(file, startPage, -1);
}
-
+
/**
* Extract all the embedded images from the pdf document
- *
+ *
* @param file Absolute file path of the file
* @param startPage Starting page number of the document
* @param endPage Ending page number of the document
* @return List list of image file names with absolute path
* @throws java.io.IOException when file is not found.
- */
+ */
public List extractImages(String file, int startPage, int endPage) throws IOException{
- return this.extractimages(file, startPage, endPage);
+ return this.extractimages(file, startPage, endPage);
}
-
+
/**
* Extract all the embedded images from the pdf document
- *
+ *
* @param file Absolute file path of the file
* @return List list of image file names with absolute path
* @throws java.io.IOException when file is not found.
- */
+ */
public List extractImages(String file) throws IOException{
return this.extractimages(file, -1, -1);
}
-
- /**
+
+ /**
+ * Extract all the embedded images from the pdf document
+ *
+ * @param file InputStream of the pdf-file
+ * @param startPage Starting page number of the document
+ * @param identifier Identifier for resulting images
+ * @return List list of image file names with absolute path
+ * @throws java.io.IOException when file is not found.
+ */
+ public List extractImages(InputStream file, int startPage, String identifier) throws IOException{
+ return this.extractimages(file, startPage, -1, identifier);
+ }
+
+ /**
+ * Extract all the embedded images from the pdf document
+ *
+ * @param file InputStream of the pdf-file
+ * @param startPage Starting page number of the document
+ * @param endPage Ending page number of the document
+ * @param identifier Identifier for resulting images
+ * @return List list of image file names with absolute path
+ * @throws java.io.IOException when file is not found.
+ */
+ public List extractImages(InputStream file, int startPage, int endPage, String identifier) throws IOException{
+ return this.extractimages(file, startPage, endPage, identifier);
+ }
+
+ /**
+ * Extract all the embedded images from the pdf document
+ *
+ * @param file InputStream of the pdf-file
+ * @param identifier Identifier for resulting images
+ * @return List list of image file names with absolute path
+ * @throws java.io.IOException when file is not found.
+ */
+ public List extractImages(InputStream file, String identifier) throws IOException{
+ return this.extractimages(file, -1, -1, identifier);
+ }
+
+
+ /**
* This method extracts all the embedded images of the pdf document
- */
- private List extractimages(String file, int startPage, int endPage){
-
+ */
+ private List extractimages(String file, int startPage, int endPage) throws IOException {
+ File fileAsFile = new File(file);
logger.info("file : " + file);
+ this.createImageDestinationDirectory(file);
+ try(FileInputStream fis1 = new FileInputStream(fileAsFile)) {
+ return extractimages(fis1, startPage, endPage, fileNameToIdentifier(fileAsFile) + "_resource");
+ }
+ }
+
+ private List extractimages(InputStream file, int startPage, int endPage, String identifier) throws IOException {
+ PDDocument doc = null;
+ try {
+ doc = PDDocument.load(file);
+ return extractimages(doc, startPage, endPage, identifier);
+ } finally {
+ closeDocument(doc);
+ }
+ }
+
+ private List extractimages(PDDocument document, int startPage, int endPage, String identifier) throws IOException {
logger.info("startPage : " + startPage);
logger.info("endPage : " + endPage);
-
+
ArrayList imgNames = new ArrayList();
boolean bImageFound = false;
- try {
- this.createImageDestinationDirectory(file);
- String fileName = this.getFileName(file).replace(".pdf", "_resource");
-
- PDDocument document = PDDocument.load(new File(file));
- PDPageTree list = document.getPages();
-
- this.updateStartAndEndPages(file, startPage, endPage);
-
- int totalImages = 1;
- for(int iPage=this.startPage-1;iPage 0 && start <= pagecount)){
- this.startPage = start;
+ pageBounds.startPage = start;
}else{
- this.startPage = 1;
+ pageBounds.startPage = 1;
}
if((end > 0 && end >= start && end <= pagecount)){
- this.endPage = end;
+ pageBounds.endPage = end;
}else{
- this.endPage = pagecount;
+ pageBounds.endPage = pagecount;
+ }
+ logger.info("Updated start page:" + pageBounds.startPage);
+ logger.info("Updated end page:" + pageBounds.endPage);
+ return pageBounds;
+ }
+
+ private class PageBounds {
+ int startPage;
+ int endPage;
+
+ public PageBounds(int startPage, int endPage) {
+ this.startPage = startPage;
+ this.endPage = endPage;
}
- document.close();
- logger.info("Updated start page:" + this.startPage);
- logger.info("Updated end page:" + this.endPage);
}
}
\ No newline at end of file
diff --git a/src/main/java/com/testautomationguru/utility/PdfUtilImageListener.java b/src/main/java/com/testautomationguru/utility/PdfUtilImageListener.java
new file mode 100755
index 0000000..89d3d45
--- /dev/null
+++ b/src/main/java/com/testautomationguru/utility/PdfUtilImageListener.java
@@ -0,0 +1,7 @@
+package com.testautomationguru.utility;
+
+import java.awt.image.BufferedImage;
+
+public interface PdfUtilImageListener {
+ void imageGenerated(BufferedImage bufferedImage, String fileName);
+}
diff --git a/src/test/java/com/testautomationguru/utility/PDFUtilStreamTest.java b/src/test/java/com/testautomationguru/utility/PDFUtilStreamTest.java
new file mode 100755
index 0000000..45d9251
--- /dev/null
+++ b/src/test/java/com/testautomationguru/utility/PDFUtilStreamTest.java
@@ -0,0 +1,156 @@
+package com.testautomationguru.utility;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.List;
+
+public class PDFUtilStreamTest {
+
+ private static final Charset TEXT_RESOURCE_CHARSET = Charset.forName("UTF-8");
+ PDFUtil pdfutil = new PDFUtil();
+
+ @Test(priority = 1)
+ public void checkForPDFPageCount() throws IOException {
+ String identifier = "image-extract/sample";
+ try(FileInputStream fis = getFileInputStream(identifier + ".pdf")) {
+ int actual = pdfutil.getPageCount(fis);
+ Assert.assertEquals(actual, 6);
+ }
+ }
+
+ @Test(priority = 2)
+ public void checkForFileContent() throws IOException {
+ String identifier = "text-extract/sample";
+ try(FileInputStream fis = getFileInputStream(identifier + ".pdf")) {
+ String actual = pdfutil.getText(fis);
+ String expected = getTextResource("text-extract/expected.txt");
+ Assert.assertEquals(actual.trim(), expected.trim());
+ }
+ }
+
+ @Test(priority = 3)
+ public void checkForFileContentUsingStripper() throws IOException {
+ String identifier = "text-extract-position/sample";
+ try(FileInputStream fis = getFileInputStream(identifier + ".pdf")) {
+ String actual = pdfutil.getText(fis);
+ String expected = getTextResource("text-extract-position/expected.txt");
+ Assert.assertNotEquals(actual.trim(), expected.trim());
+ }
+ //should match with stripper
+ PDFTextStripper stripper = new PDFTextStripper();
+ stripper.setSortByPosition(true);
+ pdfutil.useStripper(stripper);
+ try(FileInputStream fis = getFileInputStream(identifier + ".pdf")) {
+ String actual = pdfutil.getText(fis);
+ String expected = getTextResource("text-extract-position/expected.txt");
+ Assert.assertEquals(actual.trim(), expected.trim());
+ }
+ pdfutil.useStripper(null);
+ }
+
+ @Test(priority = 4)
+ public void extractImages() throws IOException {
+ String identifier = "image-extract/sample";
+ try(FileInputStream fis = getFileInputStream(identifier + ".pdf")) {
+ List actualExtractedImages = pdfutil.extractImages(fis, identifier);
+ Assert.assertEquals(actualExtractedImages.size(), 7);
+ }
+ }
+
+ @Test(priority = 5)
+ public void saveAsImages() throws IOException {
+ String identifier = "image-extract/sample";
+ try(FileInputStream fis = getFileInputStream(identifier + ".pdf")) {
+ List actualExtractedImages = pdfutil.savePdfAsImage(fis, identifier);
+ Assert.assertEquals(actualExtractedImages.size(), 6);
+ }
+ }
+
+ @Test(priority = 6)
+ public void comparePDFTextModeDiff() throws IOException {
+ pdfutil.setCompareMode(CompareMode.TEXT_MODE);
+ String identifier1 = "text-compare/sample1";
+ String identifier2 = "text-compare/sample2";
+ try(FileInputStream fis1 = getFileInputStream(identifier1 + ".pdf");
+ FileInputStream fis2 = getFileInputStream(identifier2 + ".pdf")) {
+ boolean result = pdfutil.compare(fis1, fis2, identifier1);
+ Assert.assertFalse(result);
+ }
+ }
+
+ @Test(priority = 7)
+ public void comparePDFTextModeSameAfterExcludePattern() throws IOException {
+ pdfutil.setCompareMode(CompareMode.TEXT_MODE);
+ pdfutil.excludeText("\\d+");
+ String identifier1 = "text-compare/sample1";
+ String identifier2 = "text-compare/sample2";
+ try(FileInputStream fis1 = getFileInputStream(identifier1 + ".pdf");
+ FileInputStream fis2 = getFileInputStream(identifier2 + ".pdf")) {
+ boolean result = pdfutil.compare(fis1, fis2, identifier1);
+ Assert.assertTrue(result);
+ }
+ }
+
+ @Test(priority = 8)
+ public void comparePDFImageModeSame() throws IOException {
+ pdfutil.setCompareMode(CompareMode.VISUAL_MODE);
+ String identifier1 = "image-compare-same/sample1";
+ String identifier2 = "image-compare-same/sample2";
+ try(FileInputStream fis1 = getFileInputStream(identifier1 + ".pdf");
+ FileInputStream fis2 = getFileInputStream(identifier2 + ".pdf")) {
+ boolean result = pdfutil.compare(fis1, fis2, identifier1);
+ Assert.assertTrue(result);
+ }
+ }
+
+ @Test(priority = 9)
+ public void comparePDFImageModeDiff() throws IOException {
+ pdfutil.setCompareMode(CompareMode.VISUAL_MODE);
+ pdfutil.highlightPdfDifference(true);
+ String identifier1 = "image-compare-diff/sample1";
+ String identifier2 = "image-compare-diff/sample2";
+ try(FileInputStream fis1 = getFileInputStream(identifier1 + ".pdf");
+ FileInputStream fis2 = getFileInputStream(identifier2 + ".pdf")) {
+ boolean result = pdfutil.compare(fis1, fis2, identifier1);
+ Assert.assertFalse(result);
+ }
+ }
+
+ @Test(priority = 10)
+ public void comparePDFImageModeDiffSpecificPage() throws IOException {
+ pdfutil.highlightPdfDifference(true);
+ String identifier1 = "image-compare-diff/sample1";
+ String identifier2 = "image-compare-diff/sample2";
+ try(FileInputStream fis1 = getFileInputStream(identifier1 + ".pdf");
+ FileInputStream fis2 = getFileInputStream(identifier2 + ".pdf")) {
+ boolean result = pdfutil.compare(fis1, fis2, 3, identifier1);
+ Assert.assertTrue(result);
+ }
+ }
+
+ private String getTextResource(String resourceName) throws IOException {
+ try(FileInputStream fis = getFileInputStream(resourceName);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+ IOUtils.copy(fis, baos);
+ return new String(baos.toByteArray(), TEXT_RESOURCE_CHARSET);
+ }
+ }
+
+ private FileInputStream getFileInputStream(String s) throws FileNotFoundException {
+ return new FileInputStream(getFilePath(s));
+ }
+
+ private String getFilePath(String filename) {
+ return new File(getClass().getClassLoader().getResource(filename).getFile()).getAbsolutePath();
+ }
+}
diff --git a/src/test/java/com/testautomationguru/utility/PDFUtilTest.java b/src/test/java/com/testautomationguru/utility/PDFUtilTest.java
old mode 100644
new mode 100755
index 524f4ee..0ee090b
--- a/src/test/java/com/testautomationguru/utility/PDFUtilTest.java
+++ b/src/test/java/com/testautomationguru/utility/PDFUtilTest.java
@@ -1,18 +1,20 @@
package com.testautomationguru.utility;
+import java.io.ByteArrayOutputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
+import java.nio.charset.Charset;
import java.util.List;
+import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.text.PDFTextStripper;
import org.testng.Assert;
import org.testng.annotations.Test;
-import org.testng.reporters.Files;
-
-import com.testautomationguru.utility.PDFUtil;
public class PDFUtilTest {
+ private static final Charset TEXT_RESOURCE_CHARSET = Charset.forName("UTF-8");
PDFUtil pdfutil = new PDFUtil();
@Test(priority = 1)
@@ -24,14 +26,14 @@ public void checkForPDFPageCount() throws IOException {
@Test(priority = 2)
public void checkForFileContent() throws IOException {
String actual = pdfutil.getText(getFilePath("text-extract/sample.pdf"));
- String expected = Files.readFile(new File(getFilePath("text-extract/expected.txt")));
+ String expected = getTextResource("text-extract/expected.txt");
Assert.assertEquals(actual.trim(), expected.trim());
}
@Test(priority = 3)
public void checkForFileContentUsingStripper() throws IOException {
String actual = pdfutil.getText(getFilePath("text-extract-position/sample.pdf"));
- String expected = Files.readFile(new File(getFilePath("text-extract-position/expected.txt")));
+ String expected = getTextResource("text-extract-position/expected.txt");
Assert.assertNotEquals(actual.trim(), expected.trim());
//should match with stripper
@@ -39,7 +41,7 @@ public void checkForFileContentUsingStripper() throws IOException {
stripper.setSortByPosition(true);
pdfutil.useStripper(stripper);
actual = pdfutil.getText(getFilePath("text-extract-position/sample.pdf"));
- expected = Files.readFile(new File(getFilePath("text-extract-position/expected.txt")));
+ expected = getTextResource("text-extract-position/expected.txt");
Assert.assertEquals(actual.trim(), expected.trim());
pdfutil.useStripper(null);
}
@@ -105,6 +107,14 @@ public void comparePDFImageModeDiffSpecificPage() throws IOException {
Assert.assertTrue(result);
}
+ private String getTextResource(String resourceName) throws IOException {
+ try(FileInputStream fis = new FileInputStream(getFilePath(resourceName));
+ ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+ IOUtils.copy(fis, baos);
+ return new String(baos.toByteArray(), TEXT_RESOURCE_CHARSET);
+ }
+ }
+
private String getFilePath(String filename) {
return new File(getClass().getClassLoader().getResource(filename).getFile()).getAbsolutePath();
}
diff --git a/src/test/resources/text-extract-position/expected.txt b/src/test/resources/text-extract-position/expected.txt
index 3d24318..750a812 100644
--- a/src/test/resources/text-extract-position/expected.txt
+++ b/src/test/resources/text-extract-position/expected.txt
@@ -1 +1 @@
-Item Authoring Tool PDF Tables The PDF engine supports tables in all areas of a printed test (questions, responses, passages, answers, other tables, etc.). The supported functionality is as close to HTML tables as possible, but there are restrictions due to limitations in the PDF library that will be discussed below. Tables may be created within the WYSIWYG (what you see is what you get) editors in the application in “normal” mode, or as direct HTML (table, tr, and td tags) in the editors’ “html” mode. (HTML tutorial: http://www.w3schools.com/html/html_tables.asp) When creating a table for PDF presentation, it should be kept in mind that the container area in the PDF document will likely be much smaller than that available in the HTML editor or in online testing. For example, the width of a question in two column format is limited to about 220 pixels. If possible, tables should be constructed with an iterative process of setting properties/attributes and PDF previewing to see the results. Adding/Editing a Table To add a table, click the icon on your WYSIWG editor. To view the Table Properties, right click inside one of the cells of your table. This will produce the Table Menu. Click the Edit Table option from the menu. © 2005. Vantage. All Rights Reserved. Table Properties To view/edit the Table Properties, click the Table tab. Table Tab Supported Table Properties/Attributes (PDF specific attributes take precedence) Property Attribute Values Description Name Name(s) Table Width width percentage Specifies the percentage of (ex. 50%) available space that the width of the table will occupy. For Default: 100% example, if we include a table in question content and specify a PDF Width of “50%”, that table will take up half of the width of the question area. Border Size border, pixels (ex. 120) Specifies the width of the borderwidth table/cell border in pixels. Default: 1 Horizontal align “left”, “center”, Specifies the horizontal alignment Alignment “right” of the table. Has no effect unless the table has a width less than Default: “center” 100% © 2005. Vantage. All Rights Reserved. PDF pdfalign “left”, “center”, Specifies the horizontal alignment Horizontal “right” of the table when printed to PDF. Alignment Has no effect unless the table has Default: “center” a width less than 100% Table cellpadding pixels (ex. 4) Specifies the padding in pixels Padding (left, right, top, and bottom) to be Default: 2 used in the cells of the table. Padding is the space between the border of a cell and its contents. Note: PDF tables have a minimum top padding of 3 and a minimum bottom padding of 4 for proper presentation. n/a pdfspacingbefore pixels (ex. 5) Specifies the space between the table and preceding elements Default: 2 (another table, text, etc). n/a pdfspacingafter pixels (ex. 5) Specifies the space between the table and trailing elements Default: 0 (another table, text, etc). Table Row Properties Clicking the Row tab of the Table Properties will allow you to view/edit the properties of the row. Row Tab © 2005. Vantage. All Rights Reserved. Supported Row Properties/Attributes (PDF specific attributes take precedence) Property Attribute Values Description Name Name(s) Vertical valign “top”, “middle”, Specifies the vertical alignment of Alignment “bottom” content for cells within the row. Default: “middle” Horizontal align, “top”, “middle”, Specifies the horizontal alignment Alignment alignment, “bottom” of content for cells within the halign row. Default: “left” Table Cell Properties Clicking the Cell tab of the Table Properties interface will allow you to view/edit the properties of the cell. Cell Tab © 2005. Vantage. All Rights Reserved. Supported Cell Properties/Attributes (PDF specific attributes take precedence) Property Attribute Values Description Name Name(s) colspan integer (ex. 2) Specifies the column span of the cell. This is the number of columns that the cell will represent. For example, in a table with three columns, you might specify that the first cell of the first row spans all three columns (colspan=3). This will result in a “header” cell/row that will cover the entire width of the table. Cell Width width percentage Specifies the desired width of the (ex. 50%) cell as a percentage of overall table width. For example, a cell with width = 50% will encompass half the available horizontal space in a table. If more than one cell in a column is given a width setting, the maximum value will be used as all cells in a column must be the same width. Cells without specified widths will share equally the remaining available horizontal space of the table. If combined cell widths total more than 100 percent, all cells will be downsized proportionally so that their total widths equal 100 percent. Note that unlike HTML tables, a PDF table cell’s width will not be sized by default to contain its contents. It’s up to the user to size cells/columns appropriately. PDF Cell pdfcellwidth percentage See Cell Width. Width (ex. 50%) © 2005. Vantage. All Rights Reserved. Cell Height height pixels (ex. 120) Specifies the desired minimum height of the cell in pixels. All cells in the row will share this height. If more than one height of a cell in a row is set, the maximum value will be used. If no heights are specified, the cell will be sized to fit its contents. PDF Cell pdfcellheight pixels (ex. 120) See Cell Height Height n/a cellpadding pixels (ex. 2) Specifies the padding in pixels (left, right, top, and bottom) to be Default: 2 used in this cell. Padding is the space between the border of a cell and its contents. Note: PDF tables have a minimum top padding of 3 and a minimum bottom padding of 4 for proper presentation. n/a pdfcellpadding pixels (ex. 2) See cellpadding. Default: 2 n/a border, pixels (ex. 1) Specifies the width of the cell borderwidth border in pixels. Default: 1 Vertical valign “top”, “middle”, Specifies the vertical alignment of Alignment “bottom” content within this cell. Default: “middle” Horizontal align, alignment, “top”, “middle”, Specifies the horizontal alignment Alignment halign “bottom” of content within this cell. Default: “left” © 2005. Vantage. All Rights Reserved. Recent changes pertaining to tables: 1. In the previous engine, there was no support for table formatting at all. Cell widths were distributed evenly, tables were all 100% width, and there were no settings. In this version, all of the options discussed above have been added. Known issues: 1. There is currently very limited support for absolute (pixel) widths for the overall table and the cells. This is generally because the pdf engine does not know the final width of the container of a table (especially in the case of nested tables) until the document is rendered. 2. Cells widths are not automatically sized according to the minimum width of their contents. 3. Images are not sized automatically to the constraints of a cell. 4. There is no support for cell spacing. © 2005. Vantage. All Rights Reserved.
\ No newline at end of file
+Item Authoring Tool PDF Tables The PDF engine supports tables in all areas of a printed test (questions, responses, passages, answers, other tables, etc.). The supported functionality is as close to HTML tables as possible, but there are restrictions due to limitations in the PDF library that will be discussed below. Tables may be created within the WYSIWYG (what you see is what you get) editors in the application in “normal” mode, or as direct HTML (table, tr, and td tags) in the editors’ “html” mode. (HTML tutorial: http://www.w3schools.com/html/html_tables.asp) When creating a table for PDF presentation, it should be kept in mind that the container area in the PDF document will likely be much smaller than that available in the HTML editor or in online testing. For example, the width of a question in two column format is limited to about 220 pixels. If possible, tables should be constructed with an iterative process of setting properties/attributes and PDF previewing to see the results. Adding/Editing a Table To add a table, click the icon on your WYSIWG editor. To view the Table Properties, right click inside one of the cells of your table. This will produce the Table Menu. Click the Edit Table option from the menu. © 2005. Vantage. All Rights Reserved. Table Properties To view/edit the Table Properties, click the Table tab. Table Tab Supported Table Properties/Attributes (PDF specific attributes take precedence) Property Attribute Values Description Name Name(s) Table Width width percentage Specifies the percentage of (ex. 50%) available space that the width of the table will occupy. For Default: 100% example, if we include a table in question content and specify a PDF Width of “50%”, that table will take up half of the width of the question area. Border Size border, pixels (ex. 120) Specifies the width of the borderwidth table/cell border in pixels. Default: 1 Horizontal align “left”, “center”, Specifies the horizontal alignment Alignment “right” of the table. Has no effect unless the table has a width less than Default: “center” 100% © 2005. Vantage. All Rights Reserved. PDF pdfalign “left”, “center”, Specifies the horizontal alignment Horizontal “right” of the table when printed to PDF. Alignment Has no effect unless the table has Default: “center” a width less than 100% Table cellpadding pixels (ex. 4) Specifies the padding in pixels Padding (left, right, top, and bottom) to be Default: 2 used in the cells of the table. Padding is the space between the border of a cell and its contents. Note: PDF tables have a minimum top padding of 3 and a minimum bottom padding of 4 for proper presentation. n/a pdfspacingbefore pixels (ex. 5) Specifies the space between the table and preceding elements Default: 2 (another table, text, etc). n/a pdfspacingafter pixels (ex. 5) Specifies the space between the table and trailing elements Default: 0 (another table, text, etc). Table Row Properties Clicking the Row tab of the Table Properties will allow you to view/edit the properties of the row. Row Tab © 2005. Vantage. All Rights Reserved. Supported Row Properties/Attributes (PDF specific attributes take precedence) Property Attribute Values Description Name Name(s) Vertical valign “top”, “middle”, Specifies the vertical alignment of Alignment “bottom” content for cells within the row. Default: “middle” Horizontal align, “top”, “middle”, Specifies the horizontal alignment Alignment alignment, “bottom” of content for cells within the halign row. Default: “left” Table Cell Properties Clicking the Cell tab of the Table Properties interface will allow you to view/edit the properties of the cell. Cell Tab © 2005. Vantage. All Rights Reserved. Supported Cell Properties/Attributes (PDF specific attributes take precedence) Property Attribute Values Description Name Name(s) colspan integer (ex. 2) Specifies the column span of the cell. This is the number of columns that the cell will represent. For example, in a table with three columns, you might specify that the first cell of the first row spans all three columns (colspan=3). This will result in a “header” cell/row that will cover the entire width of the table. Cell Width width percentage Specifies the desired width of the (ex. 50%) cell as a percentage of overall table width. For example, a cell with width = 50% will encompass half the available horizontal space in a table. If more than one cell in a column is given a width setting, the maximum value will be used as all cells in a column must be the same width. Cells without specified widths will share equally the remaining available horizontal space of the table. If combined cell widths total more than 100 percent, all cells will be downsized proportionally so that their total widths equal 100 percent. Note that unlike HTML tables, a PDF table cell’s width will not be sized by default to contain its contents. It’s up to the user to size cells/columns appropriately. PDF Cell pdfcellwidth percentage See Cell Width. Width (ex. 50%) © 2005. Vantage. All Rights Reserved. Cell Height height pixels (ex. 120) Specifies the desired minimum height of the cell in pixels. All cells in the row will share this height. If more than one height of a cell in a row is set, the maximum value will be used. If no heights are specified, the cell will be sized to fit its contents. PDF Cell pdfcellheight pixels (ex. 120) See Cell Height Height n/a cellpadding pixels (ex. 2) Specifies the padding in pixels (left, right, top, and bottom) to be Default: 2 used in this cell. Padding is the space between the border of a cell and its contents. Note: PDF tables have a minimum top padding of 3 and a minimum bottom padding of 4 for proper presentation. n/a pdfcellpadding pixels (ex. 2) See cellpadding. Default: 2 n/a border, pixels (ex. 1) Specifies the width of the cell borderwidth border in pixels. Default: 1 Vertical valign “top”, “middle”, Specifies the vertical alignment of Alignment “bottom” content within this cell. Default: “middle” Horizontal align, alignment, “top”, “middle”, Specifies the horizontal alignment Alignment halign “bottom” of content within this cell. Default: “left” © 2005. Vantage. All Rights Reserved. Recent changes pertaining to tables: 1. In the previous engine, there was no support for table formatting at all. Cell widths were distributed evenly, tables were all 100% width, and there were no settings. In this version, all of the options discussed above have been added. Known issues: 1. There is currently very limited support for absolute (pixel) widths for the overall table and the cells. This is generally because the pdf engine does not know the final width of the container of a table (especially in the case of nested tables) until the document is rendered. 2. Cells widths are not automatically sized according to the minimum width of their contents. 3. Images are not sized automatically to the constraints of a cell. 4. There is no support for cell spacing. © 2005. Vantage. All Rights Reserved.
\ No newline at end of file
diff --git a/src/test/resources/text-extract/expected.txt b/src/test/resources/text-extract/expected.txt
index 361c1f8..d27891f 100644
--- a/src/test/resources/text-extract/expected.txt
+++ b/src/test/resources/text-extract/expected.txt
@@ -1 +1 @@
-SAMPLE STUDENT COMMENTS AHMET EMIN TATAR He is very nice and you can ask him any questions in class. (Spring’06-Precalculus) I liked the way the subject was explained. He was always willing to stay and help when needed. He was very good at explaining the subject. (Fall’07-Precalculus) He really cared about the students.(Fall’07-Precalculus) Very Helpful teacher. Very difficult class but he does his best to try and help you out. He does very good reviews for the test and if you do your homework problems you should be fine. I would highly recommend him for Calc 2. (Summer’07-Calculus II) The best part about this instructor is his ability to make the extremely complicated much easier to understand. (Summer’07-Calculus II) Great teacher, fair grader, expresses ideas clearly and helps students a lot. Great class would take Mr Tatar for all math classes. (Summer’07-Calculus II) I liked the material we covered was challenging. It required me to keep up with the work load simultaneously. Mr Tatar was very good at explaining each problem. (Summer’07-Calculus II) I was very happy to have a mathematician teach this class instead of a computer scientist. Discrete Mathematics I, since it is required for Computer Science and not for Mathematics, was oversimplified and had much of the mathematical theory removed in favor of practical applications for Computer Science. Such was not the case with most of this class (except trees). (Fall’08-Discrete Mathematics II ) Occasionally I would ask questions or extend answers to homework problems into small con- jectures. The instructor kindly took the time to consider them and provide a response as to why the extensions I made were incorrect. Very friendly and willing to help students even outside of class time. (Fall’08-Discrete Mathematics II ) I was just emailing you to tell you how much of a help you were this semester. There is no way I would have passed the class without your help. Thank you sooo much! (Spring’09-Calculus II Recitation) I like everything about him. He explained and taught with patience and would like him to teach Discrete Math II during the next school year. (Summer’09-Discrete Mathematics I) Instructor Mr. Tatar was excellent. Excellent presentation of chapters. (Summer’09-Discrete Mathematics I) 1
\ No newline at end of file
+SAMPLE STUDENT COMMENTS AHMET EMIN TATAR He is very nice and you can ask him any questions in class. (Spring’06-Precalculus) I liked the way the subject was explained. He was always willing to stay and help when needed. He was very good at explaining the subject. (Fall’07-Precalculus) He really cared about the students.(Fall’07-Precalculus) Very Helpful teacher. Very difficult class but he does his best to try and help you out. He does very good reviews for the test and if you do your homework problems you should be fine. I would highly recommend him for Calc 2. (Summer’07-Calculus II) The best part about this instructor is his ability to make the extremely complicated much easier to understand. (Summer’07-Calculus II) Great teacher, fair grader, expresses ideas clearly and helps students a lot. Great class would take Mr Tatar for all math classes. (Summer’07-Calculus II) I liked the material we covered was challenging. It required me to keep up with the work load simultaneously. Mr Tatar was very good at explaining each problem. (Summer’07-Calculus II) I was very happy to have a mathematician teach this class instead of a computer scientist. Discrete Mathematics I, since it is required for Computer Science and not for Mathematics, was oversimplified and had much of the mathematical theory removed in favor of practical applications for Computer Science. Such was not the case with most of this class (except trees). (Fall’08-Discrete Mathematics II ) Occasionally I would ask questions or extend answers to homework problems into small con- jectures. The instructor kindly took the time to consider them and provide a response as to why the extensions I made were incorrect. Very friendly and willing to help students even outside of class time. (Fall’08-Discrete Mathematics II ) I was just emailing you to tell you how much of a help you were this semester. There is no way I would have passed the class without your help. Thank you sooo much! (Spring’09-Calculus II Recitation) I like everything about him. He explained and taught with patience and would like him to teach Discrete Math II during the next school year. (Summer’09-Discrete Mathematics I) Instructor Mr. Tatar was excellent. Excellent presentation of chapters. (Summer’09-Discrete Mathematics I) 1
\ No newline at end of file