Iterating PDF file #8

MariSelvanDev · 2018-03-20T13:25:06Z

How to convert nth page of a pdf file to png image?
which I mean.
I converted a PDF file which holds 8 pages in it, but resulted by converting only first page of a pdf file to a png image, but I need a whole.

Thanks in advance.

thnew · 2018-03-21T05:47:06Z

hi, this is not possible with my project yet, but you can fork and modify it ti make it possible. When I habe time I will do it as well. I described how to modify it here:
#6

dipendra210 · 2019-05-21T19:29:42Z

I also need to convert multiple pdf page into pngs.
Can somebody help me?
Thanks in advance.

dipendra210 · 2019-05-21T22:16:57Z

I implemented pdf2png.js by followed thnew's opinion.
So, it works well ( only when you input option for { returnFilePath: true } )
Here is my code.
pdf2png.js

var exec = require('child_process').exec;
var tmp = require('tmp');
var fs = require('fs');
var filesource = require('filesource');

var initialized = false;

// Add Ghostscript executables path
var projectPath = __dirname.split("\");
projectPath.pop();
projectPath = projectPath.join("\");

exports.ghostscriptPath = projectPath + "\executables\ghostScript";

// for linux compability
exports.ghostscriptPath = exports.ghostscriptPath.split("\").join("/");

exports.convert = function() {
var filepathOrData = arguments[0];
var callback = arguments[1];
var options = {};
var pageCount = 1;

var tmpFileCreated = false;

if(arguments[3] != null)
{
	options = arguments[1];
	pageCount = arguments[2]
	callback = arguments[3];
}

if(!initialized)
{
	if(!options.useLocalGhostscript)
	{
		process.env.Path += ";" + exports.ghostscriptPath;
	}
	
	initialized = true;
}

options.quality = options.quality || 100;

filesource.getDataPath(filepathOrData, function(resp){
	if(!resp.success)
	{
		callback(resp);
		return;
	}
	
	// get temporary filepath
	tmp.file({ postfix: ".png" }, function(err, imageFilepath, fd) {
		if(err)
		{
			callback({ success: false, error: "Error getting second temporary filepath: " + err });
			return;
		}

		const fileName = imageFilepath.substring(0, imageFilepath.length - 4);
	
		//exec("gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" + options.quality + " -dFirstPage=1 -dLastPage=2 -sOutputFile=" + imageFilepath + " " + resp.data, function (error, stdout, stderr) {
		
		exec("gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" + options.quality + " -dFirstPage=1 -dLastPage=" + pageCount + " -sOutputFile=" + fileName + "-%d.png " + resp.data, function (error, stdout, stderr) {
			// Remove temp files
			resp.clean();
			
			if(error !== null)
			{
				callback({ success: false, error: "Error converting pdf to png: " + error });
				return;
			}

			let imgPaths = [];
			for (let i = 0; i < pageCount; i++) {
				imgPaths.push(fileName + "-" + (i + 1) + ".png");
			}

			if(options.returnFilePath)
			{
				//callback({ success: true, data: imageFilepath });
				callback({ success: true, data: imgPaths });
				return;
			}
			
			var img = fs.readFileSync(imageFilepath);
			
			// Remove temp file
			fs.unlinkSync(imageFilepath);
			
			callback({ success: true, data: img });
		});
	});
});
};

api.js ( calling pdf2png.convert() function )
pdfPageCount.count(targetFile, function(resp){
if(!resp.success)
{
console.log("Something went wrong: " + resp.error);

					return;
				}
				// pdf page count
				const pageCount = resp.data;
				// orc result of each page of pdf
				let ocrResult = '';

				pdf2png.convert(targetFile, { returnFilePath: true }, pageCount, function(resp) {
					if(!resp.success) {
					        console.log("Something went wrong: " + resp.error);
					        
					        res.send( { result: "Can't handle the PDF file." } );
					}

					for (let i = 0; i < pageCount; i++) {
					    
					    console.log("Yayy the pdf got converted, now I'm gonna ocr it!");

					    Tesseract.recognize(resp.data[i])
									 .progress((p) => {
									 	//console.log('progress', p);
									 })
									 .then((result) => {
									 	ocrResult = ocrResult + result.text;
									 	if ( i === pageCount-1) {
											res.send({result: ocrResult});
									 		console.log(ocrResult);
									 	}
									 })
					}
				});

			});

MariSelvanDev changed the title ~~Iterating in PDF file~~ Iterating PDF file Mar 20, 2018

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Iterating PDF file #8

Iterating PDF file #8

MariSelvanDev commented Mar 20, 2018 •

edited

Loading

thnew commented Mar 21, 2018

dipendra210 commented May 21, 2019

dipendra210 commented May 21, 2019

Iterating PDF file #8

Iterating PDF file #8

Comments

MariSelvanDev commented Mar 20, 2018 • edited Loading

thnew commented Mar 21, 2018

dipendra210 commented May 21, 2019

dipendra210 commented May 21, 2019

MariSelvanDev commented Mar 20, 2018 •

edited

Loading