From 27dd1a2d55ec85696bd054313b8af56b93c0f6ee Mon Sep 17 00:00:00 2001 From: Alexander Densley Date: Wed, 4 Dec 2024 20:00:05 -0700 Subject: [PATCH] allow optional density for node (#116) * add support for selecting density in node * revert styling * revert stylings part 2 * add height and update args * remove space * clarify props * sort --- node-zerox/src/index.ts | 4 ++++ node-zerox/src/types.ts | 2 ++ node-zerox/src/utils.ts | 8 ++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/node-zerox/src/index.ts b/node-zerox/src/index.ts index d7550d1..a7d59b0 100644 --- a/node-zerox/src/index.ts +++ b/node-zerox/src/index.ts @@ -24,6 +24,8 @@ export const zerox = async ({ concurrency = 10, correctOrientation = true, filePath, + imageDensity = 300, + imageHeight = 2048, llmParams = {}, maintainFormat = false, maxTesseractWorkers = -1, @@ -105,6 +107,8 @@ export const zerox = async ({ // Convert the file to a series of images await convertPdfToImages({ correctOrientation, + imageDensity, + imageHeight, localPath: pdfPath, maxTesseractWorkers, pagesToConvertAsImages, diff --git a/node-zerox/src/types.ts b/node-zerox/src/types.ts index ccd491d..2faf3af 100644 --- a/node-zerox/src/types.ts +++ b/node-zerox/src/types.ts @@ -3,6 +3,8 @@ export interface ZeroxArgs { concurrency?: number; correctOrientation?: boolean; filePath: string; + imageDensity?: number; + imageHeight?: number; llmParams?: LLMParams; maintainFormat?: boolean; maxTesseractWorkers?: number; diff --git a/node-zerox/src/utils.ts b/node-zerox/src/utils.ts index 2f92490..7ac774d 100644 --- a/node-zerox/src/utils.ts +++ b/node-zerox/src/utils.ts @@ -205,6 +205,8 @@ const determineOptimalRotation = async ({ // Convert each page to a png, correct orientation, and save that image to tmp export const convertPdfToImages = async ({ correctOrientation, + imageDensity, + imageHeight, localPath, maxTesseractWorkers, pagesToConvertAsImages, @@ -213,6 +215,8 @@ export const convertPdfToImages = async ({ trimEdges, }: { correctOrientation: boolean; + imageDensity: number; + imageHeight: number; localPath: string; maxTesseractWorkers: number; pagesToConvertAsImages: number | number[]; @@ -221,9 +225,9 @@ export const convertPdfToImages = async ({ trimEdges: boolean; }) => { const options = { - density: 300, + density: imageDensity, format: "png", - height: 2048, + height: imageHeight, preserveAspectRatio: true, saveFilename: path.basename(localPath, path.extname(localPath)), savePath: tempDir,