Skip to content

Commit

Permalink
allow optional density for node (#116)
Browse files Browse the repository at this point in the history
* add support for selecting density in node

* revert styling

* revert stylings part 2

* add height and update args

* remove space

* clarify props

* sort
  • Loading branch information
alexander-densley authored Dec 5, 2024
1 parent ee5ed09 commit 27dd1a2
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 2 deletions.
4 changes: 4 additions & 0 deletions node-zerox/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ export const zerox = async ({
concurrency = 10,
correctOrientation = true,
filePath,
imageDensity = 300,
imageHeight = 2048,
llmParams = {},
maintainFormat = false,
maxTesseractWorkers = -1,
Expand Down Expand Up @@ -105,6 +107,8 @@ export const zerox = async ({
// Convert the file to a series of images
await convertPdfToImages({
correctOrientation,
imageDensity,
imageHeight,
localPath: pdfPath,
maxTesseractWorkers,
pagesToConvertAsImages,
Expand Down
2 changes: 2 additions & 0 deletions node-zerox/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ export interface ZeroxArgs {
concurrency?: number;
correctOrientation?: boolean;
filePath: string;
imageDensity?: number;
imageHeight?: number;
llmParams?: LLMParams;
maintainFormat?: boolean;
maxTesseractWorkers?: number;
Expand Down
8 changes: 6 additions & 2 deletions node-zerox/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ const determineOptimalRotation = async ({
// Convert each page to a png, correct orientation, and save that image to tmp
export const convertPdfToImages = async ({
correctOrientation,
imageDensity,
imageHeight,
localPath,
maxTesseractWorkers,
pagesToConvertAsImages,
Expand All @@ -213,6 +215,8 @@ export const convertPdfToImages = async ({
trimEdges,
}: {
correctOrientation: boolean;
imageDensity: number;
imageHeight: number;
localPath: string;
maxTesseractWorkers: number;
pagesToConvertAsImages: number | number[];
Expand All @@ -221,9 +225,9 @@ export const convertPdfToImages = async ({
trimEdges: boolean;
}) => {
const options = {
density: 300,
density: imageDensity,
format: "png",
height: 2048,
height: imageHeight,
preserveAspectRatio: true,
saveFilename: path.basename(localPath, path.extname(localPath)),
savePath: tempDir,
Expand Down

0 comments on commit 27dd1a2

Please sign in to comment.