Skip to content

Commit

Permalink
Merge pull request #118 from alexander-densley/main
Browse files Browse the repository at this point in the history
add density and height to python as well
  • Loading branch information
annapo23 authored Dec 18, 2024
2 parents 581756b + a58b7f7 commit fab0c9f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
5 changes: 4 additions & 1 deletion py_zerox/pyzerox/core/zerox.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import aiofiles
import aiofiles.os as async_os
import asyncio
from ..constants import PDFConversionDefaultOptions

# Package Imports
from ..processor import (
Expand All @@ -26,6 +27,8 @@ async def zerox(
cleanup: bool = True,
concurrency: int = 10,
file_path: Optional[str] = "",
image_density: int = PDFConversionDefaultOptions.DPI,
image_height: tuple[Optional[int], int] = PDFConversionDefaultOptions.SIZE,
maintain_format: bool = False,
model: str = "gpt-4o-mini",
output_dir: Optional[str] = None,
Expand Down Expand Up @@ -130,7 +133,7 @@ async def zerox(
**subset_pdf_create_kwargs)

# Convert the file to a series of images, below function returns a list of image paths in page order
images = await convert_pdf_to_images(local_path=local_path, temp_dir=temp_directory)
images = await convert_pdf_to_images(image_density=image_density, image_height=image_height, local_path=local_path, temp_dir=temp_directory)

if maintain_format:
for image in images:
Expand Down
6 changes: 3 additions & 3 deletions py_zerox/pyzerox/processor/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
from ..models import litellmmodel


async def convert_pdf_to_images(local_path: str, temp_dir: str) -> List[str]:
async def convert_pdf_to_images(image_density: int, image_height: tuple[Optional[int], int], local_path: str, temp_dir: str) -> List[str]:
"""Converts a PDF file to a series of images in the temp_dir. Returns a list of image paths in page order."""
options = {
"pdf_path": local_path,
"output_folder": temp_dir,
"dpi": PDFConversionDefaultOptions.DPI,
"dpi": image_density,
"fmt": PDFConversionDefaultOptions.FORMAT,
"size": PDFConversionDefaultOptions.SIZE,
"size": image_height,
"thread_count": PDFConversionDefaultOptions.THREAD_COUNT,
"use_pdftocairo": PDFConversionDefaultOptions.USE_PDFTOCAIRO,
"paths_only": True,
Expand Down

0 comments on commit fab0c9f

Please sign in to comment.