From a580d563637cc6acfbfc52d7290b3d4ff5cbf394 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 28 Nov 2024 06:33:12 -0500 Subject: [PATCH 1/2] Fix CLI bug --- extract.py | 1 - pyproject.toml | 2 +- table_app.py | 4 ++-- tabled/inference/detection.py | 7 +++---- tabled/inference/models.py | 4 +--- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/extract.py b/extract.py index 0b800a7..720ce03 100644 --- a/extract.py +++ b/extract.py @@ -1,7 +1,6 @@ import json from collections import defaultdict -import pypdfium2 import copy import os diff --git a/pyproject.toml b/pyproject.toml index 18a74ad..aa71671 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tabled-pdf" -version = "0.1.5" +version = "0.1.6" description = "Detect and recognize tables in PDFs and images." authors = ["Vik Paruchuri "] readme = "README.md" diff --git a/table_app.py b/table_app.py index 01313fe..7dc69f7 100644 --- a/table_app.py +++ b/table_app.py @@ -104,7 +104,7 @@ def page_count(pdf_file): page_number = 1 with col: - st.image(pil_image, caption="PDF file (preview)", use_column_width="auto") + st.image(pil_image, caption="PDF file (preview)", use_container_width=True) run_marker = st.sidebar.button("Run Tabled") @@ -122,7 +122,7 @@ def page_count(pdf_file): for idx, (md, table_img) in enumerate(out_data): container.markdown(f"## Table {idx}") - container.image(table_img, caption=f"Table {idx}", use_column_width="auto") + container.image(table_img, caption=f"Table {idx}", use_container_width=True) container.markdown(md) container.code(md) container.divider() diff --git a/tabled/inference/detection.py b/tabled/inference/detection.py index c1da422..67d770f 100644 --- a/tabled/inference/detection.py +++ b/tabled/inference/detection.py @@ -32,10 +32,9 @@ def merge_tables(page_table_boxes): return [b for i, b in enumerate(page_table_boxes) if i not in ignore_boxes] -def detect_tables(images, highres_images, models, detector_batch_size=settings.DETECTOR_BATCH_SIZE, layout_batch_size=settings.LAYOUT_BATCH_SIZE): - det_model, det_processor, layout_model, layout_processor = models - line_predictions = batch_text_detection(images, det_model, det_processor, batch_size=detector_batch_size) - layout_predictions = batch_layout_detection(images, layout_model, layout_processor, line_predictions, batch_size=layout_batch_size) +def detect_tables(images, highres_images, models, layout_batch_size=settings.LAYOUT_BATCH_SIZE): + layout_model, layout_processor = models + layout_predictions = batch_layout_detection(images, layout_model, layout_processor, batch_size=layout_batch_size) table_imgs = [] table_counts = [] diff --git a/tabled/inference/models.py b/tabled/inference/models.py index 3ee072b..459b4d7 100644 --- a/tabled/inference/models.py +++ b/tabled/inference/models.py @@ -8,11 +8,9 @@ def load_detection_models(): - det_model = load_det_model() - det_processor = load_det_processor() layout_model = load_layout_model() layout_processor = load_layout_processor() - return det_model, det_processor, layout_model, layout_processor + return layout_model, layout_processor def load_recognition_models(): From 46345b60826255e32831662d1126caf26289c041 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 28 Nov 2024 06:36:16 -0500 Subject: [PATCH 2/2] Fix escape warning --- tabled/formats/markdown.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tabled/formats/markdown.py b/tabled/formats/markdown.py index 9049628..67244d2 100644 --- a/tabled/formats/markdown.py +++ b/tabled/formats/markdown.py @@ -7,7 +7,7 @@ def replace_special_chars(text): - return text.replace("|", "\|").replace("-", "\-") + return text.replace("|", "\\|").replace("-", "\\-") def replace_all(text):