From a2100216c10dcee6a97b419b2b4c92cdbb578be5 Mon Sep 17 00:00:00 2001
From: bosd <c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me>
Date: Sat, 2 Nov 2024 11:10:09 +0100
Subject: [PATCH] [IMP] add typing to handlers, update docstings and pdfminer
 url

---
 camelot/cli.py      |  4 +++-
 camelot/handlers.py | 27 ++++++++++++++++-----------
 camelot/io.py       |  2 +-
 tests/test_utils.py |  2 +-
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/camelot/cli.py b/camelot/cli.py
index 8ff57135..6c0e2a1a 100644
--- a/camelot/cli.py
+++ b/camelot/cli.py
@@ -38,7 +38,9 @@ def set_config(self, key, value):
 
 @click.group(name="camelot")
 @click.version_option(version=__version__)
-@click.option("-q", "--quiet", is_flag=False, help="Suppress logs and warnings.")
+@click.option(
+    "-q", "--quiet", is_flag=False, default=False, help="Suppress logs and warnings."
+)
 @click.option(
     "-p",
     "--pages",
diff --git a/camelot/handlers.py b/camelot/handlers.py
index 7d76f960..a59ec38b 100644
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@@ -6,6 +6,7 @@
 import os
 import sys
 from pathlib import Path
+from typing import Any
 
 from pypdf import PdfReader
 from pypdf import PdfWriter
@@ -119,15 +120,17 @@ def _get_pages(self, pages):
             result.extend(range(p["start"], p["end"] + 1))
         return sorted(set(result))
 
-    def _save_page(self, filepath: StrByteType | Path, page, temp):
+    def _save_page(self, filepath: StrByteType | Path, page: int, temp: str):
         """Saves specified page from PDF into a temporary directory.
 
         Parameters
         ----------
+        filepath : str
+            Filepath or URL of the PDF file.
         page : int
             Page number.
-        layout_kwargs : dict, optional (default: {})
-            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.  # noqa
+        temp : str
+            Tmp directory.
 
 
         Returns
@@ -178,10 +181,10 @@ def _save_page(self, filepath: StrByteType | Path, page, temp):
 
     def parse(
         self,
-        flavor="lattice",
-        suppress_stdout=False,
-        parallel=False,
-        layout_kwargs=None,
+        flavor: str = "lattice",
+        suppress_stdout: bool = False,
+        parallel: bool = False,
+        layout_kwargs: dict[str, Any] | None = None,
         **kwargs,
     ):
         """Extract tables by calling parser.get_tables on all single page PDFs.
@@ -197,7 +200,7 @@ def parse(
             Process pages in parallel using all available cpu cores.
         layout_kwargs : dict, optional (default: {})
             A dict of `pdfminer.layout.LAParams
-            <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
+            <https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
         kwargs : dict
             See camelot.read_pdf kwargs.
 
@@ -241,19 +244,21 @@ def parse(
 
         return TableList(sorted(tables))
 
-    def _parse_page(self, page, tempdir, parser, suppress_stdout, layout_kwargs):
+    def _parse_page(
+        self, page: int, tempdir: str, parser, suppress_stdout: bool, layout_kwargs
+    ):
         """Extract tables by calling parser.get_tables on a single page PDF.
 
         Parameters
         ----------
-        page : str
+        page : int
             Page number to parse
         parser : Lattice, Stream, Network or Hybrid
             The parser to use.
         suppress_stdout : bool
             Suppress logs and warnings.
         layout_kwargs : dict, optional (default: {})
-            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
+            A dict of `pdfminer.layout.LAParams <https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
 
         Returns
         -------
diff --git a/camelot/io.py b/camelot/io.py
index 931688cd..6bef46a1 100644
--- a/camelot/io.py
+++ b/camelot/io.py
@@ -46,7 +46,7 @@ def read_pdf(
         Process pages in parallel using all available cpu cores.
     layout_kwargs : dict, optional (default: {})
         A dict of `pdfminer.layout.LAParams
-        <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
+        <https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
     table_areas : list, optional (default: None)
         List of table area strings of the form x1,y1,x2,y2
         where (x1, y1) -> left-top and (x2, y2) -> right-bottom
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 95ff6166..85ae6005 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -15,7 +15,7 @@
 def get_text_from_pdf(filename):
     """Method to extract text object from pdf."""
     # https://stackoverflow.com/questions/22898145/how-to-extract-text-and-text-coordinates-from-a-pdf-file
-    # https://pdfminer-docs.readthedocs.io/programming.html#performing-layout-analysis
+    # https://pdfminersix.readthedocs.io/en/latest/topic/converting_pdf_to_text.html
     document = open(filename, "rb")
     # Create resource manager
     rsrcmgr = PDFResourceManager()