From c9e2c9b4dbbb16408b5e2910de59e01559f37978 Mon Sep 17 00:00:00 2001
From: Oliver Kinch <oliver.kinch@gmail.com>
Date: Tue, 5 Mar 2024 12:54:09 +0100
Subject: [PATCH] Problems with scrape tests

---
 config/scrape/scrape.yaml     |  2 +-
 src/doms_databasen/scraper.py | 29 +++++++++++++++++------------
 tests/scraper/test_scraper.py | 11 ++++++++++-
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/config/scrape/scrape.yaml b/config/scrape/scrape.yaml
index d5a89d06..e34be38c 100644
--- a/config/scrape/scrape.yaml
+++ b/config/scrape/scrape.yaml
@@ -20,4 +20,4 @@ test_case_id: "1"
 # Constants
 sleep: 5
 max_consecutive_nonexistent_page_count: 100
-timeout_pdf_download: 20
+timeout_pdf_download: 10
diff --git a/src/doms_databasen/scraper.py b/src/doms_databasen/scraper.py
index eeee01d3..7343f7a1 100644
--- a/src/doms_databasen/scraper.py
+++ b/src/doms_databasen/scraper.py
@@ -51,11 +51,7 @@ def __init__(self, config) -> None:
         """Initializes the Scraper."""
         self.config = config
         self.test_dir = Path(self.config.scrape.paths.test_dir)
-        self.download_dir = (
-            Path(self.config.scrape.paths.download_dir)
-            if not self.config.testing
-            else self.test_dir
-        )
+        self.download_dir = Path(self.config.scrape.paths.download_dir)
         self.data_raw_dir = Path(self.config.paths.data_raw_dir)
 
         self.force = self.config.scrape.force
@@ -78,7 +74,7 @@ def scrape(self, case_id: str) -> None:
         case_dir = (
             self.data_raw_dir / case_id
             if not self.config.testing
-            else self.test_dir / self.config.scrape.test_case_name
+            else self.test_dir / case_id
         )
 
         if self._already_scraped(case_dir) and not self.force:
@@ -236,13 +232,14 @@ def _download_pdf(self, case_dir: Path) -> None:
         )
 
         download_element.click()
-        file_name = self._wait_download(files_before_download)
+        file_name = self._wait_download(files_before=files_before_download)
         if file_name:
-            from_ = (
-                self.download_dir / file_name
-                if not self.config.testing
-                else self.test_dir / file_name
-            )
+            # print cwd
+            print("cwd", os.getcwd())
+            # list dir
+            print("listdir", os.listdir())
+
+            from_ = self.download_dir / file_name
             to_ = case_dir / self.config.file_names.pdf_document
             shutil.move(from_, to_)
         else:
@@ -315,3 +312,11 @@ def _element_exists(self, xpath) -> bool:
         except Exception as e:
             logger.error(e)
             raise e
+
+    # # When closing the scraper, the webdriver should be closed.
+    # # and the download folder should be deleted.
+    # def __del__(self):
+    #     """Closes the scraper."""
+    #     self.driver.quit()
+    #     shutil.rmtree(self.download_dir)
+    #     logger.info("Scraper closed")
diff --git a/tests/scraper/test_scraper.py b/tests/scraper/test_scraper.py
index ec197469..690c25d7 100644
--- a/tests/scraper/test_scraper.py
+++ b/tests/scraper/test_scraper.py
@@ -1,3 +1,5 @@
+"""Test the scraper module."""
+
 from pathlib import Path
 
 import pytest
@@ -5,12 +7,19 @@
 
 @pytest.fixture(scope="module")
 def test_case_path(config):
-    return Path(config.scrape.paths.test_dir) / config.scrape.test_case_name
+    """Return the path to the test case."""
+    return Path(config.scrape.paths.test_dir) / config.scrape.test_case_id
 
 
 def test_case_contains_pdf(config, test_case_path):
+    """Test that the test case contains a PDF document."""
     assert (test_case_path / config.file_names.pdf_document).exists()
 
 
 def test_case_contains_tabular_data(config, test_case_path):
+    """Test that the test case contains tabular data."""
     assert (test_case_path / config.file_names.tabular_data).exists()
+
+
+if __name__ == "__main__":
+    pytest.main([f"{__file__}::test_case_contains_pdf", "-s"])