🐛 Use the correct raise statement

kaixinol · Feb 15, 2024 · 1c83103 · 1c83103
1 parent 202d2e4
commit 1c83103
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 6 deletions.
diff --git a/twitter_user_tweet_crawler/__main__.py b/twitter_user_tweet_crawler/__main__.py
@@ -94,6 +94,6 @@ def get_items_need_handle():
     logger.add(work_directory / "log/{time:YYYY-MM-DD}.log", rotation="00:00",
                level="ERROR",
                encoding="utf-8", format="{time} | {level} | {message}", enqueue=True)
-    Path(Path(__file__).absolute().parent / 'output/res').mkdir(parents=True, exist_ok=True)
+    (Path(__file__).absolute().parent / 'output/res').mkdir(parents=True, exist_ok=True)
     config.load("config.yaml")
     main()
diff --git a/twitter_user_tweet_crawler/tweet.py b/twitter_user_tweet_crawler/tweet.py
@@ -28,6 +28,10 @@
     inject = fp.read()
 
 
+class CrawlError(Exception):
+    pass
+
+
 def catch(func):
     def wrapper(self, available_driver: WebDriver):
         try:
@@ -87,14 +91,14 @@ def replace_emoji(string: str) -> str:
 
         def get_video(base_dom: WebElement):
             if not base_dom.find_element(By.XPATH, "//video").is_displayed():
-                raise
+                raise CrawlError("Can't crawl videos")
             elemet: WebElement = base_dom.find_element(By.XPATH, "//div[contains(@class, \"tmd-down\")]")
             sleep(1)
             ActionChains(available_driver).move_to_element(elemet).click().perform()
             count: int = 0
             while available_driver.execute_script("return document.isParsed;") is False:
                 if (count := count + 1) > 10:
-                    raise
+                    raise CrawlError("Timeout Error")
                 sleep(1)
                 ActionChains(available_driver).move_to_element(elemet).click().perform()
             with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -106,14 +110,14 @@ def get_img(base_dom):
             result = base_dom.find_elements(By.XPATH, '//img')
             for i in result:
                 if 'card_img' in i.get_attribute('src'):
-                    raise
+                    raise CrawlError("Can't crawl pictures")
             elemet: WebElement = base_dom.find_element(By.XPATH, "//div[contains(@class, \"tmd-down\")]")
             sleep(1)
             ActionChains(available_driver).move_to_element(elemet).click().perform()
             count: int = 0
             while available_driver.execute_script("return document.isParsed;") is False:
                 if (count := count + 1) > 10:
-                    raise
+                    raise CrawlError("Timeout Error")
                 ActionChains(available_driver).move_to_element(elemet).click().perform()
                 sleep(1)
             with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -149,7 +153,7 @@ def wait_element(count: int = 0):
                 wait.until(EC.presence_of_element_located((By.XPATH, "//article[@data-testid=\"tweet\"]//time")))
             except TimeoutException:
                 if count > 3:
-                    raise
+                    raise CrawlError("Waiting time is too long, timeout")
                 sleep(20)
                 available_driver.refresh()
                 wait_element(count + 1)