Skip to content

Commit

Permalink
🐛 Use the correct raise statement
Browse files Browse the repository at this point in the history
  • Loading branch information
kaixinol committed Feb 15, 2024
1 parent 202d2e4 commit 1c83103
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
2 changes: 1 addition & 1 deletion twitter_user_tweet_crawler/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,6 @@ def get_items_need_handle():
logger.add(work_directory / "log/{time:YYYY-MM-DD}.log", rotation="00:00",
level="ERROR",
encoding="utf-8", format="{time} | {level} | {message}", enqueue=True)
Path(Path(__file__).absolute().parent / 'output/res').mkdir(parents=True, exist_ok=True)
(Path(__file__).absolute().parent / 'output/res').mkdir(parents=True, exist_ok=True)
config.load("config.yaml")
main()
14 changes: 9 additions & 5 deletions twitter_user_tweet_crawler/tweet.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
inject = fp.read()


class CrawlError(Exception):
pass


def catch(func):
def wrapper(self, available_driver: WebDriver):
try:
Expand Down Expand Up @@ -87,14 +91,14 @@ def replace_emoji(string: str) -> str:

def get_video(base_dom: WebElement):
if not base_dom.find_element(By.XPATH, "//video").is_displayed():
raise
raise CrawlError("Can't crawl videos")
elemet: WebElement = base_dom.find_element(By.XPATH, "//div[contains(@class, \"tmd-down\")]")
sleep(1)
ActionChains(available_driver).move_to_element(elemet).click().perform()
count: int = 0
while available_driver.execute_script("return document.isParsed;") is False:
if (count := count + 1) > 10:
raise
raise CrawlError("Timeout Error")
sleep(1)
ActionChains(available_driver).move_to_element(elemet).click().perform()
with concurrent.futures.ThreadPoolExecutor() as executor:
Expand All @@ -106,14 +110,14 @@ def get_img(base_dom):
result = base_dom.find_elements(By.XPATH, '//img')
for i in result:
if 'card_img' in i.get_attribute('src'):
raise
raise CrawlError("Can't crawl pictures")
elemet: WebElement = base_dom.find_element(By.XPATH, "//div[contains(@class, \"tmd-down\")]")
sleep(1)
ActionChains(available_driver).move_to_element(elemet).click().perform()
count: int = 0
while available_driver.execute_script("return document.isParsed;") is False:
if (count := count + 1) > 10:
raise
raise CrawlError("Timeout Error")
ActionChains(available_driver).move_to_element(elemet).click().perform()
sleep(1)
with concurrent.futures.ThreadPoolExecutor() as executor:
Expand Down Expand Up @@ -149,7 +153,7 @@ def wait_element(count: int = 0):
wait.until(EC.presence_of_element_located((By.XPATH, "//article[@data-testid=\"tweet\"]//time")))
except TimeoutException:
if count > 3:
raise
raise CrawlError("Waiting time is too long, timeout")
sleep(20)
available_driver.refresh()
wait_element(count + 1)
Expand Down

0 comments on commit 1c83103

Please sign in to comment.