Skip to content

Commit

Permalink
🐛 bugfix
Browse files Browse the repository at this point in the history
fix sqlalchemy.exc.IntegrityError
fix browser instance status is not set correctly after an exception occurs
  • Loading branch information
kaixinol committed Nov 15, 2023
1 parent 3ef7450 commit 06b855a
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 10 deletions.
5 changes: 2 additions & 3 deletions twitter_user_tweet_crawler/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from .browser import get_browser, get_multiple_browsers
from .pool import ThreadPool
from .util.config import config, work_directory, set_work_directory
from .util.sql import is_id_exists


def main():
Expand Down Expand Up @@ -79,7 +78,7 @@ def get_items_need_handle():
for i in links:
full_url = i.get_attribute("href")
tweet_id = urlparse(full_url).path.split('/')[-1]
if tweet_id not in data_dict and not is_id_exists(int(tweet_id)):
if tweet_id not in data_dict:
data_dict[tweet_id] = Tweet(full_url)
pool.jobs.append(data_dict[tweet_id].load_data)
logger.info(full_url)
Expand All @@ -91,7 +90,7 @@ def get_items_need_handle():
if __name__ == "__main__":
set_work_directory(Path(__file__).absolute().parent)
logger.add(work_directory / "log/{time:YYYY-MM-DD}.log", rotation="00:00",
level="INFO",
level="ERROR",
encoding="utf-8", format="{time} | {level} | {message}", enqueue=True)
Path(Path(__file__).absolute().parent / 'output/res').mkdir(parents=True, exist_ok=True)
config.load("config.yaml")
Expand Down
9 changes: 5 additions & 4 deletions twitter_user_tweet_crawler/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ def _on_job_complete(self, index, future):
elements = self.browser.index(index)
try:
future.result()
except NoSuchElementException:
pass
# By default, `concurrent.futures` will silently log errors but will not raise them
# Throw the error directly
self.browser[elements].__dict__['is_using'] = False
self.check_and_work()
except NoSuchElementException:
pass
finally:
self.browser[elements].__dict__['is_using'] = False
self.check_and_work()
8 changes: 5 additions & 3 deletions twitter_user_tweet_crawler/tweet.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from urllib.parse import quote
from .__main__ import *
from .util.config import work_directory
from .util.sql import insert_new_record
from .util.sql import insert_new_record, is_id_exists


class Tweet:
Expand All @@ -26,7 +26,7 @@ class Tweet:
link: str

def __init__(self, link: str):
self.post_time = int(datetime.now().timestamp())
self.post_time = 0
self.post_id = int(urlparse(link).path.split('/')[-1])
self.link = link
self.text = ''
Expand All @@ -43,8 +43,10 @@ def write_markdown(self):

@logger.catch
def commit_sqlite(self):
insert_new_record(self.post_id, self.post_time, self.location)
if not is_id_exists(int(self.post_id)):
insert_new_record(self.post_id, self.post_time, self.location)

@logger.catch
def load_data(self, available_driver: WebDriver):

def replace_emoji(string: str) -> str:
Expand Down

0 comments on commit 06b855a

Please sign in to comment.