From 881679079a3989149f638dcfbe95987e1b5c883b Mon Sep 17 00:00:00 2001 From: kaesinol Date: Wed, 31 Jul 2024 13:28:47 +0800 Subject: [PATCH] :alien::bug: bugfix https://stackoverflow.com/questions/46920243/how-to-configure-chromedriver-to-initiate-chrome-browser-in-headless-mode-throug --- config.yaml | 7 +++---- twitter_user_tweet_crawler/__main__.py | 2 +- script.js => twitter_user_tweet_crawler/script.js | 0 twitter_user_tweet_crawler/tweet.py | 5 ++--- 4 files changed, 6 insertions(+), 8 deletions(-) rename script.js => twitter_user_tweet_crawler/script.js (100%) diff --git a/config.yaml b/config.yaml index c7ffe4a..86b5c7d 100644 --- a/config.yaml +++ b/config.yaml @@ -1,11 +1,10 @@ proxy: - http: socks5://127.0.0.1:7890 # null - https: socks5://127.0.0.1:7890 + http: socks5://127.0.0.1:7897 # null + https: socks5://127.0.0.1:7897 max_threads: 2 headful: 1 header: User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 -inject_js: "/media/Data/Project/twitter_user_tweet_crawler/script.js" -save: "/media/Data/Project/twitter_user_tweet_crawler/save" +save: "/mnt/data/Project/twitter_user_tweet_crawler/save" user: plantgazer \ No newline at end of file diff --git a/twitter_user_tweet_crawler/__main__.py b/twitter_user_tweet_crawler/__main__.py index 8620bb5..680624f 100644 --- a/twitter_user_tweet_crawler/__main__.py +++ b/twitter_user_tweet_crawler/__main__.py @@ -49,7 +49,7 @@ def get_items_need_handle(): work_list.extend(get_multiple_browsers(config['headful'], headless=False)) wait_list = [] for i in work_list: - wait_list.append(tweet_executor.submit(i.get, 'https://twitter.com/404')) + wait_list.append(tweet_executor.submit(i.get, 'https://x.com/404')) for ii in wait_list: ii.result() driver.get('https://twitter.com/404') diff --git a/script.js b/twitter_user_tweet_crawler/script.js similarity index 100% rename from script.js rename to twitter_user_tweet_crawler/script.js diff --git a/twitter_user_tweet_crawler/tweet.py b/twitter_user_tweet_crawler/tweet.py index 9db425e..ba99f5d 100644 --- a/twitter_user_tweet_crawler/tweet.py +++ b/twitter_user_tweet_crawler/tweet.py @@ -21,9 +21,8 @@ from .util.config import config from .util.sql import insert_new_record, is_id_exists -inject: str -inject_js = config.inject_js -with open(config.inject_js, 'r') as fp: +inject_js = Path(__file__).parent / 'script.js' +with open(inject_js, 'r') as fp: inject = fp.read()