Skip to content

Commit

Permalink
🐛🔥 bugfix & remove useless codes
Browse files Browse the repository at this point in the history
  • Loading branch information
kaixinol committed Nov 20, 2023
1 parent 8ed55d4 commit 15b68e2
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 11 deletions.
3 changes: 1 addition & 2 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{ "proxy":
{"http": "socks5://127.0.0.1:7890", "https": "socks5://127.0.0.1:7890" }, "max_threads": 1,
"header": {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"}
,"user_data_dir": "/media/Data/Project/twitter_user_tweet_crawler/twitter_user_tweet_crawler/userdata",
"header": {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"},
"inject_js": "/media/Data/Project/twitter_user_tweet_crawler/script.js",
"save": "/media/Data/Project/twitter_user_tweet_crawler/output",
"user": "s_nample"
Expand Down
5 changes: 2 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ def get_tweet():
set_work_directory(Path(__file__).absolute().parent)
config.load({"proxy": {"http": "socks5://127.0.0.1:7890", "https": "socks5://127.0.0.1:7890"}, "max_threads": 2,
"header": {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
"like Gecko) "},
"user_data_dir": "/media/Data/Project/twitter_user_tweet_crawler/twitter_user_tweet_crawler/userdata"
"like Gecko) "}
, "inject_js": "/media/Data/Project/twitter_user_tweet_crawler/script.js",
"save": "/media/Data/Project/twitter_user_tweet_crawler/output/", }
)
from twitter_user_tweet_crawler.tweet import Tweet
Path(config.save / 'res').mkdir(parents=True, exist_ok=True)
(Path(config.save) / 'res').mkdir(parents=True, exist_ok=True)
browser = get_browser()
browser.get('https://twitter.com/404')
cookie: list[dict]
Expand Down
2 changes: 1 addition & 1 deletion twitter_user_tweet_crawler/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_items_need_handle():
return driver.find_elements(*selector)

selector = (By.XPATH, '//*/div[2]/div/div[3]/a[@role="link"]')
Path(config.save / 'res').mkdir(exist_ok=True, parents=True)
(Path(config.save) / 'res').mkdir(exist_ok=True, parents=True)

driver = get_browser()

Expand Down
5 changes: 0 additions & 5 deletions twitter_user_tweet_crawler/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@ def get_browser(headless: bool = False, id=None) -> WebDriver:
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--window-size=1200x600"')

if not id:
chrome_options.add_argument(f'user-data-dir={config["user_data_dir"] + "/" + str(browsers := browsers + 1)}')
else:
chrome_options.add_argument(f'user-data-dir={config["user_data_dir"] + "/" + str(id)}')
if headless:
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
Expand Down

0 comments on commit 15b68e2

Please sign in to comment.