From 5d4be389ef2df81a75be8649405cf7feec010494 Mon Sep 17 00:00:00 2001 From: kaesinol Date: Mon, 20 Nov 2023 22:15:41 +0800 Subject: [PATCH] :art::fire: remove dead code & formatting --- test.py | 3 +-- tests/CI.py | 5 +---- twitter_user_tweet_crawler/__main__.py | 2 +- twitter_user_tweet_crawler/browser.py | 6 +----- twitter_user_tweet_crawler/tweet.py | 8 +++++--- 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/test.py b/test.py index cb8d49f..ca527f0 100644 --- a/test.py +++ b/test.py @@ -10,8 +10,7 @@ def get_tweet(): set_work_directory(Path(__file__).absolute().parent) config.load({"proxy": {"http": "socks5://127.0.0.1:7890", "https": "socks5://127.0.0.1:7890"}, "max_threads": 2, "header": {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, " - "like Gecko) "} - , "inject_js": "/media/Data/Project/twitter_user_tweet_crawler/script.js", + "like Gecko) "}, "inject_js": "/media/Data/Project/twitter_user_tweet_crawler/script.js", "save": "/media/Data/Project/twitter_user_tweet_crawler/output/", } ) from twitter_user_tweet_crawler.tweet import Tweet diff --git a/tests/CI.py b/tests/CI.py index 3cbd3f5..ce4fd6c 100644 --- a/tests/CI.py +++ b/tests/CI.py @@ -1,8 +1,6 @@ import unittest from time import sleep -from selenium import webdriver -from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.common.by import By from twitter_user_tweet_crawler.browser import get_browser @@ -10,8 +8,7 @@ config.load({"proxy": None, "max_threads": 2, "header": {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, " - "like Gecko) "} - , "inject_js": "script.js", + "like Gecko) "}, "inject_js": "script.js", } ) diff --git a/twitter_user_tweet_crawler/__main__.py b/twitter_user_tweet_crawler/__main__.py index f21ffe5..452b4b4 100644 --- a/twitter_user_tweet_crawler/__main__.py +++ b/twitter_user_tweet_crawler/__main__.py @@ -65,7 +65,7 @@ def get_items_need_handle(): cookie = driver.get_cookies() for drivers in work_list: set_cookie(drivers) - driver.get("https://twitter.com/"+ config.user) + driver.get("https://twitter.com/" + config.user) data_dict = {} pool = ThreadPool(work_list, tweet_executor) diff --git a/twitter_user_tweet_crawler/browser.py b/twitter_user_tweet_crawler/browser.py index f2523f9..c240993 100644 --- a/twitter_user_tweet_crawler/browser.py +++ b/twitter_user_tweet_crawler/browser.py @@ -1,13 +1,9 @@ from selenium import webdriver from selenium.webdriver.chrome.webdriver import WebDriver -from twitter_user_tweet_crawler.util.config import config -browsers = 0 - -def get_browser(headless: bool = False, id=None) -> WebDriver: - global browsers +def get_browser(headless: bool = False) -> WebDriver: chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--blink-settings=imagesEnabled=false') chrome_options.add_argument('--disable-remote-fonts') diff --git a/twitter_user_tweet_crawler/tweet.py b/twitter_user_tweet_crawler/tweet.py index 8908e4e..e1e3f38 100644 --- a/twitter_user_tweet_crawler/tweet.py +++ b/twitter_user_tweet_crawler/tweet.py @@ -3,10 +3,12 @@ from datetime import datetime from pathlib import Path from time import sleep +from urllib.parse import quote, urlparse from emoji import is_emoji from html2text import html2text from loguru import logger +from requests import get from rich.console import Console from rich.markdown import Markdown from rich.table import Table @@ -15,10 +17,9 @@ from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait -from urllib.parse import quote, urlparse + from .util.config import config from .util.sql import insert_new_record, is_id_exists -from requests import get inject: str inject_js = config.inject_js @@ -47,7 +48,7 @@ def __init__(self, link: str): self.via_app = None self.location = None - @logger.catch() + @logger.catch def download_res(self, url: str, path: str): with open(Path(config.save) / 'res' / path, 'wb') as fp: fp.write(get(url, proxies=config.proxy, headers=config.header).content) @@ -101,6 +102,7 @@ def get_img(): self.img = available_driver.execute_script("return document.fileName;") def click_sensitive_element(): + # TODO try: items = available_driver.find_element(By.XPATH, "//span[text()='查看']") ActionChains(available_driver).move_to_element(i).click().perform()