From a13850cfa1a8c573aa69312718b9455091b7cb98 Mon Sep 17 00:00:00 2001 From: kaesinol Date: Sun, 12 Nov 2023 22:50:10 +0800 Subject: [PATCH] fuck u twitter --- .github/workflows/python-app.yaml | 12 +----------- twitter_user_tweet_crawler/tweet.py | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml index bc515dd..9092725 100644 --- a/.github/workflows/python-app.yaml +++ b/.github/workflows/python-app.yaml @@ -44,18 +44,8 @@ jobs: - name: Test with unittest run: | python -m poetry run python -m unittest tests.CI - - name: Check if debug.png exists - run: | - if [ -f debug.png ]; then - echo "Debug.png file exists" - echo "::set-output name=debug_exists::true" - else - echo "Debug.png file does not exist" - echo "::set-output name=debug_exists::false" - fi - name: Upload artifact - if: steps.check_debug.outputs.debug_exists == 'true' - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: debug-artifact path: debug.png diff --git a/twitter_user_tweet_crawler/tweet.py b/twitter_user_tweet_crawler/tweet.py index 131f867..5a4ef1f 100644 --- a/twitter_user_tweet_crawler/tweet.py +++ b/twitter_user_tweet_crawler/tweet.py @@ -25,7 +25,8 @@ class Tweet: location: str | None link: str - def __init__(self, link: str, is_ci_test: bool = False): + def __init__(self, link: str,is_ci_test: bool = False): + self.post_time = int(datetime.now().timestamp()) self.post_id = int(urlparse(link).path.split('/')[-1]) self.link = link self.text = '' @@ -77,8 +78,7 @@ def get_img(): def click_sensitive_element(): try: - items = available_driver.find_elements(By.XPATH, "//a[@href=\"/settings/content_you_see\"]/parent" - "::*/parent::*/parent::*//div/span") + items = available_driver.find_elements(By.XPATH, "//span[text()='查看']") for i in items: ActionChains(available_driver).move_to_element(i).click().perform() except: @@ -88,12 +88,11 @@ def click_sensitive_element(): result = None available_driver.get(self.link) wait = WebDriverWait(available_driver, 20) - element = None - try: - element = wait.until(EC.presence_of_element_located((By.XPATH, '//*/time/ancestor::*[5]'))) - except: - available_driver.save_screenshot('debug.png') - time_stamp = available_driver.find_element(By.XPATH, '//time').get_attribute('datetime') + element = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="react-root"]/div/div/div[' + '2]/main//section/div/div/div[' + '1]/div/div/article/div/div/div[' + '3]//*/time/ancestor::*[3]'))) + time_stamp = element.find_element(By.XPATH, '//time').get_attribute('datetime') location = True try: result = element.find_element(By.XPATH, '//a[contains(@href, \'place\')]') @@ -103,8 +102,10 @@ def click_sensitive_element(): self.location = result.text + '(' + result.get_attribute('href') + ')' # 移除多余元素,不这样写的话用其他方式写会卡住,我不想深究了TAT available_driver.execute_script("arguments[0].parentNode.removeChild(arguments[0]);", element) - element = wait.until(EC.presence_of_element_located((By.XPATH, '//div[@aria-label and ' - '@data-testid=\'reply\']/parent::*/parent::*'))) + element = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="react-root"]/div/div/div[' + '2]/main//section/div/div/div[' + '1]/div/div/article/div/div/div[' + '3]//div[@role=\'group\' and @aria-label]'))) video = True try: result = available_driver.find_element(By.XPATH, '//*[@id="react-root"]/div/div/div[' @@ -160,10 +161,12 @@ def click_sensitive_element(): self.text += 'Location:' + self.location + '\n' if self.via_app: self.text += self.via_app - self.print() if not self.is_ci_test: + self.print() self.write_markdown() self.commit_sqlite() + return + available_driver.save_screenshot('debug.png') def print(self): console = Console()