-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
58 lines (54 loc) · 2.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import datetime
import csv
import re
import codecs
yesterday = str(datetime.date.today() + datetime.timedelta(-1))
today = str(datetime.date.today())
print(f"早安,今天是{today}\n爬取的日期为", yesterday, "and", today)
url = "http://my.bupt.edu.cn"
page = webdriver.Chrome()
page.get(url)
page.maximize_window()
# 登陆
page.find_element(By.XPATH, "//*[@id='username']").send_keys("2020212265")
page.find_element(By.XPATH, "//*[@id='password']").send_keys("03053913")
page.find_element(By.XPATH, "//*[@id='casLoginForm']/input[4]").click()
page.find_element(By.XPATH, "/html/body/div[1]/div/ul/li[5]/a").click()
page.find_element(By.XPATH, "/html/body/div[3]/div/div[1]/div[2]/ul/li[18]/a").click()
url = "http://my.bupt.edu.cn/list.jsp?urltype=tree.TreeTempUrl&wbtreeid=1162"
res = []
for i in range(1, 21):
target = page.find_element(By.XPATH, f"/html/body/div[3]/div/div[2]/ul/li[{i}]/a")
target_date = page.find_element(By.XPATH, f"/html/body/div[3]/div/div[2]/ul/li[{i}]/span[2]")
text = target_date.text
if re.search("讲座", target.text, flags=0) is not None and (text == today or text == yesterday):
n_url = target.get_attribute("href")
temp_str = ""
temp = ""
page.get(n_url)
content = page.find_elements(By.XPATH, "//*[@id='vsb_content']/div//*")
for i1 in content:
if i1.text != "":
if temp != i1.text:
temp_str += i1.text + "\n"
temp = i1.text
if re.search("盖", temp_str, flags=0) is not None:
title = page.find_element(By.XPATH, "/html/body/div[3]/div[2]/div[1]/form/div[1]/h1[1]")
res.append((text, title.text + "\n" + n_url))
print(f"可以盖章哦!快去看看吧{n_url}")
page.get(url)
# /html/body/div[3]/div/div[2]/ul/li[7]/a /html/body/div[3]/div/div[2]/ul/li[20]/a
page.close()
if not res:
print("今天还没有可以盖章的呢!过会再来吧!")
else:
f = codecs.open(f'speech{today}.csv', 'w', 'gbk')
writer = csv.writer(f)
for i in res:
writer.writerow(i)
f.close()
# //*[@id="vsb_content"]/div/p[9]/span[1]/span
# //*[@id="vsb_content"]/div/p[13]/span