-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlenovo.py
66 lines (62 loc) · 1.96 KB
/
lenovo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import requests,threading
from bs4 import BeautifulSoup
import time,json
import pymysql
def getInfo(url):
conn = pymysql.connect(host='localhost',user='root',passwd='',db='notebook',port=3306,charset='utf8')
cursor = conn.cursor()
title=""
ttm=""
flash=""
flashType=""
cpu=""
ScreenResolution=""
os=""
gpu=""
disk=""
gpuMemory=""
notebookDetail={}
detailUrl=url.replace(".html","_detail.html")
reg=requests.get(detailUrl)
soup=BeautifulSoup(reg.text,"html.parser")
title=soup.select(".main .box .mark")[0].get_text()
notebookDetail["title"]=title
timeToMarket=soup.select(".box .out a")
details=soup.select("tbody th")
for i in details:
data=i.next_sibling.next_sibling.a
if data:
notebookDetail[i.get_text()]=data.get_text()
if i.get_text()=="上市时间":
ttm=data.get_text()
if i.get_text()=="内存容量":
flash=data.get_text()
if i.get_text()=="内存类型":
flashType=data.get_text()
if i.get_text()=="处理器":
cpu=data.get_text()
if i.get_text()=="分辨率":
ScreenResolution=data.get_text()
if i.get_text()=="操作系统":
os=data.get_text()
if i.get_text()=="显卡芯片":
gpu=data.get_text()
if i.get_text()=="硬盘容量":
disk=data.get_text()
if i.get_text()=="显存容量":
gpuMemory=data.get_text()
try:
cursor.execute('INSERT INTO `lenovo` (`id`,`title`,`ttm`,`flash`,`flashType`,`cpu`,`ScreenResolution`,`os`,`gpu`,`disk`,`gpuMemory`) values (NULL, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)' ,[title,ttm,flash,flashType,cpu,ScreenResolution,os,gpu,disk,gpuMemory])
conn.commit()
except:
conn.rollback()
conn.close()
for j in range(0,34):
url="http://product.pconline.com.cn/notebook/lenovo/"
if j>0:
url="http://product.pconline.com.cn/notebook/lenovo/"+str(j*25)+"s1.shtml"
reg=requests.get(url)
soup=BeautifulSoup(reg.text,"html.parser")
tags=soup.find_all("a",class_="item-title-name")
for i in tags:
threading.Thread(target=getInfo,args=(i["href"],)).start()