forked from mugglecoding/Plan-for-combating
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pengtao
26 lines (23 loc) · 1.08 KB
/
pengtao
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import time
url = 'http://bj.58.com/pbdn/?PGTID=0d100000-0000-1121-f41b-137aeef068b7&ClickID=6?'
wb_date = requests.get(url)
soup = BeautifulSoup(wb_date.text,'lxml')
info_tags = soup.select('td.img > a')
def parse_info(crawl_url):
content = requests.get(crawl_url).text
soup = BeautifulSoup(content, 'lxml')
title = soup.select('#content > div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.mainTitle > h1')[0].text
when = soup.select('#index_show > ul.mtit_con_left.fl > li.time')[0].text.rstrip()
price = soup.select('.su_con > span')[0].text.lstrip()
address = soup.select('#totalcount')[0].text.rstrip()
type = u'商家' if (soup.select('span.red')[1].text.lstrip()).strip()=='' else u'个人'
name = soup.select('.crb_i')[1].text
print(u'商品标题: %s, 发帖时间: %s,价格: %s, 卖家类型: %s, 区域: %s, 类目: %s' %
(title,when,price,type,address,name))
time.sleep(1)
for info_tag in info_tags:
info_url= info_tag.get('href')
parse_info(info_url)