forked from Nateliason/pin-scrape
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
104 lines (74 loc) · 4.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
'''
Pin-Scrape
https://github.com/Nateliason/pin-scrape
Overview:
This set of python functions will let you scrape anyone's public Pinterest Pinboards.
Its primary use is marketing/growth hacking... but get creative!
If you use this code for anything else, please keep the authors/contributors intact.
If you think of any improvements, go ahead and make them and submit a pull request. When you do, add your name here to the list of contributors.
Dependencies:
BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
SoupSelect: https://code.google.com/p/soupselect/
FeedParser: https://pypi.python.org/pypi/feedparser
Thanks To:
Eristoddle at http://snipplr.com/view/64496/ for providing the code that most of this is based off of.
The [GrowthHackerTV](http://www.growthhacker.tv) community for being an inspiration to come up with things like this.
Contributors:
- Nathaniel Eliason @nateliason
'''
import urllib2
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup as Soup
from soupselect import select
import feedparser
pinboards = [] #a collection of the user's pinboards. You can feed this back into timeline() to get more pins
def timeline(url): #gets the last 25 pins for a user or their pinboard depending on the URL you provide.
#pinterest.com/user/feed/rss for the user's most recent pins, /user/pinboard/rss for a certain pinboard. These pins can be fed in wherever you see "specific pin"
#pinterest only stores 25 items in the feed... I'll see if there's any way to get more
timeline = feedparser.parse(url)
pins = []
for i in range(0,len(timeline['entries'])):
pins.append(timeline['entries'][i]['id'])
return pins
def get_pinterest_timeline(user): #You can use this to get a user's feed if you just want to put in their Pinterest username
return 'http://www.pinterest.com/' + user + '/feed/rss'
def get_pinboards(user): #This won't get all of their pinboards necessarily. It'll get the pinboards for their last 25 pins. Still working on a better way to do this, but it's a start.
pin_history = timeline(get_pinterest_timeline(user))
for i in pin_history:
soup = BeautifulSoup(urllib2.urlopen(i).read())
pinboard = select(soup, 'meta[property="pinterestapp:pinboard"]')[0]['content']
if pinboard not in pinboards:
pinboards.append(pinboard)
print "Done getting pinboards"
def item_url(specific_pin): #Takes the URL for a specific pin and gets the original URL for whatever was pinned. Useful for seeing what someone likes to pin from.
try:
soup = BeautifulSoup(urllib2.urlopen(specific_pin).read())
return select(soup, 'meta[property="pinterestapp:source"]')[0]['content']
except:
return 'no url'
def grab_pin(specific_pin): #You can use this to get a lot of the information from a specific pin of someone's.
soup = BeautifulSoup(urllib2.urlopen(specific_pin).read())
return {
"url": select(soup, 'meta[property="og:url"]')[0]['content'],
"title": select(soup, 'meta[property="og:title"]')[0]['content'],
"description": select(soup, 'meta[property="og:description"]')[0]['content'],
"image": select(soup, 'meta[property="og:image"]')[0]['content'],
"pinboard": select(soup, 'meta[property="pinterestapp:pinboard"]')[0]['content'],
"pinner": select(soup, 'meta[property="pinterestapp:pinner"]')[0]['content'],
"source": select(soup, 'meta[property="pinterestapp:source"]')[0]['content'],
"likes": select(soup, 'meta[property="pinterestapp:likes"]')[0]['content'],
"repins": select(soup, 'meta[property="pinterestapp:repins"]')[0]['content'],
"comments": select(soup, 'meta[property="pinterestapp:comments"]')[0]['content'],
"actions": select(soup, 'meta[property="pinterestapp:actions"]')[0]['content'],
}
def get_urls(user): #Takes a user, gets their pinboards, then goes through those pinboards to find items that are from a certain URL
user_pinboards = get_pinboards(user)
URL_items = [] #replace this with any other retailer, or add another one below it
for i in user_pinboards:
i = get_pinboard_timeline(i)
user_pins = timeline(i)
for j in user_pins:
url = item_url(j)
if 'your url' in url:
URL_items.append(url)
return URL_items