forked from IBM/watson-online-store
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_data_ibm_store.py
111 lines (103 loc) · 3.41 KB
/
get_data_ibm_store.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import urllib
# Grab select items from IBM Logo Store
items = []
# Shirts
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=206347",
"title": "Applique Crew Sweatshirt",
"category": "shirt/shirts/sweatshirts"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=131644",
"title": "Be Essential T-Shirt",
"category": "shirt/shirts/tees"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=131636",
"title": "Eye-Bee-M Sweatshirt",
"category": "shirt/shirts/sweatshirts"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=131634",
"title": "Eye-Bee-M T-Shirt",
"category": "shirt/shirts/tees"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=131622",
"title": "Fairway and Greene Polo Shirt",
"category": "shirt/shirts/polos"
})
# Hats
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=131628",
"title": "Eye-Bee-M Cap",
"category": "cap/caps/hat/hats"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=211897",
"title": "Performance Cap",
"category": "cap/caps/hat/hats"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=132258",
"title": "Quadrant Logo Cap",
"category": "cap/caps/hat/hats"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=131626",
"title": "THINK Cap",
"category": "cap/caps/hat/hats"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=122465",
"title": "PureSystems Cap",
"category": "cap/caps/hat/hats"
})
# Mugs
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=190450",
"title": "11oz Mug-Watson Health",
"category": "mug/mugs/cup/cups"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=176572",
"title": "IBM C-Handle Mug 11oz.",
"category": "mug/mugs/cup/cups"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=190447",
"title": "Wason 11oz. C-Handle Mug",
"category": "mug/mugs/cup/cups"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=132294",
"title": "Be Essential Mug",
"category": "mug/mugs/cup/cups"
})
items.append({
"url": "http://www.logostore-globalid.us/ProductDetail.aspx?pid=132254",
"title": "THINK Mug",
"category": "mug/mugs/cup/cups"
})
# Build HTML files as input to Watson Discovery Service
counter = 1
for item in items:
url = item['url']
title = item['title']
category = item['category']
print("Getting search results for: " + url)
# Add product title and category to help seed Watson results
resp = urllib.urlopen(url).read()
resp = resp.replace("IBM Logostore", "IBM Logostore\nProduct:" + title +
"\nCategory:" + category + "\n")
# Remove "upsell" tab which contains references to other products
sidx = resp.find('<div id="tabs" class="Upselltabs">')
eidx = resp.find('<script type="text/javascript">', sidx, len(resp))
resp1 = resp[:sidx]
resp2 = resp[eidx:]
resp = resp1 + resp2
file_object = open(str(counter) + '.html', 'w')
print(" title = " + title)
file_object.write(resp)
file_object.close()
counter += 1