-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPt3:5Exercises.py
162 lines (131 loc) · 4.13 KB
/
Pt3:5Exercises.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#Exercises from Section 3/5 - Using Python to Access Web Data
#REGEX PRACTICE - regexsum.py
# Read a file: use regex to find lines with numbers in them
# Return sum of said numbers
import re
nfile = raw_input("file name please:")
handle = open(nfile,'r')
#x = "a need 256 to add to 456 and give me a big number 23 in return"
sum = 0
for line in handle:
#line = line.rstrip()
#if re.search('[0-9]*', line):
#print line
y = re.findall('([0-9]+)', line)
for vals in y:
sum = float(vals) + sum
print sum
#SCRAPING PRACTICE(1/2) - firstscrape.py
# open web page, read all span tags, sum their comments
import urllib
from BeautifulSoup import *
url = raw_input('Enter - ')
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
sum = 0
taglist = list()
# Retrieve all of the anchor tags
tags = soup('span')
for tag in tags:
tag = tag.contents[0]
sum = int(tag) + sum
print sum
# Look at the parts of a tag
#print 'TAG:',tag
#print 'URL:',tag.get('href', None)
#print 'Contents:',tag.contents[0]
#print 'Attrs:',tag.attrs
#SCRAPING PRACTICE(2/2) - secscrape.py
# open web page, extract href values,
# find diff tag in relation to 1st taglist
# follow that link, repeat x times
# report last name found
# Note - this code must run in Python 2.x and you must download
# http://www.pythonlearn.com/code/BeautifulSoup.py
# Into the same folder as this program
import urllib
from BeautifulSoup import *
url = raw_input('Enter - ')
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
count = raw_input('Count:')
count = int(count)
position = raw_input('Position:')
position = int(position)
print "Retrieving:", url
i = 0
while i < count: # for the length of count, loop through the following steps
tags = soup('a') #find all a href tags on this page
#toi = tags[position - 1] #find the position value in the list [2]
new_url = tags[position - 1].get('href', None) #assign url value to position value
print "Retrieving:", new_url #print the position value in the list
new_html = urllib.urlopen(new_url).read()
soup = BeautifulSoup(new_html) #make soup with new url
i = i+1
#XML Exercises - xmlex.py
# take in a URL, parse & extract comment counts from XML,
# compute sum of paragraphs, enter the sum
import urllib
import xml.etree.ElementTree as ET
address = raw_input('Enter location: ')
uh = urllib.urlopen(address)
print 'Retrieving', address
data = uh.read()
print 'Retrieved',len(data),'characters'
tree = ET.fromstring(data)
count = 0
sum = 0
results = tree.findall('.//count')
for result in results:
result = int(result.text)
sum = result + sum
count = count + 1
print 'Count:', count
print 'Sum:', sum
#JSON Exercises - jsonex.py
# take in a URL, parse & extract comment counts from JSON,
# compute sum of paragraphs, enter the sum
import urllib
import json
address = raw_input("Please enter an an address:")
url = urllib.urlopen(address)
print "Retrieving", address
data = url.read()
print "Retreived", len(data), "characters"
js = json.loads(data)
info = json.dumps(js, indent = 4)
temp_list = js["comments"]
#print temp_list
sum = 0
count = 0
for items in temp_list:
sum = int(items["count"]) + sum
count = count + 1
print count
print sum
#API PRACTICE - geolook.py
# prompt for location, contact a web service, retrieve JSON
# parse, retrieve first 'place_id' from JSON
#(place_id is a textual identifier that uniquely ids a place as
# within googlemaps API)
import urllib
import json
# serviceurl = 'http://maps.googleapis.com/maps/api/geocode/json?'
serviceurl = 'http://python-data.dr-chuck.net/geojson?'
while True:
address = raw_input('Enter location: ')
if len(address) < 1 : break
url = serviceurl + urllib.urlencode({'sensor':'false', 'address': address})
print 'Retrieving', url
uh = urllib.urlopen(url)
data = uh.read()
print 'Retrieved',len(data),'characters'
try: js = json.loads(str(data))
except: js = None
if 'status' not in js or js['status'] != 'OK':
print '==== Failure To Retrieve ===='
print data
continue
#print json.dumps(js, indent=4)
place_id = js["results"][0]["place_id"]
print "Place id:", place_id