-
Notifications
You must be signed in to change notification settings - Fork 6
/
grab.py
42 lines (37 loc) · 1.03 KB
/
grab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
## imports
import urllib2
import re
import os
from os.path import basename
from urlparse import urlsplit
from urlparse import urlparse
from posixpath import basename,dirname
## function that processes url, if there are any spaces it replaces with '%20' ##
def process_url(raw_url):
if ' ' not in raw_url[-1]:
raw_url=raw_url.replace(' ','%20')
return raw_url
elif ' ' in raw_url[-1]:
raw_url=raw_url[:-1]
raw_url=raw_url.replace(' ','%20')
return raw_url
url='' ## give the url here
parse_object=urlparse(url)
dirname=basename(parse_object.path)
if not os.path.exists('images'):
os.mkdir("images")
os.mkdir("images/"+dirname)
os.chdir("images/"+dirname)
urlcontent=urllib2.urlopen(url).read()
imgurls=re.findall('img .*?src="(.*?)"',urlcontent)
for imgurl in imgurls:
try:
imgurl=process_url(imgurl)
imgdata=urllib2.urlopen(imgurl).read()
filname=basename(urlsplit(imgurl)[2])
output=open(filname,'wb')
output.write(imgdata)
output.close()
os.remove(filename)
except:
pass