forked from libretro/libretro-database
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
40 lines (29 loc) · 1.25 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from lxml import html
import sys
import requests
import os
import re
from chtwrite import cheatwriter
#cheat scraper for http://bsfree.shadowflareindustries.com/ navigate to the system and codetype you want, copy url
#run the script with "python scraper.py 'url'"
baseurl = 'http://bsfree.org/'
supported = "Gameboy", "Gameboy Advance", "Sega Game Gear", "Genesis", "Nintendo Entertainment System", "Sega Master System", "Playstation", "Super Nintendo", "Sega Saturn"
page = requests.get(baseurl)
tree = html.fromstring(page.text)
sysurl = tree.xpath('//td[@class="codedescalt"]//a/@href')
system = tree.xpath('//td[@class="codedescalt"]/a[@href]/text()')
supsys = [system.index(sup) for sup in supported]
for idx3 in supsys:
page2 = requests.get(baseurl + sysurl[idx3])
tree2 = html.fromstring(page2.text)
cdtype = tree2.xpath('//td[@class="codedescalt"]//a/@href')
nmtype = tree2.xpath('//td[@class="codedescalt"]/a[@href]/text()')
outdir = system[idx3]
if not os.path.exists(outdir):
os.mkdir(outdir)
for idxnum, chttype in enumerate(cdtype):
contentdir = outdir + "/" + nmtype[idxnum]
if not os.path.exists(contentdir):
os.mkdir(contentdir)
print "created: " + contentdir
cheatwriter( baseurl=baseurl, chttype=chttype, outdir=contentdir )