-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathancestries.py
149 lines (137 loc) · 5.02 KB
/
ancestries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import helpers
import datetime
import json
from bs4 import BeautifulSoup
from requests import get
from dateutil.parser import parse
class AncestryDescription:
def __init__(self, full_page, name):
self.general = helpers.trim_html(full_page, '</i></a><br/>', '<h2 class="title">You Might...')
self.you_might = helpers.ul_to_list(helpers.trim_html(full_page, 'You Might...</h2>', '<h2 class="title">Others Probably...'))
if 'half' in name.lower():
self.others_probably = helpers.ul_to_list(helpers.trim_html(full_page, 'Others Probably...</h2>', f'<h1 class="title">{name} Mechanics'))
else:
self.others_probably = helpers.ul_to_list(helpers.trim_html(full_page, 'Others Probably...</h2>', '<h2 class="title">Physical Description'))
self.physical_description = ''
self.society = ''
self.alignment_religion = ''
self.names = ''
self.sample_names = ['']
def toJSON(self):
return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
class Ancestry:
last_hit = datetime.datetime.now()
def __init__(self, name, url):
self.name = name
self.hp = 0
self.size = ''
self.speed = 0
self.boosts = ['']
self.flaws = ['']
self.languages = ['']
self.specials = ['']
self.url = url
self.last_updated = 'never'
def to_jsonify(self):
json = {
'Name': self.name,
'Traits': self.traits,
'Description': {
'General': self.description.general,
'YouMight': self.description.you_might,
'OthersProbably': self.description.others_probably,
'PhysicalDescription': self.description.physical_description,
'Society': self.description.society,
'AlignmentReligion': self.description.alignment_religion,
'Names': self.description.names,
'SampleNames': self.description.sample_names
},
'HP': self.hp,
'Size': self.size,
'Speed': self.speed,
'Boosts': self.boosts,
'Flaws': self.flaws,
'Languages': self.languages,
'Specials': self.specials,
'URL': self.url,
'LastUpdated': self.last_updated
}
return json
def scrape(self):
""" Visits the URL of the Ancestry record and scrapes its data """
# don't scrape more than once every 20 minutes
if (self.last_updated != 'never' and
self.description is not None and
self.hp is not None and
self.size is not None and
self.speed is not None and
(datetime.datetime.now() - parse(self.last_updated)).total_seconds() < 1200):
return False
response = get(self.url)
ancestral_soup = BeautifulSoup(response.text, 'html.parser')
# scrape traits
trait_spans = ancestral_soup.find_all("span", class_="trait")
trait_list = [t.a.contents[0] for t in trait_spans]
self.traits = trait_list
# scrape description
self.description = AncestryDescription(str(ancestral_soup), self.name)
self.last_updated = str(datetime.datetime.now())
return True
def get_all():
""" Returns a list of all ancestries currently on AoN """
scraped = []
response = get('http://2e.aonprd.com/Ancestries.aspx')
Ancestry.last_hit = datetime.datetime.now()
ancestral_soup = BeautifulSoup(response.text, 'html.parser')
titles = ancestral_soup.find_all("h2", class_="title")
links = [t.find_all("a")[1] for t in titles]
ancestries_list = [[l.contents[0], 'http://2e.aonprd.com/' + l['href']] for l in links]
for a in ancestries_list:
name = a[0]
url = a[1]
scraped.append(Ancestry(name, url))
return scraped
# test_ancestries = [
# {
# 'Name': 'Human',
# 'Traits': [
# 'Human',
# 'Humanoid'
# ],
# 'Description': {
# 'General': 'Human',
# 'YouMight': [
# 'Be a human',
# 'Do human things'
# ],
# 'OthersProbably': [
# 'Live longer than you',
# 'Are older than you'
# ],
# 'PhysicalDescription': 'Humanistic',
# 'Society': 'Humanitarian',
# 'AlignmentReligion': 'Everything',
# 'Names': 'Namey',
# 'SampleNames': [
# 'Bob',
# 'Alice'
# ]
# },
# 'HP': '8',
# 'Size': 'Medium',
# 'Speed': '25 feet',
# 'Boosts': [
# 'Free',
# 'Free'
# ],
# 'Flaws': [
# 'None'
# ],
# 'Languages': [
# 'Common'
# ],
# 'Specials': [],
# 'URL': '',
# 'LastUpdated': ''
# }
# ]