This repository has been archived by the owner on May 28, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxkcd.py
172 lines (153 loc) · 6.21 KB
/
xkcd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from flask import Markup
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import re
subs = {
# Substitutions (1)
"witnesses": "these dudes I know",
"allegedly": "kinda probably",
"new study": "Tumblr post",
"rebuild": "avenge",
"space": "spaaace",
"Google Glass": "Virtual Boy",
"smartphone": u"Pok\u00e9dex",
"electric": "atomic",
"senator": "Elf-lord",
"car": "cat",
"election": "eating contest",
"congressional leaders": "river spirits",
"Homeland Security": "Homestar Runner",
"could not be reached for comment": "is guilty and everyone knows it",
# Substitutions 2
"debate": "dance-off",
"self driving": "uncontrollably swerving",
"poll": "psychic reading",
"candidate": "airbender",
"drone": "dog",
"vows to": "probably won't",
"at large": "very large",
"successfully": "suddenly",
"expands": "physically expands",
"first-degree": "friggin' awful",
"second-degree": "friggin' awful",
"third-degree": "friggin' awful",
"an unknown number": "like hundreds",
"front runner": "Blade Runner",
"global": "spherical",
"years": "minutes",
"minutes": "years",
"no indication": "lots of signs",
"urged restraint by": "drunkenly egged on",
"horsepower": "tons of horsemeat",
# Substitutions 3
"gaffe": "magic spell",
"ancient": "haunted",
"star-studded": "blood-soaked",
"remains to be seen": "will never be known",
"silver bullet": "way to kill werewolves",
"subway system": "tunnels I found",
"surprising": "surprising (but not to me)",
"war of words": "interplanetary war",
"tension": "sexual tension",
"cautiously optimistic": "delusional",
"Doctor Who": "The Big Bang Theory",
"win votes": u"find Pok\u00e9mon",
"behind the headlines": "beyond the grave",
"email": "poem",
"Facebook post": "poem",
"tweet": "poem",
"Facebook CEO": "this guy",
"latest": "final",
"disrupt": "destroy",
"meeting": u"m\u00e9nage \u00e0 trois",
"scientists": "Channing Tatum and his friends",
"you won't believe": "I'm really sad about",
}
pattern = ("\\b"
+ "\\b|\\b".join(re.sub("[ -]", "[ -]", key)
+ ("(s|')?" if not key.endswith("s") else "'?") \
for key in subs.keys())
+ "\\b")
pattern = re.compile(pattern, flags=re.IGNORECASE)
# For easier matching, lower-case, space (if hyphenated), and escape the
# dictionary keys.
subs = dict((re.escape(k.lower().replace("-", " ")), v) \
for k, v in subs.iteritems())
# List of examples to display on the front page.
examples = [
{
'url': "http://www.businessinsider.com/report-10-million-self-driving-cars-will-be-on-the-road-by-2020-2015-5-6",
'old_title': "10 million self-driving cars will be on the road by 2020"
},
{
'url': "http://www.wired.com/2011/05/ucsd-skeleton-fight/",
'old_title': "Scientists Fight University of California to Study Rare Ancient Skeletons"
},
{
'url': "http://www.npr.org/2016/01/28/464640980/the-last-candidate-to-skip-the-final-iowa-debate-ronald-reagan",
'old_title': "The Last Candidate To Skip The Final Iowa Debate? Ronald Reagan"
}
];
def xkcdify(content):
"""
Replace text within a string as specified by the xkcd Substitutions comics.
This takes an HTML fragment and replaces the text accordingly, wrapping the
resulting substitutions in span tags.
:param content: Original content with text to be replaced.
:returns: Resulting content after xkcd substitutions.
"""
def sub(matchobj):
match = matchobj.group()
key = match.lower().replace("-", " ")
key1 = re.escape(key)
key2 = re.escape(key.rstrip("'s"))
# First, check if the match has a substitution.
# If it doesn't, check as if the match were plural or possessive.
if key1 in subs:
result = subs[key1]
elif key2 in subs:
result = subs[key2]
# If the pattern encountered a match that's the plural or
# possessive form of a key, modify the return value accordingly.
if match.endswith("s"):
result = result + "s"
elif match.endswith("'"):
result = result + "'"
else:
return ""
return result
# Get all the plain text strings in the document without their tags.
soup = BeautifulSoup(content, 'html.parser')
content_strings = [element for element in soup.recursiveChildGenerator() \
if type(element) == NavigableString]
for string in content_strings:
# Use index to track where the current substring of plain text starts.
index = 0
# Use wrapper to string together plain text and span elements.
wrapper_tag = soup.new_tag('span')
# Upon each match, write to the wrapper the substitution result and the
# plain text preceding it. Then update index to the position after the
# matched substring to mark the start of the next plain text substring.
for match in pattern.finditer(string):
wrapper_tag.append(soup.new_string(string[index:match.start()]))
replacement = soup.new_tag('span',
**{
'class': 'substitution',
'data-tooltip': match.group()
})
replacement.string = sub(match)
if replacement.string:
wrapper_tag.append(replacement)
else:
wrapper_tag.append(soup.new_string(match.group()))
index = match.end()
# Keep the original plain text unless substitutions were made.
if wrapper_tag.contents:
# Only append the rest of the string if substitutions were made,
# because we would otherwise be left with the full original string.
wrapper_tag.append(string[index:])
string.replace_with(wrapper_tag)
wrapper_tag.unwrap()
return unicode(soup)
for example in examples:
example['new_title'] = Markup(xkcdify(example['old_title']))