-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwmk_mako_filters.py
321 lines (283 loc) · 9.03 KB
/
wmk_mako_filters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import datetime
import time
import re
import os
import hashlib
import shutil
import json
from email.utils import formatdate # rfc822
import markdown
from wmk_utils import slugify
__all__ = [
'date_to_iso',
'date_to_rfc822',
'date',
'date_short',
'date_short_us',
'date_long',
'date_long_us',
'slugify',
'markdownify',
'truncate',
'truncatewords',
'p_unwrap',
'strip_html',
'cleanurl',
'to_json',
]
def _ensure_datetime(d):
"""
Converts dates, unix time stamps and ISO strings to datetime.
Also handles the special strings 'now' and 'today'.
"""
if isinstance(d, datetime.datetime):
return d
elif isinstance(d, datetime.date):
return datetime.datetime(d.year, d.month, d.day)
if isinstance(d, str) and d.isdigit():
d = int(d)
if isinstance(d, (int, float)):
return datetime.datetime.fromtimestamp(d)
elif isinstance(d, str):
if d.lower() == 'now':
return datetime.datetime.now()
elif d.lower() == 'today':
today = datetime.date.today()
return datetime.datetime(today.year, today.month, today.day)
try:
return datetime.datetime.fromisoformat(d)
except:
pass
return None
def date_to_iso(s=None, sep='T', upto='sec', with_tz=False):
"""
Similar to Jekyll's date_to_xmlschema but more flexible.
Examples:
- 2008-11-07T13:07:54-08:00 (with sep='T', upto='sec', with_tz=True)
- 2008-11-07 13:07 (with sep=' ', upto='min')
"""
def inner(s):
no_tz = not with_tz
d = _ensure_datetime(s)
if d is None:
return s
d = str(d)
if sep and sep != ' ' and len(sep) == 1:
d = d.replace(' ', sep, 1)
tz = '+00:00'
found = re.search(r'([-+]\d\d:\d\d)$', d)
if found:
tz = found.group(1)
if upto.startswith('day'):
no_tz = True
d = d[:10]
elif upto.startswith('min'):
d = d[:16]
elif upto.startswith('sec'):
d = d[:19]
if not no_tz:
d += tz
return d
return inner if s is None else inner(s)
def date_to_rfc822(s):
"""
Example: Thu, 5 Apr 2012 23:47:37 +0200
"""
d = _ensure_datetime(s)
if d is None:
return s
return formatdate(d)
def date_short(s):
"""
E.g. 7 Nov 2008
"""
d = _ensure_datetime(s)
if d is None:
return s
return d.strftime('%-d %b %Y')
def date_short_us(s):
"""
E.g. Nov 7th, 2008
"""
d = _ensure_datetime(s)
if d is None:
return s
if d.day in (1, 21, 31):
return d.strftime('%b %-dst, %Y')
elif d.day in (2, 22):
return d.strftime('%b %-dnd, %Y')
elif d.day in (3, 23):
return d.strftime('%b %-drd, %Y')
else:
return d.strftime('%b %-dth, %Y')
def date_long(s):
"""
E.g. 7 November 2008
"""
d = _ensure_datetime(s)
if d is None:
return s
return d.strftime('%-d %B %Y')
def date_long_us(s):
"""
E.g. Nov 7th, 2008
"""
d = _ensure_datetime(s)
if d is None:
return s
if d.day in (1, 21, 31):
return d.strftime('%B %-dst, %Y')
elif d.day in (2, 22):
return d.strftime('%B %-dnd, %Y')
elif d.day in (3, 23):
return d.strftime('%B %-drd, %Y')
else:
return d.strftime('%B %-dth, %Y')
def date(s=None, fmt=None):
"""
Strftime filter. The default format is '%c'.
"""
if not fmt:
fmt = '%c'
def inner(s):
d = _ensure_datetime(s)
return d.strftime(fmt)
return inner if s is None else inner(s)
def markdownify(s=None, extensions=None):
"""
Convert markdown to HTML.
"""
if extensions is None:
extensions = ['extra']
def inner(s):
return markdown.markdown(s, extensions=extensions)
return inner if s is None else inner(s)
def truncate(s=None, length=200, ellipsis='…'):
"""
Truncate to given number of characters. If any shortening occurs,
an ellipsis will be appended. HTML tags will be stripped.
"""
def inner(s):
s_orig = strip_html(s)
ret = s_orig[:length-1]
if (len(ret) < len(s_orig)
and s_orig[length] not in (' ', '.', '!', '?', ';', ':')):
ret = re.sub(r' [^ ]*$', '', ret)
if len(ret) < len(s_orig):
ret += ellipsis
return ret
return inner if s is None else inner(s)
def truncatewords(s=None, length=25, ellipsis='…'):
"""
Truncate to given number of words. If any shortening occurs,
an ellipsis will be appended. HTML tags will be stripped.
"""
def inner(s):
s_orig = strip_html(s).split(' ')
if len(s_orig) <= length:
return ' '.join(s_orig)
else:
return ' '.join(s_orig[:length]) + ellipsis
return inner if s is None else inner(s)
def p_unwrap(s):
"""
Remove wrapping <p> tag - iff there is only one.
Typically used like this: `${ short_text | markdownify,p_unwrap }`,
so as to keep inline tags inside the paragraph but not the wrapping
p tag.
"""
s = s.strip()
if s.startswith('<p>') and s.count('<p>') == 1:
return s.replace('<p>','').replace('</p>', '').strip()
def strip_html(s):
"""
Remove all html tags (converting markdown to html beforehand).
TODO: handle entity and character references.
"""
ret = markdownify()(s)
# hidden tags:
for tag in ('script', 'style', 'template', 'iframe', 'object'):
rx = r'<' + tag + r'[^>]*>.*?</' + tag + r'[^>]*>'
ret = re.sub(rx, '', ret, flags=re.IGNORECASE)
# block tags (at least for our purposes)
blocktags = ['address', 'article', 'blockquote', 'details', 'dialog',
'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure',
'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'header', 'img', 'hgroup', 'hr', 'li', 'main', 'nav', 'ol',
'p', 'pre', 'section', 'table', 'td', 'th', 'ul']
rx = r'<+/?(?:' + '|'.join(blocktags) + r')[^>]*>'
ret = re.sub(rx, ' ', ret, flags=re.IGNORECASE)
# Inline tags
ret = re.sub(r'<[^>]+>', '', ret, flags=re.IGNORECASE)
return ret.strip()
def to_json(d):
return json.dumps(d, default=str, ensure_ascii=False)
def cleanurl(s):
"""
Change /path/index.html to /path/.
"""
if s.endswith('/index.html'):
return s[:-10]
return s
def url_filter_gen(base_path):
"""
Returns a simple local url filter which will be named 'url' in the Mako
environment and which prefixes URLs that do not start with '/'. with the
leading path to the root of the wmk site. The default is merely to prefix
'/' to the given path (and suffix '/' if it is deemed necessary).
Relative urls such as '../this/' are not modified. If site.leading_path or
site.base_url are defined, they will be used instead of '/', in that order.
"""
if not base_path:
base_path = '/'
if not base_path.endswith('/'):
base_path += '/'
def url(s):
rest = ''
if '?' in s:
s, rest = s.split('?', 2)
rest = '?' + rest
elif '#' in s:
s, rest = s.split('#', 2)
rest = '#' + rest
if s == '/':
s = ''
if s.startswith(('/', '.', 'https:', 'http:')):
return cleanurl(s) + rest
maybe_slash = '' if s == '' else '/'
if s.endswith('/') or re.search(r'\.\w{1,5}$', s):
maybe_slash = ''
return cleanurl(base_path + s + maybe_slash) + rest
return url
def fingerprint_gen(webroot=None, assets_map=None):
"""
Returns a filtering def which will be named 'fingerprint' in the Mako
environment and which fingerprints the path to a file starting with '/' and
having a file extension, looking for it either (a) in the assets_map dict or
(b) under the webroot directory. In the latter case, it calculates a SHA1
hash based on its contents, inserting the start of the hash before the file
extension. If a file with the corresponding name does not exist it copies
the original file there. Returns the modified name.
"""
def fingerprint(s):
if not (webroot or assets_map):
return s
if not isinstance(s, str):
return s
ns = assets_map.get(s) if assets_map else None
if ns:
return ns
if not webroot:
return s
if not (s.startswith('/') and re.search(r'\.\w{1,8}$', s)):
return s
full_path = os.path.join(webroot, s.strip('/'))
if not os.path.isfile(full_path):
return s
with open(full_path, 'rb') as f:
hash = hashlib.sha1(f.read()).hexdigest()[:12]
new_full_path = re.sub(r'\.(\w+)$', '.' + hash + '.' + r'\1', full_path)
if not os.path.exists(new_full_path):
shutil.copyfile(full_path, new_full_path)
return re.sub(r'\.(\w+)$', '.' + hash + '.' + r'\1', s)
return fingerprint