-
Notifications
You must be signed in to change notification settings - Fork 4
/
bibliography.py
84 lines (72 loc) · 2.39 KB
/
bibliography.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python3
import re
import sys
import logging
logger = logging.getLogger(__name__)
BIBLIO_PAGE = "Bibliographische Verweise"
TARGET = "xslt/bibliography.xml"
URI = re.compile(r'^\*\s*(faust://bibliography/\S*)')
SHORT = re.compile(r'^\*{2}\s*(.*)')
LONG = re.compile(r'^\*{3}\s*(.*)')
ENTRY = """
<f:bib uri="{uri}">
<f:citation>{short}</f:citation>
<f:reference>{full}</f:reference>
</f:bib>
"""
START = """<?xml version="1.0" encoding="utf-8"?>
<!-- This file has been generated from:
https://faustedition.uni-wuerzburg.de/wiki/index.php/Bibliographische_Verweise
Please edit there
-->
<?xml-model href="bibliography.rnc"?>
<f:bibliography
xmlns:f="http://www.faustedition.net/ns"
xmlns="http://www.w3.org/1999/xhtml">
"""
STOP = """
</f:bibliography>
"""
def cleanup(lines):
URL = re.compile(r"\[(https?://\S+)\s+([^]]+)\]")
UNSMALL = re.compile(r"<small>.*?</small>")
UNCOMMENT = re.compile(r"<!--.*?-->")
STRONG = re.compile(r"'''(.*?)'''")
EMPH = re.compile(r"''(.*?)''")
for line in lines:
line = UNSMALL.sub('', line)
line = UNCOMMENT.sub('', line)
line = line.replace(' ', ' ')
line = line.replace('&', '&')
line = line.replace('<', '<')
line = STRONG.sub(r'<strong>\1</strong>', line)
line = EMPH.sub(r'<emph>\1</emph>', line)
line = URL.sub(r'<a href="\1">\2</a>', line)
yield line
def wiki_to_xml(lines, outfile=sys.stdout):
lines = iter(cleanup(lines))
entrycount = 0
try:
outfile.write(START)
line = next(lines)
while True:
uri = URI.match(line)
if uri:
entry = {"uri": uri.group(1)}
line = next(lines)
short = SHORT.match(line)
if short:
entry["short"] = short.group(1)
line = next(lines)
full = LONG.match(line)
if full:
entry["full"] = full.group(1)
outfile.write(ENTRY.format_map(entry))
entrycount += 1
line = next(lines)
else:
line = next(lines)
except StopIteration:
outfile.write(STOP)
logger.info("Wrote %d bibliography entries to %s", entrycount, outfile)
__all__ = [BIBLIO_PAGE, TARGET, wiki_to_xml]