-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathCrunchyRoll_XmlToSrt.py
52 lines (34 loc) · 1.01 KB
/
CrunchyRoll_XmlToSrt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import re
import sys
from bs4 import BeautifulSoup
def toSrt(xml_string):
srt = ''
texts = BeautifulSoup(xml_string, "lxml", from_encoding="utf8")
listOfTranscripts = texts.findAll("event")
# TODO parse xml instead of regex
captionNumber = 1
for captions in listOfTranscripts:
start = captions['start']
end = captions['end']
start = formatTime(start)
end = formatTime(end)
caption = captions['text']
# caption = caption.replace(''', "'")
# caption = caption.replace('"', '"')
srt += str(captionNumber) + '\n'
srt += start + ' --> ' + end + '\n'
srt += caption + '\n\n'
captionNumber += 1
return srt
def formatTime(time):
pieces = time.split(".")
pieces[1] = (pieces[1] + "0" * 3)[0:3]
formatted = "%s,%s" % (pieces[0], pieces[1])
return formatted
def main():
f = open(sys.argv[1], "r")
q = f.read()
s = toSrt(q)
print(s)
if __name__ == "__main__":
main()