Skip to content

Commit

Permalink
auto scroll and clickable messages
Browse files Browse the repository at this point in the history
  • Loading branch information
colobas committed Feb 7, 2024
1 parent 2f28057 commit 1ed7a01
Show file tree
Hide file tree
Showing 11 changed files with 1,683 additions and 6,447 deletions.
16 changes: 14 additions & 2 deletions process_debates.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
import webvtt


def convert_to_seconds(time):
"""
Convert a time in the format 00:00:01.216 to seconds
"""

h, m, s = time.split(":")
return int(h) * 3600 + int(m) * 60 + float(s)


def webvtt_to_json(vtt_path, output_path):
# adapted from: https://github.com/simonw/webvtt-to-json/blob/main/webvtt_to_json/cli.py
with open(vtt_path, "r") as vtt_file:
Expand All @@ -35,8 +44,8 @@ def webvtt_to_json(vtt_path, output_path):
for d in dicts:
line = "\n".join(d.pop("lines"))
d["speaker"], d["text"] = [_.strip() for _ in line.split(":", 1)]
d["time"] = d.pop('end')
d.pop('start')
d["time"] = convert_to_seconds(d.pop('start'))
d.pop('end')

with open(output_path, "w") as output:
json.dump(dicts, output, indent=4, ensure_ascii=False)
Expand Down Expand Up @@ -176,6 +185,9 @@ def transcribe_audio(audio_path, output_root):
# convert the vtt to json
webvtt_to_json(f"{output_root}/transcriptions/{name}.vtt", f"{output_root}/transcriptions/{name}.json")

# remove the vtt
(output_root / f"transcriptions/{name}.vtt").unlink()


def process_debate(*, title, url, output_root):
"""
Expand Down
Loading

0 comments on commit 1ed7a01

Please sign in to comment.