-
Notifications
You must be signed in to change notification settings - Fork 0
/
audio_parser.py
32 lines (25 loc) · 895 Bytes
/
audio_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from vosk import Model, KaldiRecognizer
import wave
import json
from datetime import datetime
vosk_model_path = "models/vosk-model-en-us-0.42-gigaspeech"
def new_audio_model():
model = Model(vosk_model_path)
return model
def parse_file(model, out_file):
phrases = []
start_time = datetime.now()
print("Starting Transcription")
with wave.open(out_file, 'rb') as wf:
recognizer = KaldiRecognizer(model, wf.getframerate())
recognizer.SetWords(True)
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if recognizer.AcceptWaveform(data):
result = recognizer.Result()
result_json = json.loads(result)
phrases.append(result_json)
print("Transcription Finished. Elapsed Time:" + str(datetime.now() - start_time))
return phrases