-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataLabeling_ali_EN.py
73 lines (60 loc) · 2.35 KB
/
DataLabeling_ali_EN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#auto_DataLabeling_EN.py
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import os
'''
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='./Model/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
)
rec_result = inference_pipeline(audio_in='ge_1570_2.wav')
print(rec_result)
# {'text': '欢迎大家来体验达摩院推出的语音识别模型'}
'''
parent_dir = "./raw/"
local_dir_root = "./Model/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline"
target_sr = 44100
# speaker_annos = []
# speaker_annos_bert = []
complete_list = []
filelist = list(os.walk(parent_dir))[0][2]
if os.path.exists('transcribe.txt'):
with open("./transcribe.txt", 'r', encoding='utf-8') as f:
for line in f.readlines():
pt, _, _ = line.strip().split('|')
complete_list.append(pt)
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model=local_dir_root,
)
for file in filelist:
if file[-3:] != 'wav':
print(f"{file} not supported, ignoring...\n")
continue
print(f"transcribing {parent_dir + file}...\n")
character_name = file.rstrip(".wav").split("_")[0]
savepth1 = "./dataset/" + character_name + "/" + file
savepth2 = "./data/" + character_name + "/" + "wavs" + "/" + file
if savepth1 in complete_list:
print(f'{file} is already done, skip!')
continue
if savepth2 in complete_list:
print(f'{file} is already done, skip!')
continue
rec_result = inference_pipeline(audio_in=parent_dir + file)
if 'text' not in rec_result:
print("Text is not recognized,ignoring...\n")
continue
annos_text = rec_result['text']
annos_text = '[JP]' + annos_text.replace("\n", "") + '[ZH]'
annos_text = annos_text + "\n"
# speaker_annos.append(savepth + "|" + character_name + "|" + annos_text)
line1 = savepth1 + "|" + character_name + "|" + annos_text
# speaker_annos_bert.append(savepth + "|" + character_name + "|ZH|" + rec_result['text'] + "\n")
line2 = savepth2 + "|" + character_name + "|JP|" + rec_result['text'] + "\n"
with open("./fast_vits.list", 'a', encoding='utf-8') as f:
f.write(line1)
with open(f"./esd.list", 'a', encoding='utf-8') as f:
f.write(line2)
print(rec_result)
print("Done!\n")