forked from aalto-speech/speaker-diarization
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvad-performance.py
224 lines (212 loc) · 9.51 KB
/
vad-performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#!/usr/bin/env python2
import argparse
import sys
import re
def parse_recipe(rfile):
"""Parses input recipe, checks for LNA's"""
r = []
rr = []
audio_file = re.compile('audio=(\S+)')
start_time = re.compile('start-time=(\d+.\d+)')
end_time = re.compile('end-time=(\d+.\d+)')
this_file = ''
total_entries = 0
for line in rfile:
try:
audio = audio_file.search(line).groups()[0]
start = float(start_time.search(line).groups()[0])
end = float(end_time.search(line).groups()[0])
if audio != this_file:
if this_file != '':
r.append((audio, rr))
rr = []
this_file = audio
rr.append((start, end))
total_entries += 1
except AttributeError:
print 'Recipe line without recognizable data:'
print line
# Append last audio file
r.append((this_file, rr))
return r, total_entries
def benchmark(baseline, proposed, threshold, outf):
"""Benchmarks VAD agains baseline ones"""
noise_as_speech = 0.0
speech_as_noise = 0.0
proposed_total_time = 0.0
proposed_turns = 0
baseline_total_time = 0.0
baseline_turns = 0
for files in xrange(len(baseline)):
#Both have same audio files, so same number of entries here
b_iter = iter(baseline[files][1])
p_iter = iter(proposed[files][1])
bstart, bend = b_iter.next()
baseline_turns += 1
baseline_total_time += bend - bstart
pstart, pend = p_iter.next()
proposed_turns += 1
proposed_total_time += pend - pstart
while True:
if pstart + threshold < bstart - threshold:
if pend + threshold < bstart - threshold: # Completely out
t = pend - pstart
noise_as_speech += t
if args.ns:
print '1.- Noise as speech, from:', pstart, 'to:', \
pend, 'missed time:', t, \
'total noise as speech:', noise_as_speech
try:
pstart, pend = p_iter.next()
except StopIteration:
break
proposed_turns += 1
proposed_total_time += pend - pstart
continue
else: # pend + threshold >= bstart - threshold
if bstart - pstart > 2.0 * threshold:
t = bstart - pstart - 2.0 * threshold
noise_as_speech += t
if args.ns:
print '2.- Noise as speech, from:', pstart, 'to:', \
bstart, 'missed time:', t, \
'total noise as speech:', noise_as_speech
if pend - threshold > bend + threshold: # Get another baseline
pstart = bend
try:
bstart, bend = b_iter.next()
except StopIteration:
break
baseline_turns += 1
baseline_total_time += bend - bstart
continue
bstart = pend
try:
pstart, pend = p_iter.next()
except StopIteration:
break
proposed_turns += 1
proposed_total_time += pend - pstart
continue
else: # pstart >= bstart
if bend + threshold < pstart - threshold: # Completely out
t = bend - bstart
speech_as_noise += t
if args.sn:
print '3.- Speech as Noise, from:', bstart, 'to:', \
bend, 'missed time:', t, \
'total missed:', speech_as_noise
try:
bstart, bend = b_iter.next()
except StopIteration:
break
baseline_turns += 1
baseline_total_time += bend - bstart
continue
else:
if pstart - bstart > 2.0 * threshold:
t = pstart - bstart - 2.0 * threshold
speech_as_noise += t
if args.sn:
print '4.- Speech as Noise, from:', bstart, 'to:', \
pstart, 'missed time:', t, \
'total missed:', speech_as_noise
if bend - threshold > pend + threshold: # Get another proposed
bstart = pend
try:
pstart, pend = p_iter.next()
except StopIteration:
break
proposed_turns += 1
proposed_total_time += pend - pstart
continue
pstart = bend
try:
bstart, bend = b_iter.next()
except StopIteration:
break
baseline_turns += 1
baseline_total_time += bend - bstart
continue
# Is there anything left? Add as error
first_end = True
for pstart, pend in p_iter:
if first_end and (pstart - threshold > bend + threshold):
t = pend - pstart
noise_as_speech += t
if args.ns:
print 'Noise as speech, from:', pstart, 'to:', \
pend, 'missed time:', t, \
'total noise as speech:', noise_as_speech
first_end = True
for bstart, bend in b_iter:
if first_end and (bstart - threshold > pend + threshold):
first_end = False
t = bend - bstart
speech_as_noise += t
if args.sn:
print 'Speech as Noise, from:', bstart, 'to:', \
bend, 'missed time:', t, \
'total missed:', speech_as_noise
print_results(speech_as_noise, noise_as_speech,
baseline_total_time, baseline_turns,
proposed_total_time, proposed_turns)
def print_results(missed, noise, baseline_total, baseline_turns,
proposed_total, proposed_turns):
"""Pretty print useful results"""
print '{0:>48}'.format('Benchmark results:')
print '{0:>48}'.format('------------------')
print '{0:<36} {1:^5.3f}'.format('Speech as Noise time:', missed)
print '{0:<36} {1:^5.3f}'.format('Noise as Speech time:', noise)
print '{0:<36} {1:^5.3f}'.format('Total baseline speech time:', baseline_total)
print '{0:<36} {1}'.format('Total baseline turns:', baseline_turns)
print '{0:<36} {1:^5.3f}'.format('Average baseline turn time:',
baseline_total/baseline_turns)
print '{0:<36} {1:^5.3f}'.format('Total proposed speech time:', proposed_total)
print '{0:<36} {1}'.format('Total proposed turns:', proposed_turns)
print '{0:<36} {1:^5.3f}'.format('Average proposed turn time:',
proposed_total/proposed_turns)
incorrect = noise + missed
print '{0:<36} {1:^5.3f}'.format('Incorrect time:', incorrect)
correct = baseline_total - incorrect
print '{0:<36} {1:^5.3f}'.format('Correct time:', correct)
print '{0:<36} {1}'.format('VAD Error Rate:', incorrect / baseline_total)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Rate a Voice Activity \
Detection recipe against a baseline recipe to ensure all real \
speech parts are inside VAD detected speech parts.')
parser.add_argument('baseline', type=str,
help='Especifies the baseline recipe file.')
parser.add_argument('proposed', type=str,
help='Especifies the proposed recipe file, to benchmark.')
parser.add_argument('-o', dest='outfile', type=str, default=sys.stdout, \
help='Especifies an output file, default stdout.')
parser.add_argument('-t', dest='threshold', type=float, default=0.25,
help='Especifies threshold to determine when a time is \
incorrect, defaults to 0.25 seconds before-after.')
parser.add_argument('-sn', action='store_true',
help='If set, shows each speech as noise time interval')
parser.add_argument('-ns', action='store_true',
help='If set, shows each noise as speech time interval')
args = parser.parse_args()
# Process arguments
print 'Reading baseline recipe from:', args.baseline
with open(args.baseline, 'r') as recfile:
parsed_baseline, totalb = parse_recipe(recfile)
print 'Reading proposed recipe from:', args.proposed
with open(args.proposed, 'r') as recfile:
parsed_proposed, totalp = parse_recipe(recfile)
if args.outfile != sys.stdout:
outfile = args.outfile
print 'Writing output to:', args.outfile
else:
outfile = sys.stdout
print 'Writing output to: stdout'
print 'Threshold:', args.threshold
threshold = args.threshold
# Do the real work
if outfile != sys.stdout:
with open(outfile, 'w') as outf:
benchmark(parsed_baseline, parsed_proposed, threshold, outf)
else:
benchmark(parsed_baseline, parsed_proposed, threshold, outfile)