-
Notifications
You must be signed in to change notification settings - Fork 4
/
python_vad.py
163 lines (129 loc) · 4.44 KB
/
python_vad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
'''
#--- Steve Cox --- 1/10/19
# Copyright (c) Stef van der Struijk
# License: GNU Lesser General Public License
# Modified code to play sound from buffer recording
# Added code to wait till sound is finished play so no echo occurs
# Modification of:
# https://github.com/wiseman/py-webrtcvad (MIT Copyright (c) 2016 John Wiseman)
# https://github.com/wangshub/python-vad (MIT Copyright (c) 2017 wangshub)
Requirements:
+ pyaudio - `pip install pyaudio`
+ py-webrtcvad - `pip install webrtcvad`
'''
import webrtcvad
import collections
import sys
import signal
import pyaudio
from array import array
from struct import pack
import wave
import time
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK_DURATION_MS = 30 # supports 10, 20 and 30 (ms)
PADDING_DURATION_MS = 1500 # 1 sec jugement
CHUNK_SIZE = int(RATE * CHUNK_DURATION_MS / 1000) # chunk to read
CHUNK_BYTES = CHUNK_SIZE * 2 # 16bit = 2 bytes, PCM
NUM_PADDING_CHUNKS = int(PADDING_DURATION_MS / CHUNK_DURATION_MS)
#--- Steve Cox
NUM_WINDOW_CHUNKS = int(240 / CHUNK_DURATION_MS)
#NUM_WINDOW_CHUNKS = int(400 / CHUNK_DURATION_MS) # 400 ms/ 30ms ge
NUM_WINDOW_CHUNKS_END = NUM_WINDOW_CHUNKS * 2
START_OFFSET = int(NUM_WINDOW_CHUNKS * CHUNK_DURATION_MS * 0.5 * RATE)
vad = webrtcvad.Vad(1)
#------ Steve Cox
# One time Pygame init
import pygame
pygame.mixer.pre_init(RATE, -16, CHANNELS, 2048) # setup mixer to avoid sound lag
pygame.mixer.init()
pygame.init()
#--------------------------
pa = pyaudio.PyAudio()
stream = pa.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
start=False,
# input_device_index=2,
frames_per_buffer=CHUNK_SIZE)
got_a_sentence = False
def normalize(snd_data):
"Average the volume out"
MAXIMUM = 32767 # 16384
times = float(MAXIMUM) / max(abs(i) for i in snd_data)
r = array('h')
for i in snd_data:
r.append(int(i * times))
return r
while True:
ring_buffer = collections.deque(maxlen=NUM_PADDING_CHUNKS)
triggered = False
voiced_frames = []
ring_buffer_flags = [0] * NUM_WINDOW_CHUNKS
ring_buffer_index = 0
ring_buffer_flags_end = [0] * NUM_WINDOW_CHUNKS_END
ring_buffer_index_end = 0
buffer_in = ''
# WangS
raw_data = array('h')
index = 0
start_point = 0
StartTime = time.time()
print("* recording: ")
stream.start_stream()
while not got_a_sentence:
chunk = stream.read(CHUNK_SIZE)
# add WangS
raw_data.extend(array('h', chunk))
index += CHUNK_SIZE
TimeUse = time.time() - StartTime
active = vad.is_speech(chunk, RATE)
sys.stdout.write('1' if active else '_')
ring_buffer_flags[ring_buffer_index] = 1 if active else 0
ring_buffer_index += 1
ring_buffer_index %= NUM_WINDOW_CHUNKS
ring_buffer_flags_end[ring_buffer_index_end] = 1 if active else 0
ring_buffer_index_end += 1
ring_buffer_index_end %= NUM_WINDOW_CHUNKS_END
# start point detection
if not triggered:
ring_buffer.append(chunk)
num_voiced = sum(ring_buffer_flags)
if num_voiced > 0.8 * NUM_WINDOW_CHUNKS:
sys.stdout.write(' Open ')
triggered = True
start_point = index - CHUNK_SIZE * 20 # start point
ring_buffer.clear()
# end point detection
else:
ring_buffer.append(chunk)
num_unvoiced = NUM_WINDOW_CHUNKS_END - sum(ring_buffer_flags_end)
if num_unvoiced > 0.90 * NUM_WINDOW_CHUNKS_END or TimeUse > 10:
sys.stdout.write(' Close ')
triggered = False
got_a_sentence = True
sys.stdout.flush()
sys.stdout.write('\n')
stream.stop_stream()
print("* done recording")
got_a_sentence = False
# write to file
raw_data.reverse()
for index in range(start_point):
raw_data.pop()
raw_data.reverse()
raw_data = normalize(raw_data)
#--- Steve Cox
#--- the wav has a header, we need to strip it off before playing
wav_data = raw_data[44:len(raw_data)]
sound = pygame.mixer.Sound(buffer=wav_data)
sound.play()
#--- Wait for the sound to finish playing or we get an echo
while pygame.mixer.get_busy():
pass
#data = np.zeros((10, 10), dtype="uint8")
#zmqWave.sendPlayEvent('zzzz',data)
stream.close()