From 84bf6aa284d605214997593211a47bc2c06d61ea Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 7 Aug 2024 18:32:04 +0800 Subject: [PATCH] Fix python two pass ASR examples --- .../two-pass-speech-recognition-from-microphone.py | 13 +++++++++---- sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python-api-examples/two-pass-speech-recognition-from-microphone.py b/python-api-examples/two-pass-speech-recognition-from-microphone.py index d83359e06..aa2245c5c 100755 --- a/python-api-examples/two-pass-speech-recognition-from-microphone.py +++ b/python-api-examples/two-pass-speech-recognition-from-microphone.py @@ -335,11 +335,10 @@ def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer: def run_second_pass( recognizer: sherpa_onnx.OfflineRecognizer, - sample_buffers: List[np.ndarray], + samples: np.ndarray, sample_rate: int, ): stream = recognizer.create_stream() - samples = np.concatenate(sample_buffers) stream.accept_waveform(sample_rate, samples) recognizer.decode_stream(stream) @@ -407,14 +406,20 @@ def main(): if is_endpoint: if result: + samples = np.concatenate(sample_buffers) + # There are internal sample buffers inside the streaming + # feature extractor, so we cannot send all samples to + # the 2nd pass. Here 8000 is just an empirical value + # that should work for most streaming models in sherpa-onnx + sample_buffers = [samples[-8000:]] + samples = samples[:-8000] result = run_second_pass( recognizer=second_recognizer, - sample_buffers=sample_buffers, + samples=samples, sample_rate=sample_rate, ) result = result.lower().strip() - sample_buffers = [] print( "\r{}:{}".format(segment_id, " " * len(last_result)), end="", diff --git a/sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc b/sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc index aef469e5d..ea83cfaaf 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc @@ -18,8 +18,8 @@ The input text can contain English words. Usage: -Please download the model from: -https://huggingface.co/frankyoujian/Edge-Punct-Casing/resolve/main/sherpa-onnx-cnn-bilstm-unigram-bpe-en.7z +Please download the model from: +https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 ./bin/Release/sherpa-onnx-online-punctuation \ --cnn-bilstm=/path/to/model.onnx \