diff --git a/java-api-examples/.gitignore b/java-api-examples/.gitignore index 91c35d7ae..93941e6af 100644 --- a/java-api-examples/.gitignore +++ b/java-api-examples/.gitignore @@ -1,3 +1,4 @@ lib hs_err* !run-*.sh +./hotwords_cn.txt diff --git a/java-api-examples/NonStreamingDecodeFileTransducerHotwords.java b/java-api-examples/NonStreamingDecodeFileTransducerHotwords.java new file mode 100644 index 000000000..c2996652f --- /dev/null +++ b/java-api-examples/NonStreamingDecodeFileTransducerHotwords.java @@ -0,0 +1,62 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use an offline transducer, i.e., non-streaming transducer, +// to decode files with hotwords support. +// +// See also +// https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html#modeling-unit-is-cjkchar +import com.k2fsa.sherpa.onnx.*; + +public class NonStreamingDecodeFileTransducerHotwords { + public static void main(String[] args) { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html#modeling-unit-is-cjkchar + // to download model files + String encoder = + "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/encoder-epoch-99-avg-1.int8.onnx"; + String decoder = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/decoder-epoch-99-avg-1.onnx"; + String joiner = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/joiner-epoch-99-avg-1.onnx"; + String tokens = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/tokens.txt"; + + String waveFilename = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/test_wavs/6.wav"; + + WaveReader reader = new WaveReader(waveFilename); + + OfflineTransducerModelConfig transducer = + OfflineTransducerModelConfig.builder() + .setEncoder(encoder) + .setDecoder(decoder) + .setJoiner(joiner) + .build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setTransducer(transducer) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .setModelingUnit("cjkchar") + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("modified_beam_search") + .setHotwordsFile("./hotwords_cn.txt") + .setHotwordsScore(2.0f) + .build(); + + OfflineRecognizer recognizer = new OfflineRecognizer(config); + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + recognizer.decode(stream); + + String text = recognizer.getResult(stream).getText(); + + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); + + stream.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/run-non-streaming-decode-file-transducer-hotwords.sh b/java-api-examples/run-non-streaming-decode-file-transducer-hotwords.sh new file mode 100755 index 000000000..14c54848c --- /dev/null +++ b/java-api-examples/run-non-streaming-decode-file-transducer-hotwords.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-conformer-zh-stateless2-2023-05-23/tokens.txt ]; then + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2 + tar xvf sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2 + rm sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2 +fi + +if [ ! -f hotwords_cn.txt ]; then + cat > hotwords_cn.txt <