Skip to content

Commit

Permalink
Add Kotlin and Java API for Moonshine models (#1474)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Oct 26, 2024
1 parent 669f5ef commit bd4b223
Show file tree
Hide file tree
Showing 15 changed files with 480 additions and 25 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/apk-vad-asr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
total: ["5"]
index: ["0", "1", "2", "3", "4"]
total: ["10"]
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -165,6 +165,7 @@ jobs:
git clone https://csukuangfj:[email protected]/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
du -h -d1 .
git fetch
git pull
git merge -m "merge remote" --ff origin main
Expand Down
47 changes: 25 additions & 22 deletions .github/workflows/run-java-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,31 @@ jobs:
make -j4
ls -lh lib
- name: Run java test (Non-Streaming ASR)
shell: bash
run: |
cd ./java-api-examples
./run-non-streaming-decode-file-moonshine.sh
rm -rf sherpa-onnx-moonshine-*
./run-non-streaming-decode-file-sense-voice.sh
rm -rf sherpa-onnx-sense-voice-*
./run-inverse-text-normalization-paraformer.sh
./run-non-streaming-decode-file-paraformer.sh
rm -rf sherpa-onnx-paraformer-zh-*
./run-non-streaming-decode-file-transducer.sh
rm -rf sherpa-onnx-zipformer-*
./run-non-streaming-decode-file-whisper.sh
rm -rf sherpa-onnx-whisper-*
./run-non-streaming-decode-file-nemo.sh
rm -rf sherpa-onnx-nemo-*
- name: Run java test (speaker diarization)
shell: bash
run: |
Expand Down Expand Up @@ -206,28 +231,6 @@ jobs:
./run-streaming-decode-file-transducer.sh
rm -rf sherpa-onnx-streaming-*
- name: Run java test (Non-Streaming ASR)
shell: bash
run: |
cd ./java-api-examples
./run-non-streaming-decode-file-sense-voice.sh
rm -rf sherpa-onnx-sense-voice-*
./run-inverse-text-normalization-paraformer.sh
./run-non-streaming-decode-file-paraformer.sh
rm -rf sherpa-onnx-paraformer-zh-*
./run-non-streaming-decode-file-transducer.sh
rm -rf sherpa-onnx-zipformer-*
./run-non-streaming-decode-file-whisper.sh
rm -rf sherpa-onnx-whisper-*
./run-non-streaming-decode-file-nemo.sh
rm -rf sherpa-onnx-nemo-*
- name: Run java test (Non-Streaming TTS)
shell: bash
run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,5 @@ sherpa-onnx-online-punct-en-2024-08-06
*.mp4
*.mp3
sherpa-onnx-pyannote-segmentation-3-0
sherpa-onnx-moonshine-tiny-en-int8
sherpa-onnx-moonshine-base-en-int8
60 changes: 60 additions & 0 deletions java-api-examples/NonStreamingDecodeFileMoonshine.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline Moonshine,
// i.e., non-streaming Moonshine model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileMoonshine {
public static void main(String[] args) {
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html
// to download model files

String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";

String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

String waveFilename = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";

WaveReader reader = new WaveReader(waveFilename);

OfflineMoonshineModelConfig moonshine =
OfflineMoonshineModelConfig.builder()
.setPreprocessor(preprocessor)
.setEncoder(encoder)
.setUncachedDecoder(uncachedDecoder)
.setCachedDecoder(cachedDecoder)
.build();

OfflineModelConfig modelConfig =
OfflineModelConfig.builder()
.setMoonshine(moonshine)
.setTokens(tokens)
.setNumThreads(1)
.setDebug(true)
.build();

OfflineRecognizerConfig config =
OfflineRecognizerConfig.builder()
.setOfflineModelConfig(modelConfig)
.setDecodingMethod("greedy_search")
.build();

OfflineRecognizer recognizer = new OfflineRecognizer(config);
OfflineStream stream = recognizer.createStream();
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

recognizer.decode(stream);

String text = recognizer.getResult(stream).getText();

System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

stream.release();
recognizer.release();
}
}
152 changes: 152 additions & 0 deletions java-api-examples/VadFromMicWithNonStreamingMoonshine.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model with a non-streaming
// Moonshine tiny for speech recognition.

import com.k2fsa.sherpa.onnx.*;
import javax.sound.sampled.*;

public class VadFromMicNonStreamingMoonshine {
private static final int sampleRate = 16000;
private static final int windowSize = 512;

public static Vad createVad() {
// please download ./silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String model = "./silero_vad.onnx";
SileroVadModelConfig sileroVad =
SileroVadModelConfig.builder()
.setModel(model)
.setThreshold(0.5f)
.setMinSilenceDuration(0.25f)
.setMinSpeechDuration(0.5f)
.setWindowSize(windowSize)
.build();

VadModelConfig config =
VadModelConfig.builder()
.setSileroVadModelConfig(sileroVad)
.setSampleRate(sampleRate)
.setNumThreads(1)
.setDebug(true)
.setProvider("cpu")
.build();

return new Vad(config);
}

public static OfflineRecognizer createOfflineRecognizer() {
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html
// to download model files

String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";

String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

OfflineMoonshineModelConfig moonshine =
OfflineMoonshineModelConfig.builder()
.setPreprocessor(preprocessor)
.setEncoder(encoder)
.setUncachedDecoder(uncachedDecoder)
.setCachedDecoder(cachedDecoder)
.build();

OfflineModelConfig modelConfig =
OfflineModelConfig.builder()
.setMoonshine(moonshine)
.setTokens(tokens)
.setNumThreads(1)
.setDebug(true)
.build();

OfflineRecognizerConfig config =
OfflineRecognizerConfig.builder()
.setOfflineModelConfig(modelConfig)
.setDecodingMethod("greedy_search")
.build();

return new OfflineRecognizer(config);
}

public static void main(String[] args) {
Vad vad = createVad();
OfflineRecognizer recognizer = createOfflineRecognizer();

// https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
// Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);

// https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
TargetDataLine targetDataLine;
try {
targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
targetDataLine.open(format);
targetDataLine.start();
} catch (LineUnavailableException e) {
System.out.println("Failed to open target data line: " + e.getMessage());
vad.release();
recognizer.release();
return;
}

boolean printed = false;
byte[] buffer = new byte[windowSize * 2];
float[] samples = new float[windowSize];

System.out.println("Started. Please speak");
boolean running = true;
while (targetDataLine.isOpen() && running) {
int n = targetDataLine.read(buffer, 0, buffer.length);
if (n <= 0) {
System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
continue;
}
for (int i = 0; i != windowSize; ++i) {
short low = buffer[2 * i];
short high = buffer[2 * i + 1];
int s = (high << 8) + low;
samples[i] = (float) s / 32768;
}

vad.acceptWaveform(samples);
if (vad.isSpeechDetected() && !printed) {
System.out.println("Detected speech");
printed = true;
}

if (!vad.isSpeechDetected()) {
printed = false;
}

while (!vad.empty()) {
SpeechSegment segment = vad.front();
float startTime = segment.getStart() / (float) sampleRate;
float duration = segment.getSamples().length / (float) sampleRate;

OfflineStream stream = recognizer.createStream();
stream.acceptWaveform(segment.getSamples(), sampleRate);
recognizer.decode(stream);
String text = recognizer.getResult(stream).getText();
stream.release();

if (!text.isEmpty()) {
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
}

if (text.contains("exit the program")) {
running = false;
}

vad.pop();
}
}

vad.release();
recognizer.release();
}
}
37 changes: 37 additions & 0 deletions java-api-examples/run-non-streaming-decode-file-moonshine.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
NonStreamingDecodeFileMoonshine.java
41 changes: 41 additions & 0 deletions java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi

if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
./VadFromMicWithNonStreamingMoonshine.java
6 changes: 6 additions & 0 deletions kotlin-api-examples/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@ function testSpokenLanguageIdentification() {
}

function testOfflineAsr() {
if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
Expand Down
Loading

0 comments on commit bd4b223

Please sign in to comment.