-
Notifications
You must be signed in to change notification settings - Fork 477
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Kotlin and Java API for Moonshine models (#1474)
- Loading branch information
1 parent
669f5ef
commit bd4b223
Showing
15 changed files
with
480 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,8 +23,8 @@ jobs: | |
fail-fast: false | ||
matrix: | ||
os: [ubuntu-latest] | ||
total: ["5"] | ||
index: ["0", "1", "2", "3", "4"] | ||
total: ["10"] | ||
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
|
@@ -165,6 +165,7 @@ jobs: | |
git clone https://csukuangfj:[email protected]/csukuangfj/sherpa-onnx-apk huggingface | ||
cd huggingface | ||
du -h -d1 . | ||
git fetch | ||
git pull | ||
git merge -m "merge remote" --ff origin main | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Copyright 2024 Xiaomi Corporation | ||
|
||
// This file shows how to use an offline Moonshine, | ||
// i.e., non-streaming Moonshine model, | ||
// to decode files. | ||
import com.k2fsa.sherpa.onnx.*; | ||
|
||
public class NonStreamingDecodeFileMoonshine { | ||
public static void main(String[] args) { | ||
// please refer to | ||
// https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html | ||
// to download model files | ||
|
||
String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"; | ||
String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"; | ||
String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"; | ||
String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"; | ||
|
||
String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"; | ||
|
||
String waveFilename = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"; | ||
|
||
WaveReader reader = new WaveReader(waveFilename); | ||
|
||
OfflineMoonshineModelConfig moonshine = | ||
OfflineMoonshineModelConfig.builder() | ||
.setPreprocessor(preprocessor) | ||
.setEncoder(encoder) | ||
.setUncachedDecoder(uncachedDecoder) | ||
.setCachedDecoder(cachedDecoder) | ||
.build(); | ||
|
||
OfflineModelConfig modelConfig = | ||
OfflineModelConfig.builder() | ||
.setMoonshine(moonshine) | ||
.setTokens(tokens) | ||
.setNumThreads(1) | ||
.setDebug(true) | ||
.build(); | ||
|
||
OfflineRecognizerConfig config = | ||
OfflineRecognizerConfig.builder() | ||
.setOfflineModelConfig(modelConfig) | ||
.setDecodingMethod("greedy_search") | ||
.build(); | ||
|
||
OfflineRecognizer recognizer = new OfflineRecognizer(config); | ||
OfflineStream stream = recognizer.createStream(); | ||
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
|
||
recognizer.decode(stream); | ||
|
||
String text = recognizer.getResult(stream).getText(); | ||
|
||
System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
|
||
stream.release(); | ||
recognizer.release(); | ||
} | ||
} |
152 changes: 152 additions & 0 deletions
152
java-api-examples/VadFromMicWithNonStreamingMoonshine.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
// Copyright 2024 Xiaomi Corporation | ||
|
||
// This file shows how to use a silero_vad model with a non-streaming | ||
// Moonshine tiny for speech recognition. | ||
|
||
import com.k2fsa.sherpa.onnx.*; | ||
import javax.sound.sampled.*; | ||
|
||
public class VadFromMicNonStreamingMoonshine { | ||
private static final int sampleRate = 16000; | ||
private static final int windowSize = 512; | ||
|
||
public static Vad createVad() { | ||
// please download ./silero_vad.onnx from | ||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
String model = "./silero_vad.onnx"; | ||
SileroVadModelConfig sileroVad = | ||
SileroVadModelConfig.builder() | ||
.setModel(model) | ||
.setThreshold(0.5f) | ||
.setMinSilenceDuration(0.25f) | ||
.setMinSpeechDuration(0.5f) | ||
.setWindowSize(windowSize) | ||
.build(); | ||
|
||
VadModelConfig config = | ||
VadModelConfig.builder() | ||
.setSileroVadModelConfig(sileroVad) | ||
.setSampleRate(sampleRate) | ||
.setNumThreads(1) | ||
.setDebug(true) | ||
.setProvider("cpu") | ||
.build(); | ||
|
||
return new Vad(config); | ||
} | ||
|
||
public static OfflineRecognizer createOfflineRecognizer() { | ||
// please refer to | ||
// https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html | ||
// to download model files | ||
|
||
String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"; | ||
String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"; | ||
String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"; | ||
String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"; | ||
|
||
String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"; | ||
|
||
OfflineMoonshineModelConfig moonshine = | ||
OfflineMoonshineModelConfig.builder() | ||
.setPreprocessor(preprocessor) | ||
.setEncoder(encoder) | ||
.setUncachedDecoder(uncachedDecoder) | ||
.setCachedDecoder(cachedDecoder) | ||
.build(); | ||
|
||
OfflineModelConfig modelConfig = | ||
OfflineModelConfig.builder() | ||
.setMoonshine(moonshine) | ||
.setTokens(tokens) | ||
.setNumThreads(1) | ||
.setDebug(true) | ||
.build(); | ||
|
||
OfflineRecognizerConfig config = | ||
OfflineRecognizerConfig.builder() | ||
.setOfflineModelConfig(modelConfig) | ||
.setDecodingMethod("greedy_search") | ||
.build(); | ||
|
||
return new OfflineRecognizer(config); | ||
} | ||
|
||
public static void main(String[] args) { | ||
Vad vad = createVad(); | ||
OfflineRecognizer recognizer = createOfflineRecognizer(); | ||
|
||
// https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html | ||
// Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian | ||
AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false); | ||
|
||
// https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int- | ||
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); | ||
TargetDataLine targetDataLine; | ||
try { | ||
targetDataLine = (TargetDataLine) AudioSystem.getLine(info); | ||
targetDataLine.open(format); | ||
targetDataLine.start(); | ||
} catch (LineUnavailableException e) { | ||
System.out.println("Failed to open target data line: " + e.getMessage()); | ||
vad.release(); | ||
recognizer.release(); | ||
return; | ||
} | ||
|
||
boolean printed = false; | ||
byte[] buffer = new byte[windowSize * 2]; | ||
float[] samples = new float[windowSize]; | ||
|
||
System.out.println("Started. Please speak"); | ||
boolean running = true; | ||
while (targetDataLine.isOpen() && running) { | ||
int n = targetDataLine.read(buffer, 0, buffer.length); | ||
if (n <= 0) { | ||
System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length); | ||
continue; | ||
} | ||
for (int i = 0; i != windowSize; ++i) { | ||
short low = buffer[2 * i]; | ||
short high = buffer[2 * i + 1]; | ||
int s = (high << 8) + low; | ||
samples[i] = (float) s / 32768; | ||
} | ||
|
||
vad.acceptWaveform(samples); | ||
if (vad.isSpeechDetected() && !printed) { | ||
System.out.println("Detected speech"); | ||
printed = true; | ||
} | ||
|
||
if (!vad.isSpeechDetected()) { | ||
printed = false; | ||
} | ||
|
||
while (!vad.empty()) { | ||
SpeechSegment segment = vad.front(); | ||
float startTime = segment.getStart() / (float) sampleRate; | ||
float duration = segment.getSamples().length / (float) sampleRate; | ||
|
||
OfflineStream stream = recognizer.createStream(); | ||
stream.acceptWaveform(segment.getSamples(), sampleRate); | ||
recognizer.decode(stream); | ||
String text = recognizer.getResult(stream).getText(); | ||
stream.release(); | ||
|
||
if (!text.isEmpty()) { | ||
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); | ||
} | ||
|
||
if (text.contains("exit the program")) { | ||
running = false; | ||
} | ||
|
||
vad.pop(); | ||
} | ||
} | ||
|
||
vad.release(); | ||
recognizer.release(); | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
java-api-examples/run-non-streaming-decode-file-moonshine.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
mkdir -p ../build | ||
pushd ../build | ||
cmake \ | ||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
-DBUILD_SHARED_LIBS=ON \ | ||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
-DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
.. | ||
|
||
make -j4 | ||
ls -lh lib | ||
popd | ||
fi | ||
|
||
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
pushd ../sherpa-onnx/java-api | ||
make | ||
popd | ||
fi | ||
|
||
if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
fi | ||
|
||
java \ | ||
-Djava.library.path=$PWD/../build/lib \ | ||
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
NonStreamingDecodeFileMoonshine.java |
41 changes: 41 additions & 0 deletions
41
java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
mkdir -p ../build | ||
pushd ../build | ||
cmake \ | ||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
-DBUILD_SHARED_LIBS=ON \ | ||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
-DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
.. | ||
|
||
make -j4 | ||
ls -lh lib | ||
popd | ||
fi | ||
|
||
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
pushd ../sherpa-onnx/java-api | ||
make | ||
popd | ||
fi | ||
|
||
if [ ! -f ./silero_vad.onnx ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
fi | ||
|
||
if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
fi | ||
|
||
java \ | ||
-Djava.library.path=$PWD/../build/lib \ | ||
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
./VadFromMicWithNonStreamingMoonshine.java |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.