From df4150dc5d9fc055e52b937ac33fac3f0f859e3b Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 14 Oct 2024 16:20:00 +0800 Subject: [PATCH] Upload speaker embedding models to huggingface (#1428) See also https://huggingface.co/spaces/k2-fsa/speaker-diarization --- .../workflows/export-3dspeaker-to-onnx.yaml | 27 ++++++++++++++++++- ...ort-nemo-speaker-verification-to-onnx.yaml | 27 ++++++++++++++++++- .../workflows/export-wespeaker-to-onnx.yaml | 25 +++++++++++++++++ README.md | 18 +++++++------ scripts/3dspeaker/run.sh | 4 +-- scripts/nemo/speaker-verification/run.sh | 9 ++++--- 6 files changed, 95 insertions(+), 15 deletions(-) diff --git a/.github/workflows/export-3dspeaker-to-onnx.yaml b/.github/workflows/export-3dspeaker-to-onnx.yaml index 42c965c90..a3fa98760 100644 --- a/.github/workflows/export-3dspeaker-to-onnx.yaml +++ b/.github/workflows/export-3dspeaker-to-onnx.yaml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-latest] + os: [ubuntu-latest] python-version: ["3.8"] steps: @@ -43,3 +43,28 @@ jobs: repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: speaker-recongition-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=speaker-embedding-models + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://huggingface.co/csukuangfj/$d huggingface + mv -v ./*.onnx ./huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main diff --git a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml index 180c3dc12..a9bd5a788 100644 --- a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml +++ b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest] + os: [macos-latest] python-version: ["3.10"] steps: @@ -43,3 +43,28 @@ jobs: repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: speaker-recongition-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=speaker-embedding-models + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://huggingface.co/csukuangfj/$d huggingface + mv -v ./*.onnx ./huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main diff --git a/.github/workflows/export-wespeaker-to-onnx.yaml b/.github/workflows/export-wespeaker-to-onnx.yaml index fd167ab21..764f77ca7 100644 --- a/.github/workflows/export-wespeaker-to-onnx.yaml +++ b/.github/workflows/export-wespeaker-to-onnx.yaml @@ -48,3 +48,28 @@ jobs: repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: speaker-recongition-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=speaker-embedding-models + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://huggingface.co/csukuangfj/$d huggingface + mv -v ./*.onnx ./huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main diff --git a/README.md b/README.md index 32d141f90..df29225f1 100644 --- a/README.md +++ b/README.md @@ -88,14 +88,15 @@ with the following APIs You can visit the following Huggingface spaces to try sherpa-onnx without installing anything. All you need is a browser. -| Description | URL | -|-------------------------------------------------------|------------------------------------| -| Speech recognition | [Click me][hf-space-asr] | -| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] | -| Speech synthesis | [Click me][hf-space-tts] | -| Generate subtitles | [Click me][hf-space-subtitle] | -| Audio tagging | [Click me][hf-space-audio-tagging] | -| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] | +| Description | URL | +|-------------------------------------------------------|-----------------------------------------| +| Speaker diarization | [Click me][hf-space-speaker-diarization]| +| Speech recognition | [Click me][hf-space-asr] | +| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] | +| Speech synthesis | [Click me][hf-space-tts] | +| Generate subtitles | [Click me][hf-space-subtitle] | +| Audio tagging | [Click me][hf-space-audio-tagging] | +| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] | We also have spaces built using WebAssembly. They are listed below: @@ -240,6 +241,7 @@ Video demo in Chinese: [爆了!炫神教你开打字挂!真正影响胜率 [VisionFive 2]: https://www.starfivetech.com/en/site/boards [旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html [爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html +[hf-space-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/speaker-diarization [hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition [Whisper]: https://github.com/openai/whisper [hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper diff --git a/scripts/3dspeaker/run.sh b/scripts/3dspeaker/run.sh index 6dda2c96e..9504e98ed 100755 --- a/scripts/3dspeaker/run.sh +++ b/scripts/3dspeaker/run.sh @@ -4,10 +4,10 @@ set -e function install_3d_speaker() { echo "Install 3D-Speaker" - git clone https://github.com/alibaba-damo-academy/3D-Speaker.git + git clone https://github.com/modelscope/3D-Speaker pushd 3D-Speaker pip install -q -r ./requirements.txt - pip install -q modelscope onnx onnxruntime kaldi-native-fbank + pip install -q modelscope==1.14.0 onnx onnxruntime kaldi-native-fbank popd } diff --git a/scripts/nemo/speaker-verification/run.sh b/scripts/nemo/speaker-verification/run.sh index f5a228019..16cf43ae2 100755 --- a/scripts/nemo/speaker-verification/run.sh +++ b/scripts/nemo/speaker-verification/run.sh @@ -7,14 +7,17 @@ function install_nemo() { curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py python3 get-pip.py - pip install torch==2.1.0+cpu torchaudio==2.1.0+cpu -f https://download.pytorch.org/whl/torch_stable.html + pip install torch==2.1.0 torchaudio==2.1.0 -f https://download.pytorch.org/whl/torch_stable.html - pip install wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile + pip install -qq wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile + pip install -qq ipython - sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip + # sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip ipython BRANCH='main' python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr] + + pip install numpy==1.26.4 } install_nemo