From df4150dc5d9fc055e52b937ac33fac3f0f859e3b Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Mon, 14 Oct 2024 16:20:00 +0800
Subject: [PATCH] Upload speaker embedding models to huggingface (#1428)

See also
https://huggingface.co/spaces/k2-fsa/speaker-diarization
---
 .../workflows/export-3dspeaker-to-onnx.yaml   | 27 ++++++++++++++++++-
 ...ort-nemo-speaker-verification-to-onnx.yaml | 27 ++++++++++++++++++-
 .../workflows/export-wespeaker-to-onnx.yaml   | 25 +++++++++++++++++
 README.md                                     | 18 +++++++------
 scripts/3dspeaker/run.sh                      |  4 +--
 scripts/nemo/speaker-verification/run.sh      |  9 ++++---
 6 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/export-3dspeaker-to-onnx.yaml b/.github/workflows/export-3dspeaker-to-onnx.yaml
index 42c965c90..a3fa98760 100644
--- a/.github/workflows/export-3dspeaker-to-onnx.yaml
+++ b/.github/workflows/export-3dspeaker-to-onnx.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [macos-latest]
+        os: [ubuntu-latest]
         python-version: ["3.8"]
 
     steps:
@@ -43,3 +43,28 @@ jobs:
           repo_name: k2-fsa/sherpa-onnx
           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
           tag: speaker-recongition-models
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=speaker-embedding-models
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://huggingface.co/csukuangfj/$d huggingface
+            mv -v ./*.onnx ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
index 180c3dc12..a9bd5a788 100644
--- a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
+++ b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest]
+        os: [macos-latest]
         python-version: ["3.10"]
 
     steps:
@@ -43,3 +43,28 @@ jobs:
           repo_name: k2-fsa/sherpa-onnx
           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
           tag: speaker-recongition-models
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=speaker-embedding-models
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://huggingface.co/csukuangfj/$d huggingface
+            mv -v ./*.onnx ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-wespeaker-to-onnx.yaml b/.github/workflows/export-wespeaker-to-onnx.yaml
index fd167ab21..764f77ca7 100644
--- a/.github/workflows/export-wespeaker-to-onnx.yaml
+++ b/.github/workflows/export-wespeaker-to-onnx.yaml
@@ -48,3 +48,28 @@ jobs:
           repo_name: k2-fsa/sherpa-onnx
           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
           tag: speaker-recongition-models
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=speaker-embedding-models
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://huggingface.co/csukuangfj/$d huggingface
+            mv -v ./*.onnx ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/README.md b/README.md
index 32d141f90..df29225f1 100644
--- a/README.md
+++ b/README.md
@@ -88,14 +88,15 @@ with the following APIs
 <summary>You can visit the following Huggingface spaces to try sherpa-onnx without
 installing anything. All you need is a browser.</summary>
 
-| Description                                           | URL                                |
-|-------------------------------------------------------|------------------------------------|
-| Speech recognition                                    | [Click me][hf-space-asr]           |
-| Speech recognition with [Whisper][Whisper]            | [Click me][hf-space-asr-whisper]   |
-| Speech synthesis                                      | [Click me][hf-space-tts]           |
-| Generate subtitles                                    | [Click me][hf-space-subtitle]      |
-| Audio tagging                                         | [Click me][hf-space-audio-tagging] |
-| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper]  |
+| Description                                           | URL                                     |
+|-------------------------------------------------------|-----------------------------------------|
+| Speaker diarization                                   | [Click me][hf-space-speaker-diarization]|
+| Speech recognition                                    | [Click me][hf-space-asr]                |
+| Speech recognition with [Whisper][Whisper]            | [Click me][hf-space-asr-whisper]        |
+| Speech synthesis                                      | [Click me][hf-space-tts]                |
+| Generate subtitles                                    | [Click me][hf-space-subtitle]           |
+| Audio tagging                                         | [Click me][hf-space-audio-tagging]      |
+| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper]       |
 
 We also have spaces built using WebAssembly. They are listed below:
 
@@ -240,6 +241,7 @@ Video demo in Chinese: [爆了！炫神教你开打字挂！真正影响胜率
 [VisionFive 2]: https://www.starfivetech.com/en/site/boards
 [旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html
 [爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html
+[hf-space-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/speaker-diarization
 [hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
 [Whisper]: https://github.com/openai/whisper
 [hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper
diff --git a/scripts/3dspeaker/run.sh b/scripts/3dspeaker/run.sh
index 6dda2c96e..9504e98ed 100755
--- a/scripts/3dspeaker/run.sh
+++ b/scripts/3dspeaker/run.sh
@@ -4,10 +4,10 @@ set -e
 
 function install_3d_speaker() {
   echo "Install 3D-Speaker"
-  git clone https://github.com/alibaba-damo-academy/3D-Speaker.git
+  git clone https://github.com/modelscope/3D-Speaker
   pushd 3D-Speaker
   pip install -q -r ./requirements.txt
-  pip install -q modelscope onnx onnxruntime kaldi-native-fbank
+  pip install -q modelscope==1.14.0 onnx onnxruntime kaldi-native-fbank
   popd
 }
 
diff --git a/scripts/nemo/speaker-verification/run.sh b/scripts/nemo/speaker-verification/run.sh
index f5a228019..16cf43ae2 100755
--- a/scripts/nemo/speaker-verification/run.sh
+++ b/scripts/nemo/speaker-verification/run.sh
@@ -7,14 +7,17 @@ function install_nemo() {
   curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
   python3 get-pip.py
 
-  pip install torch==2.1.0+cpu torchaudio==2.1.0+cpu   -f https://download.pytorch.org/whl/torch_stable.html
+  pip install torch==2.1.0 torchaudio==2.1.0 -f https://download.pytorch.org/whl/torch_stable.html
 
-  pip install wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile
+  pip install -qq wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile
+  pip install -qq ipython
 
-  sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip
+  # sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip ipython
 
   BRANCH='main'
   python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
+
+  pip install numpy==1.26.4
 }
 
 install_nemo