Merge branch 'main' into 214-task-word-level-alignment

sensein · Jan 2, 2025 · 0429de0 · 0429de0
2 parents 754b1fe + dd2c18f
commit 0429de0
Show file tree

Hide file tree

Showing 19 changed files with 305 additions and 52 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -64,7 +64,7 @@ jobs:
           --verbose
       shell: bash
     - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,38 @@
+# 0.26.1 (Mon Dec 30 2024)
+
+#### 🐛 Bug Fix
+
+- Update model.py [#228](https://github.com/sensein/senselab/pull/228) ([@fabiocat93](https://github.com/fabiocat93))
+
+#### 📝 Documentation
+
+- Updating tutorial files [#210](https://github.com/sensein/senselab/pull/210) ([@900miles](https://github.com/900miles) [@fabiocat93](https://github.com/fabiocat93))
+
+#### 🔩 Dependency Updates
+
+- Bump codecov/codecov-action from 4 to 5 [#211](https://github.com/sensein/senselab/pull/211) ([@dependabot[bot]](https://github.com/dependabot[bot]) [@fabiocat93](https://github.com/fabiocat93))
+
+#### Authors: 3
+
+- [@900miles](https://github.com/900miles)
+- [@dependabot[bot]](https://github.com/dependabot[bot])
+- Fabio Catania ([@fabiocat93](https://github.com/fabiocat93))
+
+---
+
+# 0.26.0 (Mon Dec 23 2024)
+
+#### 🚀 Enhancement
+
+- Bump the production-dependencies group across 1 directory with 3 updates [#218](https://github.com/sensein/senselab/pull/218) ([@dependabot[bot]](https://github.com/dependabot[bot]) [@fabiocat93](https://github.com/fabiocat93))
+
+#### Authors: 2
+
+- [@dependabot[bot]](https://github.com/dependabot[bot])
+- Fabio Catania ([@fabiocat93](https://github.com/fabiocat93))
+
+---
+
 # 0.25.0 (Tue Dec 10 2024)
 
 #### 🚀 Enhancement

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,7 +32,7 @@ datasets = "~=3"
 torch = "~=2.5"
 torchvision = "~=0.20"
 torchaudio = "~=2.5"
-transformers = "~=4.46.2"
+transformers = "~=4.47"
 pydra = "~=0.25"
 pydantic = "~=2.7"
 accelerate = "*"
@@ -43,7 +43,7 @@ torch-audiomentations = "~=0.11"
 sentence-transformers = "~=3.1"
 jiwer = "~=3.0"
 speechbrain = "~=1"
-pyav = "~=13"
+pyav = "~=14.0"
 pyannote-audio = "~=3.3"
 pycountry = "~=24.6"
 types-requests = "~=2.32"

diff --git a/src/senselab/utils/data_structures/model.py b/src/senselab/utils/data_structures/model.py
@@ -153,7 +153,10 @@ def check_hf_repo_exists(repo_id: str, revision: str = "main", repo_type: str =
     """Private function to check if a Hugging Face repository exists."""
     api = HfApi()
     try:
-        api.list_repo_commits(repo_id=repo_id, revision=revision, repo_type=repo_type)
+        if repo_type == "model":
+            api.model_info(repo_id=repo_id, revision=revision)
+        else:
+            api.list_repo_commits(repo_id=repo_id, revision=revision, repo_type=repo_type)
         return True
     except Exception:
         # raise RuntimeError(f"An error occurred: {e}")

diff --git a/src/tests/utils/data_structures/model_test.py b/src/tests/utils/data_structures/model_test.py
@@ -9,7 +9,7 @@
 
 def test_check_hf_repo_exists_true() -> None:
     """Test HF repo exists."""
-    with patch("huggingface_hub.HfApi.list_repo_commits") as mock_list_repo_commits:
+    with patch("huggingface_hub.HfApi.model_info") as mock_list_repo_commits:
         mock_list_repo_commits.return_value = True
         assert check_hf_repo_exists("valid_repo") is True
 

diff --git a/tutorials/audio/00_getting_started.ipynb b/tutorials/audio/00_getting_started.ipynb
@@ -6,7 +6,7 @@
             "source": [
                 "# Getting Started with ```senselab```\n",
                 "\n",
-                "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sensein/senselab/blob/main/tutorials/audiogetting_started.ipynb)\n",
+                "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sensein/senselab/blob/main/tutorials/audio/00_getting_started.ipynb)\n",
                 "\n",
                 "\n",
                 "Welcome to the `senselab` quick start tutorial! \n",
@@ -35,7 +35,7 @@
             },
             "outputs": [],
             "source": [
-                "pip install senselab"
+                "%pip install senselab"
             ]
         },
         {
@@ -48,14 +48,17 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 2,
+            "execution_count": null,
             "metadata": {},
             "outputs": [],
             "source": [
                 "from senselab.audio.data_structures import Audio\n",
+                "!mkdir -p tutorial_audio_files\n",
+                "!wget -O tutorial_audio_files/audio_48khz_mono_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_mono_16bits.wav\n",
+                "!wget -O tutorial_audio_files/audio_48khz_stereo_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\n",
                 "\n",
-                "MONO_AUDIO_PATH = \"../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav\"\n",
-                "STEREO_AUDIO_PATH = \"../../src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\"\n",
+                "MONO_AUDIO_PATH = \"tutorial_audio_files/audio_48khz_mono_16bits.wav\"\n",
+                "STEREO_AUDIO_PATH = \"tutorial_audio_files/audio_48khz_stereo_16bits.wav\"\n",
                 "\n",
                 "audio1 = Audio.from_filepath(MONO_AUDIO_PATH)\n",
                 "audio2 = Audio.from_filepath(STEREO_AUDIO_PATH)"
@@ -71,9 +74,18 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 3,
             "metadata": {},
-            "outputs": [],
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "The original audio has 2 channels.\n",
+                        "The downmixed audio has 1 channels.\n"
+                    ]
+                }
+            ],
             "source": [
                 "from senselab.audio.tasks.preprocessing import downmix_audios_to_mono\n",
                 "\n",
@@ -331,7 +343,7 @@
             "name": "python",
             "nbconvert_exporter": "python",
             "pygments_lexer": "ipython3",
-            "version": "3.10.10"
+            "version": "3.12.0"
         }
     },
     "nbformat": 4,

diff --git a/tutorials/audio/audio_data_augmentation.ipynb b/tutorials/audio/audio_data_augmentation.ipynb
@@ -16,7 +16,23 @@
             "cell_type": "markdown",
             "metadata": {},
             "source": [
-                "We start by importing the modules required for the augmentation, plotting, and audio processing tasks."
+                "First, we should install senselab if it has not already been installed."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "%pip install senselab"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Now, we start by importing the modules required for the augmentation, plotting, and audio processing tasks."
             ]
         },
         {
@@ -78,7 +94,10 @@
             "outputs": [],
             "source": [
                 "# Load an audio file\n",
-                "audio = Audio.from_filepath(\"../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav\")\n",
+                "!mkdir -p tutorial_audio_files\n",
+                "!wget -O tutorial_audio_files/audio_48khz_mono_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_mono_16bits.wav\n",
+                "\n",
+                "audio = Audio.from_filepath(\"tutorial_audio_files/audio_48khz_mono_16bits.wav\")\n",
                 "\n",
                 "# Play the audio\n",
                 "play_audio(audio)\n",
@@ -145,7 +164,7 @@
             "name": "python",
             "nbconvert_exporter": "python",
             "pygments_lexer": "ipython3",
-            "version": "3.10.10"
+            "version": "3.12.0"
         }
     },
     "nbformat": 4,

diff --git a/tutorials/audio/extract_speaker_embeddings.ipynb b/tutorials/audio/extract_speaker_embeddings.ipynb
@@ -17,6 +17,15 @@
                 "First, let's import the necessary libraries and the function we'll be using."
             ]
         },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "%pip install senselab"
+            ]
+        },
         {
             "cell_type": "code",
             "execution_count": null,
@@ -45,12 +54,16 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 2,
+            "execution_count": null,
             "metadata": {},
             "outputs": [],
             "source": [
-                "audio1 = Audio.from_filepath(\"../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav\")\n",
-                "audio2 = Audio.from_filepath(\"../../src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\")\n",
+                "!mkdir -p tutorial_audio_files\n",
+                "!wget -O tutorial_audio_files/audio_48khz_mono_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_mono_16bits.wav\n",
+                "!wget -O tutorial_audio_files/audio_48khz_stereo_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\n",
+                "\n",
+                "audio1 = Audio.from_filepath(\"tutorial_audio_files/audio_48khz_mono_16bits.wav\")\n",
+                "audio2 = Audio.from_filepath(\"tutorial_audio_files/audio_48khz_stereo_16bits.wav\")\n",
                 "\n",
                 "# Downmix to mono\n",
                 "audio2 = downmix_audios_to_mono([audio2])[0]\n",

diff --git a/tutorials/audio/features_extraction.ipynb b/tutorials/audio/features_extraction.ipynb
@@ -12,6 +12,15 @@
                 "In this tutorial, we will explore how to extract some audio descriptors with the `senselab` package. Descriptors include acoustic and quality measures and are extracted with different libraries. "
             ]
         },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "%pip install senselab"
+            ]
+        },
         {
             "cell_type": "code",
             "execution_count": null,
@@ -30,8 +39,11 @@
             "metadata": {},
             "outputs": [],
             "source": [
+                "!mkdir -p tutorial_audio_files\n",
+                "!wget -O tutorial_audio_files/audio_48khz_stereo_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\n",
+                "\n",
                 "# Load audio\n",
-                "audio2 = Audio.from_filepath(\"../../src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\")\n",
+                "audio2 = Audio.from_filepath(\"tutorial_audio_files/audio_48khz_stereo_16bits.wav\")\n",
                 "\n",
                 "# Downmix to mono\n",
                 "audio2 = downmix_audios_to_mono([audio2])[0]\n",

diff --git a/tutorials/audio/speaker_diarization.ipynb b/tutorials/audio/speaker_diarization.ipynb
@@ -11,6 +11,15 @@
                 "This tutorial demonstrates how to use the `diarize_audios` function to perform speaker diarization on some audio files, which means to segment the audio into multiple speakers."
             ]
         },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "%pip install senselab"
+            ]
+        },
         {
             "cell_type": "code",
             "execution_count": null,
@@ -46,7 +55,10 @@
             "outputs": [],
             "source": [
                 "# Load an audio file from the specified file path into an Audio object.\n",
-                "audio = Audio.from_filepath(\"../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav\")\n",
+                "!mkdir -p tutorial_audio_files\n",
+                "!wget -O tutorial_audio_files/audio_48khz_mono_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_mono_16bits.wav\n",
+                "\n",
+                "audio = Audio.from_filepath(\"tutorial_audio_files/audio_48khz_mono_16bits.wav\")\n",
                 "\n",
                 "# Resample the audio to 16kHz, as this is the expected input format for the model.\n",
                 "# The resample_audios function returns a list, so we take the first (and only) element.\n",