Merge "add: Enabling an option to provide user's audio file." into main

google-marketing-solutions · Nov 21, 2024 · 8814341 · 8814341
2 parents c7b7403 + 07c05c9
commit 8814341
Show file tree

Hide file tree

Showing 6 changed files with 58 additions and 16 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -34,8 +34,7 @@ jobs:
         python-version: '3.10'
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install setuptools wheel twine
+        python -m pip install --upgrade pip setuptools wheel twine
     - name: Build and publish package
       env:
         TWINE_USERNAME: __token__

diff --git a/ariel/__init__.py b/ariel/__init__.py
@@ -13,4 +13,4 @@
 # limitations under the License.
 
 """Ariel library for for end-to-end video ad dubbing using AI."""
-__version__ = "0.0.24"
+__version__ = "0.0.25"
diff --git a/ariel/colab_utils.py b/ariel/colab_utils.py
@@ -331,8 +331,8 @@ def get_folder_id_by_path(path: str) -> str:
 
   Args:
     path: The full path of the folder in Google Drive, starting from
-    '/content/drive/My Drive/...'. For example: '/content/drive/My
-    Drive/parent_folder/sub_folder'.
+      '/content/drive/My Drive/...'. For example: '/content/drive/My
+      Drive/parent_folder/sub_folder'.
 
   Raises:
     FileNotFoundError: If any part of the specified path does not exist in
@@ -444,6 +444,10 @@ class ColabPaths:
     input_file_google_drive_path: The Google Drive path of the input file.
     input_file_colab_path: The path to the input file in Colab after copying
       from Google Drive.
+    audio_file_colab_path: An optional path to a file with the audio part only.
+      It should be vocals + background audio or just background audio. It will
+      be used instead of the audio track from the input video. Must be an MP3
+      file.
     vocals_file_colab_path: The path to the vocals file in Colab after copying
       from Google Drive, or None if not provided.
     background_file_colab_path: The path to the background file in Colab after
@@ -452,20 +456,24 @@ class ColabPaths:
 
   input_file_google_drive_path: str
   input_file_colab_path: str | None
+  audio_file_colab_path: str | None = None
   vocals_file_colab_path: str | None = None
   background_file_colab_path: str | None = None
 
 
 def generate_colab_file_paths(
     *,
     video_google_drive_link: str,
+    audio_google_drive_link: str | None = None,
     vocals_google_drive_link: str | None = None,
     background_google_drive_link: str | None = None,
 ) -> ColabPaths:
   """Generates Colab file paths for the specified Google Drive links and copies files to Colab.
 
   Args:
       video_google_drive_link: The Google Drive link to the main input file.
+      audio_google_drive_link: The Google Drive link to the audio file, if
+        available. Defaults to None.
       vocals_google_drive_link: The Google Drive link to the vocals file, if
         available. Defaults to None.
       background_google_drive_link: The Google Drive link to the background
@@ -481,6 +489,14 @@ def generate_colab_file_paths(
   input_file_colab_path = copy_file_to_colab(
       source_file_path=input_file_google_drive_path
   )
+  audio_file_colab_path = None
+  if audio_google_drive_link:
+    audio_file_google_drive_path = get_file_path_from_sharable_link(
+        audio_google_drive_link
+    )
+    audio_file_colab_path = copy_file_to_colab(
+        source_file_path=audio_file_google_drive_path
+    )
   vocals_file_colab_path = None
   if vocals_google_drive_link:
     vocals_file_google_drive_path = get_file_path_from_sharable_link(
@@ -500,6 +516,7 @@ def generate_colab_file_paths(
   return ColabPaths(
       input_file_google_drive_path=input_file_google_drive_path,
       input_file_colab_path=input_file_colab_path,
+      audio_file_colab_path=audio_file_colab_path,
       vocals_file_colab_path=vocals_file_colab_path,
       background_file_colab_path=background_file_colab_path,
   )

diff --git a/ariel/dubbing.py b/ariel/dubbing.py
@@ -587,8 +587,9 @@ def __init__(
       vocals_volume_adjustment: float = 5.0,
       background_volume_adjustment: float = 0.0,
       voice_separation_rounds: int = 2,
-      vocals_audio_file: str | None,
-      background_audio_file: str | None,
+      audio_file: str | None = None,
+      vocals_audio_file: str | None = None,
+      background_audio_file: str | None = None,
       clean_up: bool = True,
       pyannote_model: str = _DEFAULT_PYANNOTE_MODEL,
       gemini_model_name: str = _DEFAULT_GEMINI_MODEL,
@@ -660,6 +661,10 @@ def __init__(
         voice_separation_rounds: The number of times the background audio file
           should be processed for voice detection and removal. It helps with the
           old voice artifacts being present in the dubbed ad.
+        audio_file: An optional path to a file with the audio part
+          only. It should be vocals + background audio or just background audio.
+          It will be used instead of the audio track from the input video.
+          Must be an MP3 file.
         vocals_audio_file: An optional path to a file with the speaking part
           only. It will be used instead of AI splitting the entire audio track
           into vocals and background audio files. If this is provided then also
@@ -712,6 +717,7 @@ def __init__(
     self.vocals_volume_adjustment = vocals_volume_adjustment
     self.background_volume_adjustment = background_volume_adjustment
     self.voice_separation_rounds = voice_separation_rounds
+    self.audio_file = audio_file
     self.vocals_audio_file = vocals_audio_file
     self.background_audio_file = background_audio_file
     self.clean_up = clean_up
@@ -962,7 +968,9 @@ def run_preprocessing(self) -> None:
     """
     if self.is_video:
       video_file, audio_file = video_processing.split_audio_video(
-          video_file=self.input_file, output_directory=self.output_directory
+          video_file=self.input_file,
+          output_directory=self.output_directory,
+          audio_file_override=self.audio_file,
       )
     else:
       video_file = None
@@ -1020,7 +1028,9 @@ def run_preprocessing_for_dubbing_from_script(self) -> None:
     """
     if self.is_video:
       video_file, audio_file = video_processing.split_audio_video(
-          video_file=self.input_file, output_directory=self.output_directory
+          video_file=self.input_file,
+          output_directory=self.output_directory,
+          audio_file_override=self.audio_file,
       )
     else:
       video_file = None

diff --git a/ariel/video_processing.py b/ariel/video_processing.py
@@ -28,13 +28,20 @@
 
 
 def split_audio_video(
-    *, video_file: str, output_directory: str
+    *,
+    video_file: str,
+    output_directory: str,
+    audio_file_override: str | None = None,
 ) -> tuple[str, str]:
   """Splits an audio/video file into separate audio and video files.
 
   Args:
       video_file: The full path to the input video file.
       output_directory: The full path to the output directory.
+      audio_file_override: An optional path to a file with the audio part only.
+        It should be vocals + background audio or just background audio. It will
+        be used instead of the audio track from the input video. Must be an MP3
+        file.
 
   Returns:
     A tuple with a path to a video ad file with no audio and the second path to
@@ -58,8 +65,11 @@ def split_audio_video(
     )
     return video_output_file, audio_output_file
   with VideoFileClip(video_file) as video_clip:
-    audio_clip = video_clip.audio
-    audio_clip.write_audiofile(audio_output_file, verbose=False, logger=None)
+    if audio_file_override:
+      tf.io.gfile.copy(audio_file_override, audio_output_file, overwrite=True)
+    else:
+      audio_clip = video_clip.audio
+      audio_clip.write_audiofile(audio_output_file, verbose=False, logger=None)
     video_clip_without_audio = video_clip.set_audio(None)
     fps = video_clip.fps or _DEFAULT_FPS
     video_clip_without_audio.write_videofile(

diff --git a/examples/dubbing_workflow.ipynb b/examples/dubbing_workflow.ipynb
@@ -111,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "id": "Fv5uA2wwijCi"
    },
@@ -127,7 +127,7 @@
     "except ModuleNotFoundError:\n",
     "    pass\n",
     "\n",
-    "# @markdown **original_language** GCP region to use when making API calls and where a temporary bucket will be created for Gemini to analyze the video / audio ad. The bucket with all its contents will be removed immediately afterwards.\n",
+    "# @markdown **gcp_region** GCP region to use when making API calls and where a temporary bucket will be created for Gemini to analyze the video / audio ad. The bucket with all its contents will be removed immediately afterwards.\n",
     "gcp_region = \"Iowa - us-central1\" # @param ['Santiago - southamerica-west1', 'Iowa - us-central1', 'Oklahoma - us-central2', 'South Carolina - us-east1', 'Northern Virginia - us-east4', 'Columbus - us-east5', 'Unknown - us-east7',  'Dallas - us-south1', 'Oregon - us-west1', 'Los Angeles - us-west2', 'Salt Lake City - us-west3', 'Las Vegas - us-west4', 'Phoenix - us-west8', 'Taiwan - asia-east1', 'Tokyo - asia-northeast1', 'Singapore - asia-southeast1', 'Sydney - australia-southeast1', 'Finland - europe-north1', 'Belgium - europe-west1', 'London - europe-west2', 'Frankfurt - europe-west3', 'Netherlands - europe-west4']\n",
     "gcp_region = gcp_region.split(\" - \")[1]\n",
     "\n",
@@ -187,14 +187,18 @@
     "if script_google_drive_link and metadata_google_drive_link:\n",
     "    raise ValueError(\"You can't specify both `script_google_drive_link` and `metadata_google_drive_link`. Please choose one of them only.\")\n",
     "\n",
+    "# @markdown **audio_google_drive_link** **[OPTIONAL]** The **shareable link** to the file with the audio track only in an MP3 format. It will be used instead of the audio track from the input video. **Leave it empty otherwise.**\n",
+    "audio_google_drive_link = \"\" # @param {type:\"string\"}\n",
+    "\n",
     "# @markdown **vocals_google_drive_link** **[OPTIONAL]** The **shareable link** to the file with the vocals track only in an MP3 format. You would use it only if you have pre-recorded the vocals track with an actor and have it as a separate file. **Leave it empty otherwise.** If you use it then you must also specify `background_google_drive_link` below.\n",
     "vocals_google_drive_link = \"\" # @param {type:\"string\"}\n",
     "\n",
     "# @markdown **background_google_drive_link** **[OPTIONAL]** The **shareable link** to the file with the background track only in an MP3 format. You would use it only if you have pre-recorded the background track and have it as a separate file. **Leave it empty otherwise.** If you use it then you must also specify `vocals_google_drive_link` above.\n",
     "background_google_drive_link = \"\" # @param {type:\"string\"}\n",
     "\n",
-    "if (vocals_google_drive_link and not background_google_drive_link) or (not vocals_google_drive_link and background_google_drive_link):\n",
-    "    raise ValueError(\"You need to specify both `vocals_google_drive_link` and `background_google_drive_link`. Or leave them both blank.\")\n",
+    "if not script_google_drive_link:\n",
+    "    if (vocals_google_drive_link and not background_google_drive_link) or (not vocals_google_drive_link and background_google_drive_link):\n",
+    "        raise ValueError(\"You need to specify both `vocals_google_drive_link` and `background_google_drive_link`. Or leave them both blank.\")\n",
     "\n",
     "# @markdown **adjust_speed** Whether to either speed up speech chunks produced by ElevenLabs API or Google's Text-To-Speech when using the \"Journey\" voice to match the duration of the speech chunks in the source language. It might distort the quality slightly.\n",
     "adjust_speed = \"False\" # @param [\"True\", \"False\"]\n",
@@ -336,6 +340,7 @@
     "video_google_drive_link = \"\" # @param {type:\"string\"}\n",
     "colab_filepaths = colab_utils.generate_colab_file_paths(\n",
     "   video_google_drive_link=video_google_drive_link,\n",
+    "   audio_google_drive_link=audio_google_drive_link,\n",
     "   vocals_google_drive_link=vocals_google_drive_link,\n",
     "   background_google_drive_link=background_google_drive_link,\n",
     "   )\n",
@@ -371,6 +376,7 @@
     "      vocals_volume_adjustment=vocals_volume_adjustment,\n",
     "      background_volume_adjustment=background_volume_adjustment,\n",
     "      voice_separation_rounds=voice_separation_rounds,\n",
+    "      audio_file=colab_filepaths.audio_file_colab_path,\n",
     "      vocals_audio_file=colab_filepaths.vocals_file_colab_path,\n",
     "      background_audio_file=colab_filepaths.background_file_colab_path,\n",
     "      clean_up=clean_up,\n",