Skip to content

Commit

Permalink
Merge "add: Enabling an option to provide user's audio file." into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Kacper Krasowiak authored and Gerrit Code Review committed Nov 21, 2024
2 parents c7b7403 + 07c05c9 commit 8814341
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 16 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ jobs:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
python -m pip install --upgrade pip setuptools wheel twine
- name: Build and publish package
env:
TWINE_USERNAME: __token__
Expand Down
2 changes: 1 addition & 1 deletion ariel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# limitations under the License.

"""Ariel library for for end-to-end video ad dubbing using AI."""
__version__ = "0.0.24"
__version__ = "0.0.25"
21 changes: 19 additions & 2 deletions ariel/colab_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,8 @@ def get_folder_id_by_path(path: str) -> str:
Args:
path: The full path of the folder in Google Drive, starting from
'/content/drive/My Drive/...'. For example: '/content/drive/My
Drive/parent_folder/sub_folder'.
'/content/drive/My Drive/...'. For example: '/content/drive/My
Drive/parent_folder/sub_folder'.
Raises:
FileNotFoundError: If any part of the specified path does not exist in
Expand Down Expand Up @@ -444,6 +444,10 @@ class ColabPaths:
input_file_google_drive_path: The Google Drive path of the input file.
input_file_colab_path: The path to the input file in Colab after copying
from Google Drive.
audio_file_colab_path: An optional path to a file with the audio part only.
It should be vocals + background audio or just background audio. It will
be used instead of the audio track from the input video. Must be an MP3
file.
vocals_file_colab_path: The path to the vocals file in Colab after copying
from Google Drive, or None if not provided.
background_file_colab_path: The path to the background file in Colab after
Expand All @@ -452,20 +456,24 @@ class ColabPaths:

input_file_google_drive_path: str
input_file_colab_path: str | None
audio_file_colab_path: str | None = None
vocals_file_colab_path: str | None = None
background_file_colab_path: str | None = None


def generate_colab_file_paths(
*,
video_google_drive_link: str,
audio_google_drive_link: str | None = None,
vocals_google_drive_link: str | None = None,
background_google_drive_link: str | None = None,
) -> ColabPaths:
"""Generates Colab file paths for the specified Google Drive links and copies files to Colab.
Args:
video_google_drive_link: The Google Drive link to the main input file.
audio_google_drive_link: The Google Drive link to the audio file, if
available. Defaults to None.
vocals_google_drive_link: The Google Drive link to the vocals file, if
available. Defaults to None.
background_google_drive_link: The Google Drive link to the background
Expand All @@ -481,6 +489,14 @@ def generate_colab_file_paths(
input_file_colab_path = copy_file_to_colab(
source_file_path=input_file_google_drive_path
)
audio_file_colab_path = None
if audio_google_drive_link:
audio_file_google_drive_path = get_file_path_from_sharable_link(
audio_google_drive_link
)
audio_file_colab_path = copy_file_to_colab(
source_file_path=audio_file_google_drive_path
)
vocals_file_colab_path = None
if vocals_google_drive_link:
vocals_file_google_drive_path = get_file_path_from_sharable_link(
Expand All @@ -500,6 +516,7 @@ def generate_colab_file_paths(
return ColabPaths(
input_file_google_drive_path=input_file_google_drive_path,
input_file_colab_path=input_file_colab_path,
audio_file_colab_path=audio_file_colab_path,
vocals_file_colab_path=vocals_file_colab_path,
background_file_colab_path=background_file_colab_path,
)
Expand Down
18 changes: 14 additions & 4 deletions ariel/dubbing.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,8 +587,9 @@ def __init__(
vocals_volume_adjustment: float = 5.0,
background_volume_adjustment: float = 0.0,
voice_separation_rounds: int = 2,
vocals_audio_file: str | None,
background_audio_file: str | None,
audio_file: str | None = None,
vocals_audio_file: str | None = None,
background_audio_file: str | None = None,
clean_up: bool = True,
pyannote_model: str = _DEFAULT_PYANNOTE_MODEL,
gemini_model_name: str = _DEFAULT_GEMINI_MODEL,
Expand Down Expand Up @@ -660,6 +661,10 @@ def __init__(
voice_separation_rounds: The number of times the background audio file
should be processed for voice detection and removal. It helps with the
old voice artifacts being present in the dubbed ad.
audio_file: An optional path to a file with the audio part
only. It should be vocals + background audio or just background audio.
It will be used instead of the audio track from the input video.
Must be an MP3 file.
vocals_audio_file: An optional path to a file with the speaking part
only. It will be used instead of AI splitting the entire audio track
into vocals and background audio files. If this is provided then also
Expand Down Expand Up @@ -712,6 +717,7 @@ def __init__(
self.vocals_volume_adjustment = vocals_volume_adjustment
self.background_volume_adjustment = background_volume_adjustment
self.voice_separation_rounds = voice_separation_rounds
self.audio_file = audio_file
self.vocals_audio_file = vocals_audio_file
self.background_audio_file = background_audio_file
self.clean_up = clean_up
Expand Down Expand Up @@ -962,7 +968,9 @@ def run_preprocessing(self) -> None:
"""
if self.is_video:
video_file, audio_file = video_processing.split_audio_video(
video_file=self.input_file, output_directory=self.output_directory
video_file=self.input_file,
output_directory=self.output_directory,
audio_file_override=self.audio_file,
)
else:
video_file = None
Expand Down Expand Up @@ -1020,7 +1028,9 @@ def run_preprocessing_for_dubbing_from_script(self) -> None:
"""
if self.is_video:
video_file, audio_file = video_processing.split_audio_video(
video_file=self.input_file, output_directory=self.output_directory
video_file=self.input_file,
output_directory=self.output_directory,
audio_file_override=self.audio_file,
)
else:
video_file = None
Expand Down
16 changes: 13 additions & 3 deletions ariel/video_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@


def split_audio_video(
*, video_file: str, output_directory: str
*,
video_file: str,
output_directory: str,
audio_file_override: str | None = None,
) -> tuple[str, str]:
"""Splits an audio/video file into separate audio and video files.
Args:
video_file: The full path to the input video file.
output_directory: The full path to the output directory.
audio_file_override: An optional path to a file with the audio part only.
It should be vocals + background audio or just background audio. It will
be used instead of the audio track from the input video. Must be an MP3
file.
Returns:
A tuple with a path to a video ad file with no audio and the second path to
Expand All @@ -58,8 +65,11 @@ def split_audio_video(
)
return video_output_file, audio_output_file
with VideoFileClip(video_file) as video_clip:
audio_clip = video_clip.audio
audio_clip.write_audiofile(audio_output_file, verbose=False, logger=None)
if audio_file_override:
tf.io.gfile.copy(audio_file_override, audio_output_file, overwrite=True)
else:
audio_clip = video_clip.audio
audio_clip.write_audiofile(audio_output_file, verbose=False, logger=None)
video_clip_without_audio = video_clip.set_audio(None)
fps = video_clip.fps or _DEFAULT_FPS
video_clip_without_audio.write_videofile(
Expand Down
14 changes: 10 additions & 4 deletions examples/dubbing_workflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"id": "Fv5uA2wwijCi"
},
Expand All @@ -127,7 +127,7 @@
"except ModuleNotFoundError:\n",
" pass\n",
"\n",
"# @markdown **original_language** GCP region to use when making API calls and where a temporary bucket will be created for Gemini to analyze the video / audio ad. The bucket with all its contents will be removed immediately afterwards.\n",
"# @markdown **gcp_region** GCP region to use when making API calls and where a temporary bucket will be created for Gemini to analyze the video / audio ad. The bucket with all its contents will be removed immediately afterwards.\n",
"gcp_region = \"Iowa - us-central1\" # @param ['Santiago - southamerica-west1', 'Iowa - us-central1', 'Oklahoma - us-central2', 'South Carolina - us-east1', 'Northern Virginia - us-east4', 'Columbus - us-east5', 'Unknown - us-east7', 'Dallas - us-south1', 'Oregon - us-west1', 'Los Angeles - us-west2', 'Salt Lake City - us-west3', 'Las Vegas - us-west4', 'Phoenix - us-west8', 'Taiwan - asia-east1', 'Tokyo - asia-northeast1', 'Singapore - asia-southeast1', 'Sydney - australia-southeast1', 'Finland - europe-north1', 'Belgium - europe-west1', 'London - europe-west2', 'Frankfurt - europe-west3', 'Netherlands - europe-west4']\n",
"gcp_region = gcp_region.split(\" - \")[1]\n",
"\n",
Expand Down Expand Up @@ -187,14 +187,18 @@
"if script_google_drive_link and metadata_google_drive_link:\n",
" raise ValueError(\"You can't specify both `script_google_drive_link` and `metadata_google_drive_link`. Please choose one of them only.\")\n",
"\n",
"# @markdown **audio_google_drive_link** **[OPTIONAL]** The **shareable link** to the file with the audio track only in an MP3 format. It will be used instead of the audio track from the input video. **Leave it empty otherwise.**\n",
"audio_google_drive_link = \"\" # @param {type:\"string\"}\n",
"\n",
"# @markdown **vocals_google_drive_link** **[OPTIONAL]** The **shareable link** to the file with the vocals track only in an MP3 format. You would use it only if you have pre-recorded the vocals track with an actor and have it as a separate file. **Leave it empty otherwise.** If you use it then you must also specify `background_google_drive_link` below.\n",
"vocals_google_drive_link = \"\" # @param {type:\"string\"}\n",
"\n",
"# @markdown **background_google_drive_link** **[OPTIONAL]** The **shareable link** to the file with the background track only in an MP3 format. You would use it only if you have pre-recorded the background track and have it as a separate file. **Leave it empty otherwise.** If you use it then you must also specify `vocals_google_drive_link` above.\n",
"background_google_drive_link = \"\" # @param {type:\"string\"}\n",
"\n",
"if (vocals_google_drive_link and not background_google_drive_link) or (not vocals_google_drive_link and background_google_drive_link):\n",
" raise ValueError(\"You need to specify both `vocals_google_drive_link` and `background_google_drive_link`. Or leave them both blank.\")\n",
"if not script_google_drive_link:\n",
" if (vocals_google_drive_link and not background_google_drive_link) or (not vocals_google_drive_link and background_google_drive_link):\n",
" raise ValueError(\"You need to specify both `vocals_google_drive_link` and `background_google_drive_link`. Or leave them both blank.\")\n",
"\n",
"# @markdown **adjust_speed** Whether to either speed up speech chunks produced by ElevenLabs API or Google's Text-To-Speech when using the \"Journey\" voice to match the duration of the speech chunks in the source language. It might distort the quality slightly.\n",
"adjust_speed = \"False\" # @param [\"True\", \"False\"]\n",
Expand Down Expand Up @@ -336,6 +340,7 @@
"video_google_drive_link = \"\" # @param {type:\"string\"}\n",
"colab_filepaths = colab_utils.generate_colab_file_paths(\n",
" video_google_drive_link=video_google_drive_link,\n",
" audio_google_drive_link=audio_google_drive_link,\n",
" vocals_google_drive_link=vocals_google_drive_link,\n",
" background_google_drive_link=background_google_drive_link,\n",
" )\n",
Expand Down Expand Up @@ -371,6 +376,7 @@
" vocals_volume_adjustment=vocals_volume_adjustment,\n",
" background_volume_adjustment=background_volume_adjustment,\n",
" voice_separation_rounds=voice_separation_rounds,\n",
" audio_file=colab_filepaths.audio_file_colab_path,\n",
" vocals_audio_file=colab_filepaths.vocals_file_colab_path,\n",
" background_audio_file=colab_filepaths.background_file_colab_path,\n",
" clean_up=clean_up,\n",
Expand Down

0 comments on commit 8814341

Please sign in to comment.