Skip to content

Commit

Permalink
Fixed output for large files by writing with soundfile if duration lo…
Browse files Browse the repository at this point in the history
…nger than 1 hour
  • Loading branch information
beveradb committed Sep 15, 2024
1 parent 4cd059e commit b5b72bb
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 3 deletions.
59 changes: 57 additions & 2 deletions audio_separator/separator/common_separator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import librosa
import torch
from pydub import AudioSegment
import soundfile as sf
from audio_separator.separator.uvr_lib_v5 import spec_utils


Expand Down Expand Up @@ -217,9 +218,28 @@ def prepare_mix(self, mix):

def write_audio(self, stem_path: str, stem_source):
"""
Writes the separated audio source to a file.
Writes the separated audio source to a file using pydub or soundfile
Pydub supports a much wider range of audio formats and produces better encoded lossy files for some formats.
Soundfile is used for very large files (longer than 1 hour), as pydub has memory issues with large files:
https://github.com/jiaaro/pydub/issues/135
"""
self.logger.debug(f"Entering write_audio with stem_path: {stem_path}")
# Get the duration of the input audio file
duration_seconds = librosa.get_duration(filename=self.audio_file_path)
duration_hours = duration_seconds / 3600
self.logger.info(f"Audio duration is {duration_hours:.2f} hours ({duration_seconds:.2f} seconds).")

if duration_hours >= 1:
self.logger.warning(f"Using soundfile for writing.")
self.write_audio_soundfile(stem_path, stem_source)
else:
self.logger.info(f"Using pydub for writing.")
self.write_audio_pydub(stem_path, stem_source)

def write_audio_pydub(self, stem_path: str, stem_source):
"""
Writes the separated audio source to a file using pydub (ffmpeg)
"""
self.logger.debug(f"Entering write_audio_pydub with stem_path: {stem_path}")

stem_source = spec_utils.normalize(wave=stem_source, max_peak=self.normalization_threshold)

Expand Down Expand Up @@ -275,6 +295,41 @@ def write_audio(self, stem_path: str, stem_source):
except (IOError, ValueError) as e:
self.logger.error(f"Error exporting audio file: {e}")

def write_audio_soundfile(self, stem_path: str, stem_source):
"""
Writes the separated audio source to a file using soundfile library.
"""
self.logger.debug(f"Entering write_audio_soundfile with stem_path: {stem_path}")

# Correctly interleave stereo channels if needed
if stem_source.shape[1] == 2:
# If the audio is already interleaved, ensure it's in the correct order
# Check if the array is Fortran contiguous (column-major)
if stem_source.flags["F_CONTIGUOUS"]:
# Convert to C contiguous (row-major)
stem_source = np.ascontiguousarray(stem_source)
# Otherwise, perform interleaving
else:
stereo_interleaved = np.empty((2 * stem_source.shape[0],), dtype=np.int16)
# Left channel
stereo_interleaved[0::2] = stem_source[:, 0]
# Right channel
stereo_interleaved[1::2] = stem_source[:, 1]
stem_source = stereo_interleaved

self.logger.debug(f"Interleaved audio data shape: {stem_source.shape}")

"""
Write audio using soundfile (for formats other than M4A).
"""
# Save audio using soundfile
try:
# Specify the subtype to define the sample width
sf.write(stem_path, stem_source, self.sample_rate)
self.logger.debug(f"Exported audio file successfully to {stem_path}")
except Exception as e:
self.logger.error(f"Error exporting audio file: {e}")

def clear_gpu_cache(self):
"""
This method clears the GPU cache to free up memory.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "audio-separator"
version = "0.19.3"
version = "0.19.4"
description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
authors = ["Andrew Beveridge <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit b5b72bb

Please sign in to comment.