Skip to content

Commit

Permalink
Add random-lenght silences.
Browse files Browse the repository at this point in the history
Insert silences of lenght between 0.1 and 1.1 seconds between each speaker segment.
  • Loading branch information
daavoo committed Jan 3, 2025
1 parent 8ebde29 commit f0798df
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
6 changes: 4 additions & 2 deletions demo/app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
from pathlib import Path

import numpy as np
import soundfile as sf
import streamlit as st

Expand All @@ -13,6 +12,7 @@
from document_to_podcast.config import DEFAULT_PROMPT, DEFAULT_SPEAKERS, Speaker
from document_to_podcast.inference.text_to_speech import text_to_speech
from document_to_podcast.inference.text_to_text import text_to_text_stream
from document_to_podcast.utils import stack_audio_segments


@st.cache_resource
Expand Down Expand Up @@ -160,7 +160,9 @@ def gen_button_clicked():

if st.session_state[gen_button]:
if st.button("Save Podcast to audio file"):
st.session_state.audio = np.concatenate(st.session_state.audio)
st.session_state.audio = stack_audio_segments(
st.session_state.audio, speech_model.audio_codec.sr
)
sf.write(
"podcast.wav",
st.session_state.audio,
Expand Down
4 changes: 2 additions & 2 deletions src/document_to_podcast/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
from pathlib import Path

import numpy as np
import soundfile as sf
import yaml
from fire import Fire
Expand All @@ -22,6 +21,7 @@
from document_to_podcast.inference.text_to_text import text_to_text_stream
from document_to_podcast.inference.text_to_speech import text_to_speech
from document_to_podcast.preprocessing import DATA_CLEANERS, DATA_LOADERS
from document_to_podcast.utils import stack_audio_segments


@logger.catch(reraise=True)
Expand Down Expand Up @@ -158,7 +158,7 @@ def document_to_podcast(
logger.info("Saving Podcast...")
sf.write(
str(output_folder / "podcast.wav"),
np.concatenate(podcast_audio),
stack_audio_segments(podcast_audio),
samplerate=sample_rate,
)
(output_folder / "podcast.txt").write_text(podcast_script)
Expand Down
9 changes: 9 additions & 0 deletions src/document_to_podcast/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import numpy as np


def stack_audio_segments(audio_segments, sample_rate):
stacked = []
for segment in audio_segments:
stacked.append(segment)
stacked.append(np.zeros(int((0.1 + np.random.rand()) * sample_rate)))
return np.concatenate(stacked)

0 comments on commit f0798df

Please sign in to comment.