-
Notifications
You must be signed in to change notification settings - Fork 0
/
video2text.py
122 lines (102 loc) · 3.99 KB
/
video2text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from requests import get
import os
import yt_dlp as youtube_dl
from yt_dlp import YoutubeDL
from youtube_search import YoutubeSearch
import speech_recognition as sr
from pydub import AudioSegment
import io
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from moviepy.editor import VideoFileClip
YDL_OPTIONS_VIDEO = {
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
"noplaylist": "True",
}
YDL_OPTIONS_AUDIO = {
"format": "bestaudio/best",
"postprocessors": [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "wav",
}
],
"noplaylist": "True",
}
def download_video_and_audio(song, time_length):
results = YoutubeSearch(song, max_results=15).to_dict()
for data in results:
if len(data["duration"].split(":")) < 3:
time = (int(data["duration"].split(":")[0]) * 60) + int(
data["duration"].split(":")[1]
)
if abs(int(time) - int(time_length)) < 3:
title = data["title"]
with YoutubeDL(YDL_OPTIONS_VIDEO) as ydl_video, YoutubeDL(
YDL_OPTIONS_AUDIO
) as ydl_audio:
try:
get(title)
except:
video_info = ydl_video.extract_info(
f"ytsearch:{title}", download=True
)["entries"][0]
audio_info = ydl_audio.extract_info(
f"ytsearch:{title}", download=True
)["entries"][0]
else:
video_info = ydl_video.extract_info(title, download=True)
audio_info = ydl_audio.extract_info(title, download=True)
filename_video = os.path.splitext(
ydl_video.prepare_filename(video_info)
)[:-1]
filename_audio = os.path.splitext(
ydl_audio.prepare_filename(audio_info)
)[:-1]
break
return "".join(filename_video) + ".mp4", "".join(filename_audio) + ".wav"
def speech_to_text(audio_file):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio = recognizer.record(source)
try:
text = recognizer.recognize_google(audio)
return text
except sr.UnknownValueError:
return "Could not understand audio"
except sr.RequestError as e:
return f"Error: {str(e)}"
def transcribe_audio_to_text(audio_data):
r = sr.Recognizer()
# Use the audio data as the audio source
with sr.AudioFile(audio_data) as source:
audio = r.record(source) # Read the entire audio file
# Recognize speech using Google Speech Recognition
try:
return r.recognize_google(audio)
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
def split_audio_into_chunks(audio_file, chunk_length=3000):
audio = AudioSegment.from_wav(audio_file)
chunks = [audio[i:i + chunk_length] for i in range(0, len(audio), chunk_length)]
chunk_data = []
for chunk in chunks:
chunk_file = io.BytesIO()
chunk.export(chunk_file, format="wav")
chunk_data.append(chunk_file.getvalue())
return chunk_data
def generate_subtitles(audio_file):
chunk_data = split_audio_into_chunks(audio_file)
subtitles = ""
for chunk in chunk_data:
text = transcribe_audio_to_text(io.BytesIO(chunk))
subtitles += "{0},".format(text)
return subtitles
video_file, audio_file = download_video_and_audio("America Ferrera's Iconic Barbie Speech", 152)
text = generate_subtitles(audio_file)
print("Converted Text:")
print(text)
with open('subtitles.txt', 'w') as file:
# Write the content of 'text' to the file
file.write(text)