-
Notifications
You must be signed in to change notification settings - Fork 2
/
script.py
42 lines (33 loc) · 1.46 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from pytube import YouTube
import whisper
import os
import re
import warnings
def sanitize_filename(filename):
return re.sub(r'[\\/*?:"<>|]', '_', filename)
def save_transcription(transcription, folder_path, file_name='transcription.txt'):
file_path = os.path.join(folder_path, file_name)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(transcription)
return file_path
def download_youtube_audio(url, folder_path):
yt = YouTube(url)
video_title = sanitize_filename(yt.title)
folder_path = os.path.join(folder_path, video_title)
os.makedirs(folder_path, exist_ok=True)
audio_path = os.path.join(folder_path, 'audio.mp3')
yt.streams.filter(only_audio=True).first().download(filename=audio_path)
return audio_path, folder_path
def transcribe_audio(audio_path, model_name='base'):
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
model = whisper.load_model(model_name)
result = model.transcribe(audio_path)
return result['text']
video_url = input("Enter the YouTube video URL: ")
folder_path = 'Downloaded_Audios'
audio_path, folder_path = download_youtube_audio(video_url, folder_path)
transcription = transcribe_audio(audio_path)
transcription_file_path = save_transcription(transcription, folder_path)
print(f"Transcription saved to: {transcription_file_path}")
print("\nTranscription:\n")
print(transcription)