-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgoogle-podcast-downloader.py
61 lines (45 loc) · 1.84 KB
/
google-podcast-downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""
Created: Aug 25, 2022
Last Modified: Sep 01, 2023
Description: Script to download entire Podcast Library for the given url
Github: https://github.com/VaasuDevanS/google-podcast-downloader
"""
import argparse
from pathlib import Path
import re
from bs4 import BeautifulSoup
import requests
from tqdm import tqdm
def main(url: str, out_dir: Path) -> None:
# Create directory if it doesn't exist
out_dir.mkdir(parents=True, exist_ok=True)
# Read the url and create soup object
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
# Get all the divs corresponding to each podcast episode
divs = soup.find_all("div", attrs={'class': 'oD3fme'})
# Iterate through each div (episode)
for ix, div in tqdm(enumerate(divs[::-1]), total=len(divs)):
# Get the date published
date = div.find("div", attrs={'class': 'OTz6ee'}).text
# Get the name of the episode
name = div.find("div", attrs={'class': 'e3ZUqe'}).text
name = re.sub('[/:*?"<>|]+', '', name)
file_name = f'EP {ix:03d} - {name} ({date}).mp3'
episode_path = out_dir / file_name
# Get the URL
url = div.find("div", attrs={"jsname": "fvi9Ef"}).get("jsdata")
url = url.split(";")[1]
# Download the episode
podcast = requests.get(url)
with open(rf"{episode_path}", "wb") as out:
out.write(podcast.content)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='Google Podcast Downloader',
description='Script to download entire Podcast Library',
)
parser.add_argument('--url', help='URL of the podcast')
parser.add_argument('--out-dir', help='Folder to download')
args = parser.parse_args()
main(url=args.url, out_dir=Path(args.out_dir))