Skip to content

Commit

Permalink
host all media on gdrive
Browse files Browse the repository at this point in the history
Former-commit-id: 64b685a1a8f62f3e5003bf0e5c1b4560b1485896
Former-commit-id: ea0ba3d
  • Loading branch information
colobas committed Feb 14, 2024
1 parent 6de31a2 commit 3cd7ce9
Show file tree
Hide file tree
Showing 19 changed files with 64 additions and 53 deletions.
13 changes: 9 additions & 4 deletions process_debates.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from googleapiclient.discovery import build
from google.oauth2.credentials import Credentials
from video_utils import upload_to_gdrive
from video_utils import upload_to_gdrive, get_file_ids, direct_link

def convert_to_seconds(time):
"""
Expand Down Expand Up @@ -144,9 +144,12 @@ def get_audio_and_video(url, audio_path, headers=None, gdrive_service=None):
subprocess.run(cmd)

# upload the video to youtube
upload_to_gdrive(audio_path.parent, audio_path.stem, gdrive_service)
mp3_direct_link = upload_to_gdrive(audio_path.parent, audio_path.stem, gdrive_service)
else:
file_ids = get_file_ids(audio_path.stem)
mp3_direct_link = direct_link(file_ids[f"{audio_path.stem}.mp3"], with_proxy=False)

return f"debates/media/{audio_path.stem}.m3u8"
return f"debates/media/{audio_path.stem}.m3u8", mp3_direct_link


def slugify(title):
Expand Down Expand Up @@ -236,11 +239,13 @@ def process_debate(*, title, url, output_root, gdrive_service, skip_transcriptio
headers = None

if not skip_upload:
get_audio_and_video(m3u8_url, audio_path, headers=headers, gdrive_service=gdrive_service)
_, mp3_direct_link = get_audio_and_video(m3u8_url, audio_path, headers=headers,
gdrive_service=gdrive_service)
out = {
"slug": slug,
"title": title,
"original_url": url,
"audio_url": mp3_direct_link,
}

with open(output_root / f"{slug}.json", "w") as f:
Expand Down
3 changes: 2 additions & 1 deletion public/debates/be-vs-chega.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "be-vs-chega",
"title": "BE vs Chega",
"original_url": "https://www.rtp.pt/play/p12900/e747851/debates-legislativas-2024"
"original_url": "https://www.rtp.pt/play/p12900/e747851/debates-legislativas-2024",
"audio_url": "https://drive.google.com/uc?id=1Tk9gV0e_r_H638ZQsB0ume7r8K-ZzwRP"
}
3 changes: 2 additions & 1 deletion public/debates/be-vs-livre.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "be-vs-livre",
"title": "BE vs Livre",
"original_url": "https://www.rtp.pt/play/p12899/e746909/debates-legislativas-2024-sicsic-noticias"
"original_url": "https://www.rtp.pt/play/p12899/e746909/debates-legislativas-2024-sicsic-noticias",
"audio_url": "https://drive.google.com/uc?id=18VRopVy-VbjNLKYkUU9TRtyo2JdGowv0"
}
3 changes: 2 additions & 1 deletion public/debates/be-vs-pcp.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "be-vs-pcp",
"title": "BE vs PCP",
"original_url": "https://www.rtp.pt/play/p12899/e747442/debates-legislativas-2024-sicsic-noticias"
"original_url": "https://www.rtp.pt/play/p12899/e747442/debates-legislativas-2024-sicsic-noticias",
"audio_url": "https://drive.google.com/uc?id=1-A5Q4Jc6FY4TN2kKbN16gKlh2SV1tfX5"
}
3 changes: 2 additions & 1 deletion public/debates/be-vs-psd.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "be-vs-psd",
"title": "BE vs PSD",
"original_url": "https://sicnoticias.pt/especiais/eleicoes-legislativas/2024-02-06-Debate-entre-BE-e-PSD-Quem-e-que-sabe-salvar-o-SNS--a252ab7c"
"original_url": "https://sicnoticias.pt/especiais/eleicoes-legislativas/2024-02-06-Debate-entre-BE-e-PSD-Quem-e-que-sabe-salvar-o-SNS--a252ab7c",
"audio_url": "https://drive.google.com/uc?id=1EIbaHmM2Xk-RFsIgYVf9BsH-xgrnntTt"
}
3 changes: 2 additions & 1 deletion public/debates/chega-vs-il.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "chega-vs-il",
"title": "Chega vs IL",
"original_url": "https://www.rtp.pt/play/p12899/e746368/debates-legislativas-2024-sicsic-noticias"
"original_url": "https://www.rtp.pt/play/p12899/e746368/debates-legislativas-2024-sicsic-noticias",
"audio_url": "https://drive.google.com/uc?id=1Z_VuncGFLxSYo_pV2irq-9hPydvN1o8-"
}
3 changes: 2 additions & 1 deletion public/debates/chega-vs-pan.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "chega-vs-pan",
"title": "Chega vs PAN",
"original_url": "https://www.rtp.pt/play/p12900/e746061/debates-legislativas-2024"
"original_url": "https://www.rtp.pt/play/p12900/e746061/debates-legislativas-2024",
"audio_url": "https://drive.google.com/uc?id=16V6_OfgkVBkZl4cVW29QUbPZtfg5jNiQ"
}
3 changes: 2 additions & 1 deletion public/debates/chega-vs-pcp.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "chega-vs-pcp",
"title": "Chega vs PCP",
"original_url": "https://www.rtp.pt/play/p12901/e747268/debates-legislativas-2024-tvicnn"
"original_url": "https://www.rtp.pt/play/p12901/e747268/debates-legislativas-2024-tvicnn",
"audio_url": "https://drive.google.com/uc?id=1aa_ln4ZOb6gDvh3iorstQ-GS2lZE6Hyj"
}
3 changes: 2 additions & 1 deletion public/debates/il-vs-pan.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "il-vs-pan",
"title": "IL vs PAN",
"original_url": "https://www.rtp.pt/play/p12899/e747269/debates-legislativas-2024-sicsic-noticias"
"original_url": "https://www.rtp.pt/play/p12899/e747269/debates-legislativas-2024-sicsic-noticias",
"audio_url": "https://drive.google.com/uc?id=1tWrPr9c-oQz202wU0jjnsqoMkxIrnLtW"
}
3 changes: 2 additions & 1 deletion public/debates/livre-vs-il.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "livre-vs-il",
"title": "Livre vs IL",
"original_url": "https://www.rtp.pt/play/p12901/e746631/debates-legislativas-2024-tvicnn"
"original_url": "https://www.rtp.pt/play/p12901/e746631/debates-legislativas-2024-tvicnn",
"audio_url": "https://drive.google.com/uc?id=1ExCAfb6cx3Cm3LyduuC3F9F1HBUHGjVL"
}
3 changes: 2 additions & 1 deletion public/debates/livre-vs-ps.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "livre-vs-ps",
"title": "Livre vs PS",
"original_url": "https://www.rtp.pt/play/p12900/e747215/debates-legislativas-2024"
"original_url": "https://www.rtp.pt/play/p12900/e747215/debates-legislativas-2024",
"audio_url": "https://drive.google.com/uc?id=1204EOcVfCSOwsm_W1BMIV-LTJtuFWI7F"
}
3 changes: 2 additions & 1 deletion public/debates/pcp-vs-pan.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "pcp-vs-pan",
"title": "PCP vs PAN",
"original_url": "https://www.rtp.pt/play/p12900/e746296/debates-legislativas-2024"
"original_url": "https://www.rtp.pt/play/p12900/e746296/debates-legislativas-2024",
"audio_url": "https://drive.google.com/uc?id=146tb4dYnPThDMOCAmQkHzcWIyzR5moqB"
}
3 changes: 2 additions & 1 deletion public/debates/ps-vs-il.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "ps-vs-il",
"title": "PS vs IL",
"original_url": "https://sicnoticias.pt/pais/2024-02-05-Debate-PS--IL-na-integra-dc65b6a5"
"original_url": "https://sicnoticias.pt/pais/2024-02-05-Debate-PS--IL-na-integra-dc65b6a5",
"audio_url": "https://drive.google.com/uc?id=1Pnc9e-zgUmcAaYxyVh4OxldU8dC7PLHm"
}
3 changes: 2 additions & 1 deletion public/debates/ps-vs-pan.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "ps-vs-pan",
"title": "PS vs PAN",
"original_url": "https://www.rtp.pt/play/p12901/e747338/debates-legislativas-2024-tvicnn"
"original_url": "https://www.rtp.pt/play/p12901/e747338/debates-legislativas-2024-tvicnn",
"audio_url": "https://drive.google.com/uc?id=1xY2LBaY8H9WthtUEG7h4S_sQN7LjxVfJ"
}
3 changes: 2 additions & 1 deletion public/debates/psd-vs-chega.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "psd-vs-chega",
"title": "PSD vs Chega",
"original_url": "https://www.rtp.pt/play/p12900/e747692/debates-legislativas-2024"
"original_url": "https://www.rtp.pt/play/p12900/e747692/debates-legislativas-2024",
"audio_url": "https://drive.google.com/uc?id=1-21NOlgRcOMDBbCG1oDCzm26RWlZoY8h"
}
3 changes: 2 additions & 1 deletion public/debates/psd-vs-pan.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "psd-vs-pan",
"title": "PSD vs PAN",
"original_url": "https://www.rtp.pt/play/p12899/e747423/debates-legislativas-2024-sicsic-noticias"
"original_url": "https://www.rtp.pt/play/p12899/e747423/debates-legislativas-2024-sicsic-noticias",
"audio_url": "https://drive.google.com/uc?id=1WHxFtmYjukmONNtaUaLXkcUlyMgcZQ9v"
}
3 changes: 2 additions & 1 deletion public/debates/psd-vs-pcp.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"slug": "psd-vs-pcp",
"title": "PSD vs PCP",
"original_url": "https://www.rtp.pt/play/p12900/e747327/debates-legislativas-2024"
"original_url": "https://www.rtp.pt/play/p12900/e747327/debates-legislativas-2024",
"audio_url": "https://drive.google.com/uc?id=1gb46XcJOGBmo5r8PkTA6Tw0YylDZv2BA"
}
9 changes: 1 addition & 8 deletions src/Debate.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,6 @@
console.log(video_url);
if (Hls.isSupported()) {
/*const config = {
xhrSetup: function (xhr, url) {
xhr.withCredentials = true;
},
};
const hls = new Hls(config);*/
const hls = new Hls();
hls.loadSource(video_url);
hls.attachMedia(video);
Expand Down Expand Up @@ -162,4 +155,4 @@

<a href="/">Voltar à página inicial</a>
<a href={debateData.original_url} target="_blank">Link para o vídeo original</a>
<a href="/debates/media/{params.slug}.mp3" download={debateData.title}>Link para o áudio</a>
<a href={debateData.audio_url} download={debateData.title}>Link para o áudio</a>
47 changes: 22 additions & 25 deletions video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@ def set_public_permission(file_id, service):
).execute()


def direct_link(file_id):
def direct_link(file_id, with_proxy=True):
video_url = f"https://drive.google.com/uc?id={file_id}"

#return "https://api.allorigins.win/raw?url=" + quote(video_url, safe="")
#return "https://corsproxy.io/?" + quote(video_url, safe="")
return "https://worker-little-base-2714.mail-2e4.workers.dev/?" + quote(video_url, safe="")

if with_proxy:
return "https://worker-little-base-2714.mail-2e4.workers.dev/?" + quote(video_url, safe="")
return video_url

def get_segment_file_ids(slug):

def get_file_ids(slug):
# get segment file ids: rclone lsjson remoteName:targetFolder --files-only | jq '.[] | {name, id}'
cmd = [
"rclone", "lsjson",
Expand All @@ -50,23 +51,6 @@ def get_segment_file_ids(slug):
return file_ids


def set_folder_permissions(slug, service):
folder_name = f"debates2024/{slug}"

# get folder id
result = service.files().list(q=f"name='{folder_name}'").execute()
folder_id = result["files"][0]["id"]

# set folder permissions
anyone_permission = {
'type': 'anyone',
'role': 'reader',
}
service.permissions().create(
fileId=folder_id,
body=anyone_permission,
).execute()

def get_segment_duration(segment_path):
cmd = [
"ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1",
Expand All @@ -91,8 +75,21 @@ def upload_to_gdrive(root_path, slug, service):
]
subprocess.run(cmd, check=True)

# copy mp3 to gdrive
cmd = [
"rclone", "copy", "--check-first", "--progress", str(root_path),
"--include", f"{slug}.mp3",
"--ignore-existing",
f"debates:debates2024/{slug}",
]
subprocess.run(cmd, check=True)

# get segment file ids
file_ids = get_segment_file_ids(slug)
file_ids = get_file_ids(slug)

# get mp3 file id
mp3_id = file_ids[f"{slug}.mp3"]
set_public_permission(mp3_id, service)

# make m3u8
with open(f"{root_path}/{slug}.m3u8", "w") as f:
Expand All @@ -105,7 +102,7 @@ def upload_to_gdrive(root_path, slug, service):
for i in trange(n_segments, desc="Creating m3u8..."):
segment_path = f"{slug}_segment_{i:0{n_digits}d}.ts"
file_id = file_ids[segment_path]
#set_public_permission(file_id, service)
set_public_permission(file_id, service)

# get duration
duration = get_segment_duration(root_path / segment_path)
Expand All @@ -114,4 +111,4 @@ def upload_to_gdrive(root_path, slug, service):
f.write(direct_link(file_id) + "\n")
f.write("#EXT-X-ENDLIST\n")

return
return direct_link(mp3_id, with_proxy=False)

0 comments on commit 3cd7ce9

Please sign in to comment.