diff --git a/process_debates.py b/process_debates.py index 8a15137..f35ae5a 100644 --- a/process_debates.py +++ b/process_debates.py @@ -16,7 +16,7 @@ from googleapiclient.discovery import build from google.oauth2.credentials import Credentials -from video_utils import upload_to_gdrive +from video_utils import upload_to_gdrive, get_file_ids, direct_link def convert_to_seconds(time): """ @@ -144,9 +144,12 @@ def get_audio_and_video(url, audio_path, headers=None, gdrive_service=None): subprocess.run(cmd) # upload the video to youtube - upload_to_gdrive(audio_path.parent, audio_path.stem, gdrive_service) + mp3_direct_link = upload_to_gdrive(audio_path.parent, audio_path.stem, gdrive_service) + else: + file_ids = get_file_ids(audio_path.stem) + mp3_direct_link = direct_link(file_ids[f"{audio_path.stem}.mp3"], with_proxy=False) - return f"debates/media/{audio_path.stem}.m3u8" + return f"debates/media/{audio_path.stem}.m3u8", mp3_direct_link def slugify(title): @@ -236,11 +239,13 @@ def process_debate(*, title, url, output_root, gdrive_service, skip_transcriptio headers = None if not skip_upload: - get_audio_and_video(m3u8_url, audio_path, headers=headers, gdrive_service=gdrive_service) + _, mp3_direct_link = get_audio_and_video(m3u8_url, audio_path, headers=headers, + gdrive_service=gdrive_service) out = { "slug": slug, "title": title, "original_url": url, + "audio_url": mp3_direct_link, } with open(output_root / f"{slug}.json", "w") as f: diff --git a/public/debates/be-vs-chega.json b/public/debates/be-vs-chega.json index 31b57fe..7fa9e8b 100644 --- a/public/debates/be-vs-chega.json +++ b/public/debates/be-vs-chega.json @@ -1,5 +1,6 @@ { "slug": "be-vs-chega", "title": "BE vs Chega", - "original_url": "https://www.rtp.pt/play/p12900/e747851/debates-legislativas-2024" + "original_url": "https://www.rtp.pt/play/p12900/e747851/debates-legislativas-2024", + "audio_url": "https://drive.google.com/uc?id=1Tk9gV0e_r_H638ZQsB0ume7r8K-ZzwRP" } \ No newline at end of file diff --git a/public/debates/be-vs-livre.json b/public/debates/be-vs-livre.json index a0f44a4..e505cb8 100644 --- a/public/debates/be-vs-livre.json +++ b/public/debates/be-vs-livre.json @@ -1,5 +1,6 @@ { "slug": "be-vs-livre", "title": "BE vs Livre", - "original_url": "https://www.rtp.pt/play/p12899/e746909/debates-legislativas-2024-sicsic-noticias" + "original_url": "https://www.rtp.pt/play/p12899/e746909/debates-legislativas-2024-sicsic-noticias", + "audio_url": "https://drive.google.com/uc?id=18VRopVy-VbjNLKYkUU9TRtyo2JdGowv0" } \ No newline at end of file diff --git a/public/debates/be-vs-pcp.json b/public/debates/be-vs-pcp.json index 0cb6025..f3fa94a 100644 --- a/public/debates/be-vs-pcp.json +++ b/public/debates/be-vs-pcp.json @@ -1,5 +1,6 @@ { "slug": "be-vs-pcp", "title": "BE vs PCP", - "original_url": "https://www.rtp.pt/play/p12899/e747442/debates-legislativas-2024-sicsic-noticias" + "original_url": "https://www.rtp.pt/play/p12899/e747442/debates-legislativas-2024-sicsic-noticias", + "audio_url": "https://drive.google.com/uc?id=1-A5Q4Jc6FY4TN2kKbN16gKlh2SV1tfX5" } \ No newline at end of file diff --git a/public/debates/be-vs-psd.json b/public/debates/be-vs-psd.json index 926c9b8..3f8c436 100644 --- a/public/debates/be-vs-psd.json +++ b/public/debates/be-vs-psd.json @@ -1,5 +1,6 @@ { "slug": "be-vs-psd", "title": "BE vs PSD", - "original_url": "https://sicnoticias.pt/especiais/eleicoes-legislativas/2024-02-06-Debate-entre-BE-e-PSD-Quem-e-que-sabe-salvar-o-SNS--a252ab7c" + "original_url": "https://sicnoticias.pt/especiais/eleicoes-legislativas/2024-02-06-Debate-entre-BE-e-PSD-Quem-e-que-sabe-salvar-o-SNS--a252ab7c", + "audio_url": "https://drive.google.com/uc?id=1EIbaHmM2Xk-RFsIgYVf9BsH-xgrnntTt" } \ No newline at end of file diff --git a/public/debates/chega-vs-il.json b/public/debates/chega-vs-il.json index 79c1573..9d70650 100644 --- a/public/debates/chega-vs-il.json +++ b/public/debates/chega-vs-il.json @@ -1,5 +1,6 @@ { "slug": "chega-vs-il", "title": "Chega vs IL", - "original_url": "https://www.rtp.pt/play/p12899/e746368/debates-legislativas-2024-sicsic-noticias" + "original_url": "https://www.rtp.pt/play/p12899/e746368/debates-legislativas-2024-sicsic-noticias", + "audio_url": "https://drive.google.com/uc?id=1Z_VuncGFLxSYo_pV2irq-9hPydvN1o8-" } \ No newline at end of file diff --git a/public/debates/chega-vs-pan.json b/public/debates/chega-vs-pan.json index f71c7dd..8c12872 100644 --- a/public/debates/chega-vs-pan.json +++ b/public/debates/chega-vs-pan.json @@ -1,5 +1,6 @@ { "slug": "chega-vs-pan", "title": "Chega vs PAN", - "original_url": "https://www.rtp.pt/play/p12900/e746061/debates-legislativas-2024" + "original_url": "https://www.rtp.pt/play/p12900/e746061/debates-legislativas-2024", + "audio_url": "https://drive.google.com/uc?id=16V6_OfgkVBkZl4cVW29QUbPZtfg5jNiQ" } \ No newline at end of file diff --git a/public/debates/chega-vs-pcp.json b/public/debates/chega-vs-pcp.json index 73e614e..c2de781 100644 --- a/public/debates/chega-vs-pcp.json +++ b/public/debates/chega-vs-pcp.json @@ -1,5 +1,6 @@ { "slug": "chega-vs-pcp", "title": "Chega vs PCP", - "original_url": "https://www.rtp.pt/play/p12901/e747268/debates-legislativas-2024-tvicnn" + "original_url": "https://www.rtp.pt/play/p12901/e747268/debates-legislativas-2024-tvicnn", + "audio_url": "https://drive.google.com/uc?id=1aa_ln4ZOb6gDvh3iorstQ-GS2lZE6Hyj" } \ No newline at end of file diff --git a/public/debates/il-vs-pan.json b/public/debates/il-vs-pan.json index b322bec..a428bba 100644 --- a/public/debates/il-vs-pan.json +++ b/public/debates/il-vs-pan.json @@ -1,5 +1,6 @@ { "slug": "il-vs-pan", "title": "IL vs PAN", - "original_url": "https://www.rtp.pt/play/p12899/e747269/debates-legislativas-2024-sicsic-noticias" + "original_url": "https://www.rtp.pt/play/p12899/e747269/debates-legislativas-2024-sicsic-noticias", + "audio_url": "https://drive.google.com/uc?id=1tWrPr9c-oQz202wU0jjnsqoMkxIrnLtW" } \ No newline at end of file diff --git a/public/debates/livre-vs-il.json b/public/debates/livre-vs-il.json index 06dda08..15a14fe 100644 --- a/public/debates/livre-vs-il.json +++ b/public/debates/livre-vs-il.json @@ -1,5 +1,6 @@ { "slug": "livre-vs-il", "title": "Livre vs IL", - "original_url": "https://www.rtp.pt/play/p12901/e746631/debates-legislativas-2024-tvicnn" + "original_url": "https://www.rtp.pt/play/p12901/e746631/debates-legislativas-2024-tvicnn", + "audio_url": "https://drive.google.com/uc?id=1ExCAfb6cx3Cm3LyduuC3F9F1HBUHGjVL" } \ No newline at end of file diff --git a/public/debates/livre-vs-ps.json b/public/debates/livre-vs-ps.json index e467e0d..ef18179 100644 --- a/public/debates/livre-vs-ps.json +++ b/public/debates/livre-vs-ps.json @@ -1,5 +1,6 @@ { "slug": "livre-vs-ps", "title": "Livre vs PS", - "original_url": "https://www.rtp.pt/play/p12900/e747215/debates-legislativas-2024" + "original_url": "https://www.rtp.pt/play/p12900/e747215/debates-legislativas-2024", + "audio_url": "https://drive.google.com/uc?id=1204EOcVfCSOwsm_W1BMIV-LTJtuFWI7F" } \ No newline at end of file diff --git a/public/debates/pcp-vs-pan.json b/public/debates/pcp-vs-pan.json index d985f6f..0e84e39 100644 --- a/public/debates/pcp-vs-pan.json +++ b/public/debates/pcp-vs-pan.json @@ -1,5 +1,6 @@ { "slug": "pcp-vs-pan", "title": "PCP vs PAN", - "original_url": "https://www.rtp.pt/play/p12900/e746296/debates-legislativas-2024" + "original_url": "https://www.rtp.pt/play/p12900/e746296/debates-legislativas-2024", + "audio_url": "https://drive.google.com/uc?id=146tb4dYnPThDMOCAmQkHzcWIyzR5moqB" } \ No newline at end of file diff --git a/public/debates/ps-vs-il.json b/public/debates/ps-vs-il.json index fd379a5..e574aa4 100644 --- a/public/debates/ps-vs-il.json +++ b/public/debates/ps-vs-il.json @@ -1,5 +1,6 @@ { "slug": "ps-vs-il", "title": "PS vs IL", - "original_url": "https://sicnoticias.pt/pais/2024-02-05-Debate-PS--IL-na-integra-dc65b6a5" + "original_url": "https://sicnoticias.pt/pais/2024-02-05-Debate-PS--IL-na-integra-dc65b6a5", + "audio_url": "https://drive.google.com/uc?id=1Pnc9e-zgUmcAaYxyVh4OxldU8dC7PLHm" } \ No newline at end of file diff --git a/public/debates/ps-vs-pan.json b/public/debates/ps-vs-pan.json index d8a3d4d..7986bc6 100644 --- a/public/debates/ps-vs-pan.json +++ b/public/debates/ps-vs-pan.json @@ -1,5 +1,6 @@ { "slug": "ps-vs-pan", "title": "PS vs PAN", - "original_url": "https://www.rtp.pt/play/p12901/e747338/debates-legislativas-2024-tvicnn" + "original_url": "https://www.rtp.pt/play/p12901/e747338/debates-legislativas-2024-tvicnn", + "audio_url": "https://drive.google.com/uc?id=1xY2LBaY8H9WthtUEG7h4S_sQN7LjxVfJ" } \ No newline at end of file diff --git a/public/debates/psd-vs-chega.json b/public/debates/psd-vs-chega.json index ad19655..20d1774 100644 --- a/public/debates/psd-vs-chega.json +++ b/public/debates/psd-vs-chega.json @@ -1,5 +1,6 @@ { "slug": "psd-vs-chega", "title": "PSD vs Chega", - "original_url": "https://www.rtp.pt/play/p12900/e747692/debates-legislativas-2024" + "original_url": "https://www.rtp.pt/play/p12900/e747692/debates-legislativas-2024", + "audio_url": "https://drive.google.com/uc?id=1-21NOlgRcOMDBbCG1oDCzm26RWlZoY8h" } \ No newline at end of file diff --git a/public/debates/psd-vs-pan.json b/public/debates/psd-vs-pan.json index a89f430..c0cfb8f 100644 --- a/public/debates/psd-vs-pan.json +++ b/public/debates/psd-vs-pan.json @@ -1,5 +1,6 @@ { "slug": "psd-vs-pan", "title": "PSD vs PAN", - "original_url": "https://www.rtp.pt/play/p12899/e747423/debates-legislativas-2024-sicsic-noticias" + "original_url": "https://www.rtp.pt/play/p12899/e747423/debates-legislativas-2024-sicsic-noticias", + "audio_url": "https://drive.google.com/uc?id=1WHxFtmYjukmONNtaUaLXkcUlyMgcZQ9v" } \ No newline at end of file diff --git a/public/debates/psd-vs-pcp.json b/public/debates/psd-vs-pcp.json index 1590af0..b8aca52 100644 --- a/public/debates/psd-vs-pcp.json +++ b/public/debates/psd-vs-pcp.json @@ -1,5 +1,6 @@ { "slug": "psd-vs-pcp", "title": "PSD vs PCP", - "original_url": "https://www.rtp.pt/play/p12900/e747327/debates-legislativas-2024" + "original_url": "https://www.rtp.pt/play/p12900/e747327/debates-legislativas-2024", + "audio_url": "https://drive.google.com/uc?id=1gb46XcJOGBmo5r8PkTA6Tw0YylDZv2BA" } \ No newline at end of file diff --git a/src/Debate.svelte b/src/Debate.svelte index eccd56d..4572bc8 100644 --- a/src/Debate.svelte +++ b/src/Debate.svelte @@ -39,13 +39,6 @@ console.log(video_url); if (Hls.isSupported()) { - /*const config = { - xhrSetup: function (xhr, url) { - xhr.withCredentials = true; - }, - }; - - const hls = new Hls(config);*/ const hls = new Hls(); hls.loadSource(video_url); hls.attachMedia(video); @@ -162,4 +155,4 @@ Voltar à página inicial Link para o vídeo original -Link para o áudio +Link para o áudio diff --git a/video_utils.py b/video_utils.py index 1cb689a..26e5104 100644 --- a/video_utils.py +++ b/video_utils.py @@ -16,15 +16,16 @@ def set_public_permission(file_id, service): ).execute() -def direct_link(file_id): +def direct_link(file_id, with_proxy=True): video_url = f"https://drive.google.com/uc?id={file_id}" - #return "https://api.allorigins.win/raw?url=" + quote(video_url, safe="") - #return "https://corsproxy.io/?" + quote(video_url, safe="") - return "https://worker-little-base-2714.mail-2e4.workers.dev/?" + quote(video_url, safe="") + if with_proxy: + return "https://worker-little-base-2714.mail-2e4.workers.dev/?" + quote(video_url, safe="") + return video_url -def get_segment_file_ids(slug): + +def get_file_ids(slug): # get segment file ids: rclone lsjson remoteName:targetFolder --files-only | jq '.[] | {name, id}' cmd = [ "rclone", "lsjson", @@ -50,23 +51,6 @@ def get_segment_file_ids(slug): return file_ids -def set_folder_permissions(slug, service): - folder_name = f"debates2024/{slug}" - - # get folder id - result = service.files().list(q=f"name='{folder_name}'").execute() - folder_id = result["files"][0]["id"] - - # set folder permissions - anyone_permission = { - 'type': 'anyone', - 'role': 'reader', - } - service.permissions().create( - fileId=folder_id, - body=anyone_permission, - ).execute() - def get_segment_duration(segment_path): cmd = [ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", @@ -91,8 +75,21 @@ def upload_to_gdrive(root_path, slug, service): ] subprocess.run(cmd, check=True) + # copy mp3 to gdrive + cmd = [ + "rclone", "copy", "--check-first", "--progress", str(root_path), + "--include", f"{slug}.mp3", + "--ignore-existing", + f"debates:debates2024/{slug}", + ] + subprocess.run(cmd, check=True) + # get segment file ids - file_ids = get_segment_file_ids(slug) + file_ids = get_file_ids(slug) + + # get mp3 file id + mp3_id = file_ids[f"{slug}.mp3"] + set_public_permission(mp3_id, service) # make m3u8 with open(f"{root_path}/{slug}.m3u8", "w") as f: @@ -105,7 +102,7 @@ def upload_to_gdrive(root_path, slug, service): for i in trange(n_segments, desc="Creating m3u8..."): segment_path = f"{slug}_segment_{i:0{n_digits}d}.ts" file_id = file_ids[segment_path] - #set_public_permission(file_id, service) + set_public_permission(file_id, service) # get duration duration = get_segment_duration(root_path / segment_path) @@ -114,4 +111,4 @@ def upload_to_gdrive(root_path, slug, service): f.write(direct_link(file_id) + "\n") f.write("#EXT-X-ENDLIST\n") - return + return direct_link(mp3_id, with_proxy=False)