diff --git a/analysis/detailed_data_collection/page_processing/process_video_id/main.py b/analysis/detailed_data_collection/page_processing/process_video_id/main.py deleted file mode 100644 index e598d8a6..00000000 --- a/analysis/detailed_data_collection/page_processing/process_video_id/main.py +++ /dev/null @@ -1,399 +0,0 @@ -import base64 -import json -from json import JSONDecodeError -from random import randint -import requests -import numpy as np -from bs4 import BeautifulSoup -import time -from google.cloud import pubsub_v1 - -from youtube_transcript_api import YouTubeTranscriptApi -# from youtube_transcript_api.formatters import TextFormatter -from youtube_transcript_api import TranscriptsDisabled, NoTranscriptFound, TooManyRequests -import logging -import traceback - -PROJECT_ID = "regrets-reporter-dev" -VIDEO_DATA_TOPIC = "video-data" -ERROR_VIDEO_ID_TOPIC = "error-video-id" -logging.basicConfig(encoding='utf-8', level=logging.INFO, format='%(asctime)s %(levelname)s %(funcName)s(%(lineno)d) %(message)s') - - -class PageParseError(Exception): - pass - - -def parse_youtube_metadata_response(response, video_id, dict_result): - json_response = json.loads(response.text.split('ytInitialData =')[1].split(';