Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for network related Trakt requests and IMDB page load errors + fix syntax error #111

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion IMDBTraktSyncer/IMDBTraktSyncer.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class PageLoadException(Exception):
service = Service(executable_path=binary_path)
try:
driver = webdriver.Chrome(service=service, options=options)
driver.set_page_load_timeout(60)
except SessionNotCreatedException as e:
error_message = str(e)
if "This version of ChromeDriver only supports Chrome version" in error_message:
Expand Down Expand Up @@ -409,7 +410,7 @@ def filter_by_comment_length(lst, min_comment_length=None):
for i, item in enumerate(imdb_ratings_to_set, 1):

year_str = f' ({item["Year"]})' if item["Year"] is not None else '' # sometimes year is None for episodes from trakt so remove it from the print string
print(f' - Rating {item["Type"]}: ({i} of {len(imdb_ratings_to_set)}) {item["Title"]}{year_str}: {item["Rating"]}/10 on IMDB ({item['IMDB_ID']})')
print(f' - Rating {item["Type"]}: ({i} of {len(imdb_ratings_to_set)}) {item["Title"]}{year_str}: {item["Rating"]}/10 on IMDB ({item["IMDB_ID"]})')

try:
# Load page
Expand Down
91 changes: 75 additions & 16 deletions IMDBTraktSyncer/errorHandling.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import traceback
import requests
from requests.exceptions import RequestException, ConnectionError, Timeout
import time
import inspect
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
from selenium.common.exceptions import WebDriverException, NoSuchElementException, TimeoutException, StaleElementReferenceException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Expand Down Expand Up @@ -40,21 +42,22 @@ def make_trakt_request(url, headers=None, params=None, payload=None, max_retries
'trakt-api-key': VC.trakt_client_id,
'Authorization': f'Bearer {VC.trakt_access_token}'
}

retry_delay = 1 # seconds between retries
retry_delay = 1 # Initial seconds between retries
retry_attempts = 0
connection_timeout = 20

while retry_attempts < max_retries:
response = None
try:
if payload is None:
if params:
response = requests.get(url, headers=headers, params=params)
response = requests.get(url, headers=headers, params=params, timeout=connection_timeout)
else:
response = requests.get(url, headers=headers)
response = requests.get(url, headers=headers, timeout=connection_timeout)
else:
response = requests.post(url, headers=headers, json=payload)

response = requests.post(url, headers=headers, json=payload, timeout=connection_timeout)
if response.status_code in [200, 201, 204]:
return response # Request succeeded, return response
elif response.status_code in [429, 500, 502, 503, 504, 520, 521, 522]:
Expand All @@ -69,13 +72,21 @@ def make_trakt_request(url, headers=None, params=None, payload=None, max_retries
print(f" - {error_message}")
EL.logger.error(f"{error_message}. URL: {url}")
return None

except requests.exceptions.RequestException as e:
error_message = f"Request failed with exception: {e}"
except (ConnectionError, Timeout) as conn_err:
# Handle connection reset and timeout
retry_attempts += 1
print(f" - Connection error: {conn_err}. Retrying ({retry_attempts}/{max_retries})...")
EL.logger.warning(f"Connection error: {conn_err}. Retrying ({retry_attempts}/{max_retries})...")
time.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
except RequestException as req_err:
# Handle other request-related exceptions
error_message = f"Request failed with exception: {req_err}"
print(f" - {error_message}")
EL.logger.error(error_message, exc_info=True)
return None

# If all retries fail
error_message = "Max retry attempts reached with Trakt API, request failed."
print(f" - {error_message}")
EL.logger.error(error_message)
Expand Down Expand Up @@ -106,18 +117,23 @@ def get_trakt_message(status_code):
521: "Service Unavailable - Cloudflare error",
522: "Service Unavailable - Cloudflare error"
}

return error_messages.get(status_code, "Unknown error")

# Custom exception for page load errors
class PageLoadException(Exception):
pass

# Function to get page with retries and adjusted wait time
def get_page_with_retries(url, driver, wait, total_wait_time=180, initial_wait_time=5):
num_retries = total_wait_time // initial_wait_time
wait_time = total_wait_time / num_retries
max_retries = num_retries
status_code = None
was_retry = False # Flag to track if a retry occurred

for retry in range(max_retries):
try:
# Attempt to load the page using Selenium driver
driver.get(url)

# Wait until the status code becomes available
Expand All @@ -132,23 +148,66 @@ def get_page_with_retries(url, driver, wait, total_wait_time=180, initial_wait_t

# Check for any error codes
if status_code is None:
if was_retry:
print("Retry successful! Continuing...")
was_retry = False # Reset flag
return True, status_code, url # Unable to determine page loaded status
elif status_code >= 400:
raise PageLoadException(f'Failed to load page. Status code: {status_code}. URL: {url}')
else:
if was_retry:
print("Retry successful! Continuing...")
was_retry = False # Reset flag
return True, status_code, url # Page loaded successfully

except (PageLoadException) as e:
print(f" - Error: {str(e)}")
except TimeoutException as e:
# Handle page load timeout explicitly
frame = inspect.currentframe() # Get the current frame
lineno = frame.f_lineno # Get the line number where the exception occurred
filename = inspect.getfile(frame) # Get the file name where the exception occurred
print(f" - TimeoutException: Page load timed out. Retrying... {str(e).splitlines()[0]} URL: {url} (File: {filename}, Line: {lineno})")
if retry + 1 < max_retries:
seconds_left = int((max_retries - retry) * wait_time)
print(f" - Retrying ({retry + 1}/{max_retries}) {seconds_left} seconds remaining...")
time.sleep(wait_time)
was_retry = True # Set flag to indicate a retry occurred
continue
else:
print(" - Max retries reached or not retrying after timeout.")
return False, status_code, url

except WebDriverException as e:
# Handle Selenium-related network errors
frame = inspect.currentframe() # Get the current frame
lineno = frame.f_lineno # Get the line number where the exception occurred
filename = inspect.getfile(frame) # Get the file name where the exception occurred
print(f" - Selenium WebDriver Error: {str(e).splitlines()[0]} URL: {url} (File: {filename}, Line: {lineno})")
if "Connection reset by peer" in str(e):
print(" - Connection was reset by the server. Retrying...")
elif retry + 1 < max_retries:
seconds_left = int((max_retries - retry) * wait_time)
print(f" - Retrying ({retry + 1}/{max_retries}) {seconds_left} seconds remaining...")
time.sleep(wait_time)
was_retry = True # Set flag to indicate a retry occurred
continue
else:
print(" - Max retries reached or not retrying.")
return False, status_code, url

except PageLoadException as e:
frame = inspect.currentframe() # Get the current frame
lineno = frame.f_lineno # Get the line number where the exception occurred
filename = inspect.getfile(frame) # Get the file name where the exception occurred
print(f" - Error: {str(e).splitlines()[0]} URL: {url} (File: {filename}, Line: {lineno})")
retryable_error_codes = [408, 425, 429, 500, 502, 503, 504]
if retry + 1 < max_retries and status_code in retryable_error_codes:
seconds_left = int((max_retries - retry) * wait_time)
print(f" - Retrying ({retry + 1}/{max_retries}) {seconds_left} seconds remaining...")
time.sleep(wait_time)
was_retry = True # Set flag to indicate a retry occurred
continue
else:
error_message = f"Max retries reached or not retrying: {e}"
EL.logger.error(error_message, exc_info=True)
# Max retries reached or not retrying
print(" - Max retries reached. PageLoadException.")
return False, status_code, url

# All retries failed and page was not loaded successfully, return False
Expand Down
14 changes: 12 additions & 2 deletions IMDBTraktSyncer/imdbData.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ def check_in_progress(summary_items):
if button:
csv_link = button
break

# Clear any previous csv files
for file in os.listdir(directory):
if file.endswith('.csv'):
os.remove(os.path.join(directory, file))

# Check if the csv_link was found and then perform the actions
if csv_link:
Expand All @@ -115,7 +120,7 @@ def check_in_progress(summary_items):
here = os.path.abspath(os.path.dirname(__file__))
here = directory

try:
try:
# Find any CSV file in the directory
csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
if not csv_files:
Expand Down Expand Up @@ -197,6 +202,11 @@ def check_in_progress(summary_items):
if button:
csv_link = button
break

# Clear any previous csv files
for file in os.listdir(directory):
if file.endswith('.csv'):
os.remove(os.path.join(directory, file))

# Check if the csv_link was found and then perform the actions
if csv_link:
Expand All @@ -214,7 +224,7 @@ def check_in_progress(summary_items):
here = os.path.abspath(os.path.dirname(__file__))
directory = here

try:
try:
# Find any CSV file in the directory
csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
if not csv_files:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
with codecs.open(os.path.join(here, "README.md"), 'r', encoding="utf-8") as fh:
long_description = "\n" + fh.read()

VERSION = '2.9.4'
VERSION = '2.9.5'
DESCRIPTION = 'A python script that syncs user watchlist, ratings and reviews for Movies, TV Shows and Episodes both ways between Trakt and IMDB.'

# Setting up
Expand Down