RileyXX · RileyXX · Dec 13, 2024 · Dec 13, 2024
diff --git a/IMDBTraktSyncer/IMDBTraktSyncer.py b/IMDBTraktSyncer/IMDBTraktSyncer.py
@@ -65,6 +65,7 @@ class PageLoadException(Exception):
         service = Service(executable_path=binary_path)
         try:
             driver = webdriver.Chrome(service=service, options=options)
+            driver.set_page_load_timeout(60)
         except SessionNotCreatedException as e:
             error_message = str(e)
             if "This version of ChromeDriver only supports Chrome version" in error_message:
@@ -409,7 +410,7 @@ def filter_by_comment_length(lst, min_comment_length=None):
                 for i, item in enumerate(imdb_ratings_to_set, 1):
 
                     year_str = f' ({item["Year"]})' if item["Year"] is not None else '' # sometimes year is None for episodes from trakt so remove it from the print string
-                    print(f' - Rating {item["Type"]}: ({i} of {len(imdb_ratings_to_set)}) {item["Title"]}{year_str}: {item["Rating"]}/10 on IMDB ({item['IMDB_ID']})')
+                    print(f' - Rating {item["Type"]}: ({i} of {len(imdb_ratings_to_set)}) {item["Title"]}{year_str}: {item["Rating"]}/10 on IMDB ({item["IMDB_ID"]})')
 
                     try:
                         # Load page

diff --git a/IMDBTraktSyncer/errorHandling.py b/IMDBTraktSyncer/errorHandling.py
@@ -1,10 +1,12 @@
 import traceback
 import requests
+from requests.exceptions import RequestException, ConnectionError, Timeout
 import time
+import inspect
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.by import By
-from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
+from selenium.common.exceptions import WebDriverException, NoSuchElementException, TimeoutException, StaleElementReferenceException
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
@@ -40,21 +42,22 @@ def make_trakt_request(url, headers=None, params=None, payload=None, max_retries
             'trakt-api-key': VC.trakt_client_id,
             'Authorization': f'Bearer {VC.trakt_access_token}'
         }
-
-    retry_delay = 1  # seconds between retries
+    
+    retry_delay = 1  # Initial seconds between retries
     retry_attempts = 0
+    connection_timeout = 20
 
     while retry_attempts < max_retries:
         response = None
         try:
             if payload is None:
                 if params:
-                    response = requests.get(url, headers=headers, params=params)
+                    response = requests.get(url, headers=headers, params=params, timeout=connection_timeout)
                 else:
-                    response = requests.get(url, headers=headers)
+                    response = requests.get(url, headers=headers, timeout=connection_timeout)
             else:
-                response = requests.post(url, headers=headers, json=payload)
-
+                response = requests.post(url, headers=headers, json=payload, timeout=connection_timeout)
+            
             if response.status_code in [200, 201, 204]:
                 return response  # Request succeeded, return response
             elif response.status_code in [429, 500, 502, 503, 504, 520, 521, 522]:
@@ -69,13 +72,21 @@ def make_trakt_request(url, headers=None, params=None, payload=None, max_retries
                 print(f"   - {error_message}")
                 EL.logger.error(f"{error_message}. URL: {url}")
                 return None
-
-        except requests.exceptions.RequestException as e:
-            error_message = f"Request failed with exception: {e}"
+        except (ConnectionError, Timeout) as conn_err:
+            # Handle connection reset and timeout
+            retry_attempts += 1
+            print(f"   - Connection error: {conn_err}. Retrying ({retry_attempts}/{max_retries})...")
+            EL.logger.warning(f"Connection error: {conn_err}. Retrying ({retry_attempts}/{max_retries})...")
+            time.sleep(retry_delay)
+            retry_delay *= 2  # Exponential backoff
+        except RequestException as req_err:
+            # Handle other request-related exceptions
+            error_message = f"Request failed with exception: {req_err}"
             print(f"   - {error_message}")
             EL.logger.error(error_message, exc_info=True)
             return None
 
+    # If all retries fail
     error_message = "Max retry attempts reached with Trakt API, request failed."
     print(f"   - {error_message}")
     EL.logger.error(error_message)
@@ -106,18 +117,23 @@ def get_trakt_message(status_code):
         521: "Service Unavailable - Cloudflare error",
         522: "Service Unavailable - Cloudflare error"
     }
-
     return error_messages.get(status_code, "Unknown error")
+
+# Custom exception for page load errors
+class PageLoadException(Exception):
+    pass
 
 # Function to get page with retries and adjusted wait time
 def get_page_with_retries(url, driver, wait, total_wait_time=180, initial_wait_time=5):
     num_retries = total_wait_time // initial_wait_time
     wait_time = total_wait_time / num_retries
     max_retries = num_retries
     status_code = None
+    was_retry = False  # Flag to track if a retry occurred
 
     for retry in range(max_retries):
         try:
+            # Attempt to load the page using Selenium driver
             driver.get(url)
 
             # Wait until the status code becomes available
@@ -132,23 +148,66 @@ def get_page_with_retries(url, driver, wait, total_wait_time=180, initial_wait_t
 
             # Check for any error codes
             if status_code is None:
+                if was_retry:
+                    print("Retry successful! Continuing...")
+                    was_retry = False  # Reset flag
                 return True, status_code, url  # Unable to determine page loaded status
             elif status_code >= 400:
                 raise PageLoadException(f'Failed to load page. Status code: {status_code}. URL: {url}')
             else:
+                if was_retry:
+                    print("Retry successful! Continuing...")
+                    was_retry = False  # Reset flag
                 return True, status_code, url  # Page loaded successfully
 
-        except (PageLoadException) as e:
-            print(f"   - Error: {str(e)}")
+        except TimeoutException as e:
+            # Handle page load timeout explicitly
+            frame = inspect.currentframe()  # Get the current frame
+            lineno = frame.f_lineno  # Get the line number where the exception occurred
+            filename = inspect.getfile(frame)  # Get the file name where the exception occurred
+            print(f"   - TimeoutException: Page load timed out. Retrying... {str(e).splitlines()[0]} URL: {url} (File: {filename}, Line: {lineno})")
+            if retry + 1 < max_retries:
+                seconds_left = int((max_retries - retry) * wait_time)
+                print(f"   - Retrying ({retry + 1}/{max_retries}) {seconds_left} seconds remaining...")
+                time.sleep(wait_time)
+                was_retry = True  # Set flag to indicate a retry occurred
+                continue
+            else:
+                print("   - Max retries reached or not retrying after timeout.")
+                return False, status_code, url
+
+        except WebDriverException as e:
+            # Handle Selenium-related network errors
+            frame = inspect.currentframe()  # Get the current frame
+            lineno = frame.f_lineno  # Get the line number where the exception occurred
+            filename = inspect.getfile(frame)  # Get the file name where the exception occurred
+            print(f"   - Selenium WebDriver Error: {str(e).splitlines()[0]} URL: {url} (File: {filename}, Line: {lineno})")
+            if "Connection reset by peer" in str(e):
+                print("   - Connection was reset by the server. Retrying...")
+            elif retry + 1 < max_retries:
+                seconds_left = int((max_retries - retry) * wait_time)
+                print(f"   - Retrying ({retry + 1}/{max_retries}) {seconds_left} seconds remaining...")
+                time.sleep(wait_time)
+                was_retry = True  # Set flag to indicate a retry occurred
+                continue
+            else:
+                print("   - Max retries reached or not retrying.")
+                return False, status_code, url
+
+        except PageLoadException as e:
+            frame = inspect.currentframe()  # Get the current frame
+            lineno = frame.f_lineno  # Get the line number where the exception occurred
+            filename = inspect.getfile(frame)  # Get the file name where the exception occurred
+            print(f"   - Error: {str(e).splitlines()[0]} URL: {url} (File: {filename}, Line: {lineno})")
             retryable_error_codes = [408, 425, 429, 500, 502, 503, 504]
             if retry + 1 < max_retries and status_code in retryable_error_codes:
                 seconds_left = int((max_retries - retry) * wait_time)
                 print(f"   - Retrying ({retry + 1}/{max_retries}) {seconds_left} seconds remaining...")
                 time.sleep(wait_time)
+                was_retry = True  # Set flag to indicate a retry occurred
+                continue
             else:
-                error_message = f"Max retries reached or not retrying: {e}"
-                EL.logger.error(error_message, exc_info=True)
-                # Max retries reached or not retrying
+                print("   - Max retries reached. PageLoadException.")
                 return False, status_code, url
 
     # All retries failed and page was not loaded successfully, return False

diff --git a/IMDBTraktSyncer/imdbData.py b/IMDBTraktSyncer/imdbData.py
@@ -98,6 +98,11 @@ def check_in_progress(summary_items):
                 if button:
                     csv_link = button
                     break
+
+        # Clear any previous csv files
+        for file in os.listdir(directory):
+            if file.endswith('.csv'):
+                os.remove(os.path.join(directory, file))
 
         # Check if the csv_link was found and then perform the actions
         if csv_link:
@@ -115,7 +120,7 @@ def check_in_progress(summary_items):
         here = os.path.abspath(os.path.dirname(__file__))
         here = directory
 
-        try:
+        try:          
             # Find any CSV file in the directory
             csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
             if not csv_files:
@@ -197,6 +202,11 @@ def check_in_progress(summary_items):
                 if button:
                     csv_link = button
                     break
+
+        # Clear any previous csv files
+        for file in os.listdir(directory):
+            if file.endswith('.csv'):
+                os.remove(os.path.join(directory, file))
 
         # Check if the csv_link was found and then perform the actions
         if csv_link:
@@ -214,7 +224,7 @@ def check_in_progress(summary_items):
         here = os.path.abspath(os.path.dirname(__file__))
         directory = here
 
-        try:
+        try:          
             # Find any CSV file in the directory
             csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
             if not csv_files:

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 with codecs.open(os.path.join(here, "README.md"), 'r', encoding="utf-8") as fh:
     long_description = "\n" + fh.read()
 
-VERSION = '2.9.4'
+VERSION = '2.9.5'
 DESCRIPTION = 'A python script that syncs user watchlist, ratings and reviews for Movies, TV Shows and Episodes both ways between Trakt and IMDB.'
 
 # Setting up