Merge pull request #143 from RileyXX/changes-to-chrome-and-chromedriv…

…er-download-handling-bug-fixes-and-other-minor-improvements Changes to chrome/chromedriver download handling, bug fixes and other minor improvements
RileyXX · Jan 22, 2025 · a1a0def · a1a0def
2 parents 71db67a + 4e7329c
commit a1a0def
Show file tree

Hide file tree

Showing 6 changed files with 317 additions and 95 deletions.
diff --git a/IMDBTraktSyncer/IMDBTraktSyncer.py b/IMDBTraktSyncer/IMDBTraktSyncer.py
diff --git a/IMDBTraktSyncer/checkChrome.py b/IMDBTraktSyncer/checkChrome.py
@@ -6,7 +6,6 @@
 import sys
 import time
 import subprocess
-import tempfile
 import stat
 from pathlib import Path
 from selenium import webdriver
@@ -267,49 +266,37 @@ def is_chromedriver_up_to_date(main_directory, current_version):
     print(f"Chromedriver binary not found under {chromedriver_dir}.")
     return False
 
-def download_and_extract_chrome(download_url, main_directory, version, max_wait_time=300, wait_interval=5):
-    temp_dir = tempfile.gettempdir()  # Use a temporary directory for the download
-    temp_zip_path = Path(temp_dir) / f"chrome-{version}.zip"
+def download_and_extract_chrome(download_url, main_directory, version, max_wait_time=300, wait_interval=60):
     zip_path = Path(main_directory) / f"chrome-{version}.zip"
     extract_path = Path(main_directory) / "Chrome" / version
 
     # Ensure the main directory exists
     Path(main_directory).mkdir(parents=True, exist_ok=True)
 
     try:
-        # Download the zip file
+        # Download the zip file directly to the main directory
         response = EH.make_request_with_retries(download_url, stream=True)
         response.raise_for_status()
 
         # Get the expected file size from the response headers (if available)
         expected_file_size = int(response.headers.get('Content-Length', 0))
         print(f" - Expected file size: {expected_file_size} bytes")
 
-        # Write the zip file to a temporary location
-        with open(temp_zip_path, "wb") as temp_file:
+        # Write the zip file to the final location
+        with open(zip_path, "wb") as zip_file:
             for chunk in response.iter_content(chunk_size=8192):
-                temp_file.write(chunk)
+                zip_file.write(chunk)
 
+        # Final wait to ensure the download is complete before extracting
+        time.sleep(wait_interval)
+
         # Validate the downloaded file size
-        actual_file_size = temp_zip_path.stat().st_size
+        actual_file_size = zip_path.stat().st_size
         print(f" - Downloaded file size: {actual_file_size} bytes")
 
-        # Retry until file sizes match or timeout occurs
-        time_waited = 0
-        while expected_file_size and actual_file_size != expected_file_size and time_waited < max_wait_time:
-            print(f" - File size mismatch. Waiting for {wait_interval} seconds before checking again...")
-            time.sleep(wait_interval)
-            time_waited += wait_interval
-            actual_file_size = temp_zip_path.stat().st_size
-            print(f" - Downloaded file size (after waiting): {actual_file_size} bytes")
-
         if expected_file_size and actual_file_size != expected_file_size:
             raise RuntimeError(f" - Downloaded file size mismatch: expected {expected_file_size} bytes, got {actual_file_size} bytes")
 
-        # Move the temp file to the final location
-        shutil.move(str(temp_zip_path), str(zip_path))
-        print(f" - Download complete. File moved to: {zip_path}")
-
         # Verify the integrity of the ZIP file before extraction
         if not zipfile.is_zipfile(zip_path):
             raise RuntimeError(f" - The downloaded file is not a valid ZIP archive: {zip_path}")
@@ -349,49 +336,37 @@ def download_and_extract_chrome(download_url, main_directory, version, max_wait_
 
     return extract_path
 
-def download_and_extract_chromedriver(download_url, main_directory, version, max_wait_time=300, wait_interval=5):
-    temp_dir = tempfile.gettempdir()  # Use a temporary directory for the download
-    temp_zip_path = Path(temp_dir) / f"chromedriver-{version}.zip"
+def download_and_extract_chromedriver(download_url, main_directory, version, max_wait_time=300, wait_interval=60):
     zip_path = Path(main_directory) / f"chromedriver-{version}.zip"
     extract_path = Path(main_directory) / "Chromedriver" / version
 
     # Ensure the main directory exists
     Path(main_directory).mkdir(parents=True, exist_ok=True)
 
     try:
-        # Download the zip file
+        # Download the zip file directly to the main directory
         response = EH.make_request_with_retries(download_url, stream=True)
         response.raise_for_status()
 
         # Get the expected file size from the response headers (if available)
         expected_file_size = int(response.headers.get('Content-Length', 0))
         print(f" - Expected file size: {expected_file_size} bytes")
 
-        # Write the zip file to a temporary location
-        with open(temp_zip_path, "wb") as temp_file:
+        # Write the zip file to the final location
+        with open(zip_path, "wb") as zip_file:
             for chunk in response.iter_content(chunk_size=8192):
-                temp_file.write(chunk)
+                zip_file.write(chunk)
 
+        # Final wait to ensure the download is complete before extracting
+        time.sleep(wait_interval)
+
         # Validate the downloaded file size
-        actual_file_size = temp_zip_path.stat().st_size
+        actual_file_size = zip_path.stat().st_size
         print(f" - Downloaded file size: {actual_file_size} bytes")
 
-        # Retry until file sizes match or timeout occurs
-        time_waited = 0
-        while expected_file_size and actual_file_size != expected_file_size and time_waited < max_wait_time:
-            print(f" - File size mismatch. Waiting for {wait_interval} seconds before checking again...")
-            time.sleep(wait_interval)
-            time_waited += wait_interval
-            actual_file_size = temp_zip_path.stat().st_size
-            print(f" - Downloaded file size (after waiting): {actual_file_size} bytes")
-
         if expected_file_size and actual_file_size != expected_file_size:
             raise RuntimeError(f" - Downloaded file size mismatch: expected {expected_file_size} bytes, got {actual_file_size} bytes")
 
-        # Move the temp file to the final location
-        shutil.move(str(temp_zip_path), str(zip_path))
-        print(f" - Download complete. File moved to: {zip_path}")
-
         # Verify the integrity of the ZIP file before extraction
         if not zipfile.is_zipfile(zip_path):
             raise RuntimeError(f" - The downloaded file is not a valid ZIP archive: {zip_path}")

diff --git a/IMDBTraktSyncer/errorHandling.py b/IMDBTraktSyncer/errorHandling.py
@@ -5,6 +5,7 @@
 import os
 import inspect
 import json
+import re
 from datetime import datetime
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
@@ -354,6 +355,10 @@ def make_request_with_retries(url, method="GET", headers=None, params=None, payl
     print(f"Max retries reached. Request to {url} failed.")
     return None
 
+# Function to clean a title by removing non-alphanumeric characters
+def clean_title(title):
+    return re.sub(r'[^a-zA-Z0-9. ]', '', title).lower()
+
 # Function to resolve IMDB_ID redirection using the driver
 def resolve_imdb_id_with_driver(imdb_id, driver, wait):
     try:
@@ -377,29 +382,33 @@ def resolve_imdb_id_with_driver(imdb_id, driver, wait):
 def update_outdated_imdb_ids_from_trakt(trakt_list, imdb_list, driver, wait):
     comparison_keys = ['Title', 'Type', 'IMDB_ID']  # Only compare Title and Type
 
-    # Group items by (Title, Type)
+    # Group items by (Title, Type), cleaning the Title
     trakt_grouped = {}
     for item in trakt_list:
         if all(key in item for key in comparison_keys):
-            key = (item['Title'], item['Type'])
+            # Clean Title before creating the key
+            cleaned_title = clean_title(item['Title'])
+            key = (cleaned_title, item['Type'])
             trakt_grouped.setdefault(key, set()).add(item['IMDB_ID'])
 
     imdb_grouped = {}
     for item in imdb_list:
         if all(key in item for key in comparison_keys):
-            key = (item['Title'], item['Type'])
+            # Clean Title before creating the key
+            cleaned_title = clean_title(item['Title'])
+            key = (cleaned_title, item['Type'])
             imdb_grouped.setdefault(key, set()).add(item['IMDB_ID'])
 
     # Find conflicting items based on Title and Type where IMDB_IDs are different
     conflicting_items = {
         key for key in trakt_grouped.keys() & imdb_grouped.keys()
         if trakt_grouped[key] != imdb_grouped[key]
     }
-
+    
     '''
     print(f"Initial Conflicting Items: {conflicting_items}")
     '''
-
+    
     # Resolve conflicts by checking IMDB_ID redirection using the driver
     for key in conflicting_items:
         trakt_ids = trakt_grouped[key]
@@ -418,48 +427,50 @@ def update_outdated_imdb_ids_from_trakt(trakt_list, imdb_list, driver, wait):
 
         # Skip resolving IMDB_IDs in imdb_list as they're already current
         resolved_imdb_ids = imdb_ids
-
+        
         '''
         # If resolved trakt IDs match imdb IDs, the conflict is considered resolved
         if resolved_trakt_ids == resolved_imdb_ids:
             print(f"Resolved conflict for: {key}")
         else:
             print(f"Conflict not resolved for: {key}")
         '''
-
+    
     return trakt_list, imdb_list, driver, wait
 
 # Function to filter out items that share the same Title, Year, and Type
-# AND have non-matching IMDB_ID values
+# AND have non-matching IMDB_ID values, using cleaned titles for comparison
 def filter_out_mismatched_items(trakt_list, IMDB_list):
     # Define the keys to be used for comparison
     comparison_keys = ['Title', 'Year', 'Type', 'IMDB_ID']
 
-    # Group items by (Title, Year, Type)
+    # Group items by (Title, Year, Type), cleaning the Title for comparison
     trakt_grouped = {}
     for item in trakt_list:
         if all(key in item for key in comparison_keys):
-            key = (item['Title'], item['Year'], item['Type'])
+            cleaned_title = clean_title(item['Title'])  # Clean the Title for comparison
+            key = (cleaned_title, item['Year'], item['Type'])
             trakt_grouped.setdefault(key, set()).add(item['IMDB_ID'])
 
     IMDB_grouped = {}
     for item in IMDB_list:
         if all(key in item for key in comparison_keys):
-            key = (item['Title'], item['Year'], item['Type'])
+            cleaned_title = clean_title(item['Title'])  # Clean the Title for comparison
+            key = (cleaned_title, item['Year'], item['Type'])
             IMDB_grouped.setdefault(key, set()).add(item['IMDB_ID'])
 
     # Find conflicting items (same Title, Year, Type but different IMDB_IDs)
     conflicting_items = {
         key for key in trakt_grouped.keys() & IMDB_grouped.keys()  # Only consider shared keys
         if trakt_grouped[key] != IMDB_grouped[key]  # Check if IMDB_IDs differ
     }
-    
+
     # Filter out conflicting items from both lists
     filtered_trakt_list = [
-        item for item in trakt_list if (item['Title'], item['Year'], item['Type']) not in conflicting_items
+        item for item in trakt_list if (clean_title(item['Title']), item['Year'], item['Type']) not in conflicting_items
     ]
     filtered_IMDB_list = [
-        item for item in IMDB_list if (item['Title'], item['Year'], item['Type']) not in conflicting_items
+        item for item in IMDB_list if (clean_title(item['Title']), item['Year'], item['Type']) not in conflicting_items
     ]
 
     return filtered_trakt_list, filtered_IMDB_list
@@ -513,7 +524,7 @@ def parse_date(item):
 
     return sorted(items, key=parse_date, reverse=descending)
 
-def check_and_update_watch_history(list):
+def check_if_watch_history_limit_reached(list):
     """
     Checks if the list has 10,000 or more items.
     If true, updates the sync_watch_history in credentials.txt to False
@@ -553,5 +564,91 @@ def check_and_update_watch_history(list):
             print("Failed to write to credentials file.", exc_info=True)
             return False  # Return False if there was an error while updating the file
 
+    # Return False if the limit hasn't been reached
+    return False
+
+def check_if_watchlist_limit_reached(list):
+    """
+    Checks if the list has 10,000 or more items.
+    If true, updates the sync_watchlist in credentials.txt to False
+    and marks the watchlist limit as reached.
+    
+    Args:
+        list (list): List of the user's watchlist.
+    
+    Returns:
+        bool: True if the watchlist limit has been reached, False otherwise.
+    """
+    # Define the file path for credentials.txt
+    here = os.path.abspath(os.path.dirname(__file__))
+    file_path = os.path.join(here, 'credentials.txt')
+
+    # Load the credentials file
+    credentials = {}
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            credentials = json.load(file)
+    except FileNotFoundError:
+        print("Credentials file not found. A new file will be created if needed.", exc_info=True)
+        return False  # Return False if the file doesn't exist
+
+    # Check if list has 10,000 or more items
+    if len(list) >= 9999:
+        # Update sync_watchlist to False
+        credentials['sync_watchlist'] = False
+
+        # Mark that the watchlist limit has been reached
+        try:
+            with open(file_path, 'w', encoding='utf-8') as file:
+                json.dump(credentials, file, indent=4, separators=(', ', ': '))
+            print("IMDB watchlist has reached the 10,000 item limit. sync_watchlist value set to False. Watchlist will no longer be synced.")
+            return True  # Return True indicating limit reached and updated the credentials
+        except Exception as e:
+            print("Failed to write to credentials file.", exc_info=True)
+            return False  # Return False if there was an error while updating the file
+
+    # Return False if the limit hasn't been reached
+    return False
+
+def check_if_ratings_limit_reached(list):
+    """
+    Checks if the list has 10,000 or more items.
+    If true, updates the sync_ratings in credentials.txt to False
+    and marks the ratings limit as reached.
+    
+    Args:
+        list (list): List of the user's ratings.
+    
+    Returns:
+        bool: True if the ratings limit has been reached, False otherwise.
+    """
+    # Define the file path for credentials.txt
+    here = os.path.abspath(os.path.dirname(__file__))
+    file_path = os.path.join(here, 'credentials.txt')
+
+    # Load the credentials file
+    credentials = {}
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            credentials = json.load(file)
+    except FileNotFoundError:
+        print("Credentials file not found. A new file will be created if needed.", exc_info=True)
+        return False  # Return False if the file doesn't exist
+
+    # Check if list has 10,000 or more items
+    if len(list) >= 9999:
+        # Update sync_ratings to False
+        credentials['sync_ratings'] = False
+
+        # Mark that the ratings limit has been reached
+        try:
+            with open(file_path, 'w', encoding='utf-8') as file:
+                json.dump(credentials, file, indent=4, separators=(', ', ': '))
+            print("IMDB ratings have reached the 10,000 item limit. sync_ratings value set to False. Ratings will no longer be synced.")
+            return True  # Return True indicating limit reached and updated the credentials
+        except Exception as e:
+            print("Failed to write to credentials file.", exc_info=True)
+            return False  # Return False if there was an error while updating the file
+
     # Return False if the limit hasn't been reached
     return False