Skip to content

Commit

Permalink
Merge pull request #143 from RileyXX/changes-to-chrome-and-chromedriv…
Browse files Browse the repository at this point in the history
…er-download-handling-bug-fixes-and-other-minor-improvements

Changes to chrome/chromedriver download handling, bug fixes and other minor improvements
  • Loading branch information
RileyXX authored Jan 22, 2025
2 parents 71db67a + 4e7329c commit a1a0def
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 95 deletions.
197 changes: 167 additions & 30 deletions IMDBTraktSyncer/IMDBTraktSyncer.py

Large diffs are not rendered by default.

61 changes: 18 additions & 43 deletions IMDBTraktSyncer/checkChrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import sys
import time
import subprocess
import tempfile
import stat
from pathlib import Path
from selenium import webdriver
Expand Down Expand Up @@ -267,49 +266,37 @@ def is_chromedriver_up_to_date(main_directory, current_version):
print(f"Chromedriver binary not found under {chromedriver_dir}.")
return False

def download_and_extract_chrome(download_url, main_directory, version, max_wait_time=300, wait_interval=5):
temp_dir = tempfile.gettempdir() # Use a temporary directory for the download
temp_zip_path = Path(temp_dir) / f"chrome-{version}.zip"
def download_and_extract_chrome(download_url, main_directory, version, max_wait_time=300, wait_interval=60):
zip_path = Path(main_directory) / f"chrome-{version}.zip"
extract_path = Path(main_directory) / "Chrome" / version

# Ensure the main directory exists
Path(main_directory).mkdir(parents=True, exist_ok=True)

try:
# Download the zip file
# Download the zip file directly to the main directory
response = EH.make_request_with_retries(download_url, stream=True)
response.raise_for_status()

# Get the expected file size from the response headers (if available)
expected_file_size = int(response.headers.get('Content-Length', 0))
print(f" - Expected file size: {expected_file_size} bytes")

# Write the zip file to a temporary location
with open(temp_zip_path, "wb") as temp_file:
# Write the zip file to the final location
with open(zip_path, "wb") as zip_file:
for chunk in response.iter_content(chunk_size=8192):
temp_file.write(chunk)
zip_file.write(chunk)

# Final wait to ensure the download is complete before extracting
time.sleep(wait_interval)

# Validate the downloaded file size
actual_file_size = temp_zip_path.stat().st_size
actual_file_size = zip_path.stat().st_size
print(f" - Downloaded file size: {actual_file_size} bytes")

# Retry until file sizes match or timeout occurs
time_waited = 0
while expected_file_size and actual_file_size != expected_file_size and time_waited < max_wait_time:
print(f" - File size mismatch. Waiting for {wait_interval} seconds before checking again...")
time.sleep(wait_interval)
time_waited += wait_interval
actual_file_size = temp_zip_path.stat().st_size
print(f" - Downloaded file size (after waiting): {actual_file_size} bytes")

if expected_file_size and actual_file_size != expected_file_size:
raise RuntimeError(f" - Downloaded file size mismatch: expected {expected_file_size} bytes, got {actual_file_size} bytes")

# Move the temp file to the final location
shutil.move(str(temp_zip_path), str(zip_path))
print(f" - Download complete. File moved to: {zip_path}")

# Verify the integrity of the ZIP file before extraction
if not zipfile.is_zipfile(zip_path):
raise RuntimeError(f" - The downloaded file is not a valid ZIP archive: {zip_path}")
Expand Down Expand Up @@ -349,49 +336,37 @@ def download_and_extract_chrome(download_url, main_directory, version, max_wait_

return extract_path

def download_and_extract_chromedriver(download_url, main_directory, version, max_wait_time=300, wait_interval=5):
temp_dir = tempfile.gettempdir() # Use a temporary directory for the download
temp_zip_path = Path(temp_dir) / f"chromedriver-{version}.zip"
def download_and_extract_chromedriver(download_url, main_directory, version, max_wait_time=300, wait_interval=60):
zip_path = Path(main_directory) / f"chromedriver-{version}.zip"
extract_path = Path(main_directory) / "Chromedriver" / version

# Ensure the main directory exists
Path(main_directory).mkdir(parents=True, exist_ok=True)

try:
# Download the zip file
# Download the zip file directly to the main directory
response = EH.make_request_with_retries(download_url, stream=True)
response.raise_for_status()

# Get the expected file size from the response headers (if available)
expected_file_size = int(response.headers.get('Content-Length', 0))
print(f" - Expected file size: {expected_file_size} bytes")

# Write the zip file to a temporary location
with open(temp_zip_path, "wb") as temp_file:
# Write the zip file to the final location
with open(zip_path, "wb") as zip_file:
for chunk in response.iter_content(chunk_size=8192):
temp_file.write(chunk)
zip_file.write(chunk)

# Final wait to ensure the download is complete before extracting
time.sleep(wait_interval)

# Validate the downloaded file size
actual_file_size = temp_zip_path.stat().st_size
actual_file_size = zip_path.stat().st_size
print(f" - Downloaded file size: {actual_file_size} bytes")

# Retry until file sizes match or timeout occurs
time_waited = 0
while expected_file_size and actual_file_size != expected_file_size and time_waited < max_wait_time:
print(f" - File size mismatch. Waiting for {wait_interval} seconds before checking again...")
time.sleep(wait_interval)
time_waited += wait_interval
actual_file_size = temp_zip_path.stat().st_size
print(f" - Downloaded file size (after waiting): {actual_file_size} bytes")

if expected_file_size and actual_file_size != expected_file_size:
raise RuntimeError(f" - Downloaded file size mismatch: expected {expected_file_size} bytes, got {actual_file_size} bytes")

# Move the temp file to the final location
shutil.move(str(temp_zip_path), str(zip_path))
print(f" - Download complete. File moved to: {zip_path}")

# Verify the integrity of the ZIP file before extraction
if not zipfile.is_zipfile(zip_path):
raise RuntimeError(f" - The downloaded file is not a valid ZIP archive: {zip_path}")
Expand Down
127 changes: 112 additions & 15 deletions IMDBTraktSyncer/errorHandling.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import inspect
import json
import re
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
Expand Down Expand Up @@ -354,6 +355,10 @@ def make_request_with_retries(url, method="GET", headers=None, params=None, payl
print(f"Max retries reached. Request to {url} failed.")
return None

# Function to clean a title by removing non-alphanumeric characters
def clean_title(title):
return re.sub(r'[^a-zA-Z0-9. ]', '', title).lower()

# Function to resolve IMDB_ID redirection using the driver
def resolve_imdb_id_with_driver(imdb_id, driver, wait):
try:
Expand All @@ -377,29 +382,33 @@ def resolve_imdb_id_with_driver(imdb_id, driver, wait):
def update_outdated_imdb_ids_from_trakt(trakt_list, imdb_list, driver, wait):
comparison_keys = ['Title', 'Type', 'IMDB_ID'] # Only compare Title and Type

# Group items by (Title, Type)
# Group items by (Title, Type), cleaning the Title
trakt_grouped = {}
for item in trakt_list:
if all(key in item for key in comparison_keys):
key = (item['Title'], item['Type'])
# Clean Title before creating the key
cleaned_title = clean_title(item['Title'])
key = (cleaned_title, item['Type'])
trakt_grouped.setdefault(key, set()).add(item['IMDB_ID'])

imdb_grouped = {}
for item in imdb_list:
if all(key in item for key in comparison_keys):
key = (item['Title'], item['Type'])
# Clean Title before creating the key
cleaned_title = clean_title(item['Title'])
key = (cleaned_title, item['Type'])
imdb_grouped.setdefault(key, set()).add(item['IMDB_ID'])

# Find conflicting items based on Title and Type where IMDB_IDs are different
conflicting_items = {
key for key in trakt_grouped.keys() & imdb_grouped.keys()
if trakt_grouped[key] != imdb_grouped[key]
}

'''
print(f"Initial Conflicting Items: {conflicting_items}")
'''

# Resolve conflicts by checking IMDB_ID redirection using the driver
for key in conflicting_items:
trakt_ids = trakt_grouped[key]
Expand All @@ -418,48 +427,50 @@ def update_outdated_imdb_ids_from_trakt(trakt_list, imdb_list, driver, wait):

# Skip resolving IMDB_IDs in imdb_list as they're already current
resolved_imdb_ids = imdb_ids

'''
# If resolved trakt IDs match imdb IDs, the conflict is considered resolved
if resolved_trakt_ids == resolved_imdb_ids:
print(f"Resolved conflict for: {key}")
else:
print(f"Conflict not resolved for: {key}")
'''

return trakt_list, imdb_list, driver, wait

# Function to filter out items that share the same Title, Year, and Type
# AND have non-matching IMDB_ID values
# AND have non-matching IMDB_ID values, using cleaned titles for comparison
def filter_out_mismatched_items(trakt_list, IMDB_list):
# Define the keys to be used for comparison
comparison_keys = ['Title', 'Year', 'Type', 'IMDB_ID']

# Group items by (Title, Year, Type)
# Group items by (Title, Year, Type), cleaning the Title for comparison
trakt_grouped = {}
for item in trakt_list:
if all(key in item for key in comparison_keys):
key = (item['Title'], item['Year'], item['Type'])
cleaned_title = clean_title(item['Title']) # Clean the Title for comparison
key = (cleaned_title, item['Year'], item['Type'])
trakt_grouped.setdefault(key, set()).add(item['IMDB_ID'])

IMDB_grouped = {}
for item in IMDB_list:
if all(key in item for key in comparison_keys):
key = (item['Title'], item['Year'], item['Type'])
cleaned_title = clean_title(item['Title']) # Clean the Title for comparison
key = (cleaned_title, item['Year'], item['Type'])
IMDB_grouped.setdefault(key, set()).add(item['IMDB_ID'])

# Find conflicting items (same Title, Year, Type but different IMDB_IDs)
conflicting_items = {
key for key in trakt_grouped.keys() & IMDB_grouped.keys() # Only consider shared keys
if trakt_grouped[key] != IMDB_grouped[key] # Check if IMDB_IDs differ
}

# Filter out conflicting items from both lists
filtered_trakt_list = [
item for item in trakt_list if (item['Title'], item['Year'], item['Type']) not in conflicting_items
item for item in trakt_list if (clean_title(item['Title']), item['Year'], item['Type']) not in conflicting_items
]
filtered_IMDB_list = [
item for item in IMDB_list if (item['Title'], item['Year'], item['Type']) not in conflicting_items
item for item in IMDB_list if (clean_title(item['Title']), item['Year'], item['Type']) not in conflicting_items
]

return filtered_trakt_list, filtered_IMDB_list
Expand Down Expand Up @@ -513,7 +524,7 @@ def parse_date(item):

return sorted(items, key=parse_date, reverse=descending)

def check_and_update_watch_history(list):
def check_if_watch_history_limit_reached(list):
"""
Checks if the list has 10,000 or more items.
If true, updates the sync_watch_history in credentials.txt to False
Expand Down Expand Up @@ -553,5 +564,91 @@ def check_and_update_watch_history(list):
print("Failed to write to credentials file.", exc_info=True)
return False # Return False if there was an error while updating the file

# Return False if the limit hasn't been reached
return False

def check_if_watchlist_limit_reached(list):
"""
Checks if the list has 10,000 or more items.
If true, updates the sync_watchlist in credentials.txt to False
and marks the watchlist limit as reached.
Args:
list (list): List of the user's watchlist.
Returns:
bool: True if the watchlist limit has been reached, False otherwise.
"""
# Define the file path for credentials.txt
here = os.path.abspath(os.path.dirname(__file__))
file_path = os.path.join(here, 'credentials.txt')

# Load the credentials file
credentials = {}
try:
with open(file_path, 'r', encoding='utf-8') as file:
credentials = json.load(file)
except FileNotFoundError:
print("Credentials file not found. A new file will be created if needed.", exc_info=True)
return False # Return False if the file doesn't exist

# Check if list has 10,000 or more items
if len(list) >= 9999:
# Update sync_watchlist to False
credentials['sync_watchlist'] = False

# Mark that the watchlist limit has been reached
try:
with open(file_path, 'w', encoding='utf-8') as file:
json.dump(credentials, file, indent=4, separators=(', ', ': '))
print("IMDB watchlist has reached the 10,000 item limit. sync_watchlist value set to False. Watchlist will no longer be synced.")
return True # Return True indicating limit reached and updated the credentials
except Exception as e:
print("Failed to write to credentials file.", exc_info=True)
return False # Return False if there was an error while updating the file

# Return False if the limit hasn't been reached
return False

def check_if_ratings_limit_reached(list):
"""
Checks if the list has 10,000 or more items.
If true, updates the sync_ratings in credentials.txt to False
and marks the ratings limit as reached.
Args:
list (list): List of the user's ratings.
Returns:
bool: True if the ratings limit has been reached, False otherwise.
"""
# Define the file path for credentials.txt
here = os.path.abspath(os.path.dirname(__file__))
file_path = os.path.join(here, 'credentials.txt')

# Load the credentials file
credentials = {}
try:
with open(file_path, 'r', encoding='utf-8') as file:
credentials = json.load(file)
except FileNotFoundError:
print("Credentials file not found. A new file will be created if needed.", exc_info=True)
return False # Return False if the file doesn't exist

# Check if list has 10,000 or more items
if len(list) >= 9999:
# Update sync_ratings to False
credentials['sync_ratings'] = False

# Mark that the ratings limit has been reached
try:
with open(file_path, 'w', encoding='utf-8') as file:
json.dump(credentials, file, indent=4, separators=(', ', ': '))
print("IMDB ratings have reached the 10,000 item limit. sync_ratings value set to False. Ratings will no longer be synced.")
return True # Return True indicating limit reached and updated the credentials
except Exception as e:
print("Failed to write to credentials file.", exc_info=True)
return False # Return False if there was an error while updating the file

# Return False if the limit hasn't been reached
return False
Loading

0 comments on commit a1a0def

Please sign in to comment.