From 43adf1c64bbe43e497b0d946b47caa0bb6755174 Mon Sep 17 00:00:00 2001 From: Alexandre Pulido Date: Wed, 24 Jul 2024 19:10:13 +0200 Subject: [PATCH] implemented watchdog to monitor changes to the library --- app/app.py | 54 ++++++++++++++++- app/db.py | 72 ++++++++++++++++++++++- app/file_watcher.py | 140 ++++++++++++++++++++++++++++++++++++++++++++ app/settings.py | 1 - requirements.txt | 1 + 5 files changed, 264 insertions(+), 4 deletions(-) create mode 100644 app/file_watcher.py diff --git a/app/app.py b/app/app.py index 0f19b9f..33325d6 100644 --- a/app/app.py +++ b/app/app.py @@ -2,6 +2,7 @@ from flask_login import LoginManager from functools import wraps import yaml +from file_watcher import Watcher import threading from markupsafe import escape from constants import * @@ -13,6 +14,13 @@ import titledb def init(): + global watcher + # Create and start the file watcher + watcher = Watcher([], on_library_change) + watcher_thread = threading.Thread(target=watcher.run) + watcher_thread.daemon = True + watcher_thread.start() + global app_settings # load initial configuration reload_conf() @@ -138,6 +146,7 @@ def set_titles_api(): @app.route('/api/settings/library/paths', methods=['GET', 'POST', 'DELETE']) @access_required('admin') def library_paths_api(): + global watcher if request.method == 'POST': data = request.json success, errors = add_library_path_to_settings(data['path']) @@ -155,6 +164,10 @@ def library_paths_api(): } elif request.method == 'DELETE': data = request.json + if watcher.remove_directory(data['path']): + print(f"Removed {data['path']} from watchdog monitoring") + else: + print(f"Failed to remove {data['path']} from watchdog monitoring") success, errors = delete_library_path_from_settings(data['path']) if success: reload_conf() @@ -272,6 +285,9 @@ def scan_library(): for library_path in library_paths: scan_library_path(library_path, update_library=False) + # remove missing files + remove_missing_files() + # update library generate_library() @@ -318,6 +334,8 @@ def scan_library_path(library_path, update_library=True): scan_in_progress = False if update_library: + # remove missing files + remove_missing_files() # update library generate_library() @@ -350,7 +368,14 @@ def scan_library_api(): def reload_conf(): global app_settings + global watcher app_settings = load_settings() + # add library paths to watchdog if necessary + library_paths = app_settings['library']['paths'] + if library_paths: + for dir in library_paths: + if os.path.exists(dir): + watcher.add_directory(dir) def get_library_status(title_id): has_base = False @@ -388,10 +413,37 @@ def get_library_status(title_id): } return library_status +def on_library_change(events): + libraries_changed = set() + with app.app_context(): + # handle moved files + for moved_event in events['moved']: + # if the file has been moved outside of the library + if not moved_event["dest_path"].startswith(moved_event["directory"]): + # remove it from the db + print(delete_file_by_filepath(moved_event["src_path"])) + else: + # update the paths + print(update_file_path(moved_event["src_path"], moved_event["dest_path"])) + + for deleted_event in events['deleted']: + # delete the file from library if it exists + print(delete_file_by_filepath(deleted_event["src_path"])) + + for created_event in events['created']: + libraries_changed.add(created_event["directory"]) + + for library_to_scan in libraries_changed: + scan_library_path(library_to_scan, update_library=False) + + # remove missing files + remove_missing_files() + generate_library() + if __name__ == '__main__': init() - app.run(debug=True, host="0.0.0.0", port=8465) + app.run(debug=False, host="0.0.0.0", port=8465) # with app.app_context(): # get_library_status('0100646009FBE000') diff --git a/app/db.py b/app/db.py index 4dda296..e52295b 100644 --- a/app/db.py +++ b/app/db.py @@ -1,6 +1,7 @@ from flask_sqlalchemy import SQLAlchemy +from sqlalchemy.orm.exc import NoResultFound from flask_login import UserMixin -import json +import json, os db = SQLAlchemy() @@ -86,6 +87,29 @@ def add_to_titles_db(library, file_info): db.session.commit() +def update_file_path(old_path, new_path): + try: + # Find the file entry in the database using the old_path + file_entry = Files.query.filter_by(filepath=old_path).one() + + # Extract the new folder and root_dir from the new_path + new_folder = "/" + os.path.basename(os.path.dirname(new_path)) + + # Update the file entry with the new path values + file_entry.filepath = new_path + file_entry.folder = new_folder + + # Commit the changes to the database + db.session.commit() + + return f"File path updated successfully from {old_path} to {new_path}." + + except NoResultFound: + return f"No file entry found for the path: {old_path}." + except Exception as e: + db.session.rollback() # Roll back the session in case of an error + return f"An error occurred while updating the file path: {str(e)}" + def get_all_titles_from_db(): # results = db.session.query(Files.title_id).distinct() # return [row[0] for row in results] @@ -126,4 +150,48 @@ def delete_files_by_library(library_path): 'path': 'library/paths', 'error': f"An error occurred: {e}" }) - return success, errors \ No newline at end of file + return success, errors + +def delete_file_by_filepath(filepath): + try: + # Find file with the given filepath + file_to_delete = Files.query.filter_by(filepath=filepath).one() + + # Delete file + db.session.delete(file_to_delete) + + # Commit the changes + db.session.commit() + + return f"File '{filepath}' has been deleted." + except Exception as e: + # If there's an error, rollback the session + db.session.rollback() + return f"An error occurred while deleting the file path: {str(e)}" + +def remove_missing_files(): + try: + # Query all entries in the Files table + files = Files.query.all() + + # List to keep track of IDs to be deleted + ids_to_delete = [] + + for file_entry in files: + # Check if the file exists on disk + if not os.path.exists(file_entry.filepath): + # If the file does not exist, mark this entry for deletion + ids_to_delete.append(file_entry.id) + print(f"File not found, marking file for deletion: {file_entry.filepath}") + + # Delete all marked entries from the database + if ids_to_delete: + Files.query.filter(Files.id.in_(ids_to_delete)).delete(synchronize_session=False) + db.session.commit() + print(f"Deleted {len(ids_to_delete)} files from the database.") + else: + print("No files were deleted. All files are present on disk.") + + except Exception as e: + db.session.rollback() # Rollback in case of an error + print(f"An error occurred while removing missing files: {str(e)}") \ No newline at end of file diff --git a/app/file_watcher.py b/app/file_watcher.py new file mode 100644 index 0000000..807c712 --- /dev/null +++ b/app/file_watcher.py @@ -0,0 +1,140 @@ +from constants import * +import time, os +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +import threading +from functools import wraps + + +def is_dict_in_list(dict_list, dictionary): + for item in dict_list: + if item == dictionary: + return True + return False + +def debounce(wait): + """Decorator that postpones a function's execution until after `wait` seconds + have elapsed since the last time it was invoked.""" + def decorator(fn): + @wraps(fn) + def debounced(*args, **kwargs): + def call_it(): + fn(*args, **kwargs) + if hasattr(debounced, '_timer'): + debounced._timer.cancel() + debounced._timer = threading.Timer(wait, call_it) + debounced._timer.start() + return debounced + return decorator + +class Watcher: + def __init__(self, directories, callback): + self.directories = set(directories) # Use a set to store directories + self.callback = callback + self.event_handler = Handler(self.callback) + self.observer = Observer() + self.scheduler_map = {} + + def run(self): + for directory in self.directories: + task = self.observer.schedule(self.event_handler, directory, recursive=True) + self.scheduler_map[directory] = task + self.observer.start() + + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + self.observer.stop() + self.observer.join() + + def add_directory(self, directory): + if directory not in self.directories: + print(f'add directory {directory} to watchdog') + task = self.observer.schedule(self.event_handler, directory, recursive=True) + self.scheduler_map[directory] = task + self.directories.add(directory) + self.event_handler.add_directory(directory) + return True + return False + + def remove_directory(self, directory): + if directory in self.directories: + if directory in self.scheduler_map: + self.observer.unschedule(self.scheduler_map[directory]) + del self.scheduler_map[directory] + self.directories.remove(directory) + return True + return False + +class Handler(FileSystemEventHandler): + def __init__(self, callback, debounce_time=5): + self._raw_callback = callback # The actual callback passed to the handler + self.directories = [] + self.debounce_time = debounce_time + self.events_to_process = { + 'modified': [], + 'created': [], + 'deleted': [], + 'moved': [] + } + self.debounced_process_events = self.debounce_callback(self._process_collected_events, debounce_time) + + def add_directory(self, directory): + if directory not in self.directories: + self.directories.append(directory) + + def debounce_callback(self, callback, wait): + @debounce(wait) + def debounced_callback(): + callback() + return debounced_callback + + def _process_collected_events(self): + if any(self.events_to_process.values()): # Check if any list has events + self._raw_callback(self.events_to_process) + # Reset the events_to_process dictionary + self.events_to_process = { + 'modified': [], + 'created': [], + 'deleted': [], + 'moved': [] + } + + def collect_event(self, event, directory): + if event.is_directory: + return + + if event.event_type in ['deleted', 'moved', 'created']: + file_extension = os.path.splitext(event.src_path)[1][1:] + if file_extension not in ALLOWED_EXTENSIONS: + return + + event_slim = { + 'directory': directory, + 'dest_path': event.dest_path, + 'src_path': event.src_path + } + if not is_dict_in_list(self.events_to_process[event.event_type], event_slim): + self.events_to_process[event.event_type].append(event_slim) + self.debounced_process_events() # Trigger the debounce mechanism + + def on_modified(self, event): + for directory in self.directories: + if event.src_path.startswith(directory): + self.collect_event(event, directory) + + def on_created(self, event): + for directory in self.directories: + if event.src_path.startswith(directory): + self.collect_event(event, directory) + + def on_deleted(self, event): + for directory in self.directories: + if event.src_path.startswith(directory): + self.collect_event(event, directory) + + def on_moved(self, event): + for directory in self.directories: + if event.src_path.startswith(directory): + self.collect_event(event, directory) diff --git a/app/settings.py b/app/settings.py index da100d6..b93292f 100644 --- a/app/settings.py +++ b/app/settings.py @@ -83,7 +83,6 @@ def delete_library_path_from_settings(path): settings = load_settings() library_paths = settings['library']['paths'] if library_paths: - print(library_paths) if path in library_paths: library_paths.remove(path) settings['library']['paths'] = library_paths diff --git a/requirements.txt b/requirements.txt index e7f3010..155eb52 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ flask-sqlalchemy==3.1.1 PyYAML==6.0.1 requests==2.31.0 unzip_http==0.4 +watchdog==4.0.1 Werkzeug==3.0.1 # NSTools