diff --git a/database.py b/database.py index 8f79161..1f96660 100644 --- a/database.py +++ b/database.py @@ -6,7 +6,7 @@ import cv2 import logging -FACE_SIMILARITY_TOLARANCE = 0.6 +FACE_SIMILARITY_TOLARANCE = 0.55 # Configure logging logging.basicConfig( diff --git a/face_detection.py b/face_detection.py index 5375795..02156a9 100644 --- a/face_detection.py +++ b/face_detection.py @@ -1,3 +1,4 @@ +import hashlib import cv2 import os import numpy as np @@ -27,6 +28,14 @@ def save_unique_face(face_location, image, unique_faces_folder, face_id): cv2.imwrite(face_path, cv2.cvtColor(face_image, cv2.COLOR_RGB2BGR)) return face_path +def hash_image_filename(filename): + """Generate a hash of the image filename.""" + return hashlib.sha256(filename.encode()).hexdigest() + +def is_image_processed(collection, filename_hash): + """Check if the image has already been processed by looking it up in the database.""" + return collection.find_one({"occurrences.filename_hash": filename_hash}) is not None + def process_images(image_folder, unique_faces_folder, collection): unique_face_count = 0 face_occurrences = {} @@ -35,6 +44,13 @@ def process_images(image_folder, unique_faces_folder, collection): if filename.lower().endswith(('.png', '.jpg', '.jpeg')): logging.info(f"Processing image: {filename}") image_path = os.path.join(image_folder, filename) + filename_hash = hash_image_filename(filename) + + # Check if the image has already been processed + if is_image_processed(collection, filename_hash): + logging.info(f"Skipping already processed image: {filename}") + continue + face_locations, image = extract_faces(image_path) for face_location in face_locations: @@ -43,6 +59,12 @@ def process_images(image_folder, unique_faces_folder, collection): existing_face_id = find_similar_face(collection, face_encoding) + occurrence_data = { + 'filename': filename, + 'filename_hash': filename_hash, # Add the filename hash here + 'bounding_box': face_location + } + if existing_face_id is None: face_id = unique_face_count face_path = save_unique_face(face_location, image, unique_faces_folder, face_id) @@ -51,40 +73,25 @@ def process_images(image_folder, unique_faces_folder, collection): 'face_id': face_id, 'image_filename': face_path, 'face_encoding': encoded_face, - 'occurrences': [{ - 'filename': filename, - 'bounding_box': face_location - }] + 'occurrences': [occurrence_data] } save_face_to_db(collection, face_data) logging.info(f"Saved unique face ID {face_id} to database with path: {face_path}") - face_occurrences[face_id] = [{ - 'filename': filename, - 'bounding_box': face_location - }] + face_occurrences[face_id] = [occurrence_data] unique_face_count += 1 else: logging.info(f"Found existing face ID {existing_face_id} in image: {filename}") # Update the occurrences for the existing face collection.update_one( {'face_id': existing_face_id}, - {'$push': {'occurrences': { - 'filename': filename, - 'bounding_box': face_location - }}} + {'$push': {'occurrences': occurrence_data}} ) if existing_face_id in face_occurrences: - face_occurrences[existing_face_id].append({ - 'filename': filename, - 'bounding_box': face_location - }) + face_occurrences[existing_face_id].append(occurrence_data) else: - face_occurrences[existing_face_id] = [{ - 'filename': filename, - 'bounding_box': face_location - }] + face_occurrences[existing_face_id] = [occurrence_data] return unique_face_count, face_occurrences @@ -99,4 +106,4 @@ def process_images(image_folder, unique_faces_folder, collection): unique_faces_count, face_occurrences = process_images(image_folder, unique_faces_folder, collection) logging.info(f"Unique faces extracted and saved to database: {unique_faces_count}") - logging.info(f"Face occurrences: {face_occurrences}") \ No newline at end of file + logging.info(f"Face occurrences: {face_occurrences}")