import sys import cv2 # My Custom Library called delibs.py import delibs # coordinates is an OPTIONAL module!!!!!!!!! Feel free to commit it out. import coordinates """ Copyright (c) 2025 by Robert Strutts License: MIT Key Optimizations: Multi-Scale Processing: First alignment at low resolution (faster) Final refinement at full resolution (accurate) Matrix Scaling: The translation components of the transformation matrix are scaled up Rotation and scaling components remain the same Smart Downscaling: Uses INTER_AREA interpolation which is ideal for size reduction Maintains aspect ratio Performance Benefits: Processing time scales with area, so 4x downscale = ~16x faster initial alignment Memory usage significantly reduced """ within_feet_check = True withinFeet = 10 # < 10 feet # Photo File Size Limits: too_small = 1024 # 1KB too_large = 10 * 1024 * 1024 # 10MB def not_a_dup(): print("👌Not a Duplicate") delibs.exit_timer(0) def is_same_location(point1, point2): if point1 == point2: return True elif within_feet_check == True: return coordinates.haversine_distance_feet(point1, point2) else: return False def is_same_camera(cam1, cam2): return cam1 == cam2 def handle_GPS(location1, location2): camera_info1, latitude1, longitude1 = location1 camera_info2, latitude2, longitude2 = location2 point1 = (latitude1, longitude1) point2 = (latitude2, longitude2) camera1 = (camera_info1['Make'], camera_info1['Model']) camera2 = (camera_info2['Make'], camera_info2['Model']) same_cams = is_same_camera(camera1, camera2) the_location = is_same_location(point1, point2) match the_location: case True: print("Images are both from same exact Location") if same_cams: print("Cameras are the same.") delibs.exit_timer(5) case False: print("Images are from different Locations") case float() if isinstance(the_location, float): # Checks if it's a float print(f"Images distance in feet: {the_location:.2f}") if the_location < withinFeet: print(f"With in requirements of {withinFeet}") if is_same_camera(camera1, camera2): print("Cameras are the same.") delibs.exit_timer(6) if same_cams == False: print("Different Cameras detected.") not_a_dup() def is_module_imported(module_name): return module_name in sys.modules def main(): if len(sys.argv) < 3: print("Usage: python3 dedup.py file1.jpg file2.jpg") sys.exit(3) if "-noansi" in sys.argv: delibs.disable_ansi() elif "-ansi" in sys.argv: delibs.enable_ansi() file1 = sys.argv[1] file2 = sys.argv[2] with delibs.Timer("Getting File Size"): size1 = delibs.check_file_size_bytes(file1, too_small, too_large) size2 = delibs.check_file_size_bytes(file2, too_small, too_large) if size1 != None: print(f"ERROR: {size1}") delibs.exit_timer(9) if size2 != None: print(f"ERROR: {size2}") delibs.exit_timer(3) # Mark as Skipped with delibs.Timer("Hashing"): # Quick hashes hash1 = delibs.quick_file_hash(file1) hash2 = delibs.quick_file_hash(file2) if (hash1 == hash2): print("xxHash found duplicates") print("❌ Perfect match - images are identical - Duplicate Found!") delibs.exit_timer(1) else: print("Done hashing...") if is_module_imported('coordinates'): print("Using Pillow GPS Coordinates module") coordinates1 = coordinates.get_coordinates_from_image(file1) if coordinates1 != None: coordinates2 = coordinates.get_coordinates_from_image(file2) if coordinates2 != None: handle_GPS(coordinates1, coordinates2) else: print("Not using Coordinates module") with delibs.Timer("Loading Images"): # Load large images large_img1 = cv2.imread(file1) # e.g., 4000x3000 pixels large_img2 = cv2.imread(file2) # e.g., 4000x3000 pixels w, h = delibs.get_image_dimensions_cv(large_img1) w2, h2 = delibs.get_image_dimensions_cv(large_img2) if w == None or h == None: print("❌Aborting❌...Invalid Image!") delibs.exit_timer(8) if w2 == None or h2 == None: print("❌Aborting❌...Invalid Image!") delibs.exit_timer(4) # Mark as Skipped if w != w2 and w != h2 and h != h2 and h != w2: print("Diffent Resolutions") not_a_dup() print("Done loading images...") with delibs.Timer("Module - Aligning with downscaling 1/4 size - Total Time"): # Align with downscaling (initially process at 1/4 size) aligned, matrix, angle = delibs.align_with_downscaling( large_img1, large_img2, downscale_factor=4, try_common_rotations=True ) # Save result # cv2.imwrite('aligned_large.jpg', aligned) # Print debug info print(f"Detected rotation: {angle}°") print(f"Final transformation matrix:\n{matrix}") # Calculate scores matrix_score = delibs.matrix_similarity_score(matrix) if "-scores" in sys.argv: is_score = True else: is_score = False if matrix_score == 1.0 and is_score == False: print("❌ Perfect match score, images should be identical - Duplicate Found!") delibs.exit_timer(1) if matrix_score < 0.3 and is_score != False: print("👌Not a Duplicate, best guess!") delibs.exit_timer(0) if is_score == True: score = delibs.find_duplicate_with_rotation(large_img1, aligned) print(f"Score: {score}") decomposed_score = delibs.decomposed_similarity_score(matrix, large_img1.shape[1]) combined_score = delibs.comprehensive_similarity(large_img1, aligned, matrix) # Check for perfect alignment if matrix_score == 1.0 and decomposed_score == 1.0 and combined_score == 1.0: print("No transformation needed") print("❌ Perfect match - images are identical - Duplicate Found!") exit_code = 1 elif matrix_score > 0.9 and decomposed_score > 0.9 and combined_score > 0.7: print("✅ Near-perfect alignment - minor differences detected") exit_code = 2 else: print("👌Not a Duplicate") exit_code = 0 print(f"Matrix deviation score: {matrix_score:.4f}") print(f"Decomposed similarity: {decomposed_score:.4f}") print(f"Combined similarity: {combined_score:.4f}") delibs.exit_timer(exit_code) # --- Example usage --- if __name__ == "__main__": main() """ Matrix-based scores are fast but don't consider image content Decomposed analysis gives more interpretable results (separate rotation/scale/translation) Combined approaches with pixel comparison are most accurate but slower Normalization is crucial - translation should be relative to image size """