import sys import cv2 # My Custom Library called delibs.py import delibs # coordinates is an OPTIONAL module!!!!!!!!! Feel free to commit it out. import coordinates """ Copyright (c) 2025 by Robert Strutts License: MIT Key Optimizations: Multi-Scale Processing: First alignment at low resolution (faster) Final refinement at full resolution (accurate) Matrix Scaling: The translation components of the transformation matrix are scaled up Rotation and scaling components remain the same Smart Downscaling: Uses INTER_AREA interpolation which is ideal for size reduction Maintains aspect ratio Performance Benefits: Processing time scales with area, so 4x downscale = ~16x faster initial alignment Memory usage significantly reduced """ def handle_GPS(location1, location2): if location1 == location2: print("Images are both from same exact Location") print("✅Possible duplicate") delibs.exit_timer(5) else: print("Images are from different Locations") print("👌Not a Duplicate") delibs.exit_timer(0) def is_module_imported(module_name): return module_name in sys.modules def main(): if len(sys.argv) < 3: print("Usage: python3 dedup.py file1.jpg file2.jpg") sys.exit(3) file1 = sys.argv[1] file2 = sys.argv[2] with delibs.Timer("Hashing"): # Quick hashes hash1 = delibs.quick_file_hash(file1) hash2 = delibs.quick_file_hash(file2) if (hash1 == hash2): print("xxHash found duplicates") print("❌ Perfect match - images are identical - Duplicate Found!") print("No transformation needed") delibs.exit_timer(1) else: print("Done hashing...") if is_module_imported('coordinates'): print("Using Pillow GPS Coordinates module") coordinates1 = coordinates.get_coordinates_from_image(file1) if coordinates1 != None: coordinates2 = coordinates.get_coordinates_from_image(file2) handle_GPS(coordinates1, coordinates2) else: print("Not using Coordinates module") with delibs.Timer("Loading Images"): # Load large images large_img1 = cv2.imread(file1) # e.g., 4000x3000 pixels large_img2 = cv2.imread(file2) # e.g., 4000x3000 pixels w, h = delibs.get_image_dimensions_cv(large_img1) w2, h2 = delibs.get_image_dimensions_cv(large_img2) if w == None or w2 == None or h == None or h2 == None: print("❌Aborting❌...Invalid Image!") delibs.exit_timer(8) if w != w2 and w != h2: print("Diffent Resolutions") print("👌Not a Duplicate") delibs.exit_timer(0) if h != h2 and h != w2: print("Diffent Resolutions") print("👌Not a Duplicate") delibs.exit_timer(0) print("Done loading images...") with delibs.Timer("Module - Aligning with downscaling 1/4 size - Total Time"): # Align with downscaling (initially process at 1/4 size) aligned, matrix, angle = delibs.align_with_downscaling( large_img1, large_img2, downscale_factor=4, try_common_rotations=True ) # Save result # cv2.imwrite('aligned_large.jpg', aligned) # Print debug info print(f"Detected rotation: {angle}°") print(f"Final transformation matrix:\n{matrix}") # Calculate scores matrix_score = delibs.matrix_similarity_score(matrix) if len(sys.argv) > 3: is_score = sys.argv[3] else: is_score = "" if matrix_score == 1.0 and is_score != "scores": print("❌ Perfect match score, images should be identical - Duplicate Found!") delibs.exit_timer(1) if matrix_score < 0.3 and is_score != "scores": print("👌Not a Duplicate, best guess!") delibs.exit_timer(0) if is_score == "scores": score = delibs.find_duplicate_with_rotation(large_img1, aligned) print(f"Score: {score}") decomposed_score = delibs.decomposed_similarity_score(matrix, large_img1.shape[1]) combined_score = delibs.comprehensive_similarity(large_img1, aligned, matrix) # Check for perfect alignment if matrix_score == 1.0 and decomposed_score == 1.0 and combined_score == 1.0: print("❌ Perfect match - images are identical - Duplicate Found!") print("No transformation needed") exit_code = 1 elif matrix_score > 0.9 and decomposed_score > 0.9 and combined_score > 0.7: print("✅ Near-perfect alignment - minor differences detected") exit_code = 2 else: print("👌Not a Duplicate") exit_code = 0 print(f"Matrix deviation score: {matrix_score:.4f}") print(f"Decomposed similarity: {decomposed_score:.4f}") print(f"Combined similarity: {combined_score:.4f}") delibs.exit_timer(exit_code) # --- Example usage --- if __name__ == "__main__": main() """ Matrix-based scores are fast but don't consider image content Decomposed analysis gives more interpretable results (separate rotation/scale/translation) Combined approaches with pixel comparison are most accurate but slower Normalization is crucial - translation should be relative to image size """