You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
215 lines
7.0 KiB
215 lines
7.0 KiB
import sys
|
|
import cv2
|
|
# My Custom Library called delibs.py
|
|
import delibs
|
|
# coordinates is an OPTIONAL module!!!!!!!!! Feel free to commit it out.
|
|
import coordinates
|
|
|
|
"""
|
|
Copyright (c) 2025 by Robert Strutts
|
|
License: MIT
|
|
|
|
Key Optimizations:
|
|
|
|
Multi-Scale Processing:
|
|
First alignment at low resolution (faster)
|
|
Final refinement at full resolution (accurate)
|
|
|
|
Matrix Scaling:
|
|
The translation components of the transformation matrix are scaled up
|
|
Rotation and scaling components remain the same
|
|
|
|
Smart Downscaling:
|
|
Uses INTER_AREA interpolation which is ideal for size reduction
|
|
Maintains aspect ratio
|
|
|
|
Performance Benefits:
|
|
Processing time scales with area, so 4x downscale = ~16x faster initial alignment
|
|
Memory usage significantly reduced
|
|
"""
|
|
|
|
within_feet_check = True
|
|
withinFeet = 10 # < 10 feet
|
|
# Photo File Size Limits:
|
|
too_small = 1024 # 1KB
|
|
too_large = 10 * 1024 * 1024 # 10MB
|
|
|
|
def not_a_dup():
|
|
print("👌Not a Duplicate")
|
|
delibs.exit_timer(0)
|
|
|
|
def is_same_location(point1, point2):
|
|
if point1 == point2:
|
|
return True
|
|
elif within_feet_check == True:
|
|
return coordinates.haversine_distance_feet(point1, point2)
|
|
else:
|
|
return False
|
|
|
|
def is_same_camera(cam1, cam2):
|
|
return cam1 == cam2
|
|
|
|
def handle_GPS(location1, location2):
|
|
camera_info1, latitude1, longitude1 = location1
|
|
camera_info2, latitude2, longitude2 = location2
|
|
point1 = (latitude1, longitude1)
|
|
point2 = (latitude2, longitude2)
|
|
camera1 = (camera_info1['Make'], camera_info1['Model'])
|
|
camera2 = (camera_info2['Make'], camera_info2['Model'])
|
|
same_cams = is_same_camera(camera1, camera2)
|
|
|
|
the_location = is_same_location(point1, point2)
|
|
match the_location:
|
|
case True:
|
|
print("Images are both from same exact Location")
|
|
if same_cams:
|
|
print("Cameras are the same.")
|
|
delibs.exit_timer(5)
|
|
|
|
case False:
|
|
print("Images are from different Locations")
|
|
|
|
case float() if isinstance(the_location, float): # Checks if it's a float
|
|
print(f"Images distance in feet: {the_location:.2f}")
|
|
if the_location < withinFeet:
|
|
print(f"With in requirements of {withinFeet}")
|
|
if is_same_camera(camera1, camera2):
|
|
print("Cameras are the same.")
|
|
delibs.exit_timer(6)
|
|
|
|
if same_cams == False:
|
|
print("Different Cameras detected.")
|
|
not_a_dup()
|
|
|
|
def is_module_imported(module_name):
|
|
return module_name in sys.modules
|
|
|
|
def main():
|
|
if len(sys.argv) < 3:
|
|
print("Usage: python3 dedup.py file1.jpg file2.jpg")
|
|
sys.exit(3)
|
|
|
|
if "-noansi" in sys.argv:
|
|
delibs.disable_ansi()
|
|
elif "-ansi" in sys.argv:
|
|
delibs.enable_ansi()
|
|
|
|
file1 = sys.argv[1]
|
|
file2 = sys.argv[2]
|
|
|
|
with delibs.Timer("Getting File Size"):
|
|
size1 = delibs.check_file_size_bytes(file1, too_small, too_large)
|
|
size2 = delibs.check_file_size_bytes(file2, too_small, too_large)
|
|
|
|
if size1 != None:
|
|
print(f"ERROR: {size1}")
|
|
delibs.exit_timer(9)
|
|
if size2 != None:
|
|
print(f"ERROR: {size2}")
|
|
delibs.exit_timer(3) # Mark as Skipped
|
|
|
|
with delibs.Timer("Hashing"):
|
|
# Quick hashes
|
|
hash1 = delibs.quick_file_hash(file1)
|
|
hash2 = delibs.quick_file_hash(file2)
|
|
|
|
if (hash1 == hash2):
|
|
print("xxHash found duplicates")
|
|
print("❌ Perfect match - images are identical - Duplicate Found!")
|
|
delibs.exit_timer(1)
|
|
else:
|
|
print("Done hashing...")
|
|
|
|
if is_module_imported('coordinates'):
|
|
print("Using Pillow GPS Coordinates module")
|
|
coordinates1 = coordinates.get_coordinates_from_image(file1)
|
|
if coordinates1 != None:
|
|
coordinates2 = coordinates.get_coordinates_from_image(file2)
|
|
if coordinates2 != None:
|
|
handle_GPS(coordinates1, coordinates2)
|
|
|
|
else:
|
|
print("Not using Coordinates module")
|
|
|
|
with delibs.Timer("Loading Images"):
|
|
# Load large images
|
|
large_img1 = cv2.imread(file1) # e.g., 4000x3000 pixels
|
|
large_img2 = cv2.imread(file2) # e.g., 4000x3000 pixels
|
|
|
|
w, h = delibs.get_image_dimensions_cv(large_img1)
|
|
w2, h2 = delibs.get_image_dimensions_cv(large_img2)
|
|
if w == None or h == None:
|
|
print("❌Aborting❌...Invalid Image!")
|
|
delibs.exit_timer(8)
|
|
if w2 == None or h2 == None:
|
|
print("❌Aborting❌...Invalid Image!")
|
|
delibs.exit_timer(4) # Mark as Skipped
|
|
|
|
if w != w2 and w != h2 and h != h2 and h != w2:
|
|
print("Diffent Resolutions")
|
|
not_a_dup()
|
|
|
|
print("Done loading images...")
|
|
with delibs.Timer("Module - Aligning with downscaling 1/4 size - Total Time"):
|
|
# Align with downscaling (initially process at 1/4 size)
|
|
aligned, matrix, angle = delibs.align_with_downscaling(
|
|
large_img1, large_img2,
|
|
downscale_factor=4,
|
|
try_common_rotations=True
|
|
)
|
|
|
|
# Save result
|
|
# cv2.imwrite('aligned_large.jpg', aligned)
|
|
|
|
# Print debug info
|
|
print(f"Detected rotation: {angle}°")
|
|
print(f"Final transformation matrix:\n{matrix}")
|
|
|
|
# Calculate scores
|
|
matrix_score = delibs.matrix_similarity_score(matrix)
|
|
|
|
if "-scores" in sys.argv:
|
|
is_score = True
|
|
else:
|
|
is_score = False
|
|
|
|
if matrix_score == 1.0 and is_score == False:
|
|
print("❌ Perfect match score, images should be identical - Duplicate Found!")
|
|
delibs.exit_timer(1)
|
|
if matrix_score < 0.3 and is_score != False:
|
|
print("👌Not a Duplicate, best guess!")
|
|
delibs.exit_timer(0)
|
|
if is_score == True:
|
|
score = delibs.find_duplicate_with_rotation(large_img1, aligned)
|
|
print(f"Score: {score}")
|
|
|
|
decomposed_score = delibs.decomposed_similarity_score(matrix, large_img1.shape[1])
|
|
combined_score = delibs.comprehensive_similarity(large_img1, aligned, matrix)
|
|
|
|
# Check for perfect alignment
|
|
if matrix_score == 1.0 and decomposed_score == 1.0 and combined_score == 1.0:
|
|
print("No transformation needed")
|
|
print("❌ Perfect match - images are identical - Duplicate Found!")
|
|
exit_code = 1
|
|
elif matrix_score > 0.9 and decomposed_score > 0.9 and combined_score > 0.7:
|
|
print("✅ Near-perfect alignment - minor differences detected")
|
|
exit_code = 2
|
|
else:
|
|
print("👌Not a Duplicate")
|
|
exit_code = 0
|
|
|
|
print(f"Matrix deviation score: {matrix_score:.4f}")
|
|
print(f"Decomposed similarity: {decomposed_score:.4f}")
|
|
print(f"Combined similarity: {combined_score:.4f}")
|
|
delibs.exit_timer(exit_code)
|
|
|
|
# --- Example usage ---
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
"""
|
|
Matrix-based scores are fast but don't consider image content
|
|
Decomposed analysis gives more interpretable results (separate rotation/scale/translation)
|
|
Combined approaches with pixel comparison are most accurate but slower
|
|
Normalization is crucial - translation should be relative to image size
|
|
"""
|
|
|