more profiling

main
Robert 8 months ago
parent 3a1946516b
commit e853019cf3
  1. 2
      README.md
  2. BIN
      Screenshot2025-04-26.png
  3. 144
      dedup.py

@ -16,3 +16,5 @@ cd dedup
source myenv/bin/activate source myenv/bin/activate
python debup.py 0.jpg 1.jpg python debup.py 0.jpg 1.jpg
``` ```
[![Image of ScreenShot](Screenshot2025-04-26.png)](Screenshot)

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

@ -40,9 +40,41 @@ def exit_handler(signum, frame):
# CTRL+C will Exit NOW!!! # CTRL+C will Exit NOW!!!
signal.signal(signal.SIGINT, exit_handler) signal.signal(signal.SIGINT, exit_handler)
def timer(): def exit_timer(level):
end = time.perf_counter() end = time.perf_counter()
print(f"Execution took {end - start:.4f} seconds") print(f"Execution took {end - start:.4f} seconds")
exit(level)
class Timer:
def __init__(self, name=None):
self.name = name if name else "Timer"
self.start_time = None
self.end_time = None
def __enter__(self):
self.start()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.stop()
self.print_result()
def start(self):
self.start_time = time.perf_counter()
def stop(self):
self.end_time = time.perf_counter()
def elapsed(self):
if self.start_time is None:
raise ValueError("Timer has not been started")
if self.end_time is None:
return time.perf_counter() - self.start_time
return self.end_time - self.start_time
def print_result(self):
elapsed = self.elapsed()
print(f"{self.name}: {elapsed:.6f} seconds")
def align_with_downscaling(img1, img2, downscale_factor=4, try_common_rotations=True): def align_with_downscaling(img1, img2, downscale_factor=4, try_common_rotations=True):
""" """
@ -60,14 +92,17 @@ def align_with_downscaling(img1, img2, downscale_factor=4, try_common_rotations=
rotation_angle: Detected simple rotation (None if not found) rotation_angle: Detected simple rotation (None if not found)
""" """
# 1. First alignment at low resolution # 1. First alignment at low resolution
small1 = downscale_image(img1, downscale_factor) with Timer("1st alignment at Low Res-Downsaling"):
small2 = downscale_image(img2, downscale_factor) small1 = downscale_image(img1, downscale_factor)
small2 = downscale_image(img2, downscale_factor)
print("Done downscaling...")
print("Please wait...Rotation starting.")
# Get initial alignment at low resolution # Get initial alignment at low resolution
_, init_matrix, rotation_angle = align_with_ecc_and_rotation( with Timer("2nd alignment at Low Res-Rotations"):
small1, small2, try_common_rotations _, init_matrix, rotation_angle = align_with_ecc_and_rotation(
) small1, small2, try_common_rotations
)
print("Done rotating low res image...")
if init_matrix is None: if init_matrix is None:
return img2, None, None # Alignment failed return img2, None, None # Alignment failed
@ -76,28 +111,32 @@ def align_with_downscaling(img1, img2, downscale_factor=4, try_common_rotations=
if rotation_angle is not None: if rotation_angle is not None:
img2 = rotate_image(img2, rotation_angle) img2 = rotate_image(img2, rotation_angle)
# Scale up the transformation matrix with Timer("Scaling translation components"):
full_matrix = init_matrix.copy() # Scale up the transformation matrix
full_matrix[:2, 2] *= downscale_factor # Scale translation components full_matrix = init_matrix.copy()
full_matrix[:2, 2] *= downscale_factor # Scale translation components
# Convert images to grayscale for final alignment print("Done scale-up/transform...")
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) with Timer("Convert images to grayscale"):
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) # Convert images to grayscale for final alignment
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
print("Done greyscale alignment...")
# Set criteria for final alignment # Set criteria for final alignment
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 500, 1e-6) criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 500, 1e-6)
print("Please wait...ECC initial estimate.")
try: try:
# Run ECC with initial estimate with Timer("ECC init"):
cc, full_matrix = cv2.findTransformECC( # Run ECC with initial estimate
gray1, gray2, full_matrix, cv2.MOTION_AFFINE, criteria cc, full_matrix = cv2.findTransformECC(
) gray1, gray2, full_matrix, cv2.MOTION_AFFINE, criteria
)
# Apply final transformation to color image with Timer("Apply final transformation to color image"):
aligned_img = cv2.warpAffine( # Apply final transformation to color image
img2, full_matrix, (img1.shape[1], img1.shape[0]), aligned_img = cv2.warpAffine(
flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP img2, full_matrix, (img1.shape[1], img1.shape[0]),
) flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP
)
return aligned_img, full_matrix, rotation_angle return aligned_img, full_matrix, rotation_angle
except: except:
@ -395,47 +434,47 @@ if __name__ == "__main__":
file1 = sys.argv[1] file1 = sys.argv[1]
file2 = sys.argv[2] file2 = sys.argv[2]
# Quick hashes
hash1 = quick_file_hash(file1) with Timer("Hashing"):
hash2 = quick_file_hash(file2) # Quick hashes
hash1 = quick_file_hash(file1)
hash2 = quick_file_hash(file2)
if (hash1 == hash2): if (hash1 == hash2):
print("xxHash found duplicates") print("xxHash found duplicates")
print("✅ Perfect match - images are identical") print("✅ Perfect match - images are identical")
print("No transformation needed") print("No transformation needed")
timer() exit_timer(1)
exit(1)
else: else:
print("Done hashing...") print("Done hashing...")
# Load large images with Timer("Loading Images"):
large_img1 = cv2.imread(file1) # e.g., 4000x3000 pixels # Load large images
large_img2 = cv2.imread(file2) # e.g., 4000x3000 pixels large_img1 = cv2.imread(file1) # e.g., 4000x3000 pixels
large_img2 = cv2.imread(file2) # e.g., 4000x3000 pixels
w, h = get_image_dimensions_cv(large_img1) w, h = get_image_dimensions_cv(large_img1)
w2, h2 = get_image_dimensions_cv(large_img2) w2, h2 = get_image_dimensions_cv(large_img2)
if w == None or w2 == None or h == None or h2 == None: if w == None or w2 == None or h == None or h2 == None:
print("Aborting...Invalid Image!") print("Aborting...Invalid Image!")
timer() exit_timer(8)
exit(8)
if w != w2 and w != h2: if w != w2 and w != h2:
print("Diffent Resolutions") print("Diffent Resolutions")
timer() exit_timer(0)
exit(0)
if h != h2 and h != w2: if h != h2 and h != w2:
print("Diffent Resolutions") print("Diffent Resolutions")
timer() exit_timer(0)
exit(0)
print("Done loading images...") print("Done loading images...")
# Align with downscaling (initially process at 1/4 size) with Timer("Aligning with downscaling 1/4 size"):
aligned, matrix, angle = align_with_downscaling( # Align with downscaling (initially process at 1/4 size)
large_img1, large_img2, aligned, matrix, angle = align_with_downscaling(
downscale_factor=4, large_img1, large_img2,
try_common_rotations=True downscale_factor=4,
) try_common_rotations=True
print("Done downscaling...") )
# Save result # Save result
# cv2.imwrite('aligned_large.jpg', aligned) # cv2.imwrite('aligned_large.jpg', aligned)
@ -453,12 +492,10 @@ if __name__ == "__main__":
if matrix_score == 1.0 and is_score != "scores": if matrix_score == 1.0 and is_score != "scores":
print("✅ Perfect Matrix score, should be identical") print("✅ Perfect Matrix score, should be identical")
timer() exit_timer(1)
exit(1) if matrix_score < 0.3 and is_score != "scores":
if matrix_score == 0.0 and is_score != "scores":
print("❌ Significant transformation required") print("❌ Significant transformation required")
timer() exit_timer(0)
exit(0)
if is_score == "scores": if is_score == "scores":
score = find_duplicate_with_rotation(large_img1, aligned) score = find_duplicate_with_rotation(large_img1, aligned)
print(f"Score: {score}") print(f"Score: {score}")
@ -481,8 +518,7 @@ if __name__ == "__main__":
print(f"Matrix deviation score: {matrix_score:.4f}") print(f"Matrix deviation score: {matrix_score:.4f}")
print(f"Decomposed similarity: {decomposed_score:.4f}") print(f"Decomposed similarity: {decomposed_score:.4f}")
print(f"Combined similarity: {combined_score:.4f}") print(f"Combined similarity: {combined_score:.4f}")
timer() exit_timer(exit_code)
exit(exit_code)
""" """
Matrix-based scores are fast but don't consider image content Matrix-based scores are fast but don't consider image content

Loading…
Cancel
Save