more profiling

main
Robert 8 months ago
parent 3a1946516b
commit e853019cf3
  1. 2
      README.md
  2. BIN
      Screenshot2025-04-26.png
  3. 80
      dedup.py

@ -16,3 +16,5 @@ cd dedup
source myenv/bin/activate
python debup.py 0.jpg 1.jpg
```
[![Image of ScreenShot](Screenshot2025-04-26.png)](Screenshot)

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

@ -40,9 +40,41 @@ def exit_handler(signum, frame):
# CTRL+C will Exit NOW!!!
signal.signal(signal.SIGINT, exit_handler)
def timer():
def exit_timer(level):
end = time.perf_counter()
print(f"Execution took {end - start:.4f} seconds")
exit(level)
class Timer:
def __init__(self, name=None):
self.name = name if name else "Timer"
self.start_time = None
self.end_time = None
def __enter__(self):
self.start()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.stop()
self.print_result()
def start(self):
self.start_time = time.perf_counter()
def stop(self):
self.end_time = time.perf_counter()
def elapsed(self):
if self.start_time is None:
raise ValueError("Timer has not been started")
if self.end_time is None:
return time.perf_counter() - self.start_time
return self.end_time - self.start_time
def print_result(self):
elapsed = self.elapsed()
print(f"{self.name}: {elapsed:.6f} seconds")
def align_with_downscaling(img1, img2, downscale_factor=4, try_common_rotations=True):
"""
@ -60,14 +92,17 @@ def align_with_downscaling(img1, img2, downscale_factor=4, try_common_rotations=
rotation_angle: Detected simple rotation (None if not found)
"""
# 1. First alignment at low resolution
with Timer("1st alignment at Low Res-Downsaling"):
small1 = downscale_image(img1, downscale_factor)
small2 = downscale_image(img2, downscale_factor)
print("Done downscaling...")
print("Please wait...Rotation starting.")
# Get initial alignment at low resolution
with Timer("2nd alignment at Low Res-Rotations"):
_, init_matrix, rotation_angle = align_with_ecc_and_rotation(
small1, small2, try_common_rotations
)
print("Done rotating low res image...")
if init_matrix is None:
return img2, None, None # Alignment failed
@ -76,23 +111,27 @@ def align_with_downscaling(img1, img2, downscale_factor=4, try_common_rotations=
if rotation_angle is not None:
img2 = rotate_image(img2, rotation_angle)
with Timer("Scaling translation components"):
# Scale up the transformation matrix
full_matrix = init_matrix.copy()
full_matrix[:2, 2] *= downscale_factor # Scale translation components
print("Done scale-up/transform...")
with Timer("Convert images to grayscale"):
# Convert images to grayscale for final alignment
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
print("Done greyscale alignment...")
# Set criteria for final alignment
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 500, 1e-6)
print("Please wait...ECC initial estimate.")
try:
with Timer("ECC init"):
# Run ECC with initial estimate
cc, full_matrix = cv2.findTransformECC(
gray1, gray2, full_matrix, cv2.MOTION_AFFINE, criteria
)
with Timer("Apply final transformation to color image"):
# Apply final transformation to color image
aligned_img = cv2.warpAffine(
img2, full_matrix, (img1.shape[1], img1.shape[0]),
@ -395,6 +434,8 @@ if __name__ == "__main__":
file1 = sys.argv[1]
file2 = sys.argv[2]
with Timer("Hashing"):
# Quick hashes
hash1 = quick_file_hash(file1)
hash2 = quick_file_hash(file2)
@ -403,11 +444,11 @@ if __name__ == "__main__":
print("xxHash found duplicates")
print("✅ Perfect match - images are identical")
print("No transformation needed")
timer()
exit(1)
exit_timer(1)
else:
print("Done hashing...")
with Timer("Loading Images"):
# Load large images
large_img1 = cv2.imread(file1) # e.g., 4000x3000 pixels
large_img2 = cv2.imread(file2) # e.g., 4000x3000 pixels
@ -416,26 +457,24 @@ if __name__ == "__main__":
w2, h2 = get_image_dimensions_cv(large_img2)
if w == None or w2 == None or h == None or h2 == None:
print("Aborting...Invalid Image!")
timer()
exit(8)
exit_timer(8)
if w != w2 and w != h2:
print("Diffent Resolutions")
timer()
exit(0)
exit_timer(0)
if h != h2 and h != w2:
print("Diffent Resolutions")
timer()
exit(0)
exit_timer(0)
print("Done loading images...")
with Timer("Aligning with downscaling 1/4 size"):
# Align with downscaling (initially process at 1/4 size)
aligned, matrix, angle = align_with_downscaling(
large_img1, large_img2,
downscale_factor=4,
try_common_rotations=True
)
print("Done downscaling...")
# Save result
# cv2.imwrite('aligned_large.jpg', aligned)
@ -453,12 +492,10 @@ if __name__ == "__main__":
if matrix_score == 1.0 and is_score != "scores":
print("✅ Perfect Matrix score, should be identical")
timer()
exit(1)
if matrix_score == 0.0 and is_score != "scores":
exit_timer(1)
if matrix_score < 0.3 and is_score != "scores":
print("❌ Significant transformation required")
timer()
exit(0)
exit_timer(0)
if is_score == "scores":
score = find_duplicate_with_rotation(large_img1, aligned)
print(f"Score: {score}")
@ -481,8 +518,7 @@ if __name__ == "__main__":
print(f"Matrix deviation score: {matrix_score:.4f}")
print(f"Decomposed similarity: {decomposed_score:.4f}")
print(f"Combined similarity: {combined_score:.4f}")
timer()
exit(exit_code)
exit_timer(exit_code)
"""
Matrix-based scores are fast but don't consider image content

Loading…
Cancel
Save