Check for tooSmall <1KB and tooLarge >10MB files.

main
Robert 8 months ago
parent 781221091a
commit 6475ca2069
  1. 4
      .gitignore
  2. BIN
      BadImageFormat.jpg
  3. 2
      README.md
  4. 1
      TooSmall.jpg
  5. 26
      dedup.py
  6. 18
      delibs.py
  7. 8
      get_dups.bat
  8. 12
      get_dups.sh

4
.gitignore vendored

@ -1,5 +1,3 @@
myenv
__pycache__
dups.txt
alike.txt
invalid.txt
*.txt

Binary file not shown.

After

Width:  |  Height:  |  Size: 553 KiB

@ -66,6 +66,6 @@ python dedup.py 0.jpg 1.jpg scores
```
### Files made by get_dups Scripts:
error level: 0 = NOT a Dup, 1 = Duplicate, 2 = Close Match, 5 = Same GPS GEO-location, 8 = Invalid Image. Possible files: dups.txt, alike.txt, sameGPS.txt, and invalid.txt
error level: 0 = NOT a Dup, 1 = Duplicate, 2 = Close Match, 5 = Same GPS GEO-location, 8 = Invalid Image, 9 = File Too small/big. Possible files: dups.txt, alike.txt, sameGPS.txt, invalid.txt, size.txt.
[![Image of ScreenShot](Screenshot2025-04-26.png)]

@ -0,0 +1 @@
adsfkjnhdsfljdkfsljdsklfdsjflds

@ -29,6 +29,8 @@ Memory usage significantly reduced
"""
within_one_mile_check = True
too_small = 1024 # 1KB
too_large = 10 * 1024 * 1024 # 10MB
def handle_GPS(location1, location2):
camera_info1, latitude1, longitude1 = location1
@ -71,6 +73,17 @@ def main():
file1 = sys.argv[1]
file2 = sys.argv[2]
with delibs.Timer("Getting File Size"):
size1 = delibs.check_file_size_bytes(file1, too_small, too_large)
size2 = delibs.check_file_size_bytes(file2, too_small, too_large)
if size1 != None:
print(f"ERROR: {size1}")
delibs.exit_timer(9)
if size2 != None:
print(f"ERROR: {size2}")
delibs.exit_timer(4) # Mark as Skipped
with delibs.Timer("Hashing"):
# Quick hashes
hash1 = delibs.quick_file_hash(file1)
@ -102,15 +115,14 @@ def main():
w, h = delibs.get_image_dimensions_cv(large_img1)
w2, h2 = delibs.get_image_dimensions_cv(large_img2)
if w == None or w2 == None or h == None or h2 == None:
if w == None or h == None:
print("❌Aborting❌...Invalid Image!")
delibs.exit_timer(8)
if w != w2 and w != h2:
print("Diffent Resolutions")
print("👌Not a Duplicate")
delibs.exit_timer(0)
if h != h2 and h != w2:
if w2 == None or h2 == None:
print("❌Aborting❌...Invalid Image!")
delibs.exit_timer(4) # Mark as Skipped
if w != w2 and w != h2 and h != h2 and h != w2:
print("Diffent Resolutions")
print("👌Not a Duplicate")
delibs.exit_timer(0)

@ -392,7 +392,23 @@ def get_image_dimensions_cv(img):
if img is not None:
height, width = img.shape[:2]
return width, height
return None, None
return None, None
def check_file_size_bytes(file_path, too_small, too_large):
try:
file_size = os.path.getsize(file_path)
if file_size < too_small:
return f"File is ({file_size} bytes) must be over {too_small}"
elif file_size > too_large:
return f"File is ({file_size} bytes) must be less than {too_large}"
else:
return None
except FileNotFoundError:
return "File not found"
except Exception as e:
return f"Error checking file size: {str(e)}"
"""
xxhash is about 510x faster than SHA256, non-cryptographic.

@ -46,6 +46,9 @@ for /l %%i in (1,1,%count%) do (
echo %~1\!outer_image!>> alike.txt
goto :break_inner
)
if !errorlevel! equ 4 (
goto :break_inner
)
if !errorlevel! equ 5 (
echo %~1\!outer_image!>> sameGPS.txt
goto :break_inner
@ -54,11 +57,14 @@ for /l %%i in (1,1,%count%) do (
echo %~1\!outer_image!>> sameGPSmile.txt
goto :break_inner
)
if !errorlevel! equ 8 (
echo %~1\!outer_image!>> invalid.txt
goto :break_inner
)
if !errorlevel! equ 9 (
echo %~1\!outer_image!>> size.txt
goto :break_inner
)
)
:break_inner
)

@ -29,7 +29,7 @@ for ((i = 0; i < ${#images[@]}; i++)); do
for ((j = i + 1; j < ${#images[@]}; j++)); do
inner_image="${images[$j]}"
echo -e "Compairing files: $outer_image TO $inner_image \n"
echo -e "\nCompairing files: $outer_image TO $inner_image"
python3 dedup.py "$1/$outer_image" "$1/$inner_image" "$2"
exit_code=$?
if [ $exit_code -eq 1 ]; then
@ -40,6 +40,9 @@ for ((i = 0; i < ${#images[@]}; i++)); do
echo "$1/$outer_image # $inner_image" >> alike.txt
break # No need to check more once found close match to duplicate
fi
if [ $exit_code -eq 4 ]; then
break # Skip Invaild inner Image
fi
if [ $exit_code -eq 5 ]; then
echo "$1/$outer_image # $inner_image" >> sameGPS.txt
break # No need to check more once found matching GPS image
@ -48,11 +51,14 @@ for ((i = 0; i < ${#images[@]}; i++)); do
echo "$1/$outer_image # $inner_image" >> sameGPSmile.txt
break # No need to check more once found matching GPS image
fi
if [ $exit_code -eq 8 ]; then
echo "$1/$outer_image # $inner_image" >> invalid.txt
echo "$1/$outer_image" >> invalid.txt
break # No need to check more once found bad image
fi
if [ $exit_code -eq 9 ]; then
echo "$1/$outer_image" >> size.txt
break # No need to check more once found image too Small or Large
fi
done
done

Loading…
Cancel
Save