Photo De-Duplication
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
dedup/get_dups.sh

95 lines
2.7 KiB

#!/bin/bash
# Copyright (c) 2025 by Robert Strutts
# License: MIT
source myenv/bin/activate
# Remove trailing slash
path="${1%/}"
start_time=$(date +%s)
# Check if the first argument is a directory
if [ ! -d "$path" ]; then
echo "Error: '$path' is not a directory."
exit 1
fi
if [ "$2" = "forreal" ]; then
mkdir -p "$path/dups"
fi
# Get list of images
shopt -s nullglob
pushd "$path" || exit 1
images=(*.jpg *.png)
popd || exit 1
# Check if we found any images
if [ ${#images[@]} -eq 0 ]; then
echo "No images found."
exit 1
fi
# Outer loop
for ((i = 0; i < ${#images[@]}; i++)); do
outer_image="${images[$i]}"
# Inner loop (only later images to avoid double-checks)
for ((j = i + 1; j < ${#images[@]}; j++)); do
inner_image="${images[$j]}"
echo -e "\nCompairing files: $outer_image TO $inner_image"
python3 dedup.py "$path/$outer_image" "$path/$inner_image" "$2"
exit_code=$?
if [ $exit_code -eq 1 ]; then
echo "$path/$outer_image # $inner_image" >> dups.txt
if [ "$2" = "forreal" ]; then
mv "$path/$outer_image" "$path/dups"
fi
break # No need to check more once found duplicate
fi
if [ $exit_code -eq 2 ]; then
echo "$path/$outer_image # $inner_image" >> alike.txt
break # No need to check more once found close match to duplicate
fi
if [ $exit_code -eq 4 ]; then
break # Skip Invaild inner Image
fi
if [ $exit_code -eq 5 ]; then
echo "$path/$outer_image # $inner_image" >> sameGPS.txt
break # No need to check more once found matching GPS image
fi
if [ $exit_code -eq 6 ]; then
echo "$path/$outer_image # $inner_image" >> sameGPSmile.txt
break # No need to check more once found matching GPS image
fi
if [ $exit_code -eq 8 ]; then
echo "$path/$outer_image" >> invalid.txt
if [ "$2" = "forreal" ]; then
echo "To remove bad image run: rm $path/$outer_image"
fi
break # No need to check more once found bad image
fi
if [ $exit_code -eq 9 ]; then
echo "$path/$outer_image" >> size.txt
break # No need to check more once found image too Small or Large
fi
done
done
end_time=$(date +%s)
elapsed=$((end_time - start_time))
# Convert to hours, minutes, seconds
hours=$((elapsed / 3600))
minutes=$(( (elapsed % 3600) / 60 ))
seconds=$((elapsed % 60))
printf "Total time to dedup everything: %02d:%02d:%02d\n" $hours $minutes $seconds
if [ "$2" != "forreal" ] && [ -f "dups.txt" ]; then
more dups.txt
else
echo "No Dups found"
fi