2019-08-24 14:35:45 +00:00
|
|
|
#!/usr/bin/env sh
|
|
|
|
|
|
|
|
# Description: List non-empty duplicate files in the current directory (based on size followed by MD5)
|
|
|
|
#
|
|
|
|
# Source: https://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
|
|
|
|
#
|
2020-11-06 00:07:01 +00:00
|
|
|
# Dependencies: find md5sum sort uniq xargs gsed
|
2019-08-24 14:35:45 +00:00
|
|
|
#
|
2020-10-21 17:03:03 +00:00
|
|
|
# Note: bash compatible required for mktemp
|
|
|
|
#
|
|
|
|
# Shell: bash
|
2020-05-06 05:12:29 +00:00
|
|
|
# Authors: syssyphus, KlzXS
|
2019-08-24 14:35:45 +00:00
|
|
|
|
2020-10-21 17:03:03 +00:00
|
|
|
# If the size of a file has more that $size_digits digits the file will be misplaced
|
|
|
|
# 12 digits fit files up to 931GiB
|
|
|
|
|
|
|
|
EDITOR="${EDITOR:-vi}"
|
|
|
|
TMPDIR="${TMPDIR:-/tmp}"
|
|
|
|
|
|
|
|
size_digits=12
|
|
|
|
tmpfile=$(mktemp "$TMPDIR/.nnnXXXXXX")
|
|
|
|
|
2020-11-06 00:07:01 +00:00
|
|
|
printf "\
|
|
|
|
## This is an overview of all duplicate files found.
|
|
|
|
## After editiing this file you will be prompted to remove some of them.
|
|
|
|
## You can choose between removing all the commented out files, all the uncommented ones or none at all.
|
|
|
|
## All the lines begining with '##','#md5sum' or 'md5sum' will be ignored either way.
|
2020-11-07 22:39:34 +00:00
|
|
|
## If you choose to remove, you will be given a choice between removing with force or interactively for each file.\n
|
2020-11-06 00:07:01 +00:00
|
|
|
" > "$tmpfile"
|
|
|
|
|
2020-10-21 17:03:03 +00:00
|
|
|
# shellcheck disable=SC2016
|
2020-10-21 17:37:33 +00:00
|
|
|
find . -size +0 -type f -printf "%${size_digits}s %p\n" | sort -rn | uniq -w"${size_digits}" -D | sed -E '
|
|
|
|
s/^ {,12}([0-9]{,12}) (.*)$/printf "%s %s\\n" "$(md5sum "\2")" "d\1"/
|
|
|
|
' | tr '\n' '\0' | xargs -0 -n1 sh -c | sort | { uniq -w32 --all-repeated=separate; echo; } | sed -nE '
|
2020-10-21 17:03:03 +00:00
|
|
|
h
|
2020-11-06 00:07:01 +00:00
|
|
|
s/^(.{32}).* d([0-9]*)$/#md5sum: \1 size: \2 bytes/p
|
2020-10-21 17:03:03 +00:00
|
|
|
g
|
|
|
|
|
|
|
|
:loop
|
|
|
|
N
|
|
|
|
/.*\n$/!b loop
|
2020-11-07 22:39:34 +00:00
|
|
|
p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' >> "$tmpfile"
|
2020-10-21 17:03:03 +00:00
|
|
|
|
|
|
|
"$EDITOR" "$tmpfile"
|
|
|
|
|
2020-11-06 00:07:01 +00:00
|
|
|
printf "Remove commented files? (yes/no/abort) [default=a]: "
|
|
|
|
read -r commented
|
|
|
|
|
|
|
|
if [ "$commented" = "y" ]; then
|
2020-11-07 22:39:34 +00:00
|
|
|
sedcmd="/^(##|#?md5sum|[^#]).*/d; /^$/d; s/^# *(.*)$/\1/"
|
2020-11-06 00:07:01 +00:00
|
|
|
elif [ "$commented" = "n" ]; then
|
2020-11-07 22:39:34 +00:00
|
|
|
sedcmd="/^(#|#?md5sum).*/d; /^$/d; s/^ *(.*)$/\1/"
|
2020-11-06 00:07:01 +00:00
|
|
|
else
|
|
|
|
printf "Press any key to exit"
|
|
|
|
read -r _
|
|
|
|
exit
|
|
|
|
fi
|
|
|
|
|
|
|
|
printf "Remove with force or interactive? (f/i) [default=i]: "
|
|
|
|
read -r force
|
|
|
|
|
2020-11-07 22:39:34 +00:00
|
|
|
if [ "$force" = "f" ]; then
|
|
|
|
#shellcheck disable=SC2016
|
|
|
|
sed -E "$sedcmd" "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c 'rm -f "$0" "$@" </dev/tty'
|
|
|
|
else
|
|
|
|
#shellcheck disable=SC2016
|
|
|
|
sed -E "$sedcmd" "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c 'rm -i "$0" "$@" </dev/tty'
|
|
|
|
fi
|
2020-10-21 17:03:03 +00:00
|
|
|
|
|
|
|
rm "$tmpfile"
|
2019-08-24 14:35:45 +00:00
|
|
|
|
2019-11-21 20:44:25 +00:00
|
|
|
printf "Press any key to exit"
|
|
|
|
read -r _
|