From 782df96080a397d9bccc20fd023c692f86d79371 Mon Sep 17 00:00:00 2001 From: KlzXS Date: Wed, 21 Oct 2020 17:03:03 +0000 Subject: [PATCH 1/7] Modified dups to allow selecting files for removal --- plugins/dups | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/plugins/dups b/plugins/dups index 4f31fc6d..aef39230 100755 --- a/plugins/dups +++ b/plugins/dups @@ -6,10 +6,39 @@ # # Dependencies: find md5sum sort uniq xargs # -# Shell: POSIX compliant +# Note: bash compatible required for mktemp +# +# Shell: bash # Authors: syssyphus, KlzXS -find . -size +0 -type f -printf "%s %p\n" | sort -rn | sed -n 'N; /^\([0-9]*\) .*\n\1.*$/p;$d;D' | awk '{printf("%s\0", substr($0, index($0, $2)))}' | xargs -0 md5sum | sort | uniq -w32 --all-repeated=separate +# If the size of a file has more that $size_digits digits the file will be misplaced +# 12 digits fit files up to 931GiB + +EDITOR="${EDITOR:-vi}" +TMPDIR="${TMPDIR:-/tmp}" + +size_digits=12 +tmpfile=$(mktemp "$TMPDIR/.nnnXXXXXX") + +# shellcheck disable=SC2016 +find . -size +0 -type f -printf "%${size_digits}s %p\n" | sort -rn | uniq -w"${size_digits}" -D | tr '\n' '\0' | xargs -0 -n1 sh -c 'printf "%s %s\n" "$(md5sum $@)" "d$0"' | sort | { uniq -w32 --all-repeated=separate; echo; } | sed -nE ' +h +s/^(.{32}).* d([0-9]*)$/md5sum: \1 size: \2 bytes/p +g + +:loop +N +/.*\n$/!b loop +p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' > "$tmpfile" + +"$EDITOR" "$tmpfile" + +cat "$tmpfile" + +# shellcheck disable=SC2016 +sed -e 's/md5sum.*//' "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c 'rm -i $0 $@ < /dev/tty' + +rm "$tmpfile" printf "Press any key to exit" read -r _ From afdba193e47b5413962f2fc56e28e9e8947be955 Mon Sep 17 00:00:00 2001 From: KlzXS Date: Wed, 21 Oct 2020 17:37:33 +0000 Subject: [PATCH 2/7] Fix filenames with spaces not working --- plugins/dups | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugins/dups b/plugins/dups index aef39230..7ce405c8 100755 --- a/plugins/dups +++ b/plugins/dups @@ -21,7 +21,9 @@ size_digits=12 tmpfile=$(mktemp "$TMPDIR/.nnnXXXXXX") # shellcheck disable=SC2016 -find . -size +0 -type f -printf "%${size_digits}s %p\n" | sort -rn | uniq -w"${size_digits}" -D | tr '\n' '\0' | xargs -0 -n1 sh -c 'printf "%s %s\n" "$(md5sum $@)" "d$0"' | sort | { uniq -w32 --all-repeated=separate; echo; } | sed -nE ' +find . -size +0 -type f -printf "%${size_digits}s %p\n" | sort -rn | uniq -w"${size_digits}" -D | sed -E ' +s/^ {,12}([0-9]{,12}) (.*)$/printf "%s %s\\n" "$(md5sum "\2")" "d\1"/ +' | tr '\n' '\0' | xargs -0 -n1 sh -c | sort | { uniq -w32 --all-repeated=separate; echo; } | sed -nE ' h s/^(.{32}).* d([0-9]*)$/md5sum: \1 size: \2 bytes/p g @@ -36,7 +38,7 @@ p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' > "$tmpfile" cat "$tmpfile" # shellcheck disable=SC2016 -sed -e 's/md5sum.*//' "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c 'rm -i $0 $@ < /dev/tty' +sed -e 's/md5sum.*//' "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c 'rm -i "$0" "$@" < /dev/tty' rm "$tmpfile" From 05bf019fbb42d85f94d56daf24c5f37fe4302f94 Mon Sep 17 00:00:00 2001 From: KlzXS Date: Fri, 6 Nov 2020 00:07:01 +0000 Subject: [PATCH 3/7] Add some prompts --- plugins/dups | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/plugins/dups b/plugins/dups index 7ce405c8..a8864127 100755 --- a/plugins/dups +++ b/plugins/dups @@ -4,7 +4,7 @@ # # Source: https://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash # -# Dependencies: find md5sum sort uniq xargs +# Dependencies: find md5sum sort uniq xargs gsed # # Note: bash compatible required for mktemp # @@ -20,12 +20,20 @@ TMPDIR="${TMPDIR:-/tmp}" size_digits=12 tmpfile=$(mktemp "$TMPDIR/.nnnXXXXXX") +printf "\ +## This is an overview of all duplicate files found. +## After editiing this file you will be prompted to remove some of them. +## You can choose between removing all the commented out files, all the uncommented ones or none at all. +## All the lines begining with '##','#md5sum' or 'md5sum' will be ignored either way. +## If you choose to remove, you will be given a choice between removing with force or interactively for each file. +" > "$tmpfile" + # shellcheck disable=SC2016 find . -size +0 -type f -printf "%${size_digits}s %p\n" | sort -rn | uniq -w"${size_digits}" -D | sed -E ' s/^ {,12}([0-9]{,12}) (.*)$/printf "%s %s\\n" "$(md5sum "\2")" "d\1"/ ' | tr '\n' '\0' | xargs -0 -n1 sh -c | sort | { uniq -w32 --all-repeated=separate; echo; } | sed -nE ' h -s/^(.{32}).* d([0-9]*)$/md5sum: \1 size: \2 bytes/p +s/^(.{32}).* d([0-9]*)$/#md5sum: \1 size: \2 bytes/p g :loop @@ -35,10 +43,26 @@ p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' > "$tmpfile" "$EDITOR" "$tmpfile" -cat "$tmpfile" +printf "Remove commented files? (yes/no/abort) [default=a]: " +read -r commented + +if [ "$commented" = "y" ]; then + sedcmd="/^(##|#?md5sum|[^#]).*/d" +elif [ "$commented" = "n" ]; then + sedcmd="/^(#|#?md5sum).*/d" +else + printf "Press any key to exit" + read -r _ + exit +fi + +printf "Remove with force or interactive? (f/i) [default=i]: " +read -r force + +rmcmd="'rm -$force \"\$0\" \"\$@\" < /dev/tty'" # shellcheck disable=SC2016 -sed -e 's/md5sum.*//' "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c 'rm -i "$0" "$@" < /dev/tty' +sed -e $sedcmd "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c "$rmcmd" rm "$tmpfile" From e8803b8b6452a5e62e4c1f7910144a803241362d Mon Sep 17 00:00:00 2001 From: KlzXS Date: Sat, 7 Nov 2020 12:58:48 +0000 Subject: [PATCH 4/7] Fix shellchek error --- plugins/dups | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/dups b/plugins/dups index a8864127..e409a5e4 100755 --- a/plugins/dups +++ b/plugins/dups @@ -62,7 +62,7 @@ read -r force rmcmd="'rm -$force \"\$0\" \"\$@\" < /dev/tty'" # shellcheck disable=SC2016 -sed -e $sedcmd "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c "$rmcmd" +sed -e "$sedcmd" "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c "$rmcmd" rm "$tmpfile" From d4c1986a95bc6441c7e749f7ea0697add7dc94fa Mon Sep 17 00:00:00 2001 From: KlzXS Date: Sat, 7 Nov 2020 22:39:34 +0000 Subject: [PATCH 5/7] sed fixes --- plugins/dups | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/plugins/dups b/plugins/dups index e409a5e4..5aacbeb9 100755 --- a/plugins/dups +++ b/plugins/dups @@ -25,7 +25,7 @@ printf "\ ## After editiing this file you will be prompted to remove some of them. ## You can choose between removing all the commented out files, all the uncommented ones or none at all. ## All the lines begining with '##','#md5sum' or 'md5sum' will be ignored either way. -## If you choose to remove, you will be given a choice between removing with force or interactively for each file. +## If you choose to remove, you will be given a choice between removing with force or interactively for each file.\n " > "$tmpfile" # shellcheck disable=SC2016 @@ -39,7 +39,7 @@ g :loop N /.*\n$/!b loop -p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' > "$tmpfile" +p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' >> "$tmpfile" "$EDITOR" "$tmpfile" @@ -47,9 +47,9 @@ printf "Remove commented files? (yes/no/abort) [default=a]: " read -r commented if [ "$commented" = "y" ]; then - sedcmd="/^(##|#?md5sum|[^#]).*/d" + sedcmd="/^(##|#?md5sum|[^#]).*/d; /^$/d; s/^# *(.*)$/\1/" elif [ "$commented" = "n" ]; then - sedcmd="/^(#|#?md5sum).*/d" + sedcmd="/^(#|#?md5sum).*/d; /^$/d; s/^ *(.*)$/\1/" else printf "Press any key to exit" read -r _ @@ -59,10 +59,13 @@ fi printf "Remove with force or interactive? (f/i) [default=i]: " read -r force -rmcmd="'rm -$force \"\$0\" \"\$@\" < /dev/tty'" - -# shellcheck disable=SC2016 -sed -e "$sedcmd" "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c "$rmcmd" +if [ "$force" = "f" ]; then + #shellcheck disable=SC2016 + sed -E "$sedcmd" "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c 'rm -f "$0" "$@" Date: Sun, 8 Nov 2020 11:28:28 +0000 Subject: [PATCH 6/7] Wording --- plugins/dups | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/dups b/plugins/dups index 5aacbeb9..a3b1c5d6 100755 --- a/plugins/dups +++ b/plugins/dups @@ -24,7 +24,7 @@ printf "\ ## This is an overview of all duplicate files found. ## After editiing this file you will be prompted to remove some of them. ## You can choose between removing all the commented out files, all the uncommented ones or none at all. -## All the lines begining with '##','#md5sum' or 'md5sum' will be ignored either way. +## Lines with double comments (##) are always ignored. ## If you choose to remove, you will be given a choice between removing with force or interactively for each file.\n " > "$tmpfile" @@ -33,7 +33,7 @@ find . -size +0 -type f -printf "%${size_digits}s %p\n" | sort -rn | uniq -w"${s s/^ {,12}([0-9]{,12}) (.*)$/printf "%s %s\\n" "$(md5sum "\2")" "d\1"/ ' | tr '\n' '\0' | xargs -0 -n1 sh -c | sort | { uniq -w32 --all-repeated=separate; echo; } | sed -nE ' h -s/^(.{32}).* d([0-9]*)$/#md5sum: \1 size: \2 bytes/p +s/^(.{32}).* d([0-9]*)$/## md5sum: \1 size: \2 bytes/p g :loop @@ -47,9 +47,9 @@ printf "Remove commented files? (yes/no/abort) [default=a]: " read -r commented if [ "$commented" = "y" ]; then - sedcmd="/^(##|#?md5sum|[^#]).*/d; /^$/d; s/^# *(.*)$/\1/" + sedcmd="/^(##|[^#]).*/d; /^$/d; s/^# *(.*)$/\1/" elif [ "$commented" = "n" ]; then - sedcmd="/^(#|#?md5sum).*/d; /^$/d; s/^ *(.*)$/\1/" + sedcmd="/^#.*/d; /^$/d; s/^ *(.*)$/\1/" else printf "Press any key to exit" read -r _ From 49936d1ca23c56ad6bbf1e43d701f5f3efd575b5 Mon Sep 17 00:00:00 2001 From: KlzXS Date: Sun, 8 Nov 2020 12:20:36 +0000 Subject: [PATCH 7/7] Clear up which files are removed --- plugins/dups | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/plugins/dups b/plugins/dups index a3b1c5d6..1cba8a8a 100755 --- a/plugins/dups +++ b/plugins/dups @@ -22,10 +22,9 @@ tmpfile=$(mktemp "$TMPDIR/.nnnXXXXXX") printf "\ ## This is an overview of all duplicate files found. -## After editiing this file you will be prompted to remove some of them. -## You can choose between removing all the commented out files, all the uncommented ones or none at all. -## Lines with double comments (##) are always ignored. -## If you choose to remove, you will be given a choice between removing with force or interactively for each file.\n +## Comment out the files you wish to remove. You will be given an option to cancel. +## Lines with double comments (##) are ignored. +## If you choose to remove, you will be given a choice between removing files with force or interactively.\n " > "$tmpfile" # shellcheck disable=SC2016 @@ -43,13 +42,11 @@ p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' >> "$tmpfile" "$EDITOR" "$tmpfile" -printf "Remove commented files? (yes/no/abort) [default=a]: " +printf "Remove commented files? (yes/no) [default=n]: " read -r commented if [ "$commented" = "y" ]; then sedcmd="/^(##|[^#]).*/d; /^$/d; s/^# *(.*)$/\1/" -elif [ "$commented" = "n" ]; then - sedcmd="/^#.*/d; /^$/d; s/^ *(.*)$/\1/" else printf "Press any key to exit" read -r _