-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathremove-duplicates.sh
More file actions
executable file
·124 lines (112 loc) · 2.63 KB
/
remove-duplicates.sh
File metadata and controls
executable file
·124 lines (112 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env bash
#################################################################################################################
# Find assumed duplicates and remove them if a file with the same prefix exist in its folder with the same sha1 #
#################################################################################################################
function usage()
{
echo "usage: ${BASH_SOURCE[0]} [-y] [-h] target"
}
function help()
{
echo
usage
echo
echo "target"
echo " target path to inspect for removing duplicates"
echo
echo "-y"
echo " delete without confirmation"
echo
echo "-h"
echo " display this help"
echo
}
forceDelete=0
while getopts :yh opt
do
case "$opt" in
y)
forceDelete=1
;;
h)
help
exit 0
;;
\?)
echo "Invalid option: -$OPTARG"
exit 1
;;
esac
done
shift $(($OPTIND - 1))
if [[ $1 == "" ]]
then
usage
exit 1
fi
target=$1
if [[ ! -d "$target" ]]
then
echo "Target directory ($target) doesn't exist"
exit 1
fi
regex="(.*)(_[0-9]| \([0-9]\))(\.[a-zA-Z0-9]+)"
if [[ "$(uname)" == "Darwin" ]] # Mac OSX
then
findCmd=$(find -E "$target" -type f -regex "$regex")
shasum="shasum"
else
findCmd=$(find "$target" -type f -regextype posix-extended -regex "$regex")
shasum="sha1sum"
fi
let removed=0
let kept=0
let found=0
while read file
do
if [[ "$file" =~ ${regex}$ ]]
then
duplicate=0
assumedOriginal="${BASH_REMATCH[1]}${BASH_REMATCH[3]}"
if [[ -f "$assumedOriginal" ]]
then
shaOriginal=$("$shasum" "$assumedOriginal" | cut -d ' ' -f 1)
shaDuplicate=$("$shasum" "$file" | cut -d ' ' -f 1)
if [[ "$shaOriginal" == "$shaDuplicate" ]]
then
duplicate=1
delete=1
let found=$found+1
if [[ "$forceDelete" == 0 ]]
then
echo "$found. $file (original $assumedOriginal): remove ? (Y/n) "
read input </dev/tty
if [[ "$input" != "Y" ]]
then
delete=0
echo "File not removed"
fi
fi
if [[ "$delete" == 1 ]]
then
rm "$file"
let removed=$removed+1
if [[ "$forceDelete" == 1 ]]
then
echo "$removed. $file (original $assumedOriginal) removed"
else
echo "$file (original $assumedOriginal) removed"
fi
fi
fi
fi
if [[ "$duplicate" == "0" ]]
then
let kept=$kept+1
echo "/!\ $file not a duplicate"
fi
fi
done <<< "$findCmd" # Since loop is executed in a subshell
echo "$removed duplicate(s) removed"
echo "$kept false duplicate(s) found"
exit 0