-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrmdups.pl
More file actions
executable file
·118 lines (88 loc) · 1.94 KB
/
rmdups.pl
File metadata and controls
executable file
·118 lines (88 loc) · 1.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/perl
use strict;
use Digest::MD5;
sub compute_hash {
my $file = shift;
my ($md5, $hash);
open FILE, "< $file";
binmode FILE;
$md5 = Digest::MD5->new;
$md5->addfile(*FILE);
$hash = $md5->hexdigest;
close FILE;
return $hash;
}
sub add_file_hash {
my ($hashes, $dups, $file) = @_;
my $hash = compute_hash($file);
if (defined $hashes->{$hash}) {
push @{$hashes->{$hash}}, $file;
$dups->{$hash} = $hashes->{$hash};
} else {
$hashes->{$hash} = [$file];
}
}
sub find_dups {
my ($dir, $subdirs, $dups) = @_;
my %sizes = ();
my %hashes = ();
if (opendir DIR, $dir) {
while (my $file = readdir DIR) {
next if $file =~ /^\./;
$file = "$dir/$file";
if (-d $file) {
push @$subdirs, $file;
} else {
my $size = -s $file;
if (defined $sizes{$size}) {
my $files = $sizes{$size};
add_file_hash \%hashes, $dups, $files->[0] if @$files == 1;
add_file_hash \%hashes, $dups, $file;
push @$files, $file;
} else {
$sizes{$size} = [$file];
}
}
}
closedir DIR;
}
}
sub rm_dups {
my %dups = %{$_[0]};
my $eof = 0;
my @unlinked = ();
foreach my $hash (keys %dups) {
my @files = @{$dups{$hash}};
for my $i (1 .. @files) {
print $i, " ", $files[$i - 1], "\n";
}
print "which to keep? [0 = skip] ";
if (eof STDIN) {
print "\n";
$eof = 1;
last;
}
my $c = <STDIN>;
chomp $c;
if (0 < $c && $c <= @files) {
splice @files, $c - 1, 1;
push @unlinked, @files;
}
}
if (!$eof && @unlinked > 0) {
print "removing files:\n";
print " $_\n" foreach @unlinked;
unlink @unlinked;
}
return !$eof;
}
my $recursive = 0; # TODO: Add support for recursively checking directories
my @dirs = ("."); # TODO: Read directory list passed via ARGV
while (@dirs > 0) {
my $dir = shift @dirs;
my (%dups, @subdirs);
print "searching directory: $dir\n";
find_dups $dir, \@subdirs, \%dups;
last if !rm_dups \%dups;
push @dirs, @subdirs if $recursive;
}