forked from simonsj/fdupes-jody
-
Notifications
You must be signed in to change notification settings - Fork 0
/
act_dedupefiles.c
145 lines (126 loc) · 4.68 KB
/
act_dedupefiles.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/* Deduplication of files with OS-specific copy-on-write mechanisms
* This file is part of jdupes; see jdupes.c for license information */
#include "jdupes.h"
#ifdef ENABLE_DEDUPE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#ifdef __linux__
/* Use built-in static dedupe header if requested */
#ifdef STATIC_DEDUPE_H
#include "linux-dedupe-static.h"
#else
#include <linux/fs.h>
#endif /* STATIC_DEDUPE_H */
/* If the Linux headers are too old, automatically use the static one */
#ifndef FILE_DEDUPE_RANGE_SAME
#warning Automatically enabled STATIC_DEDUPE_H due to insufficient header support
#include "linux-dedupe-static.h"
#endif /* FILE_DEDUPE_RANGE_SAME */
#include <sys/ioctl.h>
#define JDUPES_DEDUPE_SUPPORTED 1
#endif /* __linux__ */
#ifdef __APPLE__
#ifdef NO_HARDLINKS
#error Hard link support is required for dedupe on macOS but NO_HARDLINKS was set
#endif
#include "act_linkfiles.h"
#define JDUPES_DEDUPE_SUPPORTED 1
#endif
#ifndef JDUPES_DEDUPE_SUPPORTED
#error Dedupe is only supported on Linux and macOS
#endif
#include "act_dedupefiles.h"
#define KERNEL_DEDUP_MAX_SIZE 16777216
extern void dedupefiles(file_t * restrict files)
{
#ifdef __linux__
struct file_dedupe_range *fdr;
struct file_dedupe_range_info *fdri;
file_t *curfile, *curfile2, *dupefile;
int src_fd;
uint64_t total_files = 0;
LOUD(fprintf(stderr, "\ndedupefiles: %p\n", files);)
if (!files) nullptr("dedupefiles()");
fdr = (struct file_dedupe_range *)calloc(1,
sizeof(struct file_dedupe_range)
+ sizeof(struct file_dedupe_range_info) + 1);
fdr->dest_count = 1;
fdri = &fdr->info[0];
for (curfile = files; curfile; curfile = curfile->next) {
/* Skip all files that have no duplicates */
if (!ISFLAG(curfile->flags, FF_HAS_DUPES)) continue;
CLEARFLAG(curfile->flags, FF_HAS_DUPES);
/* For each duplicate list head, handle the duplicates in the list */
curfile2 = curfile;
src_fd = open(curfile->d_name, O_RDWR);
/* If an open fails, keep going down the dupe list until it is exhausted */
while (src_fd == -1 && curfile2->duplicates && curfile2->duplicates->duplicates) {
fprintf(stderr, "dedupe: open failed (skipping): %s\n", curfile2->d_name);
curfile2 = curfile2->duplicates;
src_fd = open(curfile2->d_name, O_RDWR);
}
if (src_fd == -1) continue;
printf(" [SRC] %s\n", curfile2->d_name);
/* Run dedupe for each set */
for (dupefile = curfile->duplicates; dupefile; dupefile = dupefile->duplicates) {
off_t remain;
int err;
/* Don't pass hard links to dedupe (GitHub issue #25) */
if (dupefile->device == curfile->device && dupefile->inode == curfile->inode) {
printf(" -==-> %s\n", dupefile->d_name);
continue;
}
/* Open destination file, skipping any that fail */
fdri->dest_fd = open(dupefile->d_name, O_RDWR);
if (fdri->dest_fd == -1) {
fprintf(stderr, "dedupe: open failed (skipping): %s\n", dupefile->d_name);
continue;
}
/* Dedupe src <--> dest, 16 MiB or less at a time */
remain = dupefile->size;
fdri->status = FILE_DEDUPE_RANGE_SAME;
/* Consume data blocks until no data remains */
while (remain) {
errno = 0;
fdr->src_offset = (uint64_t)(dupefile->size - remain);
fdri->dest_offset = fdr->src_offset;
fdr->src_length = (uint64_t)(remain <= KERNEL_DEDUP_MAX_SIZE ? remain : KERNEL_DEDUP_MAX_SIZE);
ioctl(src_fd, FIDEDUPERANGE, fdr);
if (fdri->status < 0) break;
remain -= (off_t)fdr->src_length;
}
/* Handle any errors */
err = fdri->status;
if (err != FILE_DEDUPE_RANGE_SAME || errno != 0) {
printf(" -XX-> %s\n", dupefile->d_name);
fprintf(stderr, "error: ");
if (err == FILE_DEDUPE_RANGE_DIFFERS)
fprintf(stderr, "not identical (files modified between scan and dedupe?)\n");
else if (err != 0) fprintf(stderr, "%s (%d)\n", strerror(-err), err);
else if (errno != 0) fprintf(stderr, "%s (%d)\n", strerror(errno), errno);
} else {
/* Dedupe OK; report to the user and add to file count */
printf(" ====> %s\n", dupefile->d_name);
total_files++;
}
close((int)fdri->dest_fd);
}
printf("\n");
close(src_fd);
total_files++;
}
if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "Deduplication done (%lu files processed)\n", total_files);
free(fdr);
#endif /* __linux__ */
/* On macOS, clonefile() is basically a "hard link" function, so linkfiles will do the work. */
#ifdef __APPLE__
linkfiles(files, 2, 0);
#endif /* __APPLE__ */
return;
}
#endif /* ENABLE_DEDUPE */