acst.c
1/**
2 * Copyright (C) 2022 Chris Noxz
3 * Author(s): Chris Noxz <chris@noxz.tech>
4 *
5 * This program is free software: you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation, either version 3 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#include <errno.h> // for errno, ENODATA
20#include <fcntl.h> // for open, O_NOFOLLOW, O_RDONLY
21#include <limits.h> // for PATH_MAX
22#include <stdarg.h> // for va_end, va_list, va_start
23#include <stdbool.h> // for true, false, bool
24#include <stdio.h> // for fprintf, stdout, snprintf, sprintf
25#include <stdlib.h> // for free, EXIT_FAILURE, exit, malloc
26#include <string.h> // for strlen, strcmp, memset, strerror
27#include <time.h> // for timespec
28#include <unistd.h> // for close, read
29#include <bits/getopt_core.h> // for getopt, optind
30#include <sys/stat.h> // for stat, fstat, lstat, S_ISREG
31#include <sys/xattr.h> // for fgetxattr, fremovexattr, fsetxattr
32
33#include "sha256.h" // for SHA256_final, SHA256_init, SHA256_update
34#include "acst.h"
35
36
37static void
38bin2hex(unsigned char *bin, size_t len, char *hex)
39{
40 unsigned int i, // index counter of binary data
41 j; // index counter of hex data
42 for (i = j = 0; i < len; i++) {
43 hex[j++] = hextab[bin[i] >> 4];
44 hex[j++] = hextab[bin[i] & 0x0F];
45 }
46 hex[j] = 0;
47}
48
49
50static int
51cscmp(const unsigned char *cs1, const unsigned char *cs2)
52{
53 const unsigned char \
54 *l, // pointer for "left" checksum
55 *r; // pointer for "right" checksum
56 int n; // countdown from checksum length
57
58 /* iterate through checksums until mismatch or end of checksum */
59 for (l = cs1, r = cs2, n = SHA256_BYTES; n && *l == *r; n--, l++, r++);
60 return n ? *l - *r : 0; /* return difference between checksums */
61}
62
63
64static void
65error(enum Error er, const char *fmt, ...)
66{
67 va_list ap; // argument list holder
68
69 va_start(ap, fmt);
70 vfprintf(stderr, fmt, ap);
71 va_end(ap);
72
73 switch (er) {
74 case ER_NOT_REGULAR: cnt.errNotRegular++; break;
75 case ER_OPENING: cnt.errOpening++; break;
76 case ER_XATTR_OPERATION: cnt.errWritingXattr++; break;
77 case ER_GENERIC: cnt.errGeneric++; break;
78 case ER_FATAL: exit(EXIT_FAILURE);
79 }
80 cnt.errs++;
81}
82
83
84static void
85dup_free(dn_t **head_ref)
86{
87 /* free up memory allocated by nodes, recursively */
88 if ((*head_ref)->next)
89 dup_free(&((*head_ref)->next));
90 free(*head_ref);
91}
92
93
94static void
95dup_front_back_split(dn_t *src, dn_t **front_ref, dn_t **back_ref)
96{
97 dn_t *f = src->next, // "fast" pointer
98 *s = src; // "slow" pointer
99
100 /* advance the "fast" pointer in double the speed as "slow", so that "slow"
101 * will reference the node before the midpoint of the linked list */
102 while (f != NULL) {
103 f = f->next;
104 if (f != NULL)
105 s = s->next, f = f->next;
106 }
107
108 /* the front will reference the source, while the back will reference the
109 * "slow" pointer or the node before the midpoint of the linked list */
110 *front_ref = src;
111 *back_ref = s->next;
112
113 /* split the list by breaking the linked list at midpoint */
114 s->next = NULL;
115}
116
117
118static void
119dup_merge_sort(dn_t **head_ref)
120{
121 dn_t *h = *head_ref, // pointer to reference of head
122 *a = NULL, // pointer to list a
123 *b = NULL; // pointer to list b
124
125 /* define base case, start with length 0 or 1 before merge */
126 if (h == NULL || h->next == NULL)
127 return;
128
129 /* split head into two lists before sorting, until length 0 or 1 */
130 dup_front_back_split(h, &a, &b);
131
132 /* repeat merge sort for each list, recursively */
133 dup_merge_sort(&a);
134 dup_merge_sort(&b);
135
136 /* finally merge list the two lists sorted together into head */
137 *head_ref = dup_sorted_merge(a, b);
138}
139
140
141static int
142dup_print(dn_t **head_ref)
143{
144 dn_t *cur, // holder for current node
145 *pre; // holder for previous node
146 char hex[CSSZ + 1]; // holder for checksum (as hex)
147 int i, j = 0; // counters for duplicates
148
149 /* perform a merge sort of linked list before printing to enable detection
150 * of subsequent duplicates */
151 dup_merge_sort(head_ref);
152
153 /* iterate through linked list while remembering previous node as reference
154 * for occurrence of duplicates -- if previous and current are identical */
155 for (cur = *head_ref, pre = NULL; cur; pre = cur, cur = cur->next) {
156 /* check if current and previous match, if so count as duplicates */
157 i = (pre && cscmp(cur->cs, pre->cs) == 0) ? i + 1 : 0;
158 if (i == 0) /* ignore non-duplicates */
159 continue;
160 if (i == 1 && ++j) { /* include checksum before first filename */
161 bin2hex(pre->cs, SHA256_BYTES, hex);
162 printf(STR_DPH_FRMT, hex, pre->fn);
163 }
164 printf(STR_DPN_FRMT, cur->fn);
165 }
166
167 /* free up memory when done */
168 dup_free(head_ref);
169
170 fprintf(stderr, STR_DPS_FRMT, j);
171
172 return EXIT_SUCCESS;
173}
174
175
176static void
177dup_push(dn_t **head_ref, const char *fn, int fd, xa_t *xa)
178{
179 dn_t *new = NULL; // holder for new duplicate node
180
181 /* read checksum from file */
182 if (!xa_read(fd, xa) || xa->hex[0] == 0)
183 return;
184
185 /* allocate memory for new duplicate node */
186 if (!(new = (dn_t*)malloc(sizeof(dn_t))))
187 error(ER_FATAL, STR_ERR_OOM);
188
189 /* store checksum and filename together */
190 memcpy(new->cs, xa->cs, SHA256_BYTES);
191 memcpy(new->fn, fn, MIN(PATH_MAX, strlen(fn)) + 1);
192
193 /* add duplicate node to linked list */
194 new->next = *head_ref;
195 *head_ref = new;
196}
197
198
199static dn_t*
200dup_sorted_merge(dn_t *a, dn_t *b)
201{
202 dn_t *r = NULL; // result holder
203
204 /* define base case, start with length 0 or 1 before merge */
205 if (a == NULL || b == NULL)
206 return a == NULL ? b : a;
207
208 /* sort by checksum in ascending order (... >= 0 for descending) */
209 if (cscmp(a->cs, b->cs) <= 0)
210 (r = a)->next = dup_sorted_merge(a->next, b);
211 else
212 (r = b)->next = dup_sorted_merge(a, b->next);
213
214 /* return sorted merge of a and b, until whole list is sorted */
215 return r;
216}
217
218
219static enum FileState
220file_state(int fd, xa_t *xa_s, xa_t *xa_a)
221{
222 if (arg.remove)
223 return xa_write(fd, NULL) ? FS_REMOVED : FS_REMOVED_ERROR;
224
225 if (!xa_compute(fd, xa_a))
226 return FS_ERROR;
227 else if (xa_a->hex[0] == 0)
228 return FS_DISRUPTED;
229
230 if (!xa_read(fd, xa_s))
231 return FS_MALFORMED;
232
233 if (xa_s->hex[0] == 0)
234 return FS_NEW;
235
236 /* compare stored and actual timestamp */
237 xa_a->tcmp = (
238 xa_s->s != xa_a->s ? (int)(xa_s->s - xa_a->s)
239 : xa_s->ns != xa_a->ns ? (int)(xa_s->ns - xa_a->ns)
240 : 0
241 );
242
243 return (
244 cscmp(xa_s->cs, xa_a->cs) == 0 // if hash is correct
245 ? xa_a->tcmp == 0 ? FS_OK // ...and ts are matching
246 : FS_TOUCHED // ...or if not
247 : xa_a->tcmp < 0 ? FS_OUTDATED // else if ts is new
248 : xa_a->tcmp > 0 ? FS_BACKDATED // else if ts is old
249 : FS_CORRUPT // else
250 );
251}
252
253
254static void
255process_argument(const char *fn)
256{
257 struct stat st; // stat holder
258
259 if (lstat(fn, &st) != 0)
260 error(ER_OPENING, STR_ERR_OPNF, fn);
261 else if (S_ISREG(st.st_mode))
262 process_file(fn);
263 /* make sure to only process regular files */
264 else
265 error(ER_NOT_REGULAR, STR_ERR_REGF, fn);
266}
267
268
269static void
270process_file(const char *fn)
271{
272 int fd; // file holder
273 xa_t xa_s, // stored attribute
274 xa_a; // actual attribute
275 enum FileState st; // file state
276
277 if (!(fd = open(fn, O_RDONLY | O_NOFOLLOW)))
278 return error(ER_OPENING, STR_ERR_OPNF, fn);
279
280 if (arg.duplicates)
281 dup_push(&gl.dup_head, fn, fd, &xa_s);
282 else if ((st = file_state(fd, &xa_s, &xa_a)) == FS_ERROR)
283 error(ER_GENERIC, STR_ERR_HASH, fn);
284 else if (st == FS_REMOVED_ERROR)
285 error(ER_XATTR_OPERATION, STR_ERR_XARM, fn, strerror(errno));
286 else {
287 report(fn, st);
288 if (!arg.quiet && !(st == FS_OK || st == FS_REMOVED || st == FS_DISRUPTED || st == FS_MALFORMED))
289 fprintf(stdout, STR_CMP_FRMT
290 , strlen(xa_s.hex) > 0 ? xa_s.hex : zSHA256
291 , xa_s.s
292 , xa_s.ns
293 , strlen(xa_a.hex) > 0 ? xa_a.hex : zSHA256
294 , xa_a.s
295 , xa_a.ns
296 );
297 if (!(st == FS_OK || st == FS_REMOVED || st == FS_DISRUPTED || st == FS_CORRUPT || st == FS_BACKDATED || st == FS_MALFORMED) && !xa_write(fd, &xa_a))
298 error(ER_XATTR_OPERATION, STR_ERR_XAWR, fn);
299 }
300
301 close(fd);
302}
303
304
305static void
306report(const char *fn, enum FileState st)
307{
308 switch (st) {
309 case FS_OK:
310 case FS_REMOVED: cnt.ok++; break;
311 case FS_DISRUPTED: cnt.disrupted++; break;
312 case FS_CORRUPT: cnt.corrupt++; break;
313 case FS_TOUCHED: cnt.touched++; break;
314 case FS_NEW: cnt.new++; break;
315 case FS_OUTDATED: cnt.outdated++; break;
316 case FS_BACKDATED: cnt.backdated++; break;
317 case FS_MALFORMED: cnt.malformed++; break;
318 /* don't report errors here (this is only to avoid compiler warnings) */
319 case FS_REMOVED_ERROR:
320 case FS_ERROR: return;
321 }
322 cnt.total++;
323
324 if (st == FS_CORRUPT || st == FS_BACKDATED || st == FS_MALFORMED) {
325 fprintf(stderr, STR_ERR_ABNO, fn);
326 fprintf(stdout, STR_OUT_STAT
327 , st == FS_CORRUPT ? STR_OUT_CORR
328 : st == FS_BACKDATED ? STR_OUT_BACK
329 : STR_OUT_MALF, fn);
330 }
331
332 if (arg.quiet < 2 && (st == FS_DISRUPTED || st == FS_TOUCHED || st == FS_NEW || st == FS_OUTDATED))
333 fprintf(stdout, STR_OUT_STAT
334 , st == FS_DISRUPTED ? STR_OUT_DISR
335 : st == FS_TOUCHED ? STR_OUT_TOUC
336 : st == FS_NEW ? STR_OUT_NEW
337 : STR_OUT_OUTD, fn);
338
339 if (arg.quiet < 1 && (st == FS_OK || st == FS_REMOVED))
340 fprintf(stdout, STR_OUT_STAT
341 , st == FS_OK ? STR_OUT_OK
342 : STR_OUT_REMO, fn);
343}
344
345
346static bool
347xa_compute(int fd, xa_t *xa)
348{
349 struct stat st; // file stat holder
350
351 if ((fstat(fd, &st) != 0))
352 return false;
353
354 /* retrieve date modified and compute hash of file */
355 xa->s = st.st_mtim.tv_sec;
356 xa->ns = st.st_mtim.tv_nsec;
357 xa_hash(fd, xa->cs);
358 bin2hex(xa->cs, SHA256_BYTES, xa->hex);
359
360 /* get file stats after hash computation as the file could have been
361 * changed or deleted during the computation */
362 if ((fstat(fd, &st) != 0))
363 return false;
364
365 /* check if the file was modified while the hash was computed, and of so
366 * indicate with an empty hash */
367 if (!((long unsigned int)st.st_mtim.tv_sec == xa->s && (long unsigned int)st.st_mtim.tv_nsec == xa->ns))
368 xa->hex[0] = 0;
369
370 return true;
371}
372
373
374static void
375xa_hash(int fd, unsigned char *cs)
376{
377 struct SHA256_ctx \
378 ctx; // SHA256 context
379 unsigned char buf[BUFSZ]; // file content read buffer
380 size_t len; // holder for read buffer length
381
382 SHA256_init(&ctx);
383 while ((len = read(fd, buf, BUFSZ)))
384 SHA256_update(buf, len, &ctx);
385
386 SHA256_final(&ctx, cs);
387}
388
389
390static bool
391xa_read(int fd, xa_t *xa)
392{
393 char *c; // char pointer used in loop (hex)
394 unsigned char *p; // char pointer used in loop (bin)
395 int i = 0; // index counter to track odd/even
396
397 /* initialize metadata holders to zero, and create a blank xa */
398 memset(xa, 0, sizeof(*xa));
399 memset(ts, 0, sizeof(*ts));
400
401 /* read checksum, on error indicate as new if missing else fail */
402 if (fgetxattr(fd, XATTR_CS, xa->hex, sizeof(xa->hex)) < 0)
403 return errno == ENODATA ? true : false;
404 else if (strlen(xa->hex) != CSSZ)
405 return false;
406
407 /* make sure digits are hex (else fail) and in lower case (else adjust),
408 * and also convert hex digits into binary data for checksum */
409 for (c = xa->hex, p = xa->cs; *c; c++) {
410 if (*c >= 0x41 && *c <= 0x46)
411 *c += 0x20;
412 else if ((*c < 0x30 || *c > 0x39) && (*c < 0x61 || *c > 0x66))
413 return false;
414 if (i++ % 2 == 0)
415 *p = (*c >= 0x30 && *c <= 0x39 ? *c - 0x30
416 :*c >= 0x61 && *c <= 0x66 ? *c - 0x57 : 0) * 16;
417 else
418 (*p++) += (*c >= 0x30 && *c <= 0x39 ? *c - 0x30
419 :*c >= 0x61 && *c <= 0x66 ? *c - 0x57 : 0);
420 }
421
422 /* read timestamp, on error (ENODATA included) fail */
423 if (fgetxattr(fd, XATTR_TS, ts, sizeof(ts) - 1) < 0)
424 return false;
425 else if (sscanf(ts, FRMT_TS, &xa->s, &xa->ns) < 1)
426 return false;
427
428 return true;
429}
430
431
432static bool
433xa_write(int fd, const xa_t *xa)
434{
435 if (arg.dryrun)
436 return true;
437
438 /* remove extended attributes from file if xa is null */
439 if (xa == NULL)
440 return (
441 fremovexattr(fd, XATTR_TS) |
442 fremovexattr(fd, XATTR_CS)
443 ) == 0;
444
445 /* prepare and write extended attributes to file */
446 snprintf(ts, sizeof(ts), FRMT_TS, xa->s, xa->ns);
447 return (
448 fsetxattr(fd, XATTR_TS, ts, strlen(ts), 0) |
449 fsetxattr(fd, XATTR_CS, &xa->hex, CSSZ, 0)
450 ) == 0;
451}
452
453
454int
455main(int argc, char *argv[])
456{
457 char line[PATH_MAX + 1]; // line holder for reading stdin
458 int opt, // argument option holder
459 i, // index counter for arguments
460 c; // holder for getc(stdin)
461
462 /* declare global variables */
463 gl.dup_head = NULL;
464 gl.prg = argv[0];
465
466 while ((opt = getopt(argc, argv, "dhmnqvx")) != -1)
467 switch (opt) {
468 case 'd': arg.duplicates = true; break;
469 case 'h': USAGE(EXIT_SUCCESS); break;
470 case 'm': arg.summarize = true; break;
471 case 'n': arg.dryrun = true; break;
472 case 'q': arg.quiet++; break;
473 case 'v': VER(); break;
474 case 'x': arg.remove = true; break;
475 default: USAGE(EXIT_FAILURE);
476 }
477
478 if ((i = optind) >= argc)
479 USAGE(EXIT_FAILURE);
480
481 /* read stdin if the only FILE is -, while the last argument wasn't -- */
482 if (argc - i == 1 && argv[i][0] == 0x2d && argv[i - 1][1] != argv[i][0])
483 for (i = 0; (c = getc(stdin)) != EOF; i++) {
484 if (i > PATH_MAX)
485 error(ER_FATAL, STR_ERR_PMAX);
486 if ((line[i] = (c != 0x0a) ? c : 0) != 0)
487 continue;
488 process_argument(line);
489 i = -1;
490 }
491 /* ...else, process each argument as a FILE */
492 else
493 for (; i < argc; i++)
494 process_argument(argv[i]);
495
496 if (arg.duplicates && gl.dup_head)
497 return dup_print(&gl.dup_head);
498
499 if (arg.summarize)
500 SUMMERIZE();
501
502 if ((cnt.corrupt + cnt.malformed + cnt.backdated) > 0)
503 return 5;
504 if (cnt.errs == 0 && (cnt.ok + cnt.outdated + cnt.touched + cnt.new) == cnt.total)
505 return EXIT_SUCCESS;
506 if (cnt.errOpening == cnt.errs)
507 return 2;
508 if (cnt.errNotRegular == cnt.errs)
509 return 3;
510 if (cnt.errWritingXattr == cnt.errs)
511 return 4;
512 return 6;
513}