acst

Tracks changes and corruption in files using xattr-based checksums.
git clone https://noxz.tech/git/acst.git
acst

acst.c
1/**
2 * Copyright (C) 2022 Chris Noxz
3 * Author(s): Chris Noxz <chris@noxz.tech>
4 *
5 * This program is free software: you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation, either version 3 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#include <errno.h>             // for errno, ENODATA
20#include <fcntl.h>             // for open, O_NOFOLLOW, O_RDONLY
21#include <limits.h>            // for PATH_MAX
22#include <stdarg.h>            // for va_end, va_list, va_start
23#include <stdbool.h>           // for true, false, bool
24#include <stdio.h>             // for fprintf, stdout, snprintf, sprintf
25#include <stdlib.h>            // for free, EXIT_FAILURE, exit, malloc
26#include <string.h>            // for strlen, strcmp, memset, strerror
27#include <time.h>              // for timespec
28#include <unistd.h>            // for close, read
29#include <bits/getopt_core.h>  // for getopt, optind
30#include <sys/stat.h>          // for stat, fstat, lstat, S_ISREG
31#include <sys/xattr.h>         // for fgetxattr, fremovexattr, fsetxattr
32
33#include "sha256.h"            // for SHA256_final, SHA256_init, SHA256_update
34#include "acst.h"
35
36
37static void
38bin2hex(unsigned char *bin, size_t len, char *hex)
39{
40	unsigned int    i,                      // index counter of binary data
41	                j;                      // index counter of hex data
42	for (i = j = 0; i < len; i++) {
43		hex[j++] = hextab[bin[i] >> 4];
44		hex[j++] = hextab[bin[i] & 0x0F];
45	}
46	hex[j] = 0;
47}
48
49
50static int
51cscmp(const unsigned char *cs1, const unsigned char *cs2)
52{
53	const unsigned char \
54	               *l,                      // pointer for "left" checksum
55	               *r;                      // pointer for "right" checksum
56	int             n;                      // countdown from checksum length
57
58	/* iterate through checksums until mismatch or end of checksum */
59	for (l = cs1, r = cs2, n = SHA256_BYTES; n && *l == *r; n--, l++, r++);
60	return n ? *l - *r : 0; /* return difference between checksums */
61}
62
63
64static void
65error(enum Error er, const char *fmt, ...)
66{
67	va_list         ap;                     // argument list holder
68
69	va_start(ap, fmt);
70	vfprintf(stderr, fmt, ap);
71	va_end(ap);
72
73	switch (er) {
74	case ER_NOT_REGULAR:                    cnt.errNotRegular++; break;
75	case ER_OPENING:                        cnt.errOpening++; break;
76	case ER_XATTR_OPERATION:                cnt.errWritingXattr++; break;
77	case ER_GENERIC:                        cnt.errGeneric++; break;
78	case ER_FATAL:                          exit(EXIT_FAILURE);
79	}
80	cnt.errs++;
81}
82
83
84static void
85dup_free(dn_t **head_ref)
86{
87	/* free up memory allocated by nodes, recursively */
88	if ((*head_ref)->next)
89		dup_free(&((*head_ref)->next));
90	free(*head_ref);
91}
92
93
94static void
95dup_front_back_split(dn_t *src, dn_t **front_ref, dn_t **back_ref)
96{
97	dn_t           *f = src->next,          // "fast" pointer
98	               *s = src;                // "slow" pointer
99
100	/* advance the "fast" pointer in double the speed as "slow", so that "slow"
101	 * will reference the node before the midpoint of the linked list */
102	while (f != NULL) {
103		f = f->next;
104		if (f != NULL)
105			s = s->next, f = f->next;
106	}
107
108	/* the front will reference the source, while the back will reference the
109	 * "slow" pointer or the node before the midpoint of the linked list */
110	*front_ref = src;
111	*back_ref = s->next;
112
113	/* split the list by breaking the linked list at midpoint */
114	s->next = NULL;
115}
116
117
118static void
119dup_merge_sort(dn_t **head_ref)
120{
121	dn_t           *h = *head_ref,          // pointer to reference of head
122	               *a = NULL,               // pointer to list a
123	               *b = NULL;               // pointer to list b
124
125	/* define base case, start with length 0 or 1 before merge */
126	if (h == NULL || h->next == NULL)
127		return;
128
129	/* split head into two lists before sorting, until length 0 or 1 */
130	dup_front_back_split(h, &a, &b);
131
132	/* repeat merge sort for each list, recursively */
133	dup_merge_sort(&a);
134	dup_merge_sort(&b);
135
136	/* finally merge list the two lists sorted together into head */
137	*head_ref = dup_sorted_merge(a, b);
138}
139
140
141static int
142dup_print(dn_t **head_ref)
143{
144	dn_t           *cur,                    // holder for current node
145	               *pre;                    // holder for previous node
146	char            hex[CSSZ + 1];          // holder for checksum (as hex)
147	int             i, j = 0;               // counters for duplicates
148
149	/* perform a merge sort of linked list before printing to enable detection
150	 * of subsequent duplicates */
151	dup_merge_sort(head_ref);
152
153	/* iterate through linked list while remembering previous node as reference
154	 * for occurrence of duplicates -- if previous and current are identical */
155	for (cur = *head_ref, pre = NULL; cur; pre = cur, cur = cur->next) {
156		/* check if current and previous match, if so count as duplicates */
157		i = (pre && cscmp(cur->cs, pre->cs) == 0) ? i + 1 : 0;
158		if (i == 0)             /* ignore non-duplicates */
159			continue;
160		if (i == 1 && ++j) {    /* include checksum before first filename */
161			bin2hex(pre->cs, SHA256_BYTES, hex);
162			printf(STR_DPH_FRMT, hex, pre->fn);
163		}
164		printf(STR_DPN_FRMT, cur->fn);
165	}
166
167	/* free up memory when done */
168	dup_free(head_ref);
169
170	fprintf(stderr, STR_DPS_FRMT, j);
171
172	return EXIT_SUCCESS;
173}
174
175
176static void
177dup_push(dn_t **head_ref, const char *fn, int fd, xa_t *xa)
178{
179	dn_t           *new = NULL;             // holder for new duplicate node
180
181	/* read checksum from file */
182	if (!xa_read(fd, xa) || xa->hex[0] == 0)
183		return;
184
185	/* allocate memory for new duplicate node */
186	if (!(new = (dn_t*)malloc(sizeof(dn_t))))
187		error(ER_FATAL, STR_ERR_OOM);
188
189	/* store checksum and filename together */
190	memcpy(new->cs, xa->cs, SHA256_BYTES);
191	memcpy(new->fn, fn, MIN(PATH_MAX, strlen(fn)) + 1);
192
193	/* add duplicate node to linked list */
194	new->next = *head_ref;
195	*head_ref = new;
196}
197
198
199static dn_t*
200dup_sorted_merge(dn_t *a, dn_t *b)
201{
202	dn_t           *r = NULL;               // result holder
203
204	/* define base case, start with length 0 or 1 before merge */
205	if (a == NULL || b == NULL)
206		return a == NULL ? b : a;
207
208	/* sort by checksum in ascending order (... >= 0 for descending) */
209	if (cscmp(a->cs, b->cs) <= 0)
210		(r = a)->next = dup_sorted_merge(a->next, b);
211	else
212		(r = b)->next = dup_sorted_merge(a, b->next);
213
214	/* return sorted merge of a and b, until whole list is sorted */
215	return r;
216}
217
218
219static enum FileState
220file_state(int fd, xa_t *xa_s, xa_t *xa_a)
221{
222	if (arg.remove)
223		return xa_write(fd, NULL) ? FS_REMOVED : FS_REMOVED_ERROR;
224
225	if (!xa_compute(fd, xa_a))
226		return FS_ERROR;
227	else if (xa_a->hex[0] == 0)
228		return FS_DISRUPTED;
229
230	if (!xa_read(fd, xa_s))
231		return FS_MALFORMED;
232
233	if (xa_s->hex[0] == 0)
234		return FS_NEW;
235
236	/* compare stored and actual timestamp */
237	xa_a->tcmp = (
238	      xa_s->s != xa_a->s ? (int)(xa_s->s - xa_a->s)
239	    : xa_s->ns != xa_a->ns ? (int)(xa_s->ns - xa_a->ns)
240	    : 0
241	);
242
243	return (
244	      cscmp(xa_s->cs, xa_a->cs) == 0    // if hash is correct
245	    ? xa_a->tcmp == 0 ? FS_OK           // ...and ts are matching
246	    : FS_TOUCHED                        // ...or if not
247	    : xa_a->tcmp < 0 ? FS_OUTDATED      // else if ts is new
248	    : xa_a->tcmp > 0 ? FS_BACKDATED     // else if ts is old
249	    : FS_CORRUPT                        // else
250	);
251}
252
253
254static void
255process_argument(const char *fn)
256{
257	struct stat     st;                     // stat holder
258
259	if (lstat(fn, &st) != 0)
260		error(ER_OPENING, STR_ERR_OPNF, fn);
261	else if (S_ISREG(st.st_mode))
262		process_file(fn);
263	/* make sure to only process regular files */
264	else
265		error(ER_NOT_REGULAR, STR_ERR_REGF, fn);
266}
267
268
269static void
270process_file(const char *fn)
271{
272	int             fd;                     // file holder
273	xa_t            xa_s,                   // stored attribute
274	                xa_a;                   // actual attribute
275	enum FileState  st;                     // file state
276
277	if (!(fd = open(fn, O_RDONLY | O_NOFOLLOW)))
278		return error(ER_OPENING, STR_ERR_OPNF, fn);
279
280	if (arg.duplicates)
281		dup_push(&gl.dup_head, fn, fd, &xa_s);
282	else if ((st = file_state(fd, &xa_s, &xa_a)) == FS_ERROR)
283		error(ER_GENERIC, STR_ERR_HASH, fn);
284	else if (st == FS_REMOVED_ERROR)
285		error(ER_XATTR_OPERATION, STR_ERR_XARM, fn, strerror(errno));
286	else {
287		report(fn, st);
288		if (!arg.quiet && !(st == FS_OK || st == FS_REMOVED || st == FS_DISRUPTED || st == FS_MALFORMED))
289			fprintf(stdout, STR_CMP_FRMT
290			    , strlen(xa_s.hex) > 0 ? xa_s.hex : zSHA256
291			    , xa_s.s
292			    , xa_s.ns
293			    , strlen(xa_a.hex) > 0 ? xa_a.hex : zSHA256
294			    , xa_a.s
295			    , xa_a.ns
296			);
297		if (!(st == FS_OK || st == FS_REMOVED || st == FS_DISRUPTED || st == FS_CORRUPT || st == FS_BACKDATED || st == FS_MALFORMED) && !xa_write(fd, &xa_a))
298			error(ER_XATTR_OPERATION, STR_ERR_XAWR, fn);
299	}
300
301	close(fd);
302}
303
304
305static void
306report(const char *fn, enum FileState st)
307{
308	switch (st) {
309	case FS_OK:
310	case FS_REMOVED:                        cnt.ok++; break;
311	case FS_DISRUPTED:                      cnt.disrupted++; break;
312	case FS_CORRUPT:                        cnt.corrupt++; break;
313	case FS_TOUCHED:                        cnt.touched++; break;
314	case FS_NEW:                            cnt.new++; break;
315	case FS_OUTDATED:                       cnt.outdated++; break;
316	case FS_BACKDATED:                      cnt.backdated++; break;
317	case FS_MALFORMED:                      cnt.malformed++; break;
318	/* don't report errors here (this is only to avoid compiler warnings) */
319	case FS_REMOVED_ERROR:
320	case FS_ERROR:                          return;
321	}
322	cnt.total++;
323
324	if (st == FS_CORRUPT || st == FS_BACKDATED || st == FS_MALFORMED) {
325		fprintf(stderr, STR_ERR_ABNO, fn);
326		fprintf(stdout, STR_OUT_STAT
327		    , st == FS_CORRUPT ? STR_OUT_CORR
328		    : st == FS_BACKDATED ? STR_OUT_BACK
329		    : STR_OUT_MALF, fn);
330	}
331
332	if (arg.quiet < 2 && (st == FS_DISRUPTED || st == FS_TOUCHED || st == FS_NEW || st == FS_OUTDATED))
333		fprintf(stdout, STR_OUT_STAT
334		    , st == FS_DISRUPTED ? STR_OUT_DISR
335		    : st == FS_TOUCHED ? STR_OUT_TOUC
336		    : st == FS_NEW ? STR_OUT_NEW
337		    : STR_OUT_OUTD, fn);
338
339	if (arg.quiet < 1 && (st == FS_OK || st == FS_REMOVED))
340		fprintf(stdout, STR_OUT_STAT
341		    , st == FS_OK ? STR_OUT_OK
342		    : STR_OUT_REMO, fn);
343}
344
345
346static bool
347xa_compute(int fd, xa_t *xa)
348{
349	struct stat     st;                     // file stat holder
350
351	if ((fstat(fd, &st) != 0))
352		return false;
353
354	/* retrieve date modified and compute hash of file */
355	xa->s = st.st_mtim.tv_sec;
356	xa->ns = st.st_mtim.tv_nsec;
357	xa_hash(fd, xa->cs);
358	bin2hex(xa->cs, SHA256_BYTES, xa->hex);
359
360	/* get file stats after hash computation as the file could have been
361	 * changed or deleted during the computation */
362	if ((fstat(fd, &st) != 0))
363		return false;
364
365	/* check if the file was modified while the hash was computed, and of so
366	 * indicate with an empty hash */
367	if (!((long unsigned int)st.st_mtim.tv_sec == xa->s && (long unsigned int)st.st_mtim.tv_nsec == xa->ns))
368		xa->hex[0] = 0;
369
370	return true;
371}
372
373
374static void
375xa_hash(int fd, unsigned char *cs)
376{
377	struct SHA256_ctx \
378	                ctx;                    // SHA256 context
379	unsigned char   buf[BUFSZ];             // file content read buffer
380	size_t          len;                    // holder for read buffer length
381
382	SHA256_init(&ctx);
383	while ((len = read(fd, buf, BUFSZ)))
384		SHA256_update(buf, len, &ctx);
385
386	SHA256_final(&ctx, cs);
387}
388
389
390static bool
391xa_read(int fd, xa_t *xa)
392{
393	char           *c;                      // char pointer used in loop (hex)
394	unsigned char  *p;                      // char pointer used in loop (bin)
395	int             i = 0;                  // index counter to track odd/even
396
397	/* initialize metadata holders to zero, and create a blank xa */
398	memset(xa, 0, sizeof(*xa));
399	memset(ts, 0, sizeof(*ts));
400
401	/* read checksum, on error indicate as new if missing else fail */
402	if (fgetxattr(fd, XATTR_CS, xa->hex, sizeof(xa->hex)) < 0)
403		return errno == ENODATA ? true : false;
404	else if (strlen(xa->hex) != CSSZ)
405		return false;
406
407	/* make sure digits are hex (else fail) and in lower case (else adjust),
408	 * and also convert hex digits into binary data for checksum */
409	for (c = xa->hex, p = xa->cs; *c; c++) {
410		if (*c >= 0x41 && *c <= 0x46)
411			*c += 0x20;
412		else if ((*c < 0x30 || *c > 0x39) && (*c < 0x61 || *c > 0x66))
413			return false;
414		if (i++ % 2 == 0)
415			*p =  (*c >= 0x30 && *c <= 0x39 ? *c - 0x30
416			      :*c >= 0x61 && *c <= 0x66 ? *c - 0x57 : 0) * 16;
417		else
418			(*p++) += (*c >= 0x30 && *c <= 0x39 ? *c - 0x30
419			          :*c >= 0x61 && *c <= 0x66 ? *c - 0x57 : 0);
420	}
421
422	/* read timestamp, on error (ENODATA included) fail */
423	if (fgetxattr(fd, XATTR_TS, ts, sizeof(ts) - 1) < 0)
424		return false;
425	else if (sscanf(ts, FRMT_TS, &xa->s, &xa->ns) < 1)
426		return false;
427
428	return true;
429}
430
431
432static bool
433xa_write(int fd, const xa_t *xa)
434{
435	if (arg.dryrun)
436		return true;
437
438	/* remove extended attributes from file if xa is null */
439	if (xa == NULL)
440		return (
441		    fremovexattr(fd, XATTR_TS) |
442		    fremovexattr(fd, XATTR_CS)
443		) == 0;
444
445	/* prepare and write extended attributes to file */
446	snprintf(ts, sizeof(ts), FRMT_TS, xa->s, xa->ns);
447	return (
448	    fsetxattr(fd, XATTR_TS, ts, strlen(ts), 0) |
449	    fsetxattr(fd, XATTR_CS, &xa->hex, CSSZ, 0)
450	) == 0;
451}
452
453
454int
455main(int argc, char *argv[])
456{
457	char            line[PATH_MAX + 1];     // line holder for reading stdin
458	int             opt,                    // argument option holder
459	                i,                      // index counter for arguments
460	                c;                      // holder for getc(stdin)
461
462	/* declare global variables */
463	gl.dup_head     = NULL;
464	gl.prg          = argv[0];
465
466	while ((opt = getopt(argc, argv, "dhmnqvx")) != -1)
467		switch (opt) {
468		case 'd':                           arg.duplicates = true; break;
469		case 'h':                           USAGE(EXIT_SUCCESS); break;
470		case 'm':                           arg.summarize = true; break;
471		case 'n':                           arg.dryrun = true; break;
472		case 'q':                           arg.quiet++; break;
473		case 'v':                           VER(); break;
474		case 'x':                           arg.remove = true; break;
475		default:                            USAGE(EXIT_FAILURE);
476		}
477
478	if ((i = optind) >= argc)
479		USAGE(EXIT_FAILURE);
480
481	/* read stdin if the only FILE is -, while the last argument wasn't -- */
482	if (argc - i == 1 && argv[i][0] == 0x2d && argv[i - 1][1] != argv[i][0])
483		for (i = 0; (c = getc(stdin)) != EOF; i++) {
484			if (i > PATH_MAX)
485				error(ER_FATAL, STR_ERR_PMAX);
486			if ((line[i] = (c != 0x0a) ? c : 0) != 0)
487				continue;
488			process_argument(line);
489			i = -1;
490		}
491	/* ...else, process each argument as a FILE */
492	else
493		for (; i < argc; i++)
494			process_argument(argv[i]);
495
496	if (arg.duplicates && gl.dup_head)
497		return dup_print(&gl.dup_head);
498
499	if (arg.summarize)
500		SUMMERIZE();
501
502	if ((cnt.corrupt + cnt.malformed + cnt.backdated) > 0)
503		return 5;
504	if (cnt.errs == 0 && (cnt.ok + cnt.outdated + cnt.touched + cnt.new) == cnt.total)
505		return EXIT_SUCCESS;
506	if (cnt.errOpening == cnt.errs)
507		return 2;
508	if (cnt.errNotRegular == cnt.errs)
509		return 3;
510	if (cnt.errWritingXattr == cnt.errs)
511		return 4;
512	return 6;
513}