acst

Tracks changes and corruption in files using xattr-based checksums.
git clone https://noxz.tech/git/acst.git
acst

acst.h
1/**
2 * Copyright (C) 2022 Chris Noxz
3 * Author(s): Chris Noxz <chris@noxz.tech>
4 *
5 * This program is free software: you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation, either version 3 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#ifndef ACST_H
20#define ACST_H
21
22#include <limits.h>         // for PATH_MAX
23#include <stdbool.h>        // for bool
24#include <stdio.h>          // for printf
25#include <stdlib.h>         // for exit, EXIT_SUCCESS, size_t
26
27#define BUFSZ               8192    /* buffer size to use in hash computation */
28#define SHA256_BYTES        32      /* byte size of SHA256 hash */
29#define CSSZ                64      /* size of SHA256 hash stored as hex */
30#define TSSZ                32      /* allocation size for timespan holder */
31#define XATTR_CS            "user.acst.cs"  /* name of checksum xattr */
32#define XATTR_TS            "user.acst.ts"  /* name of timestamp xattr */
33#define FRMT_TS             "%010llu.%09lu" /* format of timestamp xattr */
34
35/* string values used in program, could be changes for language compliance */
36#define STR_CMP_FRMT        " stored: %s "FRMT_TS"\n"                       \
37                            " actual: %s "FRMT_TS"\n"
38#define STR_DPN_FRMT        " %s\n"
39#define STR_DPH_FRMT        "<dup> %s\n"STR_DPN_FRMT
40#define STR_DPS_FRMT        "Found %d files with duplicates.\n"
41#define STR_ERR_OOM         "Error: out of memory\n"
42#define STR_ERR_PMAX        "Error: PATH_MAX reached before end\n"
43#define STR_ERR_ABNO        "Error: abnormal changes detected \"%s\"\n"
44#define STR_ERR_OPNF        "Error: could not open file \"%s\"\n"
45#define STR_ERR_REGF        "Error: not a regular file \"%s\"\n"
46#define STR_ERR_HASH        "Error: could not compute hash \"%s\"\n"
47#define STR_ERR_XARM        "Error: could not remove extended attributes from file \"%s\": %s\n"
48#define STR_ERR_XAWR        "Error: could not write extended attributes to file \"\%s\"\n"
49#define STR_OUT_STAT        "<%s> %s\n"
50#define STR_OUT_NEW         "new"
51#define STR_OUT_OK          "ok"
52#define STR_OUT_TOUC        "hash ok"
53#define STR_OUT_OUTD        "outdated"
54#define STR_OUT_BACK        "backdated"
55#define STR_OUT_CORR        "corrupt"
56#define STR_OUT_MALF        "malformed"
57#define STR_OUT_DISR        "disrupted"
58#define STR_OUT_REMO        "xattr removed"
59
60#define MIN(X, Y)           (((X) < (Y)) ? (X) : (Y))
61#define USAGE(code)                                                         \
62        {                                                                   \
63          printf("Usage: %s [OPTION]... <FILE>...\n", gl.prg);              \
64          if ((code) == EXIT_SUCCESS)                                       \
65            printf(                                                         \
66    "\n"                                                                    \
67    "Tracks changes and corruption in files using xattr-based checksums.\n" \
68    "\n"                                                                    \
69    "Positional arguments:\n"                                               \
70    "  FILE                  file(s) to track\n"                            \
71    "\n"                                                                    \
72    "Optional arguments:\n"                                                 \
73    "  -h                    show this help message and exit\n"             \
74    "  -m                    summarize information at end of execution\n"   \
75    "  -n                    don't create or update any file attributes\n"  \
76    "  -q                    quiet mode, lowers verbosity\n"                \
77    "  -V                    output version information and exit\n"         \
78    "  -x                    remove extended attributes from file(s)\n"     \
79    "\n"                                                                    \
80    "Full documentation <https://noxz.tech/software/acst>\n"                \
81            );                                                              \
82          exit(code);                                                       \
83        }
84#define VER()                                                               \
85        {                                                                   \
86          printf("%s " VERSION "\n", gl.prg);                               \
87          exit(EXIT_SUCCESS);                                               \
88        }
89#define SUMMERIZE()                                                         \
90        {                                                                   \
91          if (arg.remove)                                                   \
92            printf(         "\n"                                            \
93                            "Total files processed      : %d\n"             \
94                            "  Xattr removed            : %d\n"             \
95                            "Total errors               : %d\n"             \
96                            "  Opening files            : %d\n"             \
97                            "  Non-regular files        : %d\n"             \
98                            "  Xattr operations         : %d\n"             \
99                            "  Generic                  : %d\n"             \
100              , cnt.total, cnt.ok                                           \
101              , cnt.errs, cnt.errOpening, cnt.errNotRegular                 \
102              , cnt.errWritingXattr, cnt.errGeneric);                       \
103          else                                                              \
104            printf(         "\n"                                            \
105                            "Total files processed      : %d\n"             \
106                            "  New                      : %d\n"             \
107                            "  Ok                       : %d\n"             \
108                            "  Hash ok                  : %d\n"             \
109                            "  Outdated                 : %d\n"             \
110                            "  Backdated                : %d\n"             \
111                            "  Corrupt                  : %d\n"             \
112                            "  Malformed                : %d\n"             \
113                            "  Disrupted                : %d\n"             \
114                            "Total errors               : %d\n"             \
115                            "  Opening files            : %d\n"             \
116                            "  Non-regular files        : %d\n"             \
117                            "  Xattr operations         : %d\n"             \
118                            "  Generic                  : %d\n"             \
119              , cnt.total, cnt.new, cnt.ok, cnt.touched                     \
120              , cnt.outdated, cnt.backdated, cnt.corrupt                    \
121              , cnt.malformed, cnt.disrupted                                \
122              , cnt.errs, cnt.errOpening, cnt.errNotRegular                 \
123              , cnt.errWritingXattr, cnt.errGeneric);                       \
124        }
125
126/* helper variables used by various functions */
127static char ts[TSSZ];                               /* timestamp holder */
128static const char hextab[]  = "0123456789abcdef";   /* hex digit table */
129static const char zSHA256[] = "0000000000000000"    /* zeroed out SHA256 */
130                              "0000000000000000"    /* why 4 lines?... */
131                              "0000000000000000"
132                              "0000000000000000";   /* ...it looks better! */
133
134enum Error {
135	ER_NOT_REGULAR,         /* not a regular file error */
136	ER_OPENING,             /* error opening file */
137	ER_XATTR_OPERATION,     /* error when performing xattr operations */
138	ER_GENERIC,             /* generic error */
139	ER_FATAL                /* fatal error, should result in program ending */
140};
141
142enum FileState {
143	FS_OK,                  /* checksum and mtime both match */
144	FS_DISRUPTED,           /* file was changed during hash computation */
145	FS_CORRUPT,             /* checksum differs while mtime matches */
146	FS_MALFORMED,           /* xattrs cannot be read in its intended format */
147	FS_TOUCHED,             /* checksum matches but mtime doesn't */
148	FS_NEW,                 /* file has no prior checksum or mtime stored */
149	FS_OUTDATED,            /* checksum differs and mtime is newer */
150	FS_BACKDATED,           /* checksum differs and mtime is older */
151	FS_REMOVED,             /* xattrs have been removed from file */
152	FS_REMOVED_ERROR,       /* removal of xattrs failed */
153	FS_ERROR                /* general error */
154};
155
156typedef struct DuplicateNode {
157	unsigned char           cs[SHA256_BYTES];   /* checksum */
158	char                    fn[PATH_MAX + 1];   /* file name */
159	struct DuplicateNode   *next;               /* reference pointer to next dup */
160} dn_t;
161
162/* extended attribute/metadata structure */
163typedef struct ExtendedAttribute {
164	unsigned long long      s;                  /* seconds */
165	unsigned long           ns;                 /* nanoseconds */
166	char                    hex[CSSZ + 1];      /* checksum (hex) */
167	unsigned char           cs[SHA256_BYTES];   /* checksum (binary) */
168	int                     tcmp;               /* timespan comparison holder */
169} xa_t;
170
171/* arguments collected from command line */
172struct Arguments {
173	bool                    dryrun;             /* make a dry run */
174	int                     quiet;              /* level of quietness */
175	bool                    remove;             /* remove xattrs */
176	bool                    summarize;          /* show summery at end of program */
177	bool                    duplicates;         /* use xattrs to find duplicates */
178} arg;
179
180/* counters */
181struct Counters {
182	int                     errs;               /* all errors */
183	int                     errNotRegular;      /* not a regular file errors  */
184	int                     errOpening;         /* errors opening file */
185	int                     errWritingXattr;    /* errors when performing xattr operations */
186	int                     errGeneric;         /* generic errors */
187
188	int                     total;              /* all non error counters */
189	int                     ok;                 /* checksum and mtime both match */
190	int                     disrupted;          /* file was changed during hash computation */
191	int                     corrupt;            /* checksum differs while mtime matches */
192	int                     malformed;          /* xattrs cannot be read in its intended format */
193	int                     outdated;           /* checksum differs and mtime is newer */
194	int                     backdated;          /* checksum differs and mtime is older */
195	int                     touched;            /* checksum matches but mtime doesn't */
196	int                     new;                /* file has no prior checksum or mtime stored */
197} cnt;
198
199/* global variables */
200struct Global {
201	const char             *prg;                /* program name */
202	dn_t                   *dup_head;           /* head of Duplicate nodes */
203} gl;
204
205/**
206 * Converts binary data into hex format.
207 *
208 * @param bin               The binary data
209 * @param len               The size of the data
210 * @param hex               The result of the converted data
211 */
212static void bin2hex(unsigned char *bin, size_t len, char *hex);
213
214/**
215 * Compares two checksums (binary SHA256) to see if they are equal or not.
216 *
217 * @param cs1               The first checksum
218 * @param cs2               The second checksum
219 *
220 * @returns                 Value < 0 to indicate that cs1 is less than cs2
221 * @returns                 Value > 0 to indicate that cs1 is greater than cs2
222 * @returns                 Value = 0 to indicate that cs1 is equal to cs2
223 */
224static int cscmp(const unsigned char *cs1, const unsigned char *cs2);
225
226/**
227 * Recursively deallocates the memory previously allocated by a linked
228 * duplicate list.
229 *
230 * @param head_ref          Reference to first (initial call) or current node
231 */
232static void dup_free(dn_t **head_ref);
233
234/**
235 * Divides the nodes in the given linked list into front and back halves
236 * referenced by the two reference pointers. If the length is odd, the extra
237 * node ends by design up in the front list. As list length is unknown a double
238 * step pointer is used together with a single step pointer resulting in
239 * the single step pointer referencing the midpoint of the list as the double
240 * step pointer reaches the end of the list.
241 *
242 * @param src               Pointer to the source list to be split
243 * @param front_ref         Reference pointer to the resulting front list
244 * @param back_ref          Retrieves pointer to the resulting back list
245 */
246static void dup_front_back_split(dn_t *src, dn_t **front_ref, dn_t **back_ref);
247
248/**
249 * Recursively performs the merge sort by first splitting lists, starting from
250 * the head of the list, into length of 1 or 0, then by comparing nodes in
251 * lists merges them together into one list once again.
252 *
253 * see: https://en.wikipedia.org/wiki/Merge_sort
254 *
255 * @param head_ref          Reference to head of each list
256 */
257static void dup_merge_sort(dn_t **head_ref);
258
259/**
260 * Print a linked list of duplicate nodes by first sorting it to then iterate
261 * and comparing the current node with the previous to identify duplicates of
262 * file content.
263 *
264 * @param head_ref          Reference to head of list to print
265 *
266 * @returns                 Returns EXIT_SUCCESS when done
267 */
268static int dup_print(dn_t **head_ref);
269
270/**
271 * Inserts a new node at the beginning of the linked duplicate list with
272 * attached filename and checksum from extended file attribute.
273 *
274 * @param head_ref          Reference to head of linked list
275 * @param fn                Name of the file
276 * @param fd                File descriptor of the file
277 * @param xa                Extended attribute of file
278 */
279static void dup_push(dn_t **head_ref, const char *fn, int fd, xa_t *xa);
280
281/**
282 * Recursively performs the sorting part of the merge sort, where checksums of
283 * nodes are compared from two lists being merged together based on comparison.
284 *
285 * see: https://en.wikipedia.org/wiki/Merge_sort
286 *
287 * @param a                 One of the lists being compared
288 * @param b                 The other list being compared to the first one
289 *
290 * @returns                 The resulting list from the sorted merge
291 */
292static dn_t* dup_sorted_merge(dn_t *a, dn_t *b);
293
294/**
295 * Retrieves and reports error types and messages. If error type is of fatal
296 * nature, the program will exit with EXIT_FAILURE.
297 *
298 * @param er                The type of error (see enum Error above)
299 * @param fmt               The output format of the error message
300 * @param ...               Various arguments to output in the message
301 */
302static void error(enum Error er, const char *fmt, ...);
303
304/**
305 * Final stage of processing a file which returns the state of the file being
306 * processed meanwhile metadata holders are being populated.
307 *
308 * @param fd                File descriptor of the file
309 * @param xa_s              Metadata holder of (stored) extended attributes
310 * @param xa_a              Metadata holder of (actual) extended attributes
311 *
312 * @returns                 The file state of the file being processed
313 */
314static enum FileState file_state(int fd, xa_t *xa_s, xa_t *xa_a);
315
316/**
317 * Processes arguments (file names) from command line
318 * output.
319 *
320 * @param fn                Name of the file
321 */
322static void process_argument(const char *fn);
323
324/**
325 * Processes files and returns either error messages or other output if stated
326 * to do so (see arg.quiet) based on their file state (see enum FileState
327 * above).
328 *
329 * @param fn                Name of the file
330 */
331static void process_file(const char *fn);
332
333/**
334 * Reports on files and their file state.
335 *
336 * @param fn                Name of the file
337 * @param st                The state of the file
338 */
339static void report(const char *fn, enum FileState st);
340
341/**
342 * Compute and retrieve metadata for the checksum based on file content
343 * together with the date modified.
344 *
345 * @param fd                File descriptor of the file
346 * @param xa                Metadata holder of extended attributes
347 *
348 * @returns                 True if no error occurred
349 * @returns                 False if an error occurred
350 */
351static bool xa_compute(int fd, xa_t *xa);
352
353/**
354 * Computes the hash checksum of a file based on the SHA256 hashing algorithm.
355 *
356 * @param fd                File descriptor of the file
357 * @param cs                Pointer for the checksum
358 */
359static void xa_hash(int fd, unsigned char *cs);
360
361/**
362 * Reads and retrieves extended attributes as metadata from a file.
363 *
364 * @param fd                File descriptor of the file
365 * @param xa                Metadata holder of extended attributes
366 *
367 * @returns                 True if no error occurred
368 * @returns                 False if an error occurred (malformed data)
369 */
370static bool xa_read(int fd, xa_t *xa);
371
372/**
373 *  Writes, or removes if stated (see arg.remove), extended attributes as
374 *  metadata to a file.
375 *
376 * @param fd               File descriptor of the file
377 * @param xa                Metadata holder of extended attributes
378 *
379 * @returns                 True if no error occurred
380 * @returns                 False if an error occurred (write error)
381 */
382static bool xa_write(int fd, const xa_t *xa);
383
384#endif /* !ACST_H */