acst.h
1/**
2 * Copyright (C) 2022 Chris Noxz
3 * Author(s): Chris Noxz <chris@noxz.tech>
4 *
5 * This program is free software: you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation, either version 3 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#ifndef ACST_H
20#define ACST_H
21
22#include <limits.h> // for PATH_MAX
23#include <stdbool.h> // for bool
24#include <stdio.h> // for printf
25#include <stdlib.h> // for exit, EXIT_SUCCESS, size_t
26
27#define BUFSZ 8192 /* buffer size to use in hash computation */
28#define SHA256_BYTES 32 /* byte size of SHA256 hash */
29#define CSSZ 64 /* size of SHA256 hash stored as hex */
30#define TSSZ 32 /* allocation size for timespan holder */
31#define XATTR_CS "user.acst.cs" /* name of checksum xattr */
32#define XATTR_TS "user.acst.ts" /* name of timestamp xattr */
33#define FRMT_TS "%010llu.%09lu" /* format of timestamp xattr */
34
35/* string values used in program, could be changes for language compliance */
36#define STR_CMP_FRMT " stored: %s "FRMT_TS"\n" \
37 " actual: %s "FRMT_TS"\n"
38#define STR_DPN_FRMT " %s\n"
39#define STR_DPH_FRMT "<dup> %s\n"STR_DPN_FRMT
40#define STR_DPS_FRMT "Found %d files with duplicates.\n"
41#define STR_ERR_OOM "Error: out of memory\n"
42#define STR_ERR_PMAX "Error: PATH_MAX reached before end\n"
43#define STR_ERR_ABNO "Error: abnormal changes detected \"%s\"\n"
44#define STR_ERR_OPNF "Error: could not open file \"%s\"\n"
45#define STR_ERR_REGF "Error: not a regular file \"%s\"\n"
46#define STR_ERR_HASH "Error: could not compute hash \"%s\"\n"
47#define STR_ERR_XARM "Error: could not remove extended attributes from file \"%s\": %s\n"
48#define STR_ERR_XAWR "Error: could not write extended attributes to file \"\%s\"\n"
49#define STR_OUT_STAT "<%s> %s\n"
50#define STR_OUT_NEW "new"
51#define STR_OUT_OK "ok"
52#define STR_OUT_TOUC "hash ok"
53#define STR_OUT_OUTD "outdated"
54#define STR_OUT_BACK "backdated"
55#define STR_OUT_CORR "corrupt"
56#define STR_OUT_MALF "malformed"
57#define STR_OUT_DISR "disrupted"
58#define STR_OUT_REMO "xattr removed"
59
60#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
61#define USAGE(code) \
62 { \
63 printf("Usage: %s [OPTION]... <FILE>...\n", gl.prg); \
64 if ((code) == EXIT_SUCCESS) \
65 printf( \
66 "\n" \
67 "Tracks changes and corruption in files using xattr-based checksums.\n" \
68 "\n" \
69 "Positional arguments:\n" \
70 " FILE file(s) to track\n" \
71 "\n" \
72 "Optional arguments:\n" \
73 " -h show this help message and exit\n" \
74 " -m summarize information at end of execution\n" \
75 " -n don't create or update any file attributes\n" \
76 " -q quiet mode, lowers verbosity\n" \
77 " -V output version information and exit\n" \
78 " -x remove extended attributes from file(s)\n" \
79 "\n" \
80 "Full documentation <https://noxz.tech/software/acst>\n" \
81 ); \
82 exit(code); \
83 }
84#define VER() \
85 { \
86 printf("%s " VERSION "\n", gl.prg); \
87 exit(EXIT_SUCCESS); \
88 }
89#define SUMMERIZE() \
90 { \
91 if (arg.remove) \
92 printf( "\n" \
93 "Total files processed : %d\n" \
94 " Xattr removed : %d\n" \
95 "Total errors : %d\n" \
96 " Opening files : %d\n" \
97 " Non-regular files : %d\n" \
98 " Xattr operations : %d\n" \
99 " Generic : %d\n" \
100 , cnt.total, cnt.ok \
101 , cnt.errs, cnt.errOpening, cnt.errNotRegular \
102 , cnt.errWritingXattr, cnt.errGeneric); \
103 else \
104 printf( "\n" \
105 "Total files processed : %d\n" \
106 " New : %d\n" \
107 " Ok : %d\n" \
108 " Hash ok : %d\n" \
109 " Outdated : %d\n" \
110 " Backdated : %d\n" \
111 " Corrupt : %d\n" \
112 " Malformed : %d\n" \
113 " Disrupted : %d\n" \
114 "Total errors : %d\n" \
115 " Opening files : %d\n" \
116 " Non-regular files : %d\n" \
117 " Xattr operations : %d\n" \
118 " Generic : %d\n" \
119 , cnt.total, cnt.new, cnt.ok, cnt.touched \
120 , cnt.outdated, cnt.backdated, cnt.corrupt \
121 , cnt.malformed, cnt.disrupted \
122 , cnt.errs, cnt.errOpening, cnt.errNotRegular \
123 , cnt.errWritingXattr, cnt.errGeneric); \
124 }
125
126/* helper variables used by various functions */
127static char ts[TSSZ]; /* timestamp holder */
128static const char hextab[] = "0123456789abcdef"; /* hex digit table */
129static const char zSHA256[] = "0000000000000000" /* zeroed out SHA256 */
130 "0000000000000000" /* why 4 lines?... */
131 "0000000000000000"
132 "0000000000000000"; /* ...it looks better! */
133
134enum Error {
135 ER_NOT_REGULAR, /* not a regular file error */
136 ER_OPENING, /* error opening file */
137 ER_XATTR_OPERATION, /* error when performing xattr operations */
138 ER_GENERIC, /* generic error */
139 ER_FATAL /* fatal error, should result in program ending */
140};
141
142enum FileState {
143 FS_OK, /* checksum and mtime both match */
144 FS_DISRUPTED, /* file was changed during hash computation */
145 FS_CORRUPT, /* checksum differs while mtime matches */
146 FS_MALFORMED, /* xattrs cannot be read in its intended format */
147 FS_TOUCHED, /* checksum matches but mtime doesn't */
148 FS_NEW, /* file has no prior checksum or mtime stored */
149 FS_OUTDATED, /* checksum differs and mtime is newer */
150 FS_BACKDATED, /* checksum differs and mtime is older */
151 FS_REMOVED, /* xattrs have been removed from file */
152 FS_REMOVED_ERROR, /* removal of xattrs failed */
153 FS_ERROR /* general error */
154};
155
156typedef struct DuplicateNode {
157 unsigned char cs[SHA256_BYTES]; /* checksum */
158 char fn[PATH_MAX + 1]; /* file name */
159 struct DuplicateNode *next; /* reference pointer to next dup */
160} dn_t;
161
162/* extended attribute/metadata structure */
163typedef struct ExtendedAttribute {
164 unsigned long long s; /* seconds */
165 unsigned long ns; /* nanoseconds */
166 char hex[CSSZ + 1]; /* checksum (hex) */
167 unsigned char cs[SHA256_BYTES]; /* checksum (binary) */
168 int tcmp; /* timespan comparison holder */
169} xa_t;
170
171/* arguments collected from command line */
172struct Arguments {
173 bool dryrun; /* make a dry run */
174 int quiet; /* level of quietness */
175 bool remove; /* remove xattrs */
176 bool summarize; /* show summery at end of program */
177 bool duplicates; /* use xattrs to find duplicates */
178} arg;
179
180/* counters */
181struct Counters {
182 int errs; /* all errors */
183 int errNotRegular; /* not a regular file errors */
184 int errOpening; /* errors opening file */
185 int errWritingXattr; /* errors when performing xattr operations */
186 int errGeneric; /* generic errors */
187
188 int total; /* all non error counters */
189 int ok; /* checksum and mtime both match */
190 int disrupted; /* file was changed during hash computation */
191 int corrupt; /* checksum differs while mtime matches */
192 int malformed; /* xattrs cannot be read in its intended format */
193 int outdated; /* checksum differs and mtime is newer */
194 int backdated; /* checksum differs and mtime is older */
195 int touched; /* checksum matches but mtime doesn't */
196 int new; /* file has no prior checksum or mtime stored */
197} cnt;
198
199/* global variables */
200struct Global {
201 const char *prg; /* program name */
202 dn_t *dup_head; /* head of Duplicate nodes */
203} gl;
204
205/**
206 * Converts binary data into hex format.
207 *
208 * @param bin The binary data
209 * @param len The size of the data
210 * @param hex The result of the converted data
211 */
212static void bin2hex(unsigned char *bin, size_t len, char *hex);
213
214/**
215 * Compares two checksums (binary SHA256) to see if they are equal or not.
216 *
217 * @param cs1 The first checksum
218 * @param cs2 The second checksum
219 *
220 * @returns Value < 0 to indicate that cs1 is less than cs2
221 * @returns Value > 0 to indicate that cs1 is greater than cs2
222 * @returns Value = 0 to indicate that cs1 is equal to cs2
223 */
224static int cscmp(const unsigned char *cs1, const unsigned char *cs2);
225
226/**
227 * Recursively deallocates the memory previously allocated by a linked
228 * duplicate list.
229 *
230 * @param head_ref Reference to first (initial call) or current node
231 */
232static void dup_free(dn_t **head_ref);
233
234/**
235 * Divides the nodes in the given linked list into front and back halves
236 * referenced by the two reference pointers. If the length is odd, the extra
237 * node ends by design up in the front list. As list length is unknown a double
238 * step pointer is used together with a single step pointer resulting in
239 * the single step pointer referencing the midpoint of the list as the double
240 * step pointer reaches the end of the list.
241 *
242 * @param src Pointer to the source list to be split
243 * @param front_ref Reference pointer to the resulting front list
244 * @param back_ref Retrieves pointer to the resulting back list
245 */
246static void dup_front_back_split(dn_t *src, dn_t **front_ref, dn_t **back_ref);
247
248/**
249 * Recursively performs the merge sort by first splitting lists, starting from
250 * the head of the list, into length of 1 or 0, then by comparing nodes in
251 * lists merges them together into one list once again.
252 *
253 * see: https://en.wikipedia.org/wiki/Merge_sort
254 *
255 * @param head_ref Reference to head of each list
256 */
257static void dup_merge_sort(dn_t **head_ref);
258
259/**
260 * Print a linked list of duplicate nodes by first sorting it to then iterate
261 * and comparing the current node with the previous to identify duplicates of
262 * file content.
263 *
264 * @param head_ref Reference to head of list to print
265 *
266 * @returns Returns EXIT_SUCCESS when done
267 */
268static int dup_print(dn_t **head_ref);
269
270/**
271 * Inserts a new node at the beginning of the linked duplicate list with
272 * attached filename and checksum from extended file attribute.
273 *
274 * @param head_ref Reference to head of linked list
275 * @param fn Name of the file
276 * @param fd File descriptor of the file
277 * @param xa Extended attribute of file
278 */
279static void dup_push(dn_t **head_ref, const char *fn, int fd, xa_t *xa);
280
281/**
282 * Recursively performs the sorting part of the merge sort, where checksums of
283 * nodes are compared from two lists being merged together based on comparison.
284 *
285 * see: https://en.wikipedia.org/wiki/Merge_sort
286 *
287 * @param a One of the lists being compared
288 * @param b The other list being compared to the first one
289 *
290 * @returns The resulting list from the sorted merge
291 */
292static dn_t* dup_sorted_merge(dn_t *a, dn_t *b);
293
294/**
295 * Retrieves and reports error types and messages. If error type is of fatal
296 * nature, the program will exit with EXIT_FAILURE.
297 *
298 * @param er The type of error (see enum Error above)
299 * @param fmt The output format of the error message
300 * @param ... Various arguments to output in the message
301 */
302static void error(enum Error er, const char *fmt, ...);
303
304/**
305 * Final stage of processing a file which returns the state of the file being
306 * processed meanwhile metadata holders are being populated.
307 *
308 * @param fd File descriptor of the file
309 * @param xa_s Metadata holder of (stored) extended attributes
310 * @param xa_a Metadata holder of (actual) extended attributes
311 *
312 * @returns The file state of the file being processed
313 */
314static enum FileState file_state(int fd, xa_t *xa_s, xa_t *xa_a);
315
316/**
317 * Processes arguments (file names) from command line
318 * output.
319 *
320 * @param fn Name of the file
321 */
322static void process_argument(const char *fn);
323
324/**
325 * Processes files and returns either error messages or other output if stated
326 * to do so (see arg.quiet) based on their file state (see enum FileState
327 * above).
328 *
329 * @param fn Name of the file
330 */
331static void process_file(const char *fn);
332
333/**
334 * Reports on files and their file state.
335 *
336 * @param fn Name of the file
337 * @param st The state of the file
338 */
339static void report(const char *fn, enum FileState st);
340
341/**
342 * Compute and retrieve metadata for the checksum based on file content
343 * together with the date modified.
344 *
345 * @param fd File descriptor of the file
346 * @param xa Metadata holder of extended attributes
347 *
348 * @returns True if no error occurred
349 * @returns False if an error occurred
350 */
351static bool xa_compute(int fd, xa_t *xa);
352
353/**
354 * Computes the hash checksum of a file based on the SHA256 hashing algorithm.
355 *
356 * @param fd File descriptor of the file
357 * @param cs Pointer for the checksum
358 */
359static void xa_hash(int fd, unsigned char *cs);
360
361/**
362 * Reads and retrieves extended attributes as metadata from a file.
363 *
364 * @param fd File descriptor of the file
365 * @param xa Metadata holder of extended attributes
366 *
367 * @returns True if no error occurred
368 * @returns False if an error occurred (malformed data)
369 */
370static bool xa_read(int fd, xa_t *xa);
371
372/**
373 * Writes, or removes if stated (see arg.remove), extended attributes as
374 * metadata to a file.
375 *
376 * @param fd File descriptor of the file
377 * @param xa Metadata holder of extended attributes
378 *
379 * @returns True if no error occurred
380 * @returns False if an error occurred (write error)
381 */
382static bool xa_write(int fd, const xa_t *xa);
383
384#endif /* !ACST_H */