wikid

[discontinued] A quick and simple CLI-program for downloading and rendering wikipedia pages in the terminal.
git clone https://noxz.tech/git/wikid.git
Log | Files | README | LICENSE

wikid.c
1/*
2 * Downloads and renders wikipedia pages in your terminal, with some formatting
3 * Copyright (C) 2018 z0noxz, <chris@noxz.tech>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#include <stdlib.h>
19#include <string.h>
20#include <unistd.h>
21#include <curl/curl.h>
22
23#include "util.h"
24#include "term.h"
25#include "config.h"
26
27/* macros */
28#define LENGTH(x)       (sizeof(x) / sizeof(x[0]))
29#define PADDING(x)      (x * _PADDING)
30#define INDEX_OF(x, y)  ((int)(x - y) / sizeof(y[0]))
31
32/* function declarations */
33char *get_wiki_url(char *subject);
34int download_wiki(char *url);
35void print_line(int padding, const char *format, char *line);
36void handle_line(char *line);
37int print_wiki(char *subject);
38
39/* variables */
40const char  *usage              = "usage: wikid [-hlrsSt] <subject>";
41const char  *webpage            = services[0].template;
42unsigned    total_line_count    = 0;
43unsigned    section_line_count  = 0;
44bool        blank_line          = false;
45bool        pre_heading         = true;
46char        global_options      = '\0';
47unsigned    terminal_width;
48int         temp_file_descriptor;
49
50/* function implementations */
51char *get_wiki_url(char *subject)
52{
53    char *url = (char*)malloc(2048 * sizeof(char));
54
55    strcpy(url, webpage);
56    strcat(url, subject);
57
58    memmove(url + LANG_POS, language, 2);
59    string_replace(url, ' ', '_');
60
61    return url;
62}
63
64int download_wiki(char *url)
65{
66    int         state = 0;
67    char        error[CURL_ERROR_SIZE];
68    FILE        *fptr;
69    CURL        *curl;
70    CURLcode    status;
71
72    temp_file_descriptor = mkstemp(temp_file);
73    if (temp_file_descriptor == -1) {
74        fprintf(stderr, "%s\n", "Can't open tmp file");
75        return CURLE_FAILED_INIT;
76    }
77    fptr = fdopen(dup(temp_file_descriptor), "w");
78
79    curl_global_init(CURL_GLOBAL_ALL);
80    curl = curl_easy_init();
81
82    if (curl) {
83        if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error) != CURLE_OK) {
84            fprintf(stderr, "%s\n", "Failed to set an error buffer for curl");
85            state = -1;
86        } else if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK) {
87            fprintf(stderr, "%s\n", "Failed to set the URL for curl");
88            state = -1;
89        } else if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK) {
90            fprintf(stderr, "%s\n", "Failed to set write function for curl");
91            state = -1;
92        } else if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, fptr) != CURLE_OK) {
93            fprintf(stderr, "%s\n", "Failed to set output for curl");
94            state = -1;
95        } else if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK) {
96            fprintf(stderr, "%s\n", "Failed to set follow location for curl");
97            state = -1;
98        } else if ((status = curl_easy_perform(curl)) && status != CURLE_OK) {
99            fprintf(stderr, "%s\n", curl_easy_strerror(status));
100            state = status;
101        }
102
103        curl_easy_cleanup(curl);
104        curl_global_cleanup();
105    }
106
107    if (fptr)
108        fclose(fptr);
109
110    return state;
111}
112
113void print_line(int padding, const char *format, char *line)
114{
115    if (padding > 0)
116        fprintf(stdout, "%*c", PADDING(padding), ' ');
117
118    if (!((global_options >> 1) & 1U))
119        fprintf(stdout, format, line);
120    else
121        fprintf(stdout, "%s\n", line);
122}
123
124void handle_line(char *input)
125{
126    char    *temp_line  = (char*)malloc(strlen(input) + 1 * (sizeof(char)));
127    char    *ptr        = temp_line;
128    char    *line       = NULL;
129    char    *next       = NULL;
130    char    heading     = 0x0;
131    int     b           = 0;    /* beginning    */
132    int     e           = 0;    /* end          */
133    int     limit       = terminal_width - PADDING(3);
134
135    strcpy(temp_line, input);
136    string_trim(temp_line);
137
138    /* make sure first section line contains text */
139    if (*temp_line || (section_line_count > 0 && !blank_line)) {
140
141        /* check and handle headings, (h2-h4) */
142        if (*ptr++ == '=' && *ptr++ == '=' && *ptr) {
143            if (*ptr++ == '=' && *ptr-- == '=') {
144                string_remove(temp_line, "====");
145                heading = 0x4;
146            } else if (*ptr == '=') {
147                string_remove(temp_line, "===");
148                heading = 0x3;
149            } else {
150                ptr--;
151                /* capitalize 'lowest' heading */
152                while (*ptr) {
153                    if (*ptr > 0x60 && *ptr < 0x7b)
154                        *ptr -= 0x20;
155                    if ((unsigned char)*(ptr - 1) == 0xc3
156                    && (unsigned char)*ptr >= 0xa0
157                    && (unsigned char)*ptr <= 0xbe)
158                        *ptr -= 0x20;
159                    ptr++;
160                }
161                string_remove(temp_line, "==");
162                heading = 0x2;
163            }
164
165            pre_heading = false;
166            section_line_count = -1;
167        }
168
169        /* prevent lines from breaking in the middle of words */
170        ptr = temp_line;
171        while (*ptr) {
172
173            if (*ptr == ' ')
174                e = INDEX_OF(ptr, temp_line);
175
176            if (INDEX_OF(ptr, temp_line) - b >= limit) {
177                if (e - b > 0)
178                    temp_line[e] = '\n';
179                b = e + 1;
180            }
181
182            ptr++;
183        }
184
185        string_trim(temp_line);
186        line = temp_line;
187
188        /* format and output lines */
189        while (line) {
190            next = strchr(line, '\n');
191            if (next)
192                *next = '\0';
193
194            if (pre_heading) {
195                print_line(2, "\033[3m%s\033[0m\n", line);
196            } else if (heading) {
197                switch (heading) {
198                case 0x2:
199                    print_line(0, "\033[1m%s\033[0m\n", line);
200                    break;
201                case 0x3:
202                    print_line(1, "\033[1m%s\033[0m\n", line);
203                    break;
204                default:
205                    print_line(2, "\033[1m\033[3m%s\033[0m\n", line);
206                    break;
207                }
208            } else {
209                print_line(2, "%s\n", line);
210            }
211
212            if (next)
213                *next = '\n';
214            line = next ? next + 1 : NULL;
215        }
216
217        blank_line = *temp_line == '\0';
218        total_line_count++;
219        section_line_count++;
220    }
221
222    free(temp_line);
223}
224
225int print_wiki(char *subject)
226{
227    const char  beginning[] = "\"extract\":\"";
228    bool        failure     = false;
229    FILE        *fptr       = NULL;
230    char        *content    = NULL;
231    char        *line       = NULL;
232    char        *next       = NULL;
233    char        *ptr        = NULL;
234    long        size;
235
236    fptr = fdopen(dup(temp_file_descriptor), "rb");
237    if (fptr == NULL)
238        failure = true;
239
240    if (!failure) {
241        fseek(fptr, 0, SEEK_END);
242        size = ftell(fptr);
243        rewind(fptr);
244        content = malloc(size + 1 * (sizeof(char)));
245        fread(content, sizeof(char), size, fptr);
246        fclose(fptr);
247    }
248
249    if (content == NULL)
250        failure = true;
251
252    if (!failure) {
253        ptr = strstr(content, beginning);
254        if (ptr != NULL && *ptr)
255            memmove(content,
256                ptr + strlen(beginning),
257                strlen(ptr) - strlen(beginning)
258            );
259        else
260            failure = true;
261    }
262
263    if (!failure) {
264        ptr = strchr(content, '}');
265        if (ptr != NULL && *ptr--)
266            *ptr = '\0';
267        else
268            failure = true;
269    }
270
271    if (!failure) {
272        dictionary_replace(content, cleanup_mappings, LENGTH(cleanup_mappings));
273
274        if ((global_options >> 0) & 1U) {
275            fprintf(stdout, "%s\n", content);
276        } else {
277            string_remove_redundent_spaces(content);
278            unicode_decode(content);
279
280            if (!((global_options >> 1) & 1U))
281                fprintf(stdout, "\033[4m%s:\033[0m\n\n", subject);
282
283            line = content;
284            while (line) {
285                next = strchr(line, '\n');
286                if (next)
287                    *next = '\0';
288
289                handle_line(line);
290
291                if (next)
292                    *next = '\n';
293                line = next ? next + 1 : NULL;
294            }
295        }
296    }
297
298    if (content != NULL)
299        free(content);
300
301    return !failure;
302}
303
304int main(int argc, char *argv[])
305{
306    int     state       = 0;
307    char    *input      = NULL;
308    char    *url        = NULL;
309    int     buffer_size = 80;
310    char    buffer[buffer_size];
311    int     input_size;
312    int     h;
313    char    c;
314
315    /* validate input from STDIN */
316    if (!isatty(fileno(stdin)) && fgets(buffer, buffer_size, stdin) != NULL) {
317        input_size  = strlen(buffer);
318        input       = (char*)malloc(input_size);
319        input[0]    = '\0';
320        strcat(input, buffer);
321
322    /* validate input from CLI */
323    } else if (argc >= 2) {
324        input_size  = strlen(argv[argc - 1]);
325        input       = (char*)malloc(input_size);
326        input[0]    = '\0';
327        strcat(input, argv[argc - 1]);
328
329    /* no valid input, so show usage */
330    } else {
331        fprintf(stderr, "%s\n", usage);
332        state = 1;
333    }
334
335    if (state == 0) {
336        while ((c = getopt(argc, argv, "hl:rs:St")) != -1) {
337            switch (c) {
338            case 'l':
339                memmove(language, optarg, 2);
340                break;
341            case 'r':
342                global_options |= 1U << 0;
343                break;
344            case 's':
345                h = atoi(optarg);
346                if (h < 0 || h >= LENGTH(services))
347                    h = 0;
348                webpage = services[h].template;
349                break;
350            case 'S':
351                for (h = 0; h < LENGTH(services); h++)
352                    fprintf(stderr, "%d: %s\n", h, services[h].name);
353                return 0;
354            case 't':
355                global_options |= 1U << 1;
356                break;
357            case 'h':
358                fprintf(stderr, "%s\n", usage);
359                fprintf(stderr, "Finds a wiki subject and prints it.\n");
360                fprintf(stderr, " -h          Print this help text and exit\n");
361                fprintf(stderr, " -l CODE     Language code in ISO 639-1 format\n");
362                fprintf(stderr, " -r          Print wiki in raw format\n");
363                fprintf(stderr, " -s ID       Specify service ID\n");
364                fprintf(stderr, " -S          List services and IDs and exit\n");
365                fprintf(stderr, " -t          Print wiki in text only format\n");
366
367                free(input);
368                return 0;
369            default:
370                fprintf(stderr, "%s\n", usage);
371                state = 1;
372            }
373        }
374    }
375
376    /* initialization is done, so proceed with the rest of the program... */
377    terminal_width = get_terminal_width();
378
379    if (state == 0)
380        url = get_wiki_url(input);
381
382    if (state == 0 && download_wiki(url) != CURLE_OK) {
383        fprintf(stderr, "%s\n", "Failed to download the wiki");
384        state = 1;
385    }
386
387    if (state == 0 && !print_wiki(input)) {
388        fprintf(stderr, "%s\n", "Failed to print the wiki");
389        state = 1;
390    }
391
392    if (temp_file_descriptor != -1) {
393        close(temp_file_descriptor);
394        unlink(temp_file);
395    }
396
397    if (input != NULL)
398        free(input);
399
400    if (url != NULL)
401        free(url);
402
403    return state;
404}