wikid.c
1/*
2 * Downloads and renders wikipedia pages in your terminal, with some formatting
3 * Copyright (C) 2018 z0noxz, <chris@noxz.tech>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#include <stdlib.h>
19#include <string.h>
20#include <unistd.h>
21#include <curl/curl.h>
22
23#include "util.h"
24#include "term.h"
25#include "config.h"
26
27/* macros */
28#define LENGTH(x) (sizeof(x) / sizeof(x[0]))
29#define PADDING(x) (x * _PADDING)
30#define INDEX_OF(x, y) ((int)(x - y) / sizeof(y[0]))
31
32/* function declarations */
33char *get_wiki_url(char *subject);
34int download_wiki(char *url);
35void print_line(int padding, const char *format, char *line);
36void handle_line(char *line);
37int print_wiki(char *subject);
38
39/* variables */
40const char *usage = "usage: wikid [-hlrsSt] <subject>";
41const char *webpage = services[0].template;
42unsigned total_line_count = 0;
43unsigned section_line_count = 0;
44bool blank_line = false;
45bool pre_heading = true;
46char global_options = '\0';
47unsigned terminal_width;
48int temp_file_descriptor;
49
50/* function implementations */
51char *get_wiki_url(char *subject)
52{
53 char *url = (char*)malloc(2048 * sizeof(char));
54
55 strcpy(url, webpage);
56 strcat(url, subject);
57
58 memmove(url + LANG_POS, language, 2);
59 string_replace(url, ' ', '_');
60
61 return url;
62}
63
64int download_wiki(char *url)
65{
66 int state = 0;
67 char error[CURL_ERROR_SIZE];
68 FILE *fptr;
69 CURL *curl;
70 CURLcode status;
71
72 temp_file_descriptor = mkstemp(temp_file);
73 if (temp_file_descriptor == -1) {
74 fprintf(stderr, "%s\n", "Can't open tmp file");
75 return CURLE_FAILED_INIT;
76 }
77 fptr = fdopen(dup(temp_file_descriptor), "w");
78
79 curl_global_init(CURL_GLOBAL_ALL);
80 curl = curl_easy_init();
81
82 if (curl) {
83 if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error) != CURLE_OK) {
84 fprintf(stderr, "%s\n", "Failed to set an error buffer for curl");
85 state = -1;
86 } else if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK) {
87 fprintf(stderr, "%s\n", "Failed to set the URL for curl");
88 state = -1;
89 } else if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK) {
90 fprintf(stderr, "%s\n", "Failed to set write function for curl");
91 state = -1;
92 } else if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, fptr) != CURLE_OK) {
93 fprintf(stderr, "%s\n", "Failed to set output for curl");
94 state = -1;
95 } else if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK) {
96 fprintf(stderr, "%s\n", "Failed to set follow location for curl");
97 state = -1;
98 } else if ((status = curl_easy_perform(curl)) && status != CURLE_OK) {
99 fprintf(stderr, "%s\n", curl_easy_strerror(status));
100 state = status;
101 }
102
103 curl_easy_cleanup(curl);
104 curl_global_cleanup();
105 }
106
107 if (fptr)
108 fclose(fptr);
109
110 return state;
111}
112
113void print_line(int padding, const char *format, char *line)
114{
115 if (padding > 0)
116 fprintf(stdout, "%*c", PADDING(padding), ' ');
117
118 if (!((global_options >> 1) & 1U))
119 fprintf(stdout, format, line);
120 else
121 fprintf(stdout, "%s\n", line);
122}
123
124void handle_line(char *input)
125{
126 char *temp_line = (char*)malloc(strlen(input) + 1 * (sizeof(char)));
127 char *ptr = temp_line;
128 char *line = NULL;
129 char *next = NULL;
130 char heading = 0x0;
131 int b = 0; /* beginning */
132 int e = 0; /* end */
133 int limit = terminal_width - PADDING(3);
134
135 strcpy(temp_line, input);
136 string_trim(temp_line);
137
138 /* make sure first section line contains text */
139 if (*temp_line || (section_line_count > 0 && !blank_line)) {
140
141 /* check and handle headings, (h2-h4) */
142 if (*ptr++ == '=' && *ptr++ == '=' && *ptr) {
143 if (*ptr++ == '=' && *ptr-- == '=') {
144 string_remove(temp_line, "====");
145 heading = 0x4;
146 } else if (*ptr == '=') {
147 string_remove(temp_line, "===");
148 heading = 0x3;
149 } else {
150 ptr--;
151 /* capitalize 'lowest' heading */
152 while (*ptr) {
153 if (*ptr > 0x60 && *ptr < 0x7b)
154 *ptr -= 0x20;
155 if ((unsigned char)*(ptr - 1) == 0xc3
156 && (unsigned char)*ptr >= 0xa0
157 && (unsigned char)*ptr <= 0xbe)
158 *ptr -= 0x20;
159 ptr++;
160 }
161 string_remove(temp_line, "==");
162 heading = 0x2;
163 }
164
165 pre_heading = false;
166 section_line_count = -1;
167 }
168
169 /* prevent lines from breaking in the middle of words */
170 ptr = temp_line;
171 while (*ptr) {
172
173 if (*ptr == ' ')
174 e = INDEX_OF(ptr, temp_line);
175
176 if (INDEX_OF(ptr, temp_line) - b >= limit) {
177 if (e - b > 0)
178 temp_line[e] = '\n';
179 b = e + 1;
180 }
181
182 ptr++;
183 }
184
185 string_trim(temp_line);
186 line = temp_line;
187
188 /* format and output lines */
189 while (line) {
190 next = strchr(line, '\n');
191 if (next)
192 *next = '\0';
193
194 if (pre_heading) {
195 print_line(2, "\033[3m%s\033[0m\n", line);
196 } else if (heading) {
197 switch (heading) {
198 case 0x2:
199 print_line(0, "\033[1m%s\033[0m\n", line);
200 break;
201 case 0x3:
202 print_line(1, "\033[1m%s\033[0m\n", line);
203 break;
204 default:
205 print_line(2, "\033[1m\033[3m%s\033[0m\n", line);
206 break;
207 }
208 } else {
209 print_line(2, "%s\n", line);
210 }
211
212 if (next)
213 *next = '\n';
214 line = next ? next + 1 : NULL;
215 }
216
217 blank_line = *temp_line == '\0';
218 total_line_count++;
219 section_line_count++;
220 }
221
222 free(temp_line);
223}
224
225int print_wiki(char *subject)
226{
227 const char beginning[] = "\"extract\":\"";
228 bool failure = false;
229 FILE *fptr = NULL;
230 char *content = NULL;
231 char *line = NULL;
232 char *next = NULL;
233 char *ptr = NULL;
234 long size;
235
236 fptr = fdopen(dup(temp_file_descriptor), "rb");
237 if (fptr == NULL)
238 failure = true;
239
240 if (!failure) {
241 fseek(fptr, 0, SEEK_END);
242 size = ftell(fptr);
243 rewind(fptr);
244 content = malloc(size + 1 * (sizeof(char)));
245 fread(content, sizeof(char), size, fptr);
246 fclose(fptr);
247 }
248
249 if (content == NULL)
250 failure = true;
251
252 if (!failure) {
253 ptr = strstr(content, beginning);
254 if (ptr != NULL && *ptr)
255 memmove(content,
256 ptr + strlen(beginning),
257 strlen(ptr) - strlen(beginning)
258 );
259 else
260 failure = true;
261 }
262
263 if (!failure) {
264 ptr = strchr(content, '}');
265 if (ptr != NULL && *ptr--)
266 *ptr = '\0';
267 else
268 failure = true;
269 }
270
271 if (!failure) {
272 dictionary_replace(content, cleanup_mappings, LENGTH(cleanup_mappings));
273
274 if ((global_options >> 0) & 1U) {
275 fprintf(stdout, "%s\n", content);
276 } else {
277 string_remove_redundent_spaces(content);
278 unicode_decode(content);
279
280 if (!((global_options >> 1) & 1U))
281 fprintf(stdout, "\033[4m%s:\033[0m\n\n", subject);
282
283 line = content;
284 while (line) {
285 next = strchr(line, '\n');
286 if (next)
287 *next = '\0';
288
289 handle_line(line);
290
291 if (next)
292 *next = '\n';
293 line = next ? next + 1 : NULL;
294 }
295 }
296 }
297
298 if (content != NULL)
299 free(content);
300
301 return !failure;
302}
303
304int main(int argc, char *argv[])
305{
306 int state = 0;
307 char *input = NULL;
308 char *url = NULL;
309 int buffer_size = 80;
310 char buffer[buffer_size];
311 int input_size;
312 int h;
313 char c;
314
315 /* validate input from STDIN */
316 if (!isatty(fileno(stdin)) && fgets(buffer, buffer_size, stdin) != NULL) {
317 input_size = strlen(buffer);
318 input = (char*)malloc(input_size);
319 input[0] = '\0';
320 strcat(input, buffer);
321
322 /* validate input from CLI */
323 } else if (argc >= 2) {
324 input_size = strlen(argv[argc - 1]);
325 input = (char*)malloc(input_size);
326 input[0] = '\0';
327 strcat(input, argv[argc - 1]);
328
329 /* no valid input, so show usage */
330 } else {
331 fprintf(stderr, "%s\n", usage);
332 state = 1;
333 }
334
335 if (state == 0) {
336 while ((c = getopt(argc, argv, "hl:rs:St")) != -1) {
337 switch (c) {
338 case 'l':
339 memmove(language, optarg, 2);
340 break;
341 case 'r':
342 global_options |= 1U << 0;
343 break;
344 case 's':
345 h = atoi(optarg);
346 if (h < 0 || h >= LENGTH(services))
347 h = 0;
348 webpage = services[h].template;
349 break;
350 case 'S':
351 for (h = 0; h < LENGTH(services); h++)
352 fprintf(stderr, "%d: %s\n", h, services[h].name);
353 return 0;
354 case 't':
355 global_options |= 1U << 1;
356 break;
357 case 'h':
358 fprintf(stderr, "%s\n", usage);
359 fprintf(stderr, "Finds a wiki subject and prints it.\n");
360 fprintf(stderr, " -h Print this help text and exit\n");
361 fprintf(stderr, " -l CODE Language code in ISO 639-1 format\n");
362 fprintf(stderr, " -r Print wiki in raw format\n");
363 fprintf(stderr, " -s ID Specify service ID\n");
364 fprintf(stderr, " -S List services and IDs and exit\n");
365 fprintf(stderr, " -t Print wiki in text only format\n");
366
367 free(input);
368 return 0;
369 default:
370 fprintf(stderr, "%s\n", usage);
371 state = 1;
372 }
373 }
374 }
375
376 /* initialization is done, so proceed with the rest of the program... */
377 terminal_width = get_terminal_width();
378
379 if (state == 0)
380 url = get_wiki_url(input);
381
382 if (state == 0 && download_wiki(url) != CURLE_OK) {
383 fprintf(stderr, "%s\n", "Failed to download the wiki");
384 state = 1;
385 }
386
387 if (state == 0 && !print_wiki(input)) {
388 fprintf(stderr, "%s\n", "Failed to print the wiki");
389 state = 1;
390 }
391
392 if (temp_file_descriptor != -1) {
393 close(temp_file_descriptor);
394 unlink(temp_file);
395 }
396
397 if (input != NULL)
398 free(input);
399
400 if (url != NULL)
401 free(url);
402
403 return state;
404}