commit: 3b2c4179078be7a4051a666153fb60f2f106435b
parent: 88b6f190b56e8264b59c1b7a0ef440e15f3a2165
author: z0noxz <chris@noxz.tech>
date: Sun, 26 Aug 2018 14:08:30 +0200
Initial commit
9 files changed, 772 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,45 @@
+.POSIX:
+
+include config.mk
+
+SRC = util.c term.c wikid.c
+OBJ = ${SRC:.c=.o}
+
+all: options wikid
+
+options:
+ @echo wikid build options:
+ @echo "CFLAGS = ${CFLAGS}"
+ @echo "LDFLAGS = ${LDFLAGS}"
+ @echo "CC = ${CC}"
+
+.c.o:
+ @echo CC $<
+ @${CC} ${CFLAGS} -c $<
+
+wikid.o: util.h term.h config.h
+
+${OBJ}: config.h config.mk
+
+config.h:
+ @echo creating $@ from config.def.h
+ @cp config.def.h $@
+
+wikid: ${OBJ}
+ @echo CC -o $@
+ @${CC} -g -o $@ ${OBJ} ${LDFLAGS}
+
+clean:
+ @echo cleaning
+ @rm -f wikid ${OBJ}
+ @rm -f config.h
+
+install: wikid
+ mkdir -p /usr/local/bin
+ cp -f wikid /usr/local/bin
+ chmod 755 /usr/local/bin/wikid
+
+uninstall:
+ rm -f /usr/local/bin/wikid
+
+.PHONY: all options clean install uninstall
diff --git a/README.md b/README.md
@@ -1,2 +1,28 @@
-# wikid
-wikid is a quick and simple CLI-program for downloading and rendering wikipedia pages in your terminal. And it's not **wikid** as in *wiki daemon*, but as in *wicked, Strikingly good, and effective*.
+wikid
+=====
+wikid is a quick and simple CLI-program for downloading and rendering wikipedia
+pages in your terminal. And it's not **wikid** as in *wiki daemon*, but as in
+*wicked, Strikingly good, and effective*.
+
+Requirements
+------------
+wikid uses the **curl** library to download the wiki pages, and **ncurses** for
+retrieving the terminal width for line output.
+
+Installation
+------------
+Edit config.mk to match your local setup (wikid is installed into the
+/usr/local namespace by default), then simply enter the following command to
+install (if necessary as root):
+
+ make clean install
+
+Example usage of wikid
+----------------------
+Read about wikipedia, using `less`
+
+ wikid wikipedia | less
+
+Read about wikipedia in german, using `less`
+
+ wikid -lde wikipedia | less
diff --git a/config.def.h b/config.def.h
@@ -0,0 +1,24 @@
+#define LANG_POS 8
+#define _PADDING 3
+
+char language[] = "en";
+char temp_file[] = "/tmp/__wikidtmp.XXXXXX";
+const char webpage[] = "https://xx.wikipedia.org/w/api.php"
+ "?format=json"
+ "&action=query"
+ "&prop=extracts"
+ "&exlimit=1"
+ "&explaintext"
+ "&redirects"
+ "&titles=";
+
+/* cleanup mappings for wikipedia responses
+ * NOTE: the replacement cannot be greater in size than the entity itself. If
+ * it is, it'll just be ignored.
+ */
+const Dictionary cleanup_mappings[] = {
+ /* entity replacement */
+ { "\\n", "\n\n" },
+ { "\\t", " " },
+ { "\\\"", "\"" },
+};
diff --git a/config.mk b/config.mk
@@ -0,0 +1,25 @@
+# wikid version
+VERSION = 0.1.1
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/share/man
+
+# flags
+CPPFLAGS = -D_DEFAULT_SOURCE\
+ -D_BSD_SOURCE\
+ -D_POSIX_C_SOURCE=2\
+ -DVERSION=\"${VERSION}\"
+
+CFLAGS = -std=c99\
+ -Wall\
+ -Wno-deprecated-declarations\
+ -Os\
+ -pedantic\
+ ${CPPFLAGS}
+
+LDFLAGS = -lcurl\
+ -lncurses
+
+# compiler and linker
+CC = cc
diff --git a/term.c b/term.c
@@ -0,0 +1,23 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdlib.h>
+#include <term.h>
+
+unsigned get_terminal_width(void)
+{
+ const char* term = getenv("TERM");
+ char buff[1024];
+ int cols;
+
+ if (!term)
+ return 0;
+
+ if (tgetent(buff, term) <= 0)
+ return 0;
+
+ cols = tgetnum("co");
+
+ if (cols == -1)
+ return 0;
+
+ return cols;
+}
diff --git a/term.h b/term.h
@@ -0,0 +1 @@
+unsigned get_terminal_width(void);
diff --git a/util.c b/util.c
@@ -0,0 +1,217 @@
+/* See LICENSE file for copyright and license details. */
+#include <string.h>
+#include "util.h"
+
+void string_replace(char *str, const char find, const char replace)
+{
+ char *pt = str;
+ while (*pt) {
+ if (*pt == find)
+ *pt = replace;
+ pt++;
+ }
+}
+
+void string_remove(char *str, const char *rem)
+{
+ char *pt = str;
+ const char *rpt = rem;
+ int length = strlen(rem);
+
+ while (*pt) {
+ if (*pt == *rpt) {
+ rpt++;
+ if (!*rpt) {
+ pt++;
+ memmove(pt - length, pt, 1 + strlen(pt));
+ rpt = rem;
+ pt -= length;
+ continue;
+ }
+ } else {
+ rpt = rem;
+ }
+ pt++;
+ }
+}
+
+void string_trim(char *str)
+{
+ char *pt = str;
+
+ while (*pt == ' ' || *pt == '\n')
+ pt++;
+
+ if (strlen(pt) == 0) {
+ str[0] = '\0';
+ } else {
+ memmove(str, pt, 1 + strlen(pt));
+ pt = str + strlen(str) - 1;
+
+ while (*pt == ' ' || *pt == '\n')
+ pt--;
+
+ memmove(pt + 1, str + strlen(str), strlen(pt + 1));
+ }
+}
+
+void string_remove_redundent_spaces(char *str)
+{
+ char *dest = str;
+
+ while (*str != '\0') {
+ while ((*str == ' ' && *(str + 1) == ' ')
+ || (*str == ' ' && *(str + 1) == '\n')
+ || (*str == ' ' && *(str - 1) == '\n'))
+ str++;
+ *dest++ = *str++;
+ }
+ *dest = '\0';
+}
+
+void dictionary_replace(char *str, const Dictionary *dict, int size)
+{
+ const Dictionary *entry = dict;
+ const Dictionary *end = dict + size;
+ const char *ent;
+ const char *rep;
+ char *ptr;
+ int ent_len;
+ int rep_len;
+
+ while (entry < end) {
+ ent = entry->entity;
+ rep = entry->replacement;
+ ent_len = strlen(ent);
+ rep_len = strlen(rep);
+
+ if (ent_len < rep_len) {
+ entry++;
+ continue;
+ }
+
+ ptr = str;
+ while (*ptr) {
+ if (*ptr == *ent++) {
+ if (*ent == '\0') {
+ ptr -= (ent_len - 1);
+
+ while (*rep)
+ *ptr++ = *rep++;
+
+ rep -= rep_len;
+ memmove(
+ ptr,
+ ptr + ent_len - rep_len,
+ 1 + strlen(ptr + ent_len - rep_len)
+ );
+ continue;
+ }
+ } else if (*ptr == entry->entity[0]) {
+ ent = entry->entity + 1;
+ } else {
+ ent = entry->entity;
+ }
+ ptr++;
+ }
+ entry++;
+ }
+}
+
+int tiny_pow(int x, int y)
+{
+ int z = x;
+
+ if (y == 0)
+ return 1;
+
+ while (y-- > 1)
+ z *= x;
+
+ return z;
+}
+
+int hex_value(char c)
+{
+ if (c >= 0x30 && c <= 0x39)
+ return c - 0x30;
+ else if (c >= 0x41 && c <= 0x46)
+ return c - 0x37;
+ else if (c >= 0x61 && c <= 0x66)
+ return c - 0x57;
+ else
+ return -1;
+}
+
+void ucs_to_utf8(char *ucs)
+{
+ unsigned u = 0;
+ unsigned c = 0;
+ int length = strlen(ucs);
+ char *pt = ucs;
+
+ while (*pt)
+ u += hex_value(*pt++) * tiny_pow(16, length - 1 - c++);
+
+ pt = ucs;
+
+ if (u <= 0x7f) {
+ *pt++ = u;
+ } else if (u <= 0x7ff) {
+ *pt++ = 0xc0 | (u >> 6);
+ *pt++ = 0x80 | ((u >> 0) & 0x3f);
+ } else if (u <= 0xffff) {
+ *pt++ = 0xe0 | (u >> 12);
+ *pt++ = 0x80 | ((u >> 6) & 0x3f);
+ *pt++ = 0x80 | ((u >> 0) & 0x3f);
+ } else if (u <= 0x1fffff) {
+ *pt++ = 0xf0 | (u >> 18);
+ *pt++ = 0x80 | ((u >> 12) & 0x3f);
+ *pt++ = 0x80 | ((u >> 6) & 0x3f);
+ *pt++ = 0x80 | ((u >> 0) & 0x3f);
+ } else {
+ *pt++ = 0x3f;
+ }
+
+ *pt = '\0';
+}
+
+void unicode_decode(char *str)
+{
+ char buff[5];
+ char *pt = str;
+ char *bpt = buff;
+ const char indic[2] = { '\\', 'u' };
+ int c = 0;
+ int j = 0;
+
+ while (*pt) {
+ if (c < 2 && *pt == indic[c]) {
+ c++;
+ } else if (c >= 2 && c < 6 && hex_value(*pt) != -1) {
+ *bpt++ = *pt;
+ *bpt = '\0';
+ c++;
+ } else if (c == 6) {
+ ucs_to_utf8(buff);
+ bpt = buff;
+ pt -= c;
+ j = 0;
+
+ while (*bpt) {
+ *pt++ = *bpt++;
+ j++;
+ }
+
+ memmove(pt, pt + c - j, 1 + strlen(pt + c - j));
+
+ bpt = buff;
+ c = 0;
+ continue;
+ } else {
+ bpt = buff;
+ c = 0;
+ }
+ pt++;
+ }
+}
diff --git a/util.h b/util.h
@@ -0,0 +1,19 @@
+typedef enum {
+ false,
+ true
+} bool;
+
+typedef struct {
+ const char *entity;
+ const char *replacement;
+} Dictionary;
+
+void string_replace(char *str, char find, char replace);
+void string_remove(char *str, const char *rem);
+void string_trim(char *str);
+void string_remove_redundent_spaces(char *str);
+void dictionary_replace(char *str, const Dictionary *dict, int size);
+int m_pow(int x, int y);
+int hex_value(char c);
+void ucs_to_utf8(char *ucs);
+void unicode_decode(char *str);
diff --git a/wikid.c b/wikid.c
@@ -0,0 +1,390 @@
+/*
+ * Downloads and renders wikipedia pages in your terminal, with some formatting
+ * Copyright (C) 2018 z0noxz, <chris@noxz.tech>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <curl/curl.h>
+
+#include "util.h"
+#include "term.h"
+#include "config.h"
+
+/* macros */
+#define LENGTH(x) (sizeof(x) / sizeof(x[0]))
+#define PADDING(x) (x * _PADDING)
+#define INDEX_OF(x, y) ((int)(x - y) / sizeof(y[0]))
+
+/* function declarations */
+char *get_wiki_url(char *subject);
+int download_wiki(char *url);
+void print_line(int padding, const char *format, char *line);
+void handle_line(char *line);
+int print_wiki(char *subject);
+
+/* variables */
+const char *usage = "usage: wikid [-hlr] <subject>";
+unsigned total_line_count = 0;
+unsigned section_line_count = 0;
+bool blank_line = false;
+bool pre_heading = true;
+char global_options = '\0';
+unsigned terminal_width;
+int temp_file_descriptor;
+
+/* function implementations */
+char *get_wiki_url(char *subject)
+{
+ char *url = (char*)malloc(2048 * sizeof(char));
+
+ strcpy(url, webpage);
+ strcat(url, subject);
+
+ memmove(url + LANG_POS, language, 2);
+ string_replace(url, ' ', '_');
+
+ return url;
+}
+
+int download_wiki(char *url)
+{
+ int state = 0;
+ char error[CURL_ERROR_SIZE];
+ FILE *fptr;
+ CURL *curl;
+ CURLcode status;
+
+ temp_file_descriptor = mkstemp(temp_file);
+ if (temp_file_descriptor == -1) {
+ fprintf(stderr, "%s\n", "Can't open tmp file");
+ return CURLE_FAILED_INIT;
+ }
+ fptr = fdopen(dup(temp_file_descriptor), "w");
+
+ curl_global_init(CURL_GLOBAL_ALL);
+ curl = curl_easy_init();
+
+ if (curl) {
+ if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error) != CURLE_OK) {
+ fprintf(stderr, "%s\n", "Failed to set an error buffer for curl");
+ state = -1;
+ } else if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK) {
+ fprintf(stderr, "%s\n", "Failed to set the URL for curl");
+ state = -1;
+ } else if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK) {
+ fprintf(stderr, "%s\n", "Failed to set write function for curl");
+ state = -1;
+ } else if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, fptr) != CURLE_OK) {
+ fprintf(stderr, "%s\n", "Failed to set output for curl");
+ state = -1;
+ } else if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK) {
+ fprintf(stderr, "%s\n", "Failed to set follow location for curl");
+ state = -1;
+ } else if ((status = curl_easy_perform(curl)) && status != CURLE_OK) {
+ fprintf(stderr, "%s\n", curl_easy_strerror(status));
+ state = status;
+ }
+
+ curl_easy_cleanup(curl);
+ curl_global_cleanup();
+ }
+
+ if (fptr)
+ fclose(fptr);
+
+ return state;
+}
+
+void print_line(int padding, const char *format, char *line)
+{
+ if (padding > 0)
+ fprintf(stdout, "%*c", PADDING(padding), ' ');
+
+ if (!((global_options >> 1) & 1U))
+ fprintf(stdout, format, line);
+ else
+ fprintf(stdout, "%s\n", line);
+}
+
+void handle_line(char *input)
+{
+ char *temp_line = (char*)malloc(strlen(input) + 1 * (sizeof(char)));
+ char *ptr = temp_line;
+ char *line = NULL;
+ char *next = NULL;
+ char heading = 0x0;
+ int b = 0; /* beginning */
+ int e = 0; /* end */
+ int limit = terminal_width - PADDING(3);
+
+ strcpy(temp_line, input);
+ string_trim(temp_line);
+
+ /* make sure first section line contains text */
+ if (*temp_line || (section_line_count > 0 && !blank_line)) {
+
+ /* check and handle headings, (h2-h4) */
+ if (*ptr++ == '=' && *ptr++ == '=' && *ptr) {
+ if (*ptr++ == '=' && *ptr-- == '=') {
+ string_remove(temp_line, "====");
+ heading = 0x4;
+ } else if (*ptr == '=') {
+ string_remove(temp_line, "===");
+ heading = 0x3;
+ } else {
+ ptr--;
+ /* capitalize 'lowest' heading */
+ while (*ptr) {
+ if (*ptr > 0x60 && *ptr < 0x7b)
+ *ptr -= 0x20;
+ if ((unsigned char)*(ptr - 1) == 0xc3
+ && (unsigned char)*ptr >= 0xa0
+ && (unsigned char)*ptr <= 0xbe)
+ *ptr -= 0x20;
+ ptr++;
+ }
+ string_remove(temp_line, "==");
+ heading = 0x2;
+ }
+
+ pre_heading = false;
+ section_line_count = -1;
+ }
+
+ /* prevent lines from breaking in the middle of words */
+ ptr = temp_line;
+ while (*ptr) {
+
+ if (*ptr == ' ')
+ e = INDEX_OF(ptr, temp_line);
+
+ if (INDEX_OF(ptr, temp_line) - b >= limit) {
+ if (e - b > 0)
+ temp_line[e] = '\n';
+ b = e + 1;
+ }
+
+ ptr++;
+ }
+
+ string_trim(temp_line);
+ line = temp_line;
+
+ /* format and output lines */
+ while (line) {
+ next = strchr(line, '\n');
+ if (next)
+ *next = '\0';
+
+ if (pre_heading) {
+ print_line(2, "\033[3m%s\033[0m\n", line);
+ } else if (heading) {
+ switch (heading) {
+ case 0x2:
+ print_line(0, "\033[1m%s\033[0m\n", line);
+ break;
+ case 0x3:
+ print_line(1, "\033[1m%s\033[0m\n", line);
+ break;
+ default:
+ print_line(2, "\033[1m\033[3m%s\033[0m\n", line);
+ break;
+ }
+ } else {
+ print_line(2, "%s\n", line);
+ }
+
+ if (next)
+ *next = '\n';
+ line = next ? next + 1 : NULL;
+ }
+
+ blank_line = *temp_line == '\0';
+ total_line_count++;
+ section_line_count++;
+ }
+
+ free(temp_line);
+}
+
+int print_wiki(char *subject)
+{
+ const char beginning[] = "\"extract\":\"";
+ bool failure = false;
+ FILE *fptr = NULL;
+ char *content = NULL;
+ char *line = NULL;
+ char *next = NULL;
+ char *ptr = NULL;
+ long size;
+
+ fptr = fdopen(dup(temp_file_descriptor), "rb");
+ if (fptr == NULL)
+ failure = true;
+
+ if (!failure) {
+ fseek(fptr, 0, SEEK_END);
+ size = ftell(fptr);
+ rewind(fptr);
+ content = malloc(size + 1 * (sizeof(char)));
+ fread(content, sizeof(char), size, fptr);
+ fclose(fptr);
+ }
+
+ if (content == NULL)
+ failure = true;
+
+ if (!failure) {
+ ptr = strstr(content, beginning);
+ if (ptr != NULL && *ptr)
+ memmove(content,
+ ptr + strlen(beginning),
+ strlen(ptr) - strlen(beginning)
+ );
+ else
+ failure = true;
+ }
+
+ if (!failure) {
+ ptr = strchr(content, '}');
+ if (ptr != NULL && *ptr--)
+ *ptr = '\0';
+ else
+ failure = true;
+ }
+
+ if (!failure) {
+ dictionary_replace(content, cleanup_mappings, LENGTH(cleanup_mappings));
+
+ if ((global_options >> 0) & 1U) {
+ fprintf(stdout, "%s\n", content);
+ } else {
+ string_remove_redundent_spaces(content);
+ unicode_decode(content);
+
+ if (!((global_options >> 1) & 1U))
+ fprintf(stdout, "\033[4m%s:\033[0m\n\n", subject);
+
+ line = content;
+ while (line) {
+ next = strchr(line, '\n');
+ if (next)
+ *next = '\0';
+
+ handle_line(line);
+
+ if (next)
+ *next = '\n';
+ line = next ? next + 1 : NULL;
+ }
+ }
+ }
+
+ if (content != NULL)
+ free(content);
+
+ return !failure;
+}
+
+int main(int argc, char *argv[])
+{
+ int state = 0;
+ char *input = NULL;
+ char *url = NULL;
+ int buffer_size = 80;
+ char buffer[buffer_size];
+ int input_size;
+ char c;
+
+ /* validate input from STDIN */
+ if (!isatty(fileno(stdin)) && fgets(buffer, buffer_size, stdin) != NULL) {
+ input_size = strlen(buffer);
+ input = (char*)malloc(input_size);
+ input[0] = '\0';
+ strcat(input, buffer);
+
+ /* validate input from CLI */
+ } else if (argc >= 2) {
+ input_size = strlen(argv[argc - 1]);
+ input = (char*)malloc(input_size);
+ input[0] = '\0';
+ strcat(input, argv[argc - 1]);
+
+ /* no valid input, so show usage */
+ } else {
+ fprintf(stderr, "%s\n", usage);
+ state = 1;
+ }
+
+ if (state == 0) {
+ while ((c = getopt(argc, argv, "hl:rt")) != -1) {
+ switch (c) {
+ case 'l':
+ memmove(language, optarg, 2);
+ break;
+ case 'r':
+ global_options |= 1U << 0;
+ break;
+ case 't':
+ global_options |= 1U << 1;
+ break;
+ case 'h':
+ fprintf(stderr, "%s\n", usage);
+ fprintf(stderr, "Finds a wiki subject and prints it.\n");
+ fprintf(stderr, " -h Print this help text and exit\n");
+ fprintf(stderr, " -l CODE Language code in ISO 639-1 format\n");
+ fprintf(stderr, " -r Print wiki in raw format\n");
+ fprintf(stderr, " -t Print wiki in text only format\n");
+
+ free(input);
+ return 0;
+ default:
+ fprintf(stderr, "%s\n", usage);
+ state = 1;
+ }
+ }
+ }
+
+ /* initialization is done, so proceed with the rest of the program... */
+ terminal_width = get_terminal_width();
+
+ if (state == 0)
+ url = get_wiki_url(input);
+
+ if (state == 0 && download_wiki(url) != CURLE_OK) {
+ fprintf(stderr, "%s\n", "Failed to download the wiki");
+ state = 1;
+ }
+
+ if (state == 0 && !print_wiki(input)) {
+ fprintf(stderr, "%s\n", "Failed to print the wiki");
+ state = 1;
+ }
+
+ if (temp_file_descriptor != -1) {
+ close(temp_file_descriptor);
+ unlink(temp_file);
+ }
+
+ if (input != NULL)
+ free(input);
+
+ if (url != NULL)
+ free(url);
+
+ return state;
+}