wikid

[discontinued] A quick and simple CLI-program for downloading and rendering wikipedia pages in the terminal.
git clone https://noxz.tech/git/wikid.git
Log | Files | README | LICENSE

commit: 3b2c4179078be7a4051a666153fb60f2f106435b
parent: 88b6f190b56e8264b59c1b7a0ef440e15f3a2165
author: z0noxz <chris@noxz.tech>
date:   Sun, 26 Aug 2018 14:08:30 +0200
Initial commit
AMakefile45+++
MREADME.md30+-
Aconfig.def.h24++
Aconfig.mk25++
Aterm.c23++
Aterm.h1+
Autil.c217+++++++++++
Autil.h19+
Awikid.c390++++++++++++++++++++
9 files changed, 772 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,45 @@
+.POSIX:
+
+include config.mk
+
+SRC = util.c term.c wikid.c
+OBJ = ${SRC:.c=.o}
+
+all: options wikid
+
+options:
+	@echo wikid build options:
+	@echo "CFLAGS   = ${CFLAGS}"
+	@echo "LDFLAGS  = ${LDFLAGS}"
+	@echo "CC       = ${CC}"
+
+.c.o:
+	@echo CC $<
+	@${CC} ${CFLAGS} -c $<
+
+wikid.o: util.h term.h config.h
+
+${OBJ}: config.h config.mk
+
+config.h:
+	@echo creating $@ from config.def.h
+	@cp config.def.h $@
+
+wikid: ${OBJ}
+	@echo CC -o $@
+	@${CC} -g -o $@ ${OBJ} ${LDFLAGS}
+
+clean:
+	@echo cleaning
+	@rm -f wikid ${OBJ}
+	@rm -f config.h
+
+install: wikid
+	mkdir -p /usr/local/bin
+	cp -f wikid /usr/local/bin
+	chmod 755 /usr/local/bin/wikid
+
+uninstall:
+	rm -f /usr/local/bin/wikid
+
+.PHONY: all options clean install uninstall
diff --git a/README.md b/README.md
@@ -1,2 +1,28 @@
-# wikid
-wikid is a quick and simple CLI-program for downloading and rendering wikipedia pages in your terminal. And it's not **wikid** as in *wiki daemon*, but as in *wicked, Strikingly good, and effective*.
+wikid
+=====
+wikid is a quick and simple CLI-program for downloading and rendering wikipedia
+pages in your terminal. And it's not **wikid** as in *wiki daemon*, but as in
+*wicked, Strikingly good, and effective*.
+
+Requirements
+------------
+wikid uses the **curl** library to download the wiki pages, and **ncurses** for
+retrieving the terminal width for line output.
+
+Installation
+------------
+Edit config.mk to match your local setup (wikid is installed into the
+/usr/local namespace by default), then simply enter the following command to
+install (if necessary as root):
+
+    make clean install
+
+Example usage of wikid
+----------------------
+Read about wikipedia, using `less`
+
+    wikid wikipedia | less
+
+Read about wikipedia in german, using `less`
+
+    wikid -lde wikipedia | less
diff --git a/config.def.h b/config.def.h
@@ -0,0 +1,24 @@
+#define LANG_POS    8
+#define _PADDING    3
+
+char language[]         = "en";
+char temp_file[]        = "/tmp/__wikidtmp.XXXXXX";
+const char webpage[]    = "https://xx.wikipedia.org/w/api.php"
+                        "?format=json"
+                        "&action=query"
+                        "&prop=extracts"
+                        "&exlimit=1"
+                        "&explaintext"
+                        "&redirects"
+                        "&titles=";
+
+/* cleanup mappings for wikipedia responses
+ * NOTE: the replacement cannot be greater in size than the entity itself. If
+ * it is, it'll just be ignored.
+ */
+const Dictionary cleanup_mappings[] = {
+    /* entity       replacement */
+    { "\\n",        "\n\n" },
+    { "\\t",        "  " },
+    { "\\\"",       "\"" },
+};
diff --git a/config.mk b/config.mk
@@ -0,0 +1,25 @@
+# wikid version
+VERSION = 0.1.1
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/share/man
+
+# flags
+CPPFLAGS = -D_DEFAULT_SOURCE\
+           -D_BSD_SOURCE\
+           -D_POSIX_C_SOURCE=2\
+           -DVERSION=\"${VERSION}\"
+
+CFLAGS   = -std=c99\
+           -Wall\
+           -Wno-deprecated-declarations\
+           -Os\
+           -pedantic\
+           ${CPPFLAGS}
+
+LDFLAGS  = -lcurl\
+           -lncurses
+
+# compiler and linker
+CC = cc
diff --git a/term.c b/term.c
@@ -0,0 +1,23 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdlib.h>
+#include <term.h>
+
+unsigned get_terminal_width(void)
+{
+    const char* term = getenv("TERM");
+    char buff[1024];
+    int cols;
+
+    if (!term)
+        return 0;
+
+    if (tgetent(buff, term) <= 0)
+        return 0;
+
+    cols = tgetnum("co");
+
+    if (cols == -1)
+        return 0;
+
+    return cols;
+}
diff --git a/term.h b/term.h
@@ -0,0 +1 @@
+unsigned get_terminal_width(void);
diff --git a/util.c b/util.c
@@ -0,0 +1,217 @@
+/* See LICENSE file for copyright and license details. */
+#include <string.h>
+#include "util.h"
+
+void string_replace(char *str, const char find, const char replace)
+{
+    char *pt = str;
+    while (*pt) {
+        if (*pt == find)
+            *pt = replace;
+        pt++;
+    }
+}
+
+void string_remove(char *str, const char *rem)
+{
+    char        *pt     = str;
+    const char  *rpt    = rem;
+    int         length  = strlen(rem);
+
+    while (*pt) {
+        if (*pt == *rpt) {
+            rpt++;
+            if (!*rpt) {
+                pt++;
+                memmove(pt - length, pt, 1 + strlen(pt));
+                rpt = rem;
+                pt -= length;
+                continue;
+            }
+        } else {
+            rpt = rem;
+        }
+        pt++;
+    }
+}
+
+void string_trim(char *str)
+{
+    char *pt = str;
+
+    while (*pt == ' ' || *pt  == '\n')
+        pt++;
+
+    if (strlen(pt) == 0) {
+        str[0] = '\0';
+    } else {
+        memmove(str, pt, 1 + strlen(pt));
+        pt = str + strlen(str) - 1;
+
+        while (*pt == ' ' || *pt  == '\n')
+            pt--;
+
+        memmove(pt + 1, str + strlen(str), strlen(pt + 1));
+    }
+}
+
+void string_remove_redundent_spaces(char *str)
+{
+    char *dest = str;
+
+    while (*str != '\0') {
+        while ((*str == ' ' && *(str + 1) == ' ')
+            || (*str == ' ' && *(str + 1) == '\n')
+            || (*str == ' ' && *(str - 1) == '\n'))
+            str++;
+        *dest++ = *str++;
+    }
+    *dest = '\0';
+}
+
+void dictionary_replace(char *str, const Dictionary *dict, int size)
+{
+    const Dictionary    *entry  = dict;
+    const Dictionary    *end    = dict + size;
+    const char          *ent;
+    const char          *rep;
+    char                *ptr;
+    int                 ent_len;
+    int                 rep_len;
+
+    while (entry < end) {
+        ent     = entry->entity;
+        rep     = entry->replacement;
+        ent_len = strlen(ent);
+        rep_len = strlen(rep);
+
+        if (ent_len < rep_len) {
+            entry++;
+            continue;
+        }
+
+        ptr = str;
+        while (*ptr) {
+            if (*ptr == *ent++) {
+                if (*ent == '\0') {
+                    ptr -= (ent_len - 1);
+
+                    while (*rep)
+                        *ptr++ = *rep++;
+
+                    rep -= rep_len;
+                    memmove(
+                        ptr,
+                        ptr + ent_len - rep_len,
+                        1 + strlen(ptr + ent_len - rep_len)
+                    );
+                    continue;
+                }
+            } else if (*ptr == entry->entity[0]) {
+                ent = entry->entity + 1;
+            } else {
+                ent = entry->entity;
+            }
+            ptr++;
+        }
+        entry++;
+    }
+}
+
+int tiny_pow(int x, int y)
+{
+    int z = x;
+
+    if (y == 0)
+        return 1;
+
+    while (y-- > 1)
+        z *= x;
+
+    return z;
+}
+
+int hex_value(char c)
+{
+    if (c >= 0x30 && c <= 0x39)
+        return c - 0x30;
+    else if (c >= 0x41 && c <= 0x46)
+        return c - 0x37;
+    else if (c >= 0x61 && c <= 0x66)
+        return c - 0x57;
+    else
+        return -1;
+}
+
+void ucs_to_utf8(char *ucs)
+{
+    unsigned    u       = 0;
+    unsigned    c       = 0;
+    int         length  = strlen(ucs);
+    char        *pt     = ucs;
+
+    while (*pt)
+        u += hex_value(*pt++) * tiny_pow(16, length - 1 - c++);
+
+    pt = ucs;
+
+    if (u <= 0x7f) {
+        *pt++ = u;
+    } else if (u <= 0x7ff) {
+        *pt++ = 0xc0 | (u >> 6);
+        *pt++ = 0x80 | ((u >> 0) & 0x3f);
+    } else if (u <= 0xffff) {
+        *pt++ = 0xe0 | (u >> 12);
+        *pt++ = 0x80 | ((u >> 6) & 0x3f);
+        *pt++ = 0x80 | ((u >> 0) & 0x3f);
+    } else if (u <= 0x1fffff) {
+        *pt++ = 0xf0 | (u >> 18);
+        *pt++ = 0x80 | ((u >> 12) & 0x3f);
+        *pt++ = 0x80 | ((u >> 6) & 0x3f);
+        *pt++ = 0x80 | ((u >> 0) & 0x3f);
+    } else {
+        *pt++ = 0x3f;
+    }
+
+    *pt = '\0';
+}
+
+void unicode_decode(char *str)
+{
+    char        buff[5];
+    char        *pt         = str;
+    char        *bpt        = buff;
+    const char  indic[2]    = { '\\', 'u' };
+    int         c           = 0;
+    int         j           = 0;
+
+    while (*pt) {
+        if (c < 2 && *pt == indic[c]) {
+            c++;
+        } else if (c >= 2 && c < 6 && hex_value(*pt) != -1) {
+            *bpt++ = *pt;
+            *bpt = '\0';
+            c++;
+        } else if (c == 6) {
+            ucs_to_utf8(buff);
+            bpt = buff;
+            pt -= c;
+            j = 0;
+
+            while (*bpt) {
+                *pt++ = *bpt++;
+                j++;
+            }
+
+            memmove(pt, pt + c - j, 1 + strlen(pt + c - j));
+
+            bpt = buff;
+            c = 0;
+            continue;
+        } else {
+            bpt = buff;
+            c = 0;
+        }
+        pt++;
+    }
+}
diff --git a/util.h b/util.h
@@ -0,0 +1,19 @@
+typedef enum {
+    false,
+    true
+} bool;
+
+typedef struct {
+    const char *entity;
+    const char *replacement;
+} Dictionary;
+
+void string_replace(char *str, char find, char replace);
+void string_remove(char *str, const char *rem);
+void string_trim(char *str);
+void string_remove_redundent_spaces(char *str);
+void dictionary_replace(char *str, const Dictionary *dict, int size);
+int m_pow(int x, int y);
+int hex_value(char c);
+void ucs_to_utf8(char *ucs);
+void unicode_decode(char *str);
diff --git a/wikid.c b/wikid.c
@@ -0,0 +1,390 @@
+/*
+ * Downloads and renders wikipedia pages in your terminal, with some formatting
+ * Copyright (C) 2018 z0noxz, <chris@noxz.tech>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <curl/curl.h>
+
+#include "util.h"
+#include "term.h"
+#include "config.h"
+
+/* macros */
+#define LENGTH(x)       (sizeof(x) / sizeof(x[0]))
+#define PADDING(x)      (x * _PADDING)
+#define INDEX_OF(x, y)  ((int)(x - y) / sizeof(y[0]))
+
+/* function declarations */
+char *get_wiki_url(char *subject);
+int download_wiki(char *url);
+void print_line(int padding, const char *format, char *line);
+void handle_line(char *line);
+int print_wiki(char *subject);
+
+/* variables */
+const char  *usage              = "usage: wikid [-hlr] <subject>";
+unsigned    total_line_count    = 0;
+unsigned    section_line_count  = 0;
+bool        blank_line          = false;
+bool        pre_heading         = true;
+char        global_options      = '\0';
+unsigned    terminal_width;
+int         temp_file_descriptor;
+
+/* function implementations */
+char *get_wiki_url(char *subject)
+{
+    char *url = (char*)malloc(2048 * sizeof(char));
+
+    strcpy(url, webpage);
+    strcat(url, subject);
+
+    memmove(url + LANG_POS, language, 2);
+    string_replace(url, ' ', '_');
+
+    return url;
+}
+
+int download_wiki(char *url)
+{
+    int         state = 0;
+    char        error[CURL_ERROR_SIZE];
+    FILE        *fptr;
+    CURL        *curl;
+    CURLcode    status;
+
+    temp_file_descriptor = mkstemp(temp_file);
+    if (temp_file_descriptor == -1) {
+        fprintf(stderr, "%s\n", "Can't open tmp file");
+        return CURLE_FAILED_INIT;
+    }
+    fptr = fdopen(dup(temp_file_descriptor), "w");
+
+    curl_global_init(CURL_GLOBAL_ALL);
+    curl = curl_easy_init();
+
+    if (curl) {
+        if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error) != CURLE_OK) {
+            fprintf(stderr, "%s\n", "Failed to set an error buffer for curl");
+            state = -1;
+        } else if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK) {
+            fprintf(stderr, "%s\n", "Failed to set the URL for curl");
+            state = -1;
+        } else if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK) {
+            fprintf(stderr, "%s\n", "Failed to set write function for curl");
+            state = -1;
+        } else if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, fptr) != CURLE_OK) {
+            fprintf(stderr, "%s\n", "Failed to set output for curl");
+            state = -1;
+        } else if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK) {
+            fprintf(stderr, "%s\n", "Failed to set follow location for curl");
+            state = -1;
+        } else if ((status = curl_easy_perform(curl)) && status != CURLE_OK) {
+            fprintf(stderr, "%s\n", curl_easy_strerror(status));
+            state = status;
+        }
+
+        curl_easy_cleanup(curl);
+        curl_global_cleanup();
+    }
+
+    if (fptr)
+        fclose(fptr);
+
+    return state;
+}
+
+void print_line(int padding, const char *format, char *line)
+{
+    if (padding > 0)
+        fprintf(stdout, "%*c", PADDING(padding), ' ');
+
+    if (!((global_options >> 1) & 1U))
+        fprintf(stdout, format, line);
+    else
+        fprintf(stdout, "%s\n", line);
+}
+
+void handle_line(char *input)
+{
+    char    *temp_line  = (char*)malloc(strlen(input) + 1 * (sizeof(char)));
+    char    *ptr        = temp_line;
+    char    *line       = NULL;
+    char    *next       = NULL;
+    char    heading     = 0x0;
+    int     b           = 0;    /* beginning    */
+    int     e           = 0;    /* end          */
+    int     limit       = terminal_width - PADDING(3);
+
+    strcpy(temp_line, input);
+    string_trim(temp_line);
+
+    /* make sure first section line contains text */
+    if (*temp_line || (section_line_count > 0 && !blank_line)) {
+
+        /* check and handle headings, (h2-h4) */
+        if (*ptr++ == '=' && *ptr++ == '=' && *ptr) {
+            if (*ptr++ == '=' && *ptr-- == '=') {
+                string_remove(temp_line, "====");
+                heading = 0x4;
+            } else if (*ptr == '=') {
+                string_remove(temp_line, "===");
+                heading = 0x3;
+            } else {
+                ptr--;
+                /* capitalize 'lowest' heading */
+                while (*ptr) {
+                    if (*ptr > 0x60 && *ptr < 0x7b)
+                        *ptr -= 0x20;
+                    if ((unsigned char)*(ptr - 1) == 0xc3
+                    && (unsigned char)*ptr >= 0xa0
+                    && (unsigned char)*ptr <= 0xbe)
+                        *ptr -= 0x20;
+                    ptr++;
+                }
+                string_remove(temp_line, "==");
+                heading = 0x2;
+            }
+
+            pre_heading = false;
+            section_line_count = -1;
+        }
+
+        /* prevent lines from breaking in the middle of words */
+        ptr = temp_line;
+        while (*ptr) {
+
+            if (*ptr == ' ')
+                e = INDEX_OF(ptr, temp_line);
+
+            if (INDEX_OF(ptr, temp_line) - b >= limit) {
+                if (e - b > 0)
+                    temp_line[e] = '\n';
+                b = e + 1;
+            }
+
+            ptr++;
+        }
+
+        string_trim(temp_line);
+        line = temp_line;
+
+        /* format and output lines */
+        while (line) {
+            next = strchr(line, '\n');
+            if (next)
+                *next = '\0';
+
+            if (pre_heading) {
+                print_line(2, "\033[3m%s\033[0m\n", line);
+            } else if (heading) {
+                switch (heading) {
+                case 0x2:
+                    print_line(0, "\033[1m%s\033[0m\n", line);
+                    break;
+                case 0x3:
+                    print_line(1, "\033[1m%s\033[0m\n", line);
+                    break;
+                default:
+                    print_line(2, "\033[1m\033[3m%s\033[0m\n", line);
+                    break;
+                }
+            } else {
+                print_line(2, "%s\n", line);
+            }
+
+            if (next)
+                *next = '\n';
+            line = next ? next + 1 : NULL;
+        }
+
+        blank_line = *temp_line == '\0';
+        total_line_count++;
+        section_line_count++;
+    }
+
+    free(temp_line);
+}
+
+int print_wiki(char *subject)
+{
+    const char  beginning[] = "\"extract\":\"";
+    bool        failure     = false;
+    FILE        *fptr       = NULL;
+    char        *content    = NULL;
+    char        *line       = NULL;
+    char        *next       = NULL;
+    char        *ptr        = NULL;
+    long        size;
+
+    fptr = fdopen(dup(temp_file_descriptor), "rb");
+    if (fptr == NULL)
+        failure = true;
+
+    if (!failure) {
+        fseek(fptr, 0, SEEK_END);
+        size = ftell(fptr);
+        rewind(fptr);
+        content = malloc(size + 1 * (sizeof(char)));
+        fread(content, sizeof(char), size, fptr);
+        fclose(fptr);
+    }
+
+    if (content == NULL)
+        failure = true;
+
+    if (!failure) {
+        ptr = strstr(content, beginning);
+        if (ptr != NULL && *ptr)
+            memmove(content,
+                ptr + strlen(beginning),
+                strlen(ptr) - strlen(beginning)
+            );
+        else
+            failure = true;
+    }
+
+    if (!failure) {
+        ptr = strchr(content, '}');
+        if (ptr != NULL && *ptr--)
+            *ptr = '\0';
+        else
+            failure = true;
+    }
+
+    if (!failure) {
+        dictionary_replace(content, cleanup_mappings, LENGTH(cleanup_mappings));
+
+        if ((global_options >> 0) & 1U) {
+            fprintf(stdout, "%s\n", content);
+        } else {
+            string_remove_redundent_spaces(content);
+            unicode_decode(content);
+
+            if (!((global_options >> 1) & 1U))
+                fprintf(stdout, "\033[4m%s:\033[0m\n\n", subject);
+
+            line = content;
+            while (line) {
+                next = strchr(line, '\n');
+                if (next)
+                    *next = '\0';
+
+                handle_line(line);
+
+                if (next)
+                    *next = '\n';
+                line = next ? next + 1 : NULL;
+            }
+        }
+    }
+
+    if (content != NULL)
+        free(content);
+
+    return !failure;
+}
+
+int main(int argc, char *argv[])
+{
+    int     state       = 0;
+    char    *input      = NULL;
+    char    *url        = NULL;
+    int     buffer_size = 80;
+    char    buffer[buffer_size];
+    int     input_size;
+    char    c;
+
+    /* validate input from STDIN */
+    if (!isatty(fileno(stdin)) && fgets(buffer, buffer_size, stdin) != NULL) {
+        input_size  = strlen(buffer);
+        input       = (char*)malloc(input_size);
+        input[0]    = '\0';
+        strcat(input, buffer);
+
+    /* validate input from CLI */
+    } else if (argc >= 2) {
+        input_size  = strlen(argv[argc - 1]);
+        input       = (char*)malloc(input_size);
+        input[0]    = '\0';
+        strcat(input, argv[argc - 1]);
+
+    /* no valid input, so show usage */
+    } else {
+        fprintf(stderr, "%s\n", usage);
+        state = 1;
+    }
+
+    if (state == 0) {
+        while ((c = getopt(argc, argv, "hl:rt")) != -1) {
+            switch (c) {
+            case 'l':
+                memmove(language, optarg, 2);
+                break;
+            case 'r':
+                global_options |= 1U << 0;
+                break;
+            case 't':
+                global_options |= 1U << 1;
+                break;
+            case 'h':
+                fprintf(stderr, "%s\n", usage);
+                fprintf(stderr, "Finds a wiki subject and prints it.\n");
+                fprintf(stderr, " -h          Print this help text and exit\n");
+                fprintf(stderr, " -l CODE     Language code in ISO 639-1 format\n");
+                fprintf(stderr, " -r          Print wiki in raw format\n");
+                fprintf(stderr, " -t          Print wiki in text only format\n");
+
+                free(input);
+                return 0;
+            default:
+                fprintf(stderr, "%s\n", usage);
+                state = 1;
+            }
+        }
+    }
+
+    /* initialization is done, so proceed with the rest of the program... */
+    terminal_width = get_terminal_width();
+
+    if (state == 0)
+        url = get_wiki_url(input);
+
+    if (state == 0 && download_wiki(url) != CURLE_OK) {
+        fprintf(stderr, "%s\n", "Failed to download the wiki");
+        state = 1;
+    }
+
+    if (state == 0 && !print_wiki(input)) {
+        fprintf(stderr, "%s\n", "Failed to print the wiki");
+        state = 1;
+    }
+
+    if (temp_file_descriptor != -1) {
+        close(temp_file_descriptor);
+        unlink(temp_file);
+    }
+
+    if (input != NULL)
+        free(input);
+
+    if (url != NULL)
+        free(url);
+
+    return state;
+}