aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/chars.c
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2009-09-17 07:41:28 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2009-09-17 07:41:28 +0000
commit3f648213cd789e9c307f834875f09da5e1d86ab1 (patch)
treeaa52ea85d2cdaabca340865f79a922ab1a8c21e9 /chars.c
parent6f8fe1982a3b763cad6bc6f48f3dc1c2f19281d9 (diff)
downloadmandoc-3f648213cd789e9c307f834875f09da5e1d86ab1.tar.gz
mandoc-3f648213cd789e9c307f834875f09da5e1d86ab1.tar.zst
mandoc-3f648213cd789e9c307f834875f09da5e1d86ab1.zip
ascii_xxx -> chars_xxx (intended to hold more than just ascii encoding).
More html work.
Diffstat (limited to 'chars.c')
-rw-r--r--chars.c201
1 files changed, 201 insertions, 0 deletions
diff --git a/chars.c b/chars.c
new file mode 100644
index 00000000..6c2298f0
--- /dev/null
+++ b/chars.c
@@ -0,0 +1,201 @@
+/* $Id: chars.c,v 1.1 2009/09/17 07:41:28 kristaps Exp $ */
+/*
+ * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <err.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "chars.h"
+
+#define ASCII_PRINT_HI 126
+#define ASCII_PRINT_LO 32
+
+struct ln {
+ struct ln *next;
+ const char *code;
+ const char *out;
+ size_t codesz;
+ size_t outsz;
+ int type;
+#define CHARS_CHAR (1 << 0)
+#define CHARS_STRING (1 << 1)
+#define CHARS_BOTH (0x03)
+};
+
+#define LINES_MAX 266
+
+#define CHAR(w, x, y, z) \
+ { NULL, (w), (y), (x), (z), CHARS_CHAR },
+#define STRING(w, x, y, z) \
+ { NULL, (w), (y), (x), (z), CHARS_STRING },
+#define BOTH(w, x, y, z) \
+ { NULL, (w), (y), (x), (z), CHARS_BOTH },
+
+static struct ln lines[LINES_MAX] = {
+#include "chars.in"
+};
+
+struct tbl {
+ struct ln **htab;
+};
+
+static inline int match(const struct ln *,
+ const char *, size_t, int);
+static const char *find(struct tbl *, const char *,
+ size_t, size_t *, int);
+
+
+void
+chars_free(void *arg)
+{
+ struct tbl *tab;
+
+ tab = (struct tbl *)arg;
+
+ free(tab->htab);
+ free(tab);
+}
+
+
+/* ARGSUSED */
+void *
+chars_init(enum chars type)
+{
+ struct tbl *tab;
+ struct ln **htab;
+ struct ln *pp;
+ int i, hash;
+
+ /*
+ * Constructs a very basic chaining hashtable. The hash routine
+ * is simply the integral value of the first character.
+ * Subsequent entries are chained in the order they're processed
+ * (they're in-line re-ordered during lookup).
+ */
+
+ if (NULL == (tab = malloc(sizeof(struct tbl))))
+ err(1, "malloc");
+
+ htab = calloc(ASCII_PRINT_HI - ASCII_PRINT_LO + 1,
+ sizeof(struct ln **));
+
+ if (NULL == htab)
+ err(1, "malloc");
+
+ for (i = 0; i < LINES_MAX; i++) {
+ assert(lines[i].codesz > 0);
+ assert(lines[i].code);
+ assert(lines[i].out);
+
+ hash = (int)lines[i].code[0] - ASCII_PRINT_LO;
+
+ if (NULL == (pp = htab[hash])) {
+ htab[hash] = &lines[i];
+ continue;
+ }
+
+ for ( ; pp->next; pp = pp->next)
+ /* Scan ahead. */ ;
+
+ pp->next = &lines[i];
+ }
+
+ tab->htab = htab;
+ return(tab);
+}
+
+
+const char *
+chars_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz)
+{
+
+ return(find((struct tbl *)arg, p, sz, rsz, CHARS_CHAR));
+}
+
+
+const char *
+chars_a2res(void *arg, const char *p, size_t sz, size_t *rsz)
+{
+
+ return(find((struct tbl *)arg, p, sz, rsz, CHARS_STRING));
+}
+
+
+static const char *
+find(struct tbl *tab, const char *p, size_t sz, size_t *rsz, int type)
+{
+ struct ln *pp, *prev;
+ struct ln **htab;
+ int hash;
+
+ assert(p);
+ assert(sz > 0);
+
+ if (p[0] < ASCII_PRINT_LO || p[0] > ASCII_PRINT_HI)
+ return(NULL);
+
+ /*
+ * Lookup the symbol in the symbol hash. See ascii2htab for the
+ * hashtable specs. This dynamically re-orders the hash chain
+ * to optimise for repeat hits.
+ */
+
+ hash = (int)p[0] - ASCII_PRINT_LO;
+ htab = tab->htab;
+
+ if (NULL == (pp = htab[hash]))
+ return(NULL);
+
+ if (NULL == pp->next) {
+ if ( ! match(pp, p, sz, type))
+ return(NULL);
+ *rsz = pp->outsz;
+ return(pp->out);
+ }
+
+ for (prev = NULL; pp; pp = pp->next) {
+ if ( ! match(pp, p, sz, type)) {
+ prev = pp;
+ continue;
+ }
+
+ /* Re-order the hash chain. */
+
+ if (prev) {
+ prev->next = pp->next;
+ pp->next = htab[hash];
+ htab[hash] = pp;
+ }
+
+ *rsz = pp->outsz;
+ return(pp->out);
+ }
+
+ return(NULL);
+}
+
+
+static inline int
+match(const struct ln *ln, const char *p, size_t sz, int type)
+{
+
+ if ( ! (ln->type & type))
+ return(0);
+ if (ln->codesz != sz)
+ return(0);
+ return(0 == strncmp(ln->code, p, sz));
+}