summaryrefslogtreecommitdiffstatshomepage
path: root/ascii.c
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2009-03-16 22:19:19 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2009-03-16 22:19:19 +0000
commit08508ed7e6368876201309aff6964a039f011045 (patch)
treedf76daf7d386d0316d975ca1e126b77eb8efba16 /ascii.c
parentc13583a4f480a20774e466fbef54590c6dd6bb48 (diff)
downloadmandoc-08508ed7e6368876201309aff6964a039f011045.tar.gz
mandoc-08508ed7e6368876201309aff6964a039f011045.tar.zst
mandoc-08508ed7e6368876201309aff6964a039f011045.zip
Fixed mdoc_phrase escape handling.
Added MDOC_IGNDELIM (Pf, soon Li, etc.). macro_constant_delimited ignargs -> argv.c parsing. Renamed macro functions to correspond to ontologies. `Fo' and `St' made callable (compat documented). strings.sh deprecated (directly using CPP). Abstracted ASCII translation into ascii.{c,in}. ASCII table uses a self-reordering chained hashtable. Removed old regressions.
Diffstat (limited to 'ascii.c')
-rw-r--r--ascii.c169
1 files changed, 169 insertions, 0 deletions
diff --git a/ascii.c b/ascii.c
new file mode 100644
index 00000000..80523e5f
--- /dev/null
+++ b/ascii.c
@@ -0,0 +1,169 @@
+/* $Id: ascii.c,v 1.1 2009/03/16 22:19:19 kristaps Exp $ */
+/*
+ * Copyright (c) 2009 Kristaps Dzonsons <kristaps@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the
+ * above copyright notice and this permission notice appear in all
+ * copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+ * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <err.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "term.h"
+
+#define ASCII_PRINT_HI 126
+#define ASCII_PRINT_LO 32
+
+/*
+ * Lookup and hashing routines for constructing the ASCII symbol table,
+ * which should contain a significant portion of mdoc(7)'s special
+ * symbols.
+ */
+
+struct line {
+ const char *code;
+ const char *out;
+ /* 32- and 64-bit alignment safe. */
+ size_t codesz;
+ size_t outsz;
+};
+
+struct linep {
+ const struct line *line;
+ struct linep *next;
+};
+
+#define LINE(w, x, y, z) \
+ { (w), (y), (x), (z) },
+static const struct line lines[] = {
+#include "ascii.in"
+};
+
+
+static inline int match(const struct line *,
+ const char *, size_t);
+
+
+void *
+ascii2htab(void)
+{
+ void **htab;
+ struct linep *pp, *p;
+ int i, len, hash;
+
+ /*
+ * Constructs a very basic chaining hashtable. The hash routine
+ * is simply the integral value of the first character.
+ * Subsequent entries are chained in the order they're processed
+ * (they're in-line re-ordered during lookup).
+ */
+
+ assert(0 == sizeof(lines) % sizeof(struct line));
+ len = sizeof(lines) / sizeof(struct line);
+
+ if (NULL == (p = calloc((size_t)len, sizeof(struct linep))))
+ err(1, "malloc");
+
+ htab = calloc(ASCII_PRINT_HI - ASCII_PRINT_LO + 1,
+ sizeof(struct linep **));
+
+ if (NULL == htab)
+ err(1, "malloc");
+
+ for (i = 0; i < len; i++) {
+ assert(lines[i].codesz > 0);
+ assert(lines[i].code);
+ assert(lines[i].out);
+
+ p[i].line = &lines[i];
+
+ hash = (int)lines[i].code[0] - ASCII_PRINT_LO;
+
+ if (NULL == (pp = ((struct linep **)htab)[hash])) {
+ htab[hash] = &p[i];
+ continue;
+ }
+
+ for ( ; pp->next; pp = pp->next)
+ /* Scan ahead. */ ;
+
+ pp->next = &p[i];
+ }
+
+ return((void *)htab);
+}
+
+
+const char *
+a2ascii(void *htabp, const char *p, size_t sz, size_t *rsz)
+{
+ struct linep *pp, *prev;
+ void **htab;
+ int hash;
+
+ htab = (void **)htabp;
+
+ assert(p);
+ assert(sz > 0);
+ assert(p[0] >= ASCII_PRINT_LO && p[0] <= ASCII_PRINT_HI);
+
+ /*
+ * Lookup the symbol in the symbol hash. See ascii2htab for the
+ * hashtable specs. This dynamically re-orders the hash chain
+ * to optimise for repeat hits.
+ */
+
+ hash = (int)p[0] - ASCII_PRINT_LO;
+
+ if (NULL == (pp = ((struct linep **)htab)[hash]))
+ return(NULL);
+
+ if (NULL == pp->next) {
+ if ( ! match(pp->line, p, sz))
+ return(NULL);
+ *rsz = pp->line->outsz;
+ return(pp->line->out);
+ }
+
+ for (prev = NULL; pp; pp = pp->next) {
+ if ( ! match(pp->line, p, sz)) {
+ prev = pp;
+ continue;
+ }
+
+ /* Re-order the hash chain. */
+
+ if (prev) {
+ prev->next = pp->next;
+ pp->next = ((struct linep **)htab)[hash];
+ htab[hash] = pp;
+ }
+
+ *rsz = pp->line->outsz;
+ return(pp->line->out);
+ }
+
+ return(NULL);
+}
+
+
+static inline int
+match(const struct line *line, const char *p, size_t sz)
+{
+
+ if (line->codesz != sz)
+ return(0);
+ return(0 == strncmp(line->code, p, sz));
+}