1 files changed, 689 insertions, 0 deletions
diff --git a/apropos.c b/apropos.c
new file mode 100644
index 00000000..409ef8e2
--- /dev/null
+++ b/apropos.c
@@ -0,0 +1,689 @@
+/*	$Id: apropos.c,v 1.1 2011/10/06 23:00:54 kristaps Exp $ */
+/*
+* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifdef __linux__
+# include <db_185.h>
+#else
+# include <db.h>
+#endif
+
+#include "mandoc.h"
+
+#define	MAXRESULTS	 100
+
+#define TYPE_NAME	0x01
+#define TYPE_FUNCTION	0x02
+#define TYPE_UTILITY	0x04
+#define TYPE_INCLUDES	0x08
+#define TYPE_VARIABLE	0x10
+#define TYPE_STANDARD	0x20
+#define TYPE_AUTHOR	0x40
+#define TYPE_CONFIG	0x80
+#define TYPE_DESC	0x100
+#define TYPE_XREF	0x200
+#define TYPE_PATH	0x400
+#define	TYPE_ENV	0x800
+#define	TYPE_ERR	0x1000
+
+enum	match {
+	MATCH_SUBSTR = 0,
+	MATCH_REGEX,
+	MATCH_EXACT
+};
+
+enum	sort {
+	SORT_TITLE = 0,
+	SORT_CAT,
+	SORT__MAX
+};
+
+struct	opts {
+	enum sort	 sort; /* output sorting */
+	const char	*arch; /* restrict to architecture */
+	const char	*cat; /* restrict to category */
+	int		 types; /* only types in bitmask */
+	int		 insens; /* case-insensitive match */
+	enum match	 match; /* match type */
+};
+
+struct	type {
+	int		 mask;
+	const char	*name;
+};
+
+struct	rec {
+	char		*file;
+	char		*cat;
+	char		*title;
+	char		*arch;
+	char		*desc;
+	recno_t		 rec;
+};
+
+struct	res {
+	char		*arch; /* architecture */
+	char		*desc; /* free-form description */
+	char		*keyword; /* matched keyword */
+	int	 	 types; /* bitmask of field selectors */
+	char		*cat; /* manual section */
+	char		*title; /* manual section */
+	char		*uri; /* formatted uri of file */
+	recno_t		 rec; /* unique id of underlying manual */
+};
+
+struct	state {
+	DB		 *db; /* database */
+	DB		 *idx; /* index */
+	const char	 *dbf; /* database name */
+	const char	 *idxf; /* index name */
+	void		(*err)(const char *);
+	void		(*errx)(const char *, ...);
+};
+
+static	const char * const sorts[SORT__MAX] = {
+	"cat", /* SORT_CAT */
+	"title", /* SORT_TITLE */
+};
+
+static	const struct type types[] = {
+	{ TYPE_NAME, "name" },
+	{ TYPE_FUNCTION, "func" },
+	{ TYPE_UTILITY, "utility" },
+	{ TYPE_INCLUDES, "incl" },
+	{ TYPE_VARIABLE, "var" },
+	{ TYPE_STANDARD, "stand" },
+	{ TYPE_AUTHOR, "auth" },
+	{ TYPE_CONFIG, "conf" },
+	{ TYPE_DESC, "desc" },
+	{ TYPE_XREF, "xref" },
+	{ TYPE_PATH, "path" },
+	{ TYPE_ENV, "env" },
+	{ TYPE_ERR, "err" },
+	{ INT_MAX, "all" },
+	{ 0, NULL }
+};
+
+static	void	 buf_alloc(char **, size_t *, size_t);
+static	void	 buf_dup(struct mchars *, char **, const char *);
+static	void	 buf_redup(struct mchars *, char **, 
+			size_t *, const char *);
+static	void	 error(const char *, ...);
+static	int	 sort_cat(const void *, const void *);
+static	int	 sort_title(const void *, const void *);
+static	void	 state_destroy(struct state *);
+static	int	 state_getrecord(struct state *, recno_t, struct rec *);
+static	int	 state_init(struct state *, 
+			const char *, const char *,
+			void (*err)(const char *),
+			void (*errx)(const char *, ...));
+static	void	 state_output(const struct res *, int);
+static	void	 state_search(struct state *, 
+			const struct opts *, char *);
+
+static	void	 usage(void);
+
+static	const char	 *progname;
+
+int
+main(int argc, char *argv[])
+{
+	int		 ch, i;
+	const char	*dbf, *idxf;
+	struct state	 state;
+	char		*q, *v;
+	struct opts	 opts;
+	extern int	 optind;
+	extern char	*optarg;
+
+	memset(&opts, 0, sizeof(struct opts));
+
+	dbf = "mandoc.db";
+	idxf = "mandoc.index";
+	q = NULL;
+
+	progname = strrchr(argv[0], '/');
+	if (progname == NULL)
+		progname = argv[0];
+	else
+		++progname;
+
+	opts.match = MATCH_SUBSTR;
+
+	while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:"))) 
+		switch (ch) {
+		case ('a'):
+			opts.arch = optarg;
+			break;
+		case ('c'):
+			opts.cat = optarg;
+			break;
+		case ('e'):
+			opts.match = MATCH_EXACT;
+			break;
+		case ('I'):
+			opts.insens = 1;
+			break;
+		case ('r'):
+			opts.match = MATCH_REGEX;
+			break;
+		case ('s'):
+			for (i = 0; i < SORT__MAX; i++) {
+				if (strcmp(optarg, sorts[i])) 
+					continue;
+				opts.sort = (enum sort)i;
+				break;
+			}
+
+			if (i < SORT__MAX)
+				break;
+
+			error("%s: Bad sort\n", optarg);
+			return(EXIT_FAILURE);
+		case ('t'):
+			while (NULL != (v = strsep(&optarg, ","))) {
+				if ('\0' == *v)
+					continue;
+				for (i = 0; types[i].mask; i++) {
+					if (strcmp(types[i].name, v))
+						continue;
+					break;
+				}
+				if (0 == types[i].mask)
+					break;
+				opts.types |= types[i].mask;
+			}
+			if (NULL == v)
+				break;
+			
+			error("%s: Bad type\n", v);
+			return(EXIT_FAILURE);
+		default:
+			usage();
+			return(EXIT_FAILURE);
+		}
+
+	argc -= optind;
+	argv += optind;
+
+	if (0 == argc || '\0' == **argv) {
+		usage();
+		return(EXIT_FAILURE);
+	} else
+		q = *argv;
+
+	if (0 == opts.types)
+		opts.types = TYPE_NAME | TYPE_DESC;
+
+	if ( ! state_init(&state, dbf, idxf, perror, error)) {
+		state_destroy(&state);
+		return(EXIT_FAILURE);
+	}
+
+	state_search(&state, &opts, q);
+	state_destroy(&state);
+
+	return(EXIT_SUCCESS);
+}
+
+static void
+state_search(struct state *p, const struct opts *opts, char *q)
+{
+	int		 i, len, ch, rflags, dflag;
+	struct mchars	*mc;
+	char		*buf;
+	size_t		 bufsz;
+	recno_t		 rec;
+	uint32_t	 fl;
+	DBT		 key, val;
+	struct res	 res[MAXRESULTS];
+	regex_t		 reg;
+	regex_t		*regp;
+	char		 filebuf[10];
+	struct rec	 record;
+
+	len = 0;
+	buf = NULL;
+	bufsz = 0;
+	ch = 0;
+	regp = NULL;
+
+	switch (opts->match) {
+	case (MATCH_REGEX):
+		rflags = REG_EXTENDED | REG_NOSUB | 
+			(opts->insens ? REG_ICASE : 0);
+
+		if (0 != regcomp(&reg, q, rflags)) {
+			error("%s: Bad pattern\n", q);
+			return;
+		}
+
+		regp = &reg;
+		dflag = R_FIRST;
+		break;
+	case (MATCH_EXACT):
+		key.data = q;
+		key.size = strlen(q) + 1;
+		dflag = R_CURSOR;
+		break;
+	default:
+		dflag = R_FIRST;
+		break;
+	}
+
+	if (NULL == (mc = mchars_alloc())) {
+		perror(NULL);
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * Iterate over the entire keyword database.
+	 * For each record, we must first translate the key into UTF-8.
+	 * Following that, make sure it's acceptable.
+	 * Lastly, add it to the available records.
+	 */
+
+	while (len < MAXRESULTS) {
+		if ((ch = (*p->db->seq)(p->db, &key, &val, dflag)))
+			break;
+
+		dflag = R_NEXT;
+
+		/* 
+		 * Keys must be sized as such: the keyword must be
+		 * non-empty (nil terminator plus one character) and the
+		 * value must be 8 (recno_t---uint32_t---index reference
+		 * and a uint32_t flag field).
+		 */
+
+		if (key.size < 2 || 8 != val.size) {
+			error("%s: Corrupt database\n", p->dbf);
+			exit(EXIT_FAILURE);
+		}
+
+		buf_redup(mc, &buf, &bufsz, (char *)key.data);
+
+		fl = *(uint32_t *)val.data;
+
+		if ( ! (fl & opts->types))
+			continue;
+
+		switch (opts->match) {
+		case (MATCH_REGEX):
+			if (regexec(regp, buf, 0, NULL, 0))
+				continue;
+			break;
+		case (MATCH_EXACT):
+			if (opts->insens && strcasecmp(buf, q))
+				goto send;
+			if ( ! opts->insens && strcmp(buf, q))
+				goto send;
+			break;
+		default:
+			if (opts->insens && NULL == strcasestr(buf, q))
+				continue;
+			if ( ! opts->insens && NULL == strstr(buf, q))
+				continue;
+			break;
+		}
+
+		/*
+		 * Now look up the file itself in our index.  The file's
+		 * indexed by its recno for fast lookups.
+		 */
+
+		memcpy(&rec, val.data + 4, sizeof(recno_t));
+
+		if ( ! state_getrecord(p, rec, &record))
+			exit(EXIT_FAILURE);
+
+		/* If we're in a different section, skip... */
+
+		if (opts->cat && strcasecmp(opts->cat, record.cat))
+			continue;
+		if (opts->arch && strcasecmp(opts->arch, record.arch))
+			continue;
+
+		/* FIXME: this needs to be changed.  Ugh.  Linear. */
+
+		for (i = 0; i < len; i++)
+			if (res[i].rec == record.rec)
+				break;
+
+		if (i < len)
+			continue;
+
+		/*
+		 * Now we have our filename, keywords, types, and all
+		 * other necessary information.  
+		 * Process it and add it to our list of results.
+		 */
+
+		filebuf[9] = '\0';
+		snprintf(filebuf, 10, "%u", record.rec);
+		assert('\0' == filebuf[9]);
+
+		res[len].rec = record.rec;
+		res[len].types = fl;
+
+		buf_dup(mc, &res[len].keyword, buf);
+		buf_dup(mc, &res[len].uri, filebuf);
+		buf_dup(mc, &res[len].cat, record.cat);
+		buf_dup(mc, &res[len].arch, record.arch);
+		buf_dup(mc, &res[len].title, record.title);
+		buf_dup(mc, &res[len].desc, record.desc);
+		len++;
+	}
+
+send:
+	if (ch < 0) {
+		perror(p->dbf);
+		exit(EXIT_FAILURE);
+	} 
+
+	switch (opts->sort) {
+	case (SORT_CAT):
+		qsort(res, len, sizeof(struct res), sort_cat);
+		break;
+	default:
+		qsort(res, len, sizeof(struct res), sort_title);
+		break;
+	}
+
+	state_output(res, len);
+
+	for (len-- ; len >= 0; len--) {
+		free(res[len].keyword);
+		free(res[len].title);
+		free(res[len].cat);
+		free(res[len].arch);
+		free(res[len].desc);
+		free(res[len].uri);
+	}
+
+	free(buf);
+	mchars_free(mc);
+
+	if (regp)
+		regfree(regp);
+}
+
+/*
+ * Track allocated buffer size for buf_redup().
+ */
+static inline void
+buf_alloc(char **buf, size_t *bufsz, size_t sz)
+{
+
+	if (sz < *bufsz) 
+		return;
+
+	*bufsz = sz + 1024;
+	if (NULL == (*buf = realloc(*buf, *bufsz))) {
+		perror(NULL);
+		exit(EXIT_FAILURE);
+	}
+}
+
+/*
+ * Like buf_redup() but throwing away the buffer size.
+ */
+static void
+buf_dup(struct mchars *mc, char **buf, const char *val)
+{
+	size_t		 bufsz;
+
+	bufsz = 0;
+	*buf = NULL;
+	buf_redup(mc, buf, &bufsz, val);
+}
+
+/*
+ * Normalise strings from the index and database.
+ * These strings are escaped as defined by mandoc_char(7) along with
+ * other goop in mandoc.h (e.g., soft hyphens).
+ */
+static void
+buf_redup(struct mchars *mc, char **buf, 
+		size_t *bufsz, const char *val)
+{
+	size_t		 sz;
+	const char	*seq, *cpp;
+	int		 len, pos;
+	enum mandoc_esc	 esc;
+	const char	 rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' };
+
+	/* Pre-allocate by the length of the input */
+
+	buf_alloc(buf, bufsz, strlen(val) + 1);
+
+	pos = 0;
+
+	while ('\0' != *val) {
+		/*
+		 * Halt on the first escape sequence.
+		 * This also halts on the end of string, in which case
+		 * we just copy, fallthrough, and exit the loop.
+		 */
+		if ((sz = strcspn(val, rsv)) > 0) {
+			memcpy(&(*buf)[pos], val, sz);
+			pos += (int)sz;
+			val += (int)sz;
+		}
+
+		if (ASCII_HYPH == *val) {
+			(*buf)[pos++] = '-';
+			val++;
+			continue;
+		} else if (ASCII_NBRSP == *val) {
+			(*buf)[pos++] = ' ';
+			val++;
+			continue;
+		} else if ('\\' != *val)
+			break;
+
+		/* Read past the slash. */
+
+		val++;
+
+		/*
+		 * Parse the escape sequence and see if it's a
+		 * predefined character or special character.
+		 */
+
+		esc = mandoc_escape(&val, &seq, &len);
+		if (ESCAPE_ERROR == esc)
+			break;
+
+		cpp = ESCAPE_SPECIAL == esc ? 
+			mchars_spec2str(mc, seq, len, &sz) : NULL;
+
+		if (NULL == cpp)
+			continue;
+
+		/* Copy the rendered glyph into the stream. */
+
+		buf_alloc(buf, bufsz, sz);
+
+		memcpy(&(*buf)[pos], cpp, sz);
+		pos += (int)sz;
+	}
+
+	(*buf)[pos] = '\0';
+}
+
+static void
+error(const char *fmt, ...)
+{
+	va_list		 ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+}
+
+static void
+state_output(const struct res *res, int sz)
+{
+	int		 i;
+
+	for (i = 0; i < sz; i++)
+		printf("%s(%s%s%s) - %s\n", res[i].title, 
+				res[i].cat, 
+				*res[i].arch ? "/" : "",
+				*res[i].arch ? res[i].arch : "",
+				res[i].desc);
+}
+
+static void
+usage(void)
+{
+
+	fprintf(stderr, "usage: %s "
+			"[-eIr] "
+			"[-a arch] "
+			"[-c cat] "
+			"[-s sort] "
+			"[-t type[,...]] "
+			"key\n", progname);
+}
+
+static int
+state_init(struct state *p, 
+		const char *dbf, const char *idxf,
+		void (*err)(const char *),
+		void (*errx)(const char *, ...))
+{
+	BTREEINFO	 info;
+
+	memset(p, 0, sizeof(struct state));
+	memset(&info, 0, sizeof(BTREEINFO));
+
+	info.flags = R_DUP;
+
+	p->dbf = dbf;
+	p->idxf = idxf;
+	p->err = err;
+
+	p->db = dbopen(p->dbf, O_RDONLY, 0, DB_BTREE, &info);
+	if (NULL == p->db) {
+		(*err)(p->dbf);
+		return(0);
+	}
+
+	p->idx = dbopen(p->idxf, O_RDONLY, 0, DB_RECNO, NULL);
+	if (NULL == p->idx) {
+		(*err)(p->idxf);
+		return(0);
+	}
+
+	return(1);
+}
+
+static void
+state_destroy(struct state *p)
+{
+
+	if (p->db)
+		(*p->db->close)(p->db);
+	if (p->idx)
+		(*p->idx->close)(p->idx);
+}
+
+static int
+state_getrecord(struct state *p, recno_t rec, struct rec *rp)
+{
+	DBT		key, val;
+	size_t		sz;
+	int		rc;
+
+	key.data = &rec;
+	key.size = sizeof(recno_t);
+
+	rc = (*p->idx->get)(p->idx, &key, &val, 0);
+	if (rc < 0) {
+		(*p->err)(p->idxf);
+		return(0);
+	} else if (rc > 0) {
+		(*p->errx)("%s: Corrupt index\n", p->idxf);
+		return(0);
+	}
+
+	rp->file = (char *)val.data;
+	if ((sz = strlen(rp->file) + 1) >= val.size) {
+		(*p->errx)("%s: Corrupt index\n", p->idxf);
+		return(0);
+	}
+
+	rp->cat = (char *)val.data + (int)sz;
+	if ((sz += strlen(rp->cat) + 1) >= val.size) {
+		(*p->errx)("%s: Corrupt index\n", p->idxf);
+		return(0);
+	}
+
+	rp->title = (char *)val.data + (int)sz;
+	if ((sz += strlen(rp->title) + 1) >= val.size) {
+		(*p->errx)("%s: Corrupt index\n", p->idxf);
+		return(0);
+	}
+
+	rp->arch = (char *)val.data + (int)sz;
+	if ((sz += strlen(rp->arch) + 1) >= val.size) {
+		(*p->errx)("%s: Corrupt index\n", p->idxf);
+		return(0);
+	}
+
+	rp->desc = (char *)val.data + (int)sz;
+	rp->rec = rec;
+	return(1);
+}
+
+static int
+sort_title(const void *p1, const void *p2)
+{
+
+	return(strcmp(((const struct res *)p1)->title,
+		      ((const struct res *)p2)->title));
+}
+
+static int
+sort_cat(const void *p1, const void *p2)
+{
+	int		 rc;
+
+	rc = strcmp(((const struct res *)p1)->cat,
+			((const struct res *)p2)->cat);
+
+	return(0 == rc ? sort_title(p1, p2) : rc);
+}