-/* $Id: mandoc-db.c,v 1.1 2011/04/02 15:40:40 kristaps Exp $ */
+/* $Id: mandoc-db.c,v 1.18 2011/05/04 08:21:17 kristaps Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <sys/param.h>
#include <assert.h>
#include "mandoc.h"
#define MANDOC_DB "mandoc.db"
-#define MANDOC_BUFSZ 10
+#define MANDOC_IDX "mandoc.index"
+#define MANDOC_BUFSZ BUFSIZ
+#define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
enum type {
MANDOC_NONE = 0,
MANDOC_FUNCTION,
MANDOC_UTILITY,
MANDOC_INCLUDES,
- MANDOC_VARIABLE
+ MANDOC_VARIABLE,
+ MANDOC_STANDARD,
+ MANDOC_AUTHOR,
+ MANDOC_CONFIG
};
+#define MAN_ARGS DB *db, \
+ const char *dbn, \
+ DBT *key, size_t *ksz, \
+ DBT *val, \
+ DBT *rval, size_t *rsz, \
+ const struct man_node *n
#define MDOC_ARGS DB *db, \
const char *dbn, \
DBT *key, size_t *ksz, \
DBT *val, \
+ DBT *rval, size_t *rsz, \
const struct mdoc_node *n
static void dbt_append(DBT *, size_t *, const char *);
static void dbt_appendb(DBT *, size_t *,
const void *, size_t);
static void dbt_init(DBT *, size_t *);
-static void version(void);
+static void dbt_put(DB *, const char *, DBT *, DBT *);
static void usage(void);
-static void pmdoc(DB *, const char *,
- DBT *, size_t *,
- DBT *, size_t *,
- const char *, struct mdoc *);
+static void pman(DB *, const char *, DBT *, size_t *,
+ DBT *, DBT *, size_t *, struct man *);
+static int pman_node(MAN_ARGS);
+static void pmdoc(DB *, const char *, DBT *, size_t *,
+ DBT *, DBT *, size_t *, struct mdoc *);
static void pmdoc_node(MDOC_ARGS);
+static void pmdoc_An(MDOC_ARGS);
+static void pmdoc_Cd(MDOC_ARGS);
static void pmdoc_Fd(MDOC_ARGS);
static void pmdoc_In(MDOC_ARGS);
static void pmdoc_Fn(MDOC_ARGS);
static void pmdoc_Fo(MDOC_ARGS);
+static void pmdoc_Nd(MDOC_ARGS);
static void pmdoc_Nm(MDOC_ARGS);
+static void pmdoc_St(MDOC_ARGS);
static void pmdoc_Vt(MDOC_ARGS);
typedef void (*pmdoc_nf)(MDOC_ARGS);
NULL, /* El */
NULL, /* It */
NULL, /* Ad */
- NULL, /* An */
+ pmdoc_An, /* An */
NULL, /* Ar */
- NULL, /* Cd */
+ pmdoc_Cd, /* Cd */
NULL, /* Cm */
NULL, /* Dv */
NULL, /* Er */
NULL, /* Ic */
pmdoc_In, /* In */
NULL, /* Li */
- NULL, /* Nd */
+ pmdoc_Nd, /* Nd */
pmdoc_Nm, /* Nm */
NULL, /* Op */
NULL, /* Ot */
NULL, /* Pa */
NULL, /* Rv */
- NULL, /* St */
+ pmdoc_St, /* St */
pmdoc_Vt, /* Va */
pmdoc_Vt, /* Vt */
NULL, /* Xr */
int
main(int argc, char *argv[])
{
- struct mparse *mp;
- struct mdoc *mdoc;
- struct man *man;
- const char *f, *fn;
- size_t sz;
- char fbuf[MAXPATHLEN];
- int c;
- DB *db;
- DBT key, val;
- size_t ksz, vsz;
- BTREEINFO info;
+ struct mparse *mp; /* parse sequence */
+ struct mdoc *mdoc; /* resulting mdoc */
+ struct man *man; /* resulting man */
+ char *fn; /* current file being parsed */
+ const char *msec, /* manual section */
+ *mtitle, /* manual title */
+ *dir; /* result dir (default: cwd) */
+ char ibuf[MAXPATHLEN], /* index fname */
+ ibbuf[MAXPATHLEN], /* index backup fname */
+ fbuf[MAXPATHLEN], /* btree fname */
+ fbbuf[MAXPATHLEN]; /* btree backup fname */
+ int ch;
+ DB *idx, /* index database */
+ *db; /* keyword database */
+ DBT rkey, rval, /* recno entries */
+ key, val; /* persistent keyword entries */
+ size_t sv,
+ ksz, rsz; /* entry buffer size */
+ char vbuf[8]; /* stringified record number */
+ BTREEINFO info; /* btree configuration */
+ recno_t rec; /* current record number */
extern int optind;
extern char *optarg;
- f = MANDOC_DB;
-
progname = strrchr(argv[0], '/');
if (progname == NULL)
progname = argv[0];
else
++progname;
- while (-1 != (c = getopt(argc, argv, "f:V")))
- switch (c) {
- case ('f'):
- f = optarg;
+ dir = "";
+
+ while (-1 != (ch = getopt(argc, argv, "d:")))
+ switch (ch) {
+ case ('d'):
+ dir = optarg;
break;
- case ('V'):
- version();
- return((int)MANDOCLEVEL_OK);
default:
usage();
return((int)MANDOCLEVEL_BADARG);
argv += optind;
/*
- * Set up a temporary file-name into which we're going to write
- * all of our data. This is securely renamed to the real
- * file-name after we've written all of our data.
+ * Set up temporary file-names into which we're going to write
+ * all of our data (both for the index and database). These
+ * will be securely renamed to the real file-names after we've
+ * written all of our data.
*/
- if (0 == (sz = strlen(f)) || sz + 5 >= MAXPATHLEN) {
- fprintf(stderr, "%s: Bad filename\n", progname);
+ ibuf[0] = ibuf[MAXPATHLEN - 2] =
+ ibbuf[0] = ibbuf[MAXPATHLEN - 2] =
+ fbuf[0] = fbuf[MAXPATHLEN - 2] =
+ fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';
+
+ strlcat(fbuf, dir, MAXPATHLEN);
+ strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
+
+ strlcat(fbbuf, fbuf, MAXPATHLEN);
+ strlcat(fbbuf, "~", MAXPATHLEN);
+
+ strlcat(ibuf, dir, MAXPATHLEN);
+ strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
+
+ strlcat(ibbuf, ibuf, MAXPATHLEN);
+ strlcat(ibbuf, "~", MAXPATHLEN);
+
+ if ('\0' != fbuf[MAXPATHLEN - 2] ||
+ '\0' != fbbuf[MAXPATHLEN - 2] ||
+ '\0' != ibuf[MAXPATHLEN - 2] ||
+ '\0' != ibbuf[MAXPATHLEN - 2]) {
+ fprintf(stderr, "%s: Path too long\n", progname);
exit((int)MANDOCLEVEL_SYSERR);
}
- memcpy(fbuf, f, sz);
- memcpy(fbuf + (int)sz, ".bak", 4);
- fbuf[(int)sz + 4] = '\0';
-
/*
- * Open a BTREE database that allows duplicates. If the
- * database already exists (it's a backup anyway), then blow it
- * away with O_TRUNC.
+ * For the keyword database, open a BTREE database that allows
+ * duplicates. For the index database, use a standard RECNO
+ * database type.
*/
memset(&info, 0, sizeof(BTREEINFO));
info.flags = R_DUP;
-
- db = dbopen(fbuf, O_CREAT|O_TRUNC|O_RDWR,
- 0644, DB_BTREE, &info);
+ db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);
if (NULL == db) {
- perror(f);
+ perror(fbbuf);
exit((int)MANDOCLEVEL_SYSERR);
}
- /* Use the auto-parser and don't report any errors. */
+ idx = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
- mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
+ if (NULL == db) {
+ perror(ibbuf);
+ (*db->close)(db);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
/*
* Try parsing the manuals given on the command line. If we
* totally fail, then just keep on going. Take resulting trees
* and push them down into the database code.
+ * Use the auto-parser and don't report any errors.
*/
+ mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
+
memset(&key, 0, sizeof(DBT));
memset(&val, 0, sizeof(DBT));
- ksz = vsz = 0;
+ memset(&rkey, 0, sizeof(DBT));
+ memset(&rval, 0, sizeof(DBT));
+
+ val.size = sizeof(vbuf);
+ val.data = vbuf;
+ rkey.size = sizeof(recno_t);
+
+ rec = 1;
+ ksz = rsz = 0;
while (NULL != (fn = *argv++)) {
- printf("Trying: %s\n", fn);
mparse_reset(mp);
- if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL)
+
+ /* Parse and get (non-empty) AST. */
+
+ if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
+ fprintf(stderr, "%s: Parse failure\n", fn);
continue;
+ }
mparse_result(mp, &mdoc, &man);
+ if (NULL == mdoc && NULL == man)
+ continue;
+
+ /* Manual section: can be empty string. */
+
+ msec = NULL != mdoc ?
+ mdoc_meta(mdoc)->msec :
+ man_meta(man)->msec;
+ mtitle = NULL != mdoc ?
+ mdoc_meta(mdoc)->title :
+ man_meta(man)->title;
+
+ assert(msec);
+ assert(mtitle);
+
+ /*
+ * The index record value consists of a nil-terminated
+ * filename, a nil-terminated manual section, and a
+ * nil-terminated description. Since the description
+ * may not be set, we set a sentinel to see if we're
+ * going to write a nil byte in its place.
+ */
+
+ dbt_init(&rval, &rsz);
+ dbt_appendb(&rval, &rsz, fn, strlen(fn) + 1);
+ dbt_appendb(&rval, &rsz, msec, strlen(msec) + 1);
+ dbt_appendb(&rval, &rsz, mtitle, strlen(mtitle) + 1);
+ sv = rval.size;
+
+ /* Fix the record number in the btree value. */
+
+ memset(val.data, 0, sizeof(uint32_t));
+ memcpy(val.data + 4, &rec, sizeof(uint32_t));
+
if (mdoc)
- pmdoc(db, fbuf, &key, &ksz,
- &val, &vsz, fn, mdoc);
+ pmdoc(db, fbbuf, &key, &ksz,
+ &val, &rval, &rsz, mdoc);
+ else
+ pman(db, fbbuf, &key, &ksz,
+ &val, &rval, &rsz, man);
+
+ /*
+ * Apply this to the index. If we haven't had a
+ * description set, put an empty one in now.
+ */
+
+ if (rval.size == sv)
+ dbt_appendb(&rval, &rsz, "", 1);
+
+ rkey.data = &rec;
+ dbt_put(idx, ibbuf, &rkey, &rval);
+
+ printf("Indexed: %s\n", fn);
+ rec++;
}
(*db->close)(db);
+ (*idx->close)(idx);
+
mparse_free(mp);
free(key.data);
- free(val.data);
+ free(rval.data);
/* Atomically replace the file with our temporary one. */
- if (-1 == rename(fbuf, f))
- perror(f);
+ if (-1 == rename(fbbuf, fbuf))
+ perror(fbuf);
+ if (-1 == rename(ibbuf, ibuf))
+ perror(fbuf);
return((int)MANDOCLEVEL_OK);
}
while (key->size + sz >= *ksz) {
*ksz = key->size + sz + MANDOC_BUFSZ;
- *ksz = *ksz + (4 - (*ksz % 4));
key->data = mandoc_realloc(key->data, *ksz);
}
+#if 0
+ dstp = key->data + (int)key->size;
+
+ while (NULL != (endp = memchr(cp, '\\', sz))) {
+ ssz = endp - cp;
+ memcpy(dstp, cp, ssz);
+
+ dstp += ssz;
+ key->size += ssz;
+ sz -= ssz;
+
+ cp = endp++;
+ /* FIXME: expects nil-terminated string! */
+ esc = mandoc_escape((const char **)&endp, NULL, NULL);
+
+ switch (esc) {
+ case (ESCAPE_ERROR):
+ /* Nil-terminate this point. */
+ memcpy(dstp, "", 1);
+ key->size++;
+ return;
+ case (ESCAPE_PREDEF):
+ /* FALLTHROUGH */
+ case (ESCAPE_SPECIAL):
+ break;
+ default:
+ sz -= endp - cp;
+ cp = endp;
+ continue;
+ }
+
+ ssz = endp - cp;
+ memcpy(dstp, cp, ssz);
+
+ dstp += ssz;
+ key->size += ssz;
+ sz -= ssz;
+
+ cp = endp;
+ }
+#endif
+
memcpy(key->data + (int)key->size, cp, sz);
key->size += sz;
}
{
size_t sz;
- assert(key->data);
- assert(key->size <= *ksz);
-
if (0 == (sz = strlen(cp)))
return;
- /* Overshoot by MANDOC_BUFSZ (and nil terminator). */
-
- while (key->size + sz + 1 >= *ksz) {
- *ksz = key->size + sz + 1 + MANDOC_BUFSZ;
- *ksz = *ksz + (4 - (*ksz % 4));
- key->data = mandoc_realloc(key->data, *ksz);
- }
-
- /* Space-separate appended tokens. */
+ assert(key->data);
if (key->size)
((char *)key->data)[(int)key->size - 1] = ' ';
- memcpy(key->data + (int)key->size, cp, sz + 1);
- key->size += sz + 1;
+ dbt_appendb(key, ksz, cp, sz + 1);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_An(MDOC_ARGS)
+{
+ uint32_t fl;
+
+ if (SEC_AUTHORS != n->sec)
+ return;
+
+ for (n = n->child; n; n = n->next)
+ if (MDOC_TEXT == n->type)
+ dbt_append(key, ksz, n->string);
+
+ fl = MANDOC_AUTHOR;
+ memcpy(val->data, &fl, 4);
}
/* ARGSUSED */
uint32_t fl;
const char *start, *end;
size_t sz;
- char nil;
if (SEC_SYNOPSIS != n->sec)
return;
if (NULL == (n = n->child) || MDOC_TEXT != n->type)
return;
+
+ /*
+ * Only consider those `Fd' macro fields that begin with an
+ * "inclusion" token (versus, e.g., #define).
+ */
if (strcmp("#include", n->string))
return;
+
if (NULL == (n = n->next) || MDOC_TEXT != n->type)
return;
+ /*
+ * Strip away the enclosing angle brackets and make sure we're
+ * not zero-length.
+ */
+
start = n->string;
- if ('<' == *start)
+ if ('<' == *start || '"' == *start)
start++;
if (0 == (sz = strlen(start)))
return;
end = &start[(int)sz - 1];
- if ('>' == *end)
+ if ('>' == *end || '"' == *end)
end--;
- nil = '\0';
dbt_appendb(key, ksz, start, end - start + 1);
- dbt_appendb(key, ksz, &nil, 1);
+ dbt_appendb(key, ksz, "", 1);
fl = MANDOC_INCLUDES;
memcpy(val->data, &fl, 4);
}
+/* ARGSUSED */
+static void
+pmdoc_Cd(MDOC_ARGS)
+{
+ uint32_t fl;
+
+ if (SEC_SYNOPSIS != n->sec)
+ return;
+
+ for (n = n->child; n; n = n->next)
+ if (MDOC_TEXT == n->type)
+ dbt_append(key, ksz, n->string);
+
+ fl = MANDOC_CONFIG;
+ memcpy(val->data, &fl, 4);
+}
+
/* ARGSUSED */
static void
pmdoc_In(MDOC_ARGS)
if (NULL == cp)
cp = n->child->string;
- /* Ignore pointers. */
+ /* Strip away pointer symbol. */
while ('*' == *cp)
cp++;
memcpy(val->data, &fl, 4);
}
+/* ARGSUSED */
+static void
+pmdoc_St(MDOC_ARGS)
+{
+ uint32_t fl;
+
+ if (SEC_STANDARDS != n->sec)
+ return;
+ if (NULL == n->child || MDOC_TEXT != n->child->type)
+ return;
+
+ dbt_append(key, ksz, n->child->string);
+ fl = MANDOC_STANDARD;
+ memcpy(val->data, &fl, 4);
+}
+
/* ARGSUSED */
static void
pmdoc_Vt(MDOC_ARGS)
{
uint32_t fl;
- const char *start, *end;
+ const char *start;
size_t sz;
- char nil;
if (SEC_SYNOPSIS != n->sec)
return;
if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
return;
- if (NULL == n->child || MDOC_TEXT != n->child->type)
+ if (NULL == n->last || MDOC_TEXT != n->last->type)
return;
/*
- * Strip away leading '*' and trailing ';'.
+ * Strip away leading pointer symbol '*' and trailing ';'.
*/
start = n->last->string;
if (0 == (sz = strlen(start)))
return;
- end = &start[sz - 1];
- while (end > start && ';' == *end)
- end--;
+ if (';' == start[sz - 1])
+ sz--;
- if (end == start)
+ if (0 == sz)
return;
- nil = '\0';
- dbt_appendb(key, ksz, start, end - start + 1);
- dbt_appendb(key, ksz, &nil, 1);
+ dbt_appendb(key, ksz, start, sz);
+ dbt_appendb(key, ksz, "", 1);
+
fl = MANDOC_VARIABLE;
memcpy(val->data, &fl, 4);
}
memcpy(val->data, &fl, 4);
}
+
+/* ARGSUSED */
+static void
+pmdoc_Nd(MDOC_ARGS)
+{
+ int first;
+
+ for (first = 1, n = n->child; n; n = n->next) {
+ if (MDOC_TEXT != n->type)
+ continue;
+ if (first)
+ dbt_appendb(rval, rsz, n->string, strlen(n->string) + 1);
+ else
+ dbt_append(rval, rsz, n->string);
+ first = 0;
+ }
+}
+
/* ARGSUSED */
static void
pmdoc_Nm(MDOC_ARGS)
memcpy(val->data, &fl, 4);
}
+static void
+dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
+{
+
+ if (0 == key->size)
+ return;
+
+ assert(key->data);
+ assert(val->size);
+ assert(val->data);
+
+ if (0 == (*db->put)(db, key, val, 0))
+ return;
+
+ perror(dbn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ /* NOTREACHED */
+}
+
/*
* Call out to per-macro handlers after clearing the persistent database
* key. If the macro sets the database key, flush it to the database.
break;
dbt_init(key, ksz);
- (*mdocs[n->tok])(db, dbn, key, ksz, val, n);
- if (0 == key->size)
- break;
- if (0 == (*db->put)(db, key, val, 0))
- break;
-
- perror(dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- /* NOTREACHED */
+ (*mdocs[n->tok])(db, dbn, key, ksz, val, rval, rsz, n);
+ dbt_put(db, dbn, key, val);
+ break;
default:
break;
}
- pmdoc_node(db, dbn, key, ksz, val, n->child);
- pmdoc_node(db, dbn, key, ksz, val, n->next);
+ pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->child);
+ pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->next);
}
-static void
-pmdoc(DB *db, const char *dbn,
- DBT *key, size_t *ksz,
- DBT *val, size_t *valsz,
- const char *path, struct mdoc *m)
+static int
+pman_node(MAN_ARGS)
{
- uint32_t flag;
+ const struct man_node *head, *body;
+ const char *start, *sv;
+ size_t sz;
+ uint32_t fl;
- flag = MANDOC_NONE;
+ if (NULL == n)
+ return(0);
- /*
- * Database values are a 4-byte bit-field followed by the path
- * of the manual. Allocate all the space we'll need now; we
- * change the bit-field depending on the key type.
+ /*
+ * We're only searching for one thing: the first text child in
+ * the BODY of a NAME section. Since we don't keep track of
+ * sections in -man, run some hoops to find out whether we're in
+ * the correct section or not.
*/
- dbt_init(val, valsz);
- dbt_appendb(val, valsz, &flag, 4);
- dbt_append(val, valsz, path);
+ if (MAN_BODY == n->type && MAN_SH == n->tok) {
+ body = n;
+ assert(body->parent);
+ if (NULL != (head = body->parent->head) &&
+ 1 == head->nchild &&
+ NULL != (head = (head->child)) &&
+ MAN_TEXT == head->type &&
+ 0 == strcmp(head->string, "NAME") &&
+ NULL != (body = body->child) &&
+ MAN_TEXT == body->type) {
+
+ fl = MANDOC_NAME;
+ memcpy(val->data, &fl, 4);
+
+ assert(body->string);
+ start = sv = body->string;
+
+ /*
+ * Go through a special heuristic dance here.
+ * This is why -man manuals are great!
+ * Conventionally, one or more manual names are
+ * comma-specified prior to a whitespace, then a
+ * dash, then a description. Try to puzzle out
+ * the name parts here.
+ */
+
+ for ( ;; ) {
+ sz = strcspn(start, " ,");
+ if ('\0' == start[(int)sz])
+ break;
+
+ dbt_init(key, ksz);
+ dbt_appendb(key, ksz, start, sz);
+ dbt_appendb(key, ksz, "", 1);
+
+ dbt_put(db, dbn, key, val);
+
+ if (' ' == start[(int)sz]) {
+ start += (int)sz + 1;
+ break;
+ }
+
+ assert(',' == start[(int)sz]);
+ start += (int)sz + 1;
+ while (' ' == *start)
+ start++;
+ }
+
+ if (sv == start) {
+ dbt_init(key, ksz);
+ dbt_append(key, ksz, start);
+ return(1);
+ }
+
+ while (' ' == *start)
+ start++;
+
+ if ('\\' == *start && '-' == *(start + 1))
+ start += 2;
+ else if ('-' == *start)
+ start++;
+
+ while (' ' == *start)
+ start++;
+
+ dbt_appendb(rval, rsz, start, strlen(start) + 1);
+ }
+ }
+
+ if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->child))
+ return(1);
+ if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->next))
+ return(1);
- pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
+ return(0);
}
static void
-version(void)
+pman(DB *db, const char *dbn, DBT *key, size_t *ksz,
+ DBT *val, DBT *rval, size_t *rsz, struct man *m)
+{
+
+ pman_node(db, dbn, key, ksz, val, rval, rsz, man_node(m));
+}
+
+
+static void
+pmdoc(DB *db, const char *dbn, DBT *key, size_t *ksz,
+ DBT *val, DBT *rval, size_t *rsz, struct mdoc *m)
{
- printf("%s %s\n", progname, VERSION);
+ pmdoc_node(db, dbn, key, ksz, val, rval, rsz, mdoc_node(m));
}
static void
{
fprintf(stderr, "usage: %s "
- "[-V] "
- "[-f path] "
+ "[-d path] "
"[file...]\n",
progname);
}