-/* $Id: mandoc-db.c,v 1.3 2011/04/03 14:18:29 kristaps Exp $ */
+/* $Id: mandoc-db.c,v 1.18 2011/05/04 08:21:17 kristaps Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
MANDOC_FUNCTION,
MANDOC_UTILITY,
MANDOC_INCLUDES,
- MANDOC_VARIABLE
+ MANDOC_VARIABLE,
+ MANDOC_STANDARD,
+ MANDOC_AUTHOR,
+ MANDOC_CONFIG
};
+#define MAN_ARGS DB *db, \
+ const char *dbn, \
+ DBT *key, size_t *ksz, \
+ DBT *val, \
+ DBT *rval, size_t *rsz, \
+ const struct man_node *n
#define MDOC_ARGS DB *db, \
const char *dbn, \
DBT *key, size_t *ksz, \
DBT *val, \
+ DBT *rval, size_t *rsz, \
const struct mdoc_node *n
static void dbt_append(DBT *, size_t *, const char *);
static void dbt_appendb(DBT *, size_t *,
const void *, size_t);
static void dbt_init(DBT *, size_t *);
-static void version(void);
+static void dbt_put(DB *, const char *, DBT *, DBT *);
static void usage(void);
-static void pmdoc(DB *, const char *,
- DBT *, size_t *, DBT *,
- const char *, struct mdoc *);
+static void pman(DB *, const char *, DBT *, size_t *,
+ DBT *, DBT *, size_t *, struct man *);
+static int pman_node(MAN_ARGS);
+static void pmdoc(DB *, const char *, DBT *, size_t *,
+ DBT *, DBT *, size_t *, struct mdoc *);
static void pmdoc_node(MDOC_ARGS);
+static void pmdoc_An(MDOC_ARGS);
+static void pmdoc_Cd(MDOC_ARGS);
static void pmdoc_Fd(MDOC_ARGS);
static void pmdoc_In(MDOC_ARGS);
static void pmdoc_Fn(MDOC_ARGS);
static void pmdoc_Fo(MDOC_ARGS);
+static void pmdoc_Nd(MDOC_ARGS);
static void pmdoc_Nm(MDOC_ARGS);
+static void pmdoc_St(MDOC_ARGS);
static void pmdoc_Vt(MDOC_ARGS);
typedef void (*pmdoc_nf)(MDOC_ARGS);
NULL, /* El */
NULL, /* It */
NULL, /* Ad */
- NULL, /* An */
+ pmdoc_An, /* An */
NULL, /* Ar */
- NULL, /* Cd */
+ pmdoc_Cd, /* Cd */
NULL, /* Cm */
NULL, /* Dv */
NULL, /* Er */
NULL, /* Ic */
pmdoc_In, /* In */
NULL, /* Li */
- NULL, /* Nd */
+ pmdoc_Nd, /* Nd */
pmdoc_Nm, /* Nm */
NULL, /* Op */
NULL, /* Ot */
NULL, /* Pa */
NULL, /* Rv */
- NULL, /* St */
+ pmdoc_St, /* St */
pmdoc_Vt, /* Va */
pmdoc_Vt, /* Vt */
NULL, /* Xr */
{
struct mparse *mp; /* parse sequence */
struct mdoc *mdoc; /* resulting mdoc */
- char *fn;
- const char *dir; /* result dir (default: cwd) */
+ struct man *man; /* resulting man */
+ char *fn; /* current file being parsed */
+ const char *msec, /* manual section */
+ *mtitle, /* manual title */
+ *dir; /* result dir (default: cwd) */
char ibuf[MAXPATHLEN], /* index fname */
ibbuf[MAXPATHLEN], /* index backup fname */
fbuf[MAXPATHLEN], /* btree fname */
fbbuf[MAXPATHLEN]; /* btree backup fname */
- int c;
- DB *index, /* index database */
+ int ch;
+ DB *idx, /* index database */
*db; /* keyword database */
DBT rkey, rval, /* recno entries */
key, val; /* persistent keyword entries */
- size_t ksz; /* entry buffer size */
- char vbuf[8];
+ size_t sv,
+ ksz, rsz; /* entry buffer size */
+ char vbuf[8]; /* stringified record number */
BTREEINFO info; /* btree configuration */
- recno_t rec;
+ recno_t rec; /* current record number */
extern int optind;
extern char *optarg;
dir = "";
- while (-1 != (c = getopt(argc, argv, "d:V")))
- switch (c) {
+ while (-1 != (ch = getopt(argc, argv, "d:")))
+ switch (ch) {
case ('d'):
dir = optarg;
break;
- case ('V'):
- version();
- return((int)MANDOCLEVEL_OK);
default:
usage();
return((int)MANDOCLEVEL_BADARG);
exit((int)MANDOCLEVEL_SYSERR);
}
- index = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
+ idx = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
if (NULL == db) {
perror(ibbuf);
rkey.size = sizeof(recno_t);
rec = 1;
- ksz = 0;
+ ksz = rsz = 0;
while (NULL != (fn = *argv++)) {
mparse_reset(mp);
- if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL)
- continue;
+ /* Parse and get (non-empty) AST. */
- mparse_result(mp, &mdoc, NULL);
- if (NULL == mdoc)
+ if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
+ fprintf(stderr, "%s: Parse failure\n", fn);
+ continue;
+ }
+ mparse_result(mp, &mdoc, &man);
+ if (NULL == mdoc && NULL == man)
continue;
- rkey.data = &rec;
- rval.data = fn;
- rval.size = strlen(fn) + 1;
+ /* Manual section: can be empty string. */
- if (-1 == (*index->put)(index, &rkey, &rval, 0)) {
- perror(ibbuf);
- break;
- }
+ msec = NULL != mdoc ?
+ mdoc_meta(mdoc)->msec :
+ man_meta(man)->msec;
+ mtitle = NULL != mdoc ?
+ mdoc_meta(mdoc)->title :
+ man_meta(man)->title;
+
+ assert(msec);
+ assert(mtitle);
+
+ /*
+ * The index record value consists of a nil-terminated
+ * filename, a nil-terminated manual section, and a
+ * nil-terminated description. Since the description
+ * may not be set, we set a sentinel to see if we're
+ * going to write a nil byte in its place.
+ */
+
+ dbt_init(&rval, &rsz);
+ dbt_appendb(&rval, &rsz, fn, strlen(fn) + 1);
+ dbt_appendb(&rval, &rsz, msec, strlen(msec) + 1);
+ dbt_appendb(&rval, &rsz, mtitle, strlen(mtitle) + 1);
+ sv = rval.size;
+
+ /* Fix the record number in the btree value. */
memset(val.data, 0, sizeof(uint32_t));
memcpy(val.data + 4, &rec, sizeof(uint32_t));
- pmdoc(db, fbbuf, &key, &ksz, &val, fn, mdoc);
+ if (mdoc)
+ pmdoc(db, fbbuf, &key, &ksz,
+ &val, &rval, &rsz, mdoc);
+ else
+ pman(db, fbbuf, &key, &ksz,
+ &val, &rval, &rsz, man);
+
+ /*
+ * Apply this to the index. If we haven't had a
+ * description set, put an empty one in now.
+ */
+
+ if (rval.size == sv)
+ dbt_appendb(&rval, &rsz, "", 1);
+
+ rkey.data = &rec;
+ dbt_put(idx, ibbuf, &rkey, &rval);
+
+ printf("Indexed: %s\n", fn);
rec++;
}
(*db->close)(db);
- (*index->close)(index);
+ (*idx->close)(idx);
mparse_free(mp);
free(key.data);
+ free(rval.data);
/* Atomically replace the file with our temporary one. */
key->data = mandoc_realloc(key->data, *ksz);
}
+#if 0
+ dstp = key->data + (int)key->size;
+
+ while (NULL != (endp = memchr(cp, '\\', sz))) {
+ ssz = endp - cp;
+ memcpy(dstp, cp, ssz);
+
+ dstp += ssz;
+ key->size += ssz;
+ sz -= ssz;
+
+ cp = endp++;
+ /* FIXME: expects nil-terminated string! */
+ esc = mandoc_escape((const char **)&endp, NULL, NULL);
+
+ switch (esc) {
+ case (ESCAPE_ERROR):
+ /* Nil-terminate this point. */
+ memcpy(dstp, "", 1);
+ key->size++;
+ return;
+ case (ESCAPE_PREDEF):
+ /* FALLTHROUGH */
+ case (ESCAPE_SPECIAL):
+ break;
+ default:
+ sz -= endp - cp;
+ cp = endp;
+ continue;
+ }
+
+ ssz = endp - cp;
+ memcpy(dstp, cp, ssz);
+
+ dstp += ssz;
+ key->size += ssz;
+ sz -= ssz;
+
+ cp = endp;
+ }
+#endif
+
memcpy(key->data + (int)key->size, cp, sz);
key->size += sz;
}
dbt_appendb(key, ksz, cp, sz + 1);
}
+/* ARGSUSED */
+static void
+pmdoc_An(MDOC_ARGS)
+{
+ uint32_t fl;
+
+ if (SEC_AUTHORS != n->sec)
+ return;
+
+ for (n = n->child; n; n = n->next)
+ if (MDOC_TEXT == n->type)
+ dbt_append(key, ksz, n->string);
+
+ fl = MANDOC_AUTHOR;
+ memcpy(val->data, &fl, 4);
+}
+
/* ARGSUSED */
static void
pmdoc_Fd(MDOC_ARGS)
uint32_t fl;
const char *start, *end;
size_t sz;
- char nil;
if (SEC_SYNOPSIS != n->sec)
return;
if (NULL == (n = n->child) || MDOC_TEXT != n->type)
return;
+
+ /*
+ * Only consider those `Fd' macro fields that begin with an
+ * "inclusion" token (versus, e.g., #define).
+ */
if (strcmp("#include", n->string))
return;
+
if (NULL == (n = n->next) || MDOC_TEXT != n->type)
return;
+ /*
+ * Strip away the enclosing angle brackets and make sure we're
+ * not zero-length.
+ */
+
start = n->string;
- if ('<' == *start)
+ if ('<' == *start || '"' == *start)
start++;
if (0 == (sz = strlen(start)))
return;
end = &start[(int)sz - 1];
- if ('>' == *end)
+ if ('>' == *end || '"' == *end)
end--;
- nil = '\0';
dbt_appendb(key, ksz, start, end - start + 1);
- dbt_appendb(key, ksz, &nil, 1);
+ dbt_appendb(key, ksz, "", 1);
fl = MANDOC_INCLUDES;
memcpy(val->data, &fl, 4);
}
+/* ARGSUSED */
+static void
+pmdoc_Cd(MDOC_ARGS)
+{
+ uint32_t fl;
+
+ if (SEC_SYNOPSIS != n->sec)
+ return;
+
+ for (n = n->child; n; n = n->next)
+ if (MDOC_TEXT == n->type)
+ dbt_append(key, ksz, n->string);
+
+ fl = MANDOC_CONFIG;
+ memcpy(val->data, &fl, 4);
+}
+
/* ARGSUSED */
static void
pmdoc_In(MDOC_ARGS)
if (NULL == cp)
cp = n->child->string;
- /* Ignore pointers. */
+ /* Strip away pointer symbol. */
while ('*' == *cp)
cp++;
memcpy(val->data, &fl, 4);
}
+/* ARGSUSED */
+static void
+pmdoc_St(MDOC_ARGS)
+{
+ uint32_t fl;
+
+ if (SEC_STANDARDS != n->sec)
+ return;
+ if (NULL == n->child || MDOC_TEXT != n->child->type)
+ return;
+
+ dbt_append(key, ksz, n->child->string);
+ fl = MANDOC_STANDARD;
+ memcpy(val->data, &fl, 4);
+}
+
/* ARGSUSED */
static void
pmdoc_Vt(MDOC_ARGS)
{
uint32_t fl;
- const char *start, *end;
+ const char *start;
size_t sz;
- char nil;
if (SEC_SYNOPSIS != n->sec)
return;
if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
return;
- if (NULL == n->child || MDOC_TEXT != n->child->type)
+ if (NULL == n->last || MDOC_TEXT != n->last->type)
return;
/*
- * Strip away leading '*' and trailing ';'.
+ * Strip away leading pointer symbol '*' and trailing ';'.
*/
start = n->last->string;
if (0 == (sz = strlen(start)))
return;
- end = &start[sz - 1];
- while (end > start && ';' == *end)
- end--;
+ if (';' == start[sz - 1])
+ sz--;
- if (end == start)
+ if (0 == sz)
return;
- nil = '\0';
- dbt_appendb(key, ksz, start, end - start + 1);
- dbt_appendb(key, ksz, &nil, 1);
+ dbt_appendb(key, ksz, start, sz);
+ dbt_appendb(key, ksz, "", 1);
+
fl = MANDOC_VARIABLE;
memcpy(val->data, &fl, 4);
}
memcpy(val->data, &fl, 4);
}
+
+/* ARGSUSED */
+static void
+pmdoc_Nd(MDOC_ARGS)
+{
+ int first;
+
+ for (first = 1, n = n->child; n; n = n->next) {
+ if (MDOC_TEXT != n->type)
+ continue;
+ if (first)
+ dbt_appendb(rval, rsz, n->string, strlen(n->string) + 1);
+ else
+ dbt_append(rval, rsz, n->string);
+ first = 0;
+ }
+}
+
/* ARGSUSED */
static void
pmdoc_Nm(MDOC_ARGS)
memcpy(val->data, &fl, 4);
}
+static void
+dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
+{
+
+ if (0 == key->size)
+ return;
+
+ assert(key->data);
+ assert(val->size);
+ assert(val->data);
+
+ if (0 == (*db->put)(db, key, val, 0))
+ return;
+
+ perror(dbn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ /* NOTREACHED */
+}
+
/*
* Call out to per-macro handlers after clearing the persistent database
* key. If the macro sets the database key, flush it to the database.
break;
dbt_init(key, ksz);
- (*mdocs[n->tok])(db, dbn, key, ksz, val, n);
- if (0 == key->size)
- break;
- if (0 == (*db->put)(db, key, val, 0))
- break;
-
- perror(dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- /* NOTREACHED */
+ (*mdocs[n->tok])(db, dbn, key, ksz, val, rval, rsz, n);
+ dbt_put(db, dbn, key, val);
+ break;
default:
break;
}
- pmdoc_node(db, dbn, key, ksz, val, n->child);
- pmdoc_node(db, dbn, key, ksz, val, n->next);
+ pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->child);
+ pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->next);
+}
+
+static int
+pman_node(MAN_ARGS)
+{
+ const struct man_node *head, *body;
+ const char *start, *sv;
+ size_t sz;
+ uint32_t fl;
+
+ if (NULL == n)
+ return(0);
+
+ /*
+ * We're only searching for one thing: the first text child in
+ * the BODY of a NAME section. Since we don't keep track of
+ * sections in -man, run some hoops to find out whether we're in
+ * the correct section or not.
+ */
+
+ if (MAN_BODY == n->type && MAN_SH == n->tok) {
+ body = n;
+ assert(body->parent);
+ if (NULL != (head = body->parent->head) &&
+ 1 == head->nchild &&
+ NULL != (head = (head->child)) &&
+ MAN_TEXT == head->type &&
+ 0 == strcmp(head->string, "NAME") &&
+ NULL != (body = body->child) &&
+ MAN_TEXT == body->type) {
+
+ fl = MANDOC_NAME;
+ memcpy(val->data, &fl, 4);
+
+ assert(body->string);
+ start = sv = body->string;
+
+ /*
+ * Go through a special heuristic dance here.
+ * This is why -man manuals are great!
+ * Conventionally, one or more manual names are
+ * comma-specified prior to a whitespace, then a
+ * dash, then a description. Try to puzzle out
+ * the name parts here.
+ */
+
+ for ( ;; ) {
+ sz = strcspn(start, " ,");
+ if ('\0' == start[(int)sz])
+ break;
+
+ dbt_init(key, ksz);
+ dbt_appendb(key, ksz, start, sz);
+ dbt_appendb(key, ksz, "", 1);
+
+ dbt_put(db, dbn, key, val);
+
+ if (' ' == start[(int)sz]) {
+ start += (int)sz + 1;
+ break;
+ }
+
+ assert(',' == start[(int)sz]);
+ start += (int)sz + 1;
+ while (' ' == *start)
+ start++;
+ }
+
+ if (sv == start) {
+ dbt_init(key, ksz);
+ dbt_append(key, ksz, start);
+ return(1);
+ }
+
+ while (' ' == *start)
+ start++;
+
+ if ('\\' == *start && '-' == *(start + 1))
+ start += 2;
+ else if ('-' == *start)
+ start++;
+
+ while (' ' == *start)
+ start++;
+
+ dbt_appendb(rval, rsz, start, strlen(start) + 1);
+ }
+ }
+
+ if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->child))
+ return(1);
+ if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->next))
+ return(1);
+
+ return(0);
}
static void
-pmdoc(DB *db, const char *dbn,
- DBT *key, size_t *ksz, DBT *val,
- const char *path, struct mdoc *m)
+pman(DB *db, const char *dbn, DBT *key, size_t *ksz,
+ DBT *val, DBT *rval, size_t *rsz, struct man *m)
{
- pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
+ pman_node(db, dbn, key, ksz, val, rval, rsz, man_node(m));
}
+
static void
-version(void)
+pmdoc(DB *db, const char *dbn, DBT *key, size_t *ksz,
+ DBT *val, DBT *rval, size_t *rsz, struct mdoc *m)
{
- printf("%s %s\n", progname, VERSION);
+ pmdoc_node(db, dbn, key, ksz, val, rval, rsz, mdoc_node(m));
}
static void
{
fprintf(stderr, "usage: %s "
- "[-V] "
"[-d path] "
"[file...]\n",
progname);