X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/213c173babbe2d47d3bcf16a00cd23efcf2b6135..ccfcc5247a1dcd27d5b8759547f16bdb451120e3:/mandoc-db.c diff --git a/mandoc-db.c b/mandoc-db.c index f2f5af83..85d5036e 100644 --- a/mandoc-db.c +++ b/mandoc-db.c @@ -1,4 +1,4 @@ -/* $Id: mandoc-db.c,v 1.1 2011/04/02 15:40:40 kristaps Exp $ */ +/* $Id: mandoc-db.c,v 1.18 2011/05/04 08:21:17 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * @@ -14,6 +14,10 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include #include @@ -34,7 +38,9 @@ #include "mandoc.h" #define MANDOC_DB "mandoc.db" -#define MANDOC_BUFSZ 10 +#define MANDOC_IDX "mandoc.index" +#define MANDOC_BUFSZ BUFSIZ +#define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR enum type { MANDOC_NONE = 0, @@ -42,31 +48,46 @@ enum type { MANDOC_FUNCTION, MANDOC_UTILITY, MANDOC_INCLUDES, - MANDOC_VARIABLE + MANDOC_VARIABLE, + MANDOC_STANDARD, + MANDOC_AUTHOR, + MANDOC_CONFIG }; +#define MAN_ARGS DB *db, \ + const char *dbn, \ + DBT *key, size_t *ksz, \ + DBT *val, \ + DBT *rval, size_t *rsz, \ + const struct man_node *n #define MDOC_ARGS DB *db, \ const char *dbn, \ DBT *key, size_t *ksz, \ DBT *val, \ + DBT *rval, size_t *rsz, \ const struct mdoc_node *n static void dbt_append(DBT *, size_t *, const char *); static void dbt_appendb(DBT *, size_t *, const void *, size_t); static void dbt_init(DBT *, size_t *); -static void version(void); +static void dbt_put(DB *, const char *, DBT *, DBT *); static void usage(void); -static void pmdoc(DB *, const char *, - DBT *, size_t *, - DBT *, size_t *, - const char *, struct mdoc *); +static void pman(DB *, const char *, DBT *, size_t *, + DBT *, DBT *, size_t *, struct man *); +static int pman_node(MAN_ARGS); +static void pmdoc(DB *, const char *, DBT *, size_t *, + DBT *, DBT *, size_t *, struct mdoc *); static void pmdoc_node(MDOC_ARGS); +static void pmdoc_An(MDOC_ARGS); +static void pmdoc_Cd(MDOC_ARGS); static void pmdoc_Fd(MDOC_ARGS); static void pmdoc_In(MDOC_ARGS); static void pmdoc_Fn(MDOC_ARGS); static void pmdoc_Fo(MDOC_ARGS); +static void pmdoc_Nd(MDOC_ARGS); static void pmdoc_Nm(MDOC_ARGS); +static void pmdoc_St(MDOC_ARGS); static void pmdoc_Vt(MDOC_ARGS); typedef void (*pmdoc_nf)(MDOC_ARGS); @@ -89,9 +110,9 @@ static const pmdoc_nf mdocs[MDOC_MAX] = { NULL, /* El */ NULL, /* It */ NULL, /* Ad */ - NULL, /* An */ + pmdoc_An, /* An */ NULL, /* Ar */ - NULL, /* Cd */ + pmdoc_Cd, /* Cd */ NULL, /* Cm */ NULL, /* Dv */ NULL, /* Er */ @@ -105,13 +126,13 @@ static const pmdoc_nf mdocs[MDOC_MAX] = { NULL, /* Ic */ pmdoc_In, /* In */ NULL, /* Li */ - NULL, /* Nd */ + pmdoc_Nd, /* Nd */ pmdoc_Nm, /* Nm */ NULL, /* Op */ NULL, /* Ot */ NULL, /* Pa */ NULL, /* Rv */ - NULL, /* St */ + pmdoc_St, /* St */ pmdoc_Vt, /* Va */ pmdoc_Vt, /* Vt */ NULL, /* Xr */ @@ -201,36 +222,43 @@ static const pmdoc_nf mdocs[MDOC_MAX] = { int main(int argc, char *argv[]) { - struct mparse *mp; - struct mdoc *mdoc; - struct man *man; - const char *f, *fn; - size_t sz; - char fbuf[MAXPATHLEN]; - int c; - DB *db; - DBT key, val; - size_t ksz, vsz; - BTREEINFO info; + struct mparse *mp; /* parse sequence */ + struct mdoc *mdoc; /* resulting mdoc */ + struct man *man; /* resulting man */ + char *fn; /* current file being parsed */ + const char *msec, /* manual section */ + *mtitle, /* manual title */ + *dir; /* result dir (default: cwd) */ + char ibuf[MAXPATHLEN], /* index fname */ + ibbuf[MAXPATHLEN], /* index backup fname */ + fbuf[MAXPATHLEN], /* btree fname */ + fbbuf[MAXPATHLEN]; /* btree backup fname */ + int ch; + DB *idx, /* index database */ + *db; /* keyword database */ + DBT rkey, rval, /* recno entries */ + key, val; /* persistent keyword entries */ + size_t sv, + ksz, rsz; /* entry buffer size */ + char vbuf[8]; /* stringified record number */ + BTREEINFO info; /* btree configuration */ + recno_t rec; /* current record number */ extern int optind; extern char *optarg; - f = MANDOC_DB; - progname = strrchr(argv[0], '/'); if (progname == NULL) progname = argv[0]; else ++progname; - while (-1 != (c = getopt(argc, argv, "f:V"))) - switch (c) { - case ('f'): - f = optarg; + dir = ""; + + while (-1 != (ch = getopt(argc, argv, "d:"))) + switch (ch) { + case ('d'): + dir = optarg; break; - case ('V'): - version(); - return((int)MANDOCLEVEL_OK); default: usage(); return((int)MANDOCLEVEL_BADARG); @@ -240,72 +268,161 @@ main(int argc, char *argv[]) argv += optind; /* - * Set up a temporary file-name into which we're going to write - * all of our data. This is securely renamed to the real - * file-name after we've written all of our data. + * Set up temporary file-names into which we're going to write + * all of our data (both for the index and database). These + * will be securely renamed to the real file-names after we've + * written all of our data. */ - if (0 == (sz = strlen(f)) || sz + 5 >= MAXPATHLEN) { - fprintf(stderr, "%s: Bad filename\n", progname); + ibuf[0] = ibuf[MAXPATHLEN - 2] = + ibbuf[0] = ibbuf[MAXPATHLEN - 2] = + fbuf[0] = fbuf[MAXPATHLEN - 2] = + fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0'; + + strlcat(fbuf, dir, MAXPATHLEN); + strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + + strlcat(fbbuf, fbuf, MAXPATHLEN); + strlcat(fbbuf, "~", MAXPATHLEN); + + strlcat(ibuf, dir, MAXPATHLEN); + strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); + + strlcat(ibbuf, ibuf, MAXPATHLEN); + strlcat(ibbuf, "~", MAXPATHLEN); + + if ('\0' != fbuf[MAXPATHLEN - 2] || + '\0' != fbbuf[MAXPATHLEN - 2] || + '\0' != ibuf[MAXPATHLEN - 2] || + '\0' != ibbuf[MAXPATHLEN - 2]) { + fprintf(stderr, "%s: Path too long\n", progname); exit((int)MANDOCLEVEL_SYSERR); } - memcpy(fbuf, f, sz); - memcpy(fbuf + (int)sz, ".bak", 4); - fbuf[(int)sz + 4] = '\0'; - /* - * Open a BTREE database that allows duplicates. If the - * database already exists (it's a backup anyway), then blow it - * away with O_TRUNC. + * For the keyword database, open a BTREE database that allows + * duplicates. For the index database, use a standard RECNO + * database type. */ memset(&info, 0, sizeof(BTREEINFO)); info.flags = R_DUP; - - db = dbopen(fbuf, O_CREAT|O_TRUNC|O_RDWR, - 0644, DB_BTREE, &info); + db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info); if (NULL == db) { - perror(f); + perror(fbbuf); exit((int)MANDOCLEVEL_SYSERR); } - /* Use the auto-parser and don't report any errors. */ + idx = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL); - mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); + if (NULL == db) { + perror(ibbuf); + (*db->close)(db); + exit((int)MANDOCLEVEL_SYSERR); + } /* * Try parsing the manuals given on the command line. If we * totally fail, then just keep on going. Take resulting trees * and push them down into the database code. + * Use the auto-parser and don't report any errors. */ + mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); + memset(&key, 0, sizeof(DBT)); memset(&val, 0, sizeof(DBT)); - ksz = vsz = 0; + memset(&rkey, 0, sizeof(DBT)); + memset(&rval, 0, sizeof(DBT)); + + val.size = sizeof(vbuf); + val.data = vbuf; + rkey.size = sizeof(recno_t); + + rec = 1; + ksz = rsz = 0; while (NULL != (fn = *argv++)) { - printf("Trying: %s\n", fn); mparse_reset(mp); - if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) + + /* Parse and get (non-empty) AST. */ + + if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) { + fprintf(stderr, "%s: Parse failure\n", fn); continue; + } mparse_result(mp, &mdoc, &man); + if (NULL == mdoc && NULL == man) + continue; + + /* Manual section: can be empty string. */ + + msec = NULL != mdoc ? + mdoc_meta(mdoc)->msec : + man_meta(man)->msec; + mtitle = NULL != mdoc ? + mdoc_meta(mdoc)->title : + man_meta(man)->title; + + assert(msec); + assert(mtitle); + + /* + * The index record value consists of a nil-terminated + * filename, a nil-terminated manual section, and a + * nil-terminated description. Since the description + * may not be set, we set a sentinel to see if we're + * going to write a nil byte in its place. + */ + + dbt_init(&rval, &rsz); + dbt_appendb(&rval, &rsz, fn, strlen(fn) + 1); + dbt_appendb(&rval, &rsz, msec, strlen(msec) + 1); + dbt_appendb(&rval, &rsz, mtitle, strlen(mtitle) + 1); + sv = rval.size; + + /* Fix the record number in the btree value. */ + + memset(val.data, 0, sizeof(uint32_t)); + memcpy(val.data + 4, &rec, sizeof(uint32_t)); + if (mdoc) - pmdoc(db, fbuf, &key, &ksz, - &val, &vsz, fn, mdoc); + pmdoc(db, fbbuf, &key, &ksz, + &val, &rval, &rsz, mdoc); + else + pman(db, fbbuf, &key, &ksz, + &val, &rval, &rsz, man); + + /* + * Apply this to the index. If we haven't had a + * description set, put an empty one in now. + */ + + if (rval.size == sv) + dbt_appendb(&rval, &rsz, "", 1); + + rkey.data = &rec; + dbt_put(idx, ibbuf, &rkey, &rval); + + printf("Indexed: %s\n", fn); + rec++; } (*db->close)(db); + (*idx->close)(idx); + mparse_free(mp); free(key.data); - free(val.data); + free(rval.data); /* Atomically replace the file with our temporary one. */ - if (-1 == rename(fbuf, f)) - perror(f); + if (-1 == rename(fbbuf, fbuf)) + perror(fbuf); + if (-1 == rename(ibbuf, ibuf)) + perror(fbuf); return((int)MANDOCLEVEL_OK); } @@ -343,10 +460,51 @@ dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz) while (key->size + sz >= *ksz) { *ksz = key->size + sz + MANDOC_BUFSZ; - *ksz = *ksz + (4 - (*ksz % 4)); key->data = mandoc_realloc(key->data, *ksz); } +#if 0 + dstp = key->data + (int)key->size; + + while (NULL != (endp = memchr(cp, '\\', sz))) { + ssz = endp - cp; + memcpy(dstp, cp, ssz); + + dstp += ssz; + key->size += ssz; + sz -= ssz; + + cp = endp++; + /* FIXME: expects nil-terminated string! */ + esc = mandoc_escape((const char **)&endp, NULL, NULL); + + switch (esc) { + case (ESCAPE_ERROR): + /* Nil-terminate this point. */ + memcpy(dstp, "", 1); + key->size++; + return; + case (ESCAPE_PREDEF): + /* FALLTHROUGH */ + case (ESCAPE_SPECIAL): + break; + default: + sz -= endp - cp; + cp = endp; + continue; + } + + ssz = endp - cp; + memcpy(dstp, cp, ssz); + + dstp += ssz; + key->size += ssz; + sz -= ssz; + + cp = endp; + } +#endif + memcpy(key->data + (int)key->size, cp, sz); key->size += sz; } @@ -361,27 +519,32 @@ dbt_append(DBT *key, size_t *ksz, const char *cp) { size_t sz; - assert(key->data); - assert(key->size <= *ksz); - if (0 == (sz = strlen(cp))) return; - /* Overshoot by MANDOC_BUFSZ (and nil terminator). */ - - while (key->size + sz + 1 >= *ksz) { - *ksz = key->size + sz + 1 + MANDOC_BUFSZ; - *ksz = *ksz + (4 - (*ksz % 4)); - key->data = mandoc_realloc(key->data, *ksz); - } - - /* Space-separate appended tokens. */ + assert(key->data); if (key->size) ((char *)key->data)[(int)key->size - 1] = ' '; - memcpy(key->data + (int)key->size, cp, sz + 1); - key->size += sz + 1; + dbt_appendb(key, ksz, cp, sz + 1); +} + +/* ARGSUSED */ +static void +pmdoc_An(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_AUTHORS != n->sec) + return; + + for (n = n->child; n; n = n->next) + if (MDOC_TEXT == n->type) + dbt_append(key, ksz, n->string); + + fl = MANDOC_AUTHOR; + memcpy(val->data, &fl, 4); } /* ARGSUSED */ @@ -391,36 +554,62 @@ pmdoc_Fd(MDOC_ARGS) uint32_t fl; const char *start, *end; size_t sz; - char nil; if (SEC_SYNOPSIS != n->sec) return; if (NULL == (n = n->child) || MDOC_TEXT != n->type) return; + + /* + * Only consider those `Fd' macro fields that begin with an + * "inclusion" token (versus, e.g., #define). + */ if (strcmp("#include", n->string)) return; + if (NULL == (n = n->next) || MDOC_TEXT != n->type) return; + /* + * Strip away the enclosing angle brackets and make sure we're + * not zero-length. + */ + start = n->string; - if ('<' == *start) + if ('<' == *start || '"' == *start) start++; if (0 == (sz = strlen(start))) return; end = &start[(int)sz - 1]; - if ('>' == *end) + if ('>' == *end || '"' == *end) end--; - nil = '\0'; dbt_appendb(key, ksz, start, end - start + 1); - dbt_appendb(key, ksz, &nil, 1); + dbt_appendb(key, ksz, "", 1); fl = MANDOC_INCLUDES; memcpy(val->data, &fl, 4); } +/* ARGSUSED */ +static void +pmdoc_Cd(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_SYNOPSIS != n->sec) + return; + + for (n = n->child; n; n = n->next) + if (MDOC_TEXT == n->type) + dbt_append(key, ksz, n->string); + + fl = MANDOC_CONFIG; + memcpy(val->data, &fl, 4); +} + /* ARGSUSED */ static void pmdoc_In(MDOC_ARGS) @@ -455,7 +644,7 @@ pmdoc_Fn(MDOC_ARGS) if (NULL == cp) cp = n->child->string; - /* Ignore pointers. */ + /* Strip away pointer symbol. */ while ('*' == *cp) cp++; @@ -465,24 +654,39 @@ pmdoc_Fn(MDOC_ARGS) memcpy(val->data, &fl, 4); } +/* ARGSUSED */ +static void +pmdoc_St(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_STANDARDS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + dbt_append(key, ksz, n->child->string); + fl = MANDOC_STANDARD; + memcpy(val->data, &fl, 4); +} + /* ARGSUSED */ static void pmdoc_Vt(MDOC_ARGS) { uint32_t fl; - const char *start, *end; + const char *start; size_t sz; - char nil; if (SEC_SYNOPSIS != n->sec) return; if (MDOC_Vt == n->tok && MDOC_BODY != n->type) return; - if (NULL == n->child || MDOC_TEXT != n->child->type) + if (NULL == n->last || MDOC_TEXT != n->last->type) return; /* - * Strip away leading '*' and trailing ';'. + * Strip away leading pointer symbol '*' and trailing ';'. */ start = n->last->string; @@ -493,16 +697,15 @@ pmdoc_Vt(MDOC_ARGS) if (0 == (sz = strlen(start))) return; - end = &start[sz - 1]; - while (end > start && ';' == *end) - end--; + if (';' == start[sz - 1]) + sz--; - if (end == start) + if (0 == sz) return; - nil = '\0'; - dbt_appendb(key, ksz, start, end - start + 1); - dbt_appendb(key, ksz, &nil, 1); + dbt_appendb(key, ksz, start, sz); + dbt_appendb(key, ksz, "", 1); + fl = MANDOC_VARIABLE; memcpy(val->data, &fl, 4); } @@ -523,6 +726,24 @@ pmdoc_Fo(MDOC_ARGS) memcpy(val->data, &fl, 4); } + +/* ARGSUSED */ +static void +pmdoc_Nd(MDOC_ARGS) +{ + int first; + + for (first = 1, n = n->child; n; n = n->next) { + if (MDOC_TEXT != n->type) + continue; + if (first) + dbt_appendb(rval, rsz, n->string, strlen(n->string) + 1); + else + dbt_append(rval, rsz, n->string); + first = 0; + } +} + /* ARGSUSED */ static void pmdoc_Nm(MDOC_ARGS) @@ -551,6 +772,25 @@ pmdoc_Nm(MDOC_ARGS) memcpy(val->data, &fl, 4); } +static void +dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) +{ + + if (0 == key->size) + return; + + assert(key->data); + assert(val->size); + assert(val->data); + + if (0 == (*db->put)(db, key, val, 0)) + return; + + perror(dbn); + exit((int)MANDOCLEVEL_SYSERR); + /* NOTREACHED */ +} + /* * Call out to per-macro handlers after clearing the persistent database * key. If the macro sets the database key, flush it to the database. @@ -576,52 +816,128 @@ pmdoc_node(MDOC_ARGS) break; dbt_init(key, ksz); - (*mdocs[n->tok])(db, dbn, key, ksz, val, n); - if (0 == key->size) - break; - if (0 == (*db->put)(db, key, val, 0)) - break; - - perror(dbn); - exit((int)MANDOCLEVEL_SYSERR); - /* NOTREACHED */ + (*mdocs[n->tok])(db, dbn, key, ksz, val, rval, rsz, n); + dbt_put(db, dbn, key, val); + break; default: break; } - pmdoc_node(db, dbn, key, ksz, val, n->child); - pmdoc_node(db, dbn, key, ksz, val, n->next); + pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->child); + pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->next); } -static void -pmdoc(DB *db, const char *dbn, - DBT *key, size_t *ksz, - DBT *val, size_t *valsz, - const char *path, struct mdoc *m) +static int +pman_node(MAN_ARGS) { - uint32_t flag; + const struct man_node *head, *body; + const char *start, *sv; + size_t sz; + uint32_t fl; - flag = MANDOC_NONE; + if (NULL == n) + return(0); - /* - * Database values are a 4-byte bit-field followed by the path - * of the manual. Allocate all the space we'll need now; we - * change the bit-field depending on the key type. + /* + * We're only searching for one thing: the first text child in + * the BODY of a NAME section. Since we don't keep track of + * sections in -man, run some hoops to find out whether we're in + * the correct section or not. */ - dbt_init(val, valsz); - dbt_appendb(val, valsz, &flag, 4); - dbt_append(val, valsz, path); + if (MAN_BODY == n->type && MAN_SH == n->tok) { + body = n; + assert(body->parent); + if (NULL != (head = body->parent->head) && + 1 == head->nchild && + NULL != (head = (head->child)) && + MAN_TEXT == head->type && + 0 == strcmp(head->string, "NAME") && + NULL != (body = body->child) && + MAN_TEXT == body->type) { + + fl = MANDOC_NAME; + memcpy(val->data, &fl, 4); + + assert(body->string); + start = sv = body->string; + + /* + * Go through a special heuristic dance here. + * This is why -man manuals are great! + * Conventionally, one or more manual names are + * comma-specified prior to a whitespace, then a + * dash, then a description. Try to puzzle out + * the name parts here. + */ + + for ( ;; ) { + sz = strcspn(start, " ,"); + if ('\0' == start[(int)sz]) + break; + + dbt_init(key, ksz); + dbt_appendb(key, ksz, start, sz); + dbt_appendb(key, ksz, "", 1); + + dbt_put(db, dbn, key, val); + + if (' ' == start[(int)sz]) { + start += (int)sz + 1; + break; + } + + assert(',' == start[(int)sz]); + start += (int)sz + 1; + while (' ' == *start) + start++; + } + + if (sv == start) { + dbt_init(key, ksz); + dbt_append(key, ksz, start); + return(1); + } + + while (' ' == *start) + start++; + + if ('\\' == *start && '-' == *(start + 1)) + start += 2; + else if ('-' == *start) + start++; + + while (' ' == *start) + start++; + + dbt_appendb(rval, rsz, start, strlen(start) + 1); + } + } + + if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->child)) + return(1); + if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->next)) + return(1); - pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m)); + return(0); } static void -version(void) +pman(DB *db, const char *dbn, DBT *key, size_t *ksz, + DBT *val, DBT *rval, size_t *rsz, struct man *m) +{ + + pman_node(db, dbn, key, ksz, val, rval, rsz, man_node(m)); +} + + +static void +pmdoc(DB *db, const char *dbn, DBT *key, size_t *ksz, + DBT *val, DBT *rval, size_t *rsz, struct mdoc *m) { - printf("%s %s\n", progname, VERSION); + pmdoc_node(db, dbn, key, ksz, val, rval, rsz, mdoc_node(m)); } static void @@ -629,8 +945,7 @@ usage(void) { fprintf(stderr, "usage: %s " - "[-V] " - "[-f path] " + "[-d path] " "[file...]\n", progname); }