]> git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
Allow man.conf file to be assignable.
[mandoc.git] / mandocdb.c
1 /* $Id: mandocdb.c,v 1.10 2011/11/23 09:52:20 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22
23 #include <assert.h>
24 #include <dirent.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #ifdef __linux__
33 # include <db_185.h>
34 #else
35 # include <db.h>
36 #endif
37
38 #include "man.h"
39 #include "mdoc.h"
40 #include "mandoc.h"
41 #include "mandocdb.h"
42 #include "manpath.h"
43
44 #define MANDOC_BUFSZ BUFSIZ
45 #define MANDOC_SLOP 1024
46
47 /* Tiny list for files. No need to bring in QUEUE. */
48
49 struct of {
50 char *fname; /* heap-allocated */
51 struct of *next; /* NULL for last one */
52 struct of *first; /* first in list */
53 };
54
55 /* Buffer for storing growable data. */
56
57 struct buf {
58 char *cp;
59 size_t len; /* current length */
60 size_t size; /* total buffer size */
61 };
62
63 /* Operation we're going to perform. */
64
65 enum op {
66 OP_NEW = 0, /* new database */
67 OP_UPDATE, /* delete/add entries in existing database */
68 OP_DELETE /* delete entries from existing database */
69 };
70
71 #define MAN_ARGS DB *hash, \
72 struct buf *buf, \
73 struct buf *dbuf, \
74 const struct man_node *n
75 #define MDOC_ARGS DB *hash, \
76 struct buf *buf, \
77 struct buf *dbuf, \
78 const struct mdoc_node *n, \
79 const struct mdoc_meta *m
80
81 static void buf_appendmdoc(struct buf *,
82 const struct mdoc_node *, int);
83 static void buf_append(struct buf *, const char *);
84 static void buf_appendb(struct buf *,
85 const void *, size_t);
86 static void dbt_put(DB *, const char *, DBT *, DBT *);
87 static void hash_put(DB *, const struct buf *, uint64_t);
88 static void hash_reset(DB **);
89 static void index_merge(const struct of *, struct mparse *,
90 struct buf *, struct buf *,
91 DB *, DB *, const char *,
92 DB *, const char *, int,
93 recno_t, const recno_t *, size_t);
94 static void index_prune(const struct of *, DB *,
95 const char *, DB *, const char *,
96 int, recno_t *, recno_t **, size_t *);
97 static void ofile_argbuild(char *[], int, int, struct of **);
98 static int ofile_dirbuild(const char *, int, struct of **);
99 static void ofile_free(struct of *);
100 static int pman_node(MAN_ARGS);
101 static void pmdoc_node(MDOC_ARGS);
102 static void pmdoc_An(MDOC_ARGS);
103 static void pmdoc_Cd(MDOC_ARGS);
104 static void pmdoc_Er(MDOC_ARGS);
105 static void pmdoc_Ev(MDOC_ARGS);
106 static void pmdoc_Fd(MDOC_ARGS);
107 static void pmdoc_In(MDOC_ARGS);
108 static void pmdoc_Fn(MDOC_ARGS);
109 static void pmdoc_Fo(MDOC_ARGS);
110 static void pmdoc_Nd(MDOC_ARGS);
111 static void pmdoc_Nm(MDOC_ARGS);
112 static void pmdoc_Pa(MDOC_ARGS);
113 static void pmdoc_St(MDOC_ARGS);
114 static void pmdoc_Vt(MDOC_ARGS);
115 static void pmdoc_Xr(MDOC_ARGS);
116 static void usage(void);
117
118 typedef void (*pmdoc_nf)(MDOC_ARGS);
119
120 static const pmdoc_nf mdocs[MDOC_MAX] = {
121 NULL, /* Ap */
122 NULL, /* Dd */
123 NULL, /* Dt */
124 NULL, /* Os */
125 NULL, /* Sh */
126 NULL, /* Ss */
127 NULL, /* Pp */
128 NULL, /* D1 */
129 NULL, /* Dl */
130 NULL, /* Bd */
131 NULL, /* Ed */
132 NULL, /* Bl */
133 NULL, /* El */
134 NULL, /* It */
135 NULL, /* Ad */
136 pmdoc_An, /* An */
137 NULL, /* Ar */
138 pmdoc_Cd, /* Cd */
139 NULL, /* Cm */
140 NULL, /* Dv */
141 pmdoc_Er, /* Er */
142 pmdoc_Ev, /* Ev */
143 NULL, /* Ex */
144 NULL, /* Fa */
145 pmdoc_Fd, /* Fd */
146 NULL, /* Fl */
147 pmdoc_Fn, /* Fn */
148 NULL, /* Ft */
149 NULL, /* Ic */
150 pmdoc_In, /* In */
151 NULL, /* Li */
152 pmdoc_Nd, /* Nd */
153 pmdoc_Nm, /* Nm */
154 NULL, /* Op */
155 NULL, /* Ot */
156 pmdoc_Pa, /* Pa */
157 NULL, /* Rv */
158 pmdoc_St, /* St */
159 pmdoc_Vt, /* Va */
160 pmdoc_Vt, /* Vt */
161 pmdoc_Xr, /* Xr */
162 NULL, /* %A */
163 NULL, /* %B */
164 NULL, /* %D */
165 NULL, /* %I */
166 NULL, /* %J */
167 NULL, /* %N */
168 NULL, /* %O */
169 NULL, /* %P */
170 NULL, /* %R */
171 NULL, /* %T */
172 NULL, /* %V */
173 NULL, /* Ac */
174 NULL, /* Ao */
175 NULL, /* Aq */
176 NULL, /* At */
177 NULL, /* Bc */
178 NULL, /* Bf */
179 NULL, /* Bo */
180 NULL, /* Bq */
181 NULL, /* Bsx */
182 NULL, /* Bx */
183 NULL, /* Db */
184 NULL, /* Dc */
185 NULL, /* Do */
186 NULL, /* Dq */
187 NULL, /* Ec */
188 NULL, /* Ef */
189 NULL, /* Em */
190 NULL, /* Eo */
191 NULL, /* Fx */
192 NULL, /* Ms */
193 NULL, /* No */
194 NULL, /* Ns */
195 NULL, /* Nx */
196 NULL, /* Ox */
197 NULL, /* Pc */
198 NULL, /* Pf */
199 NULL, /* Po */
200 NULL, /* Pq */
201 NULL, /* Qc */
202 NULL, /* Ql */
203 NULL, /* Qo */
204 NULL, /* Qq */
205 NULL, /* Re */
206 NULL, /* Rs */
207 NULL, /* Sc */
208 NULL, /* So */
209 NULL, /* Sq */
210 NULL, /* Sm */
211 NULL, /* Sx */
212 NULL, /* Sy */
213 NULL, /* Tn */
214 NULL, /* Ux */
215 NULL, /* Xc */
216 NULL, /* Xo */
217 pmdoc_Fo, /* Fo */
218 NULL, /* Fc */
219 NULL, /* Oo */
220 NULL, /* Oc */
221 NULL, /* Bk */
222 NULL, /* Ek */
223 NULL, /* Bt */
224 NULL, /* Hf */
225 NULL, /* Fr */
226 NULL, /* Ud */
227 NULL, /* Lb */
228 NULL, /* Lp */
229 NULL, /* Lk */
230 NULL, /* Mt */
231 NULL, /* Brq */
232 NULL, /* Bro */
233 NULL, /* Brc */
234 NULL, /* %C */
235 NULL, /* Es */
236 NULL, /* En */
237 NULL, /* Dx */
238 NULL, /* %Q */
239 NULL, /* br */
240 NULL, /* sp */
241 NULL, /* %U */
242 NULL, /* Ta */
243 };
244
245 static const char *progname;
246
247 int
248 main(int argc, char *argv[])
249 {
250 struct mparse *mp; /* parse sequence */
251 struct manpaths dirs;
252 enum op op; /* current operation */
253 const char *dir;
254 char ibuf[MAXPATHLEN], /* index fname */
255 fbuf[MAXPATHLEN]; /* btree fname */
256 int verb, /* output verbosity */
257 ch, i, flags;
258 DB *idx, /* index database */
259 *db, /* keyword database */
260 *hash; /* temporary keyword hashtable */
261 BTREEINFO info; /* btree configuration */
262 recno_t maxrec; /* supremum of all records */
263 recno_t *recs; /* buffer of empty records */
264 size_t sz1, sz2,
265 recsz, /* buffer size of recs */
266 reccur; /* valid number of recs */
267 struct buf buf, /* keyword buffer */
268 dbuf; /* description buffer */
269 struct of *of; /* list of files for processing */
270 extern int optind;
271 extern char *optarg;
272
273 progname = strrchr(argv[0], '/');
274 if (progname == NULL)
275 progname = argv[0];
276 else
277 ++progname;
278
279 memset(&dirs, 0, sizeof(struct manpaths));
280
281 verb = 0;
282 of = NULL;
283 db = idx = NULL;
284 mp = NULL;
285 hash = NULL;
286 recs = NULL;
287 recsz = reccur = 0;
288 maxrec = 0;
289 op = OP_NEW;
290 dir = NULL;
291
292 while (-1 != (ch = getopt(argc, argv, "d:u:v")))
293 switch (ch) {
294 case ('d'):
295 dir = optarg;
296 op = OP_UPDATE;
297 break;
298 case ('u'):
299 dir = optarg;
300 op = OP_DELETE;
301 break;
302 case ('v'):
303 verb++;
304 break;
305 default:
306 usage();
307 return((int)MANDOCLEVEL_BADARG);
308 }
309
310 argc -= optind;
311 argv += optind;
312
313 memset(&info, 0, sizeof(BTREEINFO));
314 info.flags = R_DUP;
315
316 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
317
318 memset(&buf, 0, sizeof(struct buf));
319 memset(&dbuf, 0, sizeof(struct buf));
320
321 buf.size = dbuf.size = MANDOC_BUFSZ;
322
323 buf.cp = mandoc_malloc(buf.size);
324 dbuf.cp = mandoc_malloc(dbuf.size);
325
326 flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
327
328 if (OP_UPDATE == op || OP_DELETE == op) {
329 ibuf[0] = fbuf[0] = '\0';
330
331 strlcat(fbuf, dir, MAXPATHLEN);
332 strlcat(fbuf, "/", MAXPATHLEN);
333 sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
334
335 strlcat(ibuf, dir, MAXPATHLEN);
336 strlcat(ibuf, "/", MAXPATHLEN);
337 sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
338
339 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
340 fprintf(stderr, "%s: Path too long\n", dir);
341 exit((int)MANDOCLEVEL_BADARG);
342 }
343
344 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
345 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
346
347 if (NULL == db) {
348 perror(fbuf);
349 exit((int)MANDOCLEVEL_SYSERR);
350 } else if (NULL == db) {
351 perror(ibuf);
352 exit((int)MANDOCLEVEL_SYSERR);
353 }
354
355 if (verb > 2) {
356 printf("%s: Opened\n", fbuf);
357 printf("%s: Opened\n", ibuf);
358 }
359
360 ofile_argbuild(argv, argc, verb, &of);
361 if (NULL == of)
362 goto out;
363
364 of = of->first;
365
366 index_prune(of, db, fbuf, idx, ibuf, verb,
367 &maxrec, &recs, &recsz);
368
369 if (OP_UPDATE == op)
370 index_merge(of, mp, &dbuf, &buf, hash,
371 db, fbuf, idx, ibuf, verb,
372 maxrec, recs, reccur);
373
374 goto out;
375 }
376
377 /*
378 * Configure the directories we're going to scan.
379 * If we have command-line arguments, use them.
380 * If not, we use man(1)'s method (see mandocdb.8).
381 */
382
383 if (argc > 0) {
384 dirs.paths = mandoc_malloc(argc * sizeof(char *));
385 dirs.sz = argc;
386 for (i = 0; i < argc; i++)
387 dirs.paths[i] = mandoc_strdup(argv[i]);
388 } else
389 manpath_parseconf(&dirs);
390
391 for (i = 0; i < dirs.sz; i++) {
392 ibuf[0] = fbuf[0] = '\0';
393
394 strlcat(fbuf, dirs.paths[i], MAXPATHLEN);
395 strlcat(fbuf, "/", MAXPATHLEN);
396 sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
397
398 strlcat(ibuf, dirs.paths[i], MAXPATHLEN);
399 strlcat(ibuf, "/", MAXPATHLEN);
400 sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
401
402 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
403 fprintf(stderr, "%s: Path too long\n",
404 dirs.paths[i]);
405 exit((int)MANDOCLEVEL_BADARG);
406 }
407
408 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
409 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
410
411 if (NULL == db) {
412 perror(fbuf);
413 exit((int)MANDOCLEVEL_SYSERR);
414 } else if (NULL == db) {
415 perror(ibuf);
416 exit((int)MANDOCLEVEL_SYSERR);
417 }
418
419 if (verb > 2) {
420 printf("%s: Truncated\n", fbuf);
421 printf("%s: Truncated\n", ibuf);
422 }
423
424 ofile_free(of);
425 of = NULL;
426
427 if ( ! ofile_dirbuild(dirs.paths[i], verb, &of))
428 exit((int)MANDOCLEVEL_SYSERR);
429
430 if (NULL == of)
431 continue;
432
433 of = of->first;
434
435 index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
436 idx, ibuf, verb, maxrec, recs, reccur);
437 }
438
439 out:
440 if (db)
441 (*db->close)(db);
442 if (idx)
443 (*idx->close)(idx);
444 if (hash)
445 (*hash->close)(hash);
446 if (mp)
447 mparse_free(mp);
448
449 manpath_free(&dirs);
450 ofile_free(of);
451 free(buf.cp);
452 free(dbuf.cp);
453 free(recs);
454
455 return(MANDOCLEVEL_OK);
456 }
457
458 void
459 index_merge(const struct of *of, struct mparse *mp,
460 struct buf *dbuf, struct buf *buf,
461 DB *hash, DB *db, const char *dbf,
462 DB *idx, const char *idxf, int verb,
463 recno_t maxrec, const recno_t *recs, size_t reccur)
464 {
465 recno_t rec;
466 int ch;
467 DBT key, val;
468 struct mdoc *mdoc;
469 struct man *man;
470 const char *fn, *msec, *mtitle, *arch;
471 size_t sv;
472 unsigned seq;
473 struct db_val vbuf;
474
475 for (rec = 0; of; of = of->next) {
476 fn = of->fname;
477 if (reccur > 0) {
478 --reccur;
479 rec = recs[(int)reccur];
480 } else if (maxrec > 0) {
481 rec = maxrec;
482 maxrec = 0;
483 } else
484 rec++;
485
486 mparse_reset(mp);
487 hash_reset(&hash);
488
489 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
490 fprintf(stderr, "%s: Parse failure\n", fn);
491 continue;
492 }
493
494 mparse_result(mp, &mdoc, &man);
495 if (NULL == mdoc && NULL == man)
496 continue;
497
498 msec = NULL != mdoc ?
499 mdoc_meta(mdoc)->msec : man_meta(man)->msec;
500 mtitle = NULL != mdoc ?
501 mdoc_meta(mdoc)->title : man_meta(man)->title;
502 arch = NULL != mdoc ?
503 mdoc_meta(mdoc)->arch : NULL;
504
505 if (NULL == arch)
506 arch = "";
507
508 /*
509 * The index record value consists of a nil-terminated
510 * filename, a nil-terminated manual section, and a
511 * nil-terminated description. Since the description
512 * may not be set, we set a sentinel to see if we're
513 * going to write a nil byte in its place.
514 */
515
516 dbuf->len = 0;
517 buf_appendb(dbuf, fn, strlen(fn) + 1);
518 buf_appendb(dbuf, msec, strlen(msec) + 1);
519 buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
520 buf_appendb(dbuf, arch, strlen(arch) + 1);
521
522 sv = dbuf->len;
523
524 /* Fix the record number in the btree value. */
525
526 if (mdoc)
527 pmdoc_node(hash, buf, dbuf,
528 mdoc_node(mdoc), mdoc_meta(mdoc));
529 else
530 pman_node(hash, buf, dbuf, man_node(man));
531
532 /*
533 * Copy from the in-memory hashtable of pending keywords
534 * into the database.
535 */
536
537 vbuf.rec = rec;
538 seq = R_FIRST;
539 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
540 seq = R_NEXT;
541
542 vbuf.mask = *(uint64_t *)val.data;
543 val.size = sizeof(struct db_val);
544 val.data = &vbuf;
545
546 if (verb > 1)
547 printf("%s: Added keyword: %s\n",
548 fn, (char *)key.data);
549 dbt_put(db, dbf, &key, &val);
550 }
551 if (ch < 0) {
552 perror("hash");
553 exit((int)MANDOCLEVEL_SYSERR);
554 }
555
556 /*
557 * Apply to the index. If we haven't had a description
558 * set, put an empty one in now.
559 */
560
561 if (dbuf->len == sv)
562 buf_appendb(dbuf, "", 1);
563
564 key.data = &rec;
565 key.size = sizeof(recno_t);
566
567 val.data = dbuf->cp;
568 val.size = dbuf->len;
569
570 if (verb)
571 printf("%s: Added index\n", fn);
572 dbt_put(idx, idxf, &key, &val);
573 }
574 }
575
576 /*
577 * Scan through all entries in the index file `idx' and prune those
578 * entries in `ofile'.
579 * Pruning consists of removing from `db', then invalidating the entry
580 * in `idx' (zeroing its value size).
581 */
582 static void
583 index_prune(const struct of *ofile, DB *db, const char *dbf,
584 DB *idx, const char *idxf, int verb,
585 recno_t *maxrec, recno_t **recs, size_t *recsz)
586 {
587 const struct of *of;
588 const char *fn;
589 struct db_val *vbuf;
590 unsigned seq, sseq;
591 DBT key, val;
592 size_t reccur;
593 int ch;
594
595 reccur = 0;
596 seq = R_FIRST;
597 while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
598 seq = R_NEXT;
599 *maxrec = *(recno_t *)key.data;
600 if (0 == val.size) {
601 if (reccur >= *recsz) {
602 *recsz += MANDOC_SLOP;
603 *recs = mandoc_realloc(*recs,
604 *recsz * sizeof(recno_t));
605 }
606 (*recs)[(int)reccur] = *maxrec;
607 reccur++;
608 continue;
609 }
610
611 fn = (char *)val.data;
612 for (of = ofile; of; of = of->next)
613 if (0 == strcmp(fn, of->fname))
614 break;
615
616 if (NULL == of)
617 continue;
618
619 sseq = R_FIRST;
620 while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
621 sseq = R_NEXT;
622 assert(sizeof(struct db_val) == val.size);
623 vbuf = val.data;
624 if (*maxrec != vbuf->rec)
625 continue;
626 if (verb)
627 printf("%s: Deleted keyword: %s\n",
628 fn, (char *)key.data);
629 ch = (*db->del)(db, &key, R_CURSOR);
630 if (ch < 0)
631 break;
632 }
633 if (ch < 0) {
634 perror(dbf);
635 exit((int)MANDOCLEVEL_SYSERR);
636 }
637
638 if (verb)
639 printf("%s: Deleted index\n", fn);
640
641 val.size = 0;
642 ch = (*idx->put)(idx, &key, &val, R_CURSOR);
643 if (ch < 0) {
644 perror(idxf);
645 exit((int)MANDOCLEVEL_SYSERR);
646 }
647
648 if (reccur >= *recsz) {
649 *recsz += MANDOC_SLOP;
650 *recs = mandoc_realloc
651 (*recs, *recsz * sizeof(recno_t));
652 }
653
654 (*recs)[(int)reccur] = *maxrec;
655 reccur++;
656 }
657 (*maxrec)++;
658 }
659
660 /*
661 * Grow the buffer (if necessary) and copy in a binary string.
662 */
663 static void
664 buf_appendb(struct buf *buf, const void *cp, size_t sz)
665 {
666
667 /* Overshoot by MANDOC_BUFSZ. */
668
669 while (buf->len + sz >= buf->size) {
670 buf->size = buf->len + sz + MANDOC_BUFSZ;
671 buf->cp = mandoc_realloc(buf->cp, buf->size);
672 }
673
674 memcpy(buf->cp + (int)buf->len, cp, sz);
675 buf->len += sz;
676 }
677
678 /*
679 * Append a nil-terminated string to the buffer.
680 * This can be invoked multiple times.
681 * The buffer string will be nil-terminated.
682 * If invoked multiple times, a space is put between strings.
683 */
684 static void
685 buf_append(struct buf *buf, const char *cp)
686 {
687 size_t sz;
688
689 if (0 == (sz = strlen(cp)))
690 return;
691
692 if (buf->len)
693 buf->cp[(int)buf->len - 1] = ' ';
694
695 buf_appendb(buf, cp, sz + 1);
696 }
697
698 /*
699 * Recursively add all text from a given node.
700 * This is optimised for general mdoc nodes in this context, which do
701 * not consist of subexpressions and having a recursive call for n->next
702 * would be wasteful.
703 * The "f" variable should be 0 unless called from pmdoc_Nd for the
704 * description buffer, which does not start at the beginning of the
705 * buffer.
706 */
707 static void
708 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
709 {
710
711 for ( ; n; n = n->next) {
712 if (n->child)
713 buf_appendmdoc(buf, n->child, f);
714
715 if (MDOC_TEXT == n->type && f) {
716 f = 0;
717 buf_appendb(buf, n->string,
718 strlen(n->string) + 1);
719 } else if (MDOC_TEXT == n->type)
720 buf_append(buf, n->string);
721
722 }
723 }
724
725 /* ARGSUSED */
726 static void
727 pmdoc_An(MDOC_ARGS)
728 {
729
730 if (SEC_AUTHORS != n->sec)
731 return;
732
733 buf_appendmdoc(buf, n->child, 0);
734 hash_put(hash, buf, TYPE_An);
735 }
736
737 static void
738 hash_reset(DB **db)
739 {
740 DB *hash;
741
742 if (NULL != (hash = *db))
743 (*hash->close)(hash);
744
745 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
746 if (NULL == *db) {
747 perror("hash");
748 exit((int)MANDOCLEVEL_SYSERR);
749 }
750 }
751
752 /* ARGSUSED */
753 static void
754 pmdoc_Fd(MDOC_ARGS)
755 {
756 const char *start, *end;
757 size_t sz;
758
759 if (SEC_SYNOPSIS != n->sec)
760 return;
761 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
762 return;
763
764 /*
765 * Only consider those `Fd' macro fields that begin with an
766 * "inclusion" token (versus, e.g., #define).
767 */
768 if (strcmp("#include", n->string))
769 return;
770
771 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
772 return;
773
774 /*
775 * Strip away the enclosing angle brackets and make sure we're
776 * not zero-length.
777 */
778
779 start = n->string;
780 if ('<' == *start || '"' == *start)
781 start++;
782
783 if (0 == (sz = strlen(start)))
784 return;
785
786 end = &start[(int)sz - 1];
787 if ('>' == *end || '"' == *end)
788 end--;
789
790 assert(end >= start);
791
792 buf_appendb(buf, start, (size_t)(end - start + 1));
793 buf_appendb(buf, "", 1);
794
795 hash_put(hash, buf, TYPE_In);
796 }
797
798 /* ARGSUSED */
799 static void
800 pmdoc_Cd(MDOC_ARGS)
801 {
802
803 if (SEC_SYNOPSIS != n->sec)
804 return;
805
806 buf_appendmdoc(buf, n->child, 0);
807 hash_put(hash, buf, TYPE_Cd);
808 }
809
810 /* ARGSUSED */
811 static void
812 pmdoc_In(MDOC_ARGS)
813 {
814
815 if (SEC_SYNOPSIS != n->sec)
816 return;
817 if (NULL == n->child || MDOC_TEXT != n->child->type)
818 return;
819
820 buf_append(buf, n->child->string);
821 hash_put(hash, buf, TYPE_In);
822 }
823
824 /* ARGSUSED */
825 static void
826 pmdoc_Fn(MDOC_ARGS)
827 {
828 const char *cp;
829
830 if (SEC_SYNOPSIS != n->sec)
831 return;
832 if (NULL == n->child || MDOC_TEXT != n->child->type)
833 return;
834
835 /* .Fn "struct type *arg" "foo" */
836
837 cp = strrchr(n->child->string, ' ');
838 if (NULL == cp)
839 cp = n->child->string;
840
841 /* Strip away pointer symbol. */
842
843 while ('*' == *cp)
844 cp++;
845
846 buf_append(buf, cp);
847 hash_put(hash, buf, TYPE_Fn);
848 }
849
850 /* ARGSUSED */
851 static void
852 pmdoc_St(MDOC_ARGS)
853 {
854
855 if (SEC_STANDARDS != n->sec)
856 return;
857 if (NULL == n->child || MDOC_TEXT != n->child->type)
858 return;
859
860 buf_append(buf, n->child->string);
861 hash_put(hash, buf, TYPE_St);
862 }
863
864 /* ARGSUSED */
865 static void
866 pmdoc_Xr(MDOC_ARGS)
867 {
868
869 if (NULL == (n = n->child))
870 return;
871
872 buf_appendb(buf, n->string, strlen(n->string));
873
874 if (NULL != (n = n->next)) {
875 buf_appendb(buf, ".", 1);
876 buf_appendb(buf, n->string, strlen(n->string) + 1);
877 } else
878 buf_appendb(buf, ".", 2);
879
880 hash_put(hash, buf, TYPE_Xr);
881 }
882
883 /* ARGSUSED */
884 static void
885 pmdoc_Vt(MDOC_ARGS)
886 {
887 const char *start;
888 size_t sz;
889
890 if (SEC_SYNOPSIS != n->sec)
891 return;
892 if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
893 return;
894 if (NULL == n->last || MDOC_TEXT != n->last->type)
895 return;
896
897 /*
898 * Strip away leading pointer symbol '*' and trailing ';'.
899 */
900
901 start = n->last->string;
902
903 while ('*' == *start)
904 start++;
905
906 if (0 == (sz = strlen(start)))
907 return;
908
909 if (';' == start[(int)sz - 1])
910 sz--;
911
912 if (0 == sz)
913 return;
914
915 buf_appendb(buf, start, sz);
916 buf_appendb(buf, "", 1);
917 hash_put(hash, buf, TYPE_Va);
918 }
919
920 /* ARGSUSED */
921 static void
922 pmdoc_Fo(MDOC_ARGS)
923 {
924
925 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
926 return;
927 if (NULL == n->child || MDOC_TEXT != n->child->type)
928 return;
929
930 buf_append(buf, n->child->string);
931 hash_put(hash, buf, TYPE_Fn);
932 }
933
934
935 /* ARGSUSED */
936 static void
937 pmdoc_Nd(MDOC_ARGS)
938 {
939
940 if (MDOC_BODY != n->type)
941 return;
942
943 buf_appendmdoc(dbuf, n->child, 1);
944 buf_appendmdoc(buf, n->child, 0);
945
946 hash_put(hash, buf, TYPE_Nd);
947 }
948
949 /* ARGSUSED */
950 static void
951 pmdoc_Er(MDOC_ARGS)
952 {
953
954 if (SEC_ERRORS != n->sec)
955 return;
956
957 buf_appendmdoc(buf, n->child, 0);
958 hash_put(hash, buf, TYPE_Er);
959 }
960
961 /* ARGSUSED */
962 static void
963 pmdoc_Ev(MDOC_ARGS)
964 {
965
966 if (SEC_ENVIRONMENT != n->sec)
967 return;
968
969 buf_appendmdoc(buf, n->child, 0);
970 hash_put(hash, buf, TYPE_Ev);
971 }
972
973 /* ARGSUSED */
974 static void
975 pmdoc_Pa(MDOC_ARGS)
976 {
977
978 if (SEC_FILES != n->sec)
979 return;
980
981 buf_appendmdoc(buf, n->child, 0);
982 hash_put(hash, buf, TYPE_Pa);
983 }
984
985 /* ARGSUSED */
986 static void
987 pmdoc_Nm(MDOC_ARGS)
988 {
989
990 if (SEC_NAME == n->sec) {
991 buf_appendmdoc(buf, n->child, 0);
992 hash_put(hash, buf, TYPE_Nm);
993 return;
994 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
995 return;
996
997 if (NULL == n->child)
998 buf_append(buf, m->name);
999
1000 buf_appendmdoc(buf, n->child, 0);
1001 hash_put(hash, buf, TYPE_Nm);
1002 }
1003
1004 static void
1005 hash_put(DB *db, const struct buf *buf, uint64_t mask)
1006 {
1007 DBT key, val;
1008 int rc;
1009
1010 if (buf->len < 2)
1011 return;
1012
1013 key.data = buf->cp;
1014 key.size = buf->len;
1015
1016 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1017 perror("hash");
1018 exit((int)MANDOCLEVEL_SYSERR);
1019 } else if (0 == rc)
1020 mask |= *(uint64_t *)val.data;
1021
1022 val.data = &mask;
1023 val.size = sizeof(uint64_t);
1024
1025 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1026 perror("hash");
1027 exit((int)MANDOCLEVEL_SYSERR);
1028 }
1029 }
1030
1031 static void
1032 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1033 {
1034
1035 assert(key->size);
1036 assert(val->size);
1037
1038 if (0 == (*db->put)(db, key, val, 0))
1039 return;
1040
1041 perror(dbn);
1042 exit((int)MANDOCLEVEL_SYSERR);
1043 /* NOTREACHED */
1044 }
1045
1046 /*
1047 * Call out to per-macro handlers after clearing the persistent database
1048 * key. If the macro sets the database key, flush it to the database.
1049 */
1050 static void
1051 pmdoc_node(MDOC_ARGS)
1052 {
1053
1054 if (NULL == n)
1055 return;
1056
1057 switch (n->type) {
1058 case (MDOC_HEAD):
1059 /* FALLTHROUGH */
1060 case (MDOC_BODY):
1061 /* FALLTHROUGH */
1062 case (MDOC_TAIL):
1063 /* FALLTHROUGH */
1064 case (MDOC_BLOCK):
1065 /* FALLTHROUGH */
1066 case (MDOC_ELEM):
1067 if (NULL == mdocs[n->tok])
1068 break;
1069
1070 buf->len = 0;
1071 (*mdocs[n->tok])(hash, buf, dbuf, n, m);
1072 break;
1073 default:
1074 break;
1075 }
1076
1077 pmdoc_node(hash, buf, dbuf, n->child, m);
1078 pmdoc_node(hash, buf, dbuf, n->next, m);
1079 }
1080
1081 static int
1082 pman_node(MAN_ARGS)
1083 {
1084 const struct man_node *head, *body;
1085 const char *start, *sv;
1086 size_t sz;
1087
1088 if (NULL == n)
1089 return(0);
1090
1091 /*
1092 * We're only searching for one thing: the first text child in
1093 * the BODY of a NAME section. Since we don't keep track of
1094 * sections in -man, run some hoops to find out whether we're in
1095 * the correct section or not.
1096 */
1097
1098 if (MAN_BODY == n->type && MAN_SH == n->tok) {
1099 body = n;
1100 assert(body->parent);
1101 if (NULL != (head = body->parent->head) &&
1102 1 == head->nchild &&
1103 NULL != (head = (head->child)) &&
1104 MAN_TEXT == head->type &&
1105 0 == strcmp(head->string, "NAME") &&
1106 NULL != (body = body->child) &&
1107 MAN_TEXT == body->type) {
1108
1109 assert(body->string);
1110 start = sv = body->string;
1111
1112 /*
1113 * Go through a special heuristic dance here.
1114 * This is why -man manuals are great!
1115 * (I'm being sarcastic: my eyes are bleeding.)
1116 * Conventionally, one or more manual names are
1117 * comma-specified prior to a whitespace, then a
1118 * dash, then a description. Try to puzzle out
1119 * the name parts here.
1120 */
1121
1122 for ( ;; ) {
1123 sz = strcspn(start, " ,");
1124 if ('\0' == start[(int)sz])
1125 break;
1126
1127 buf->len = 0;
1128 buf_appendb(buf, start, sz);
1129 buf_appendb(buf, "", 1);
1130
1131 hash_put(hash, buf, TYPE_Nm);
1132
1133 if (' ' == start[(int)sz]) {
1134 start += (int)sz + 1;
1135 break;
1136 }
1137
1138 assert(',' == start[(int)sz]);
1139 start += (int)sz + 1;
1140 while (' ' == *start)
1141 start++;
1142 }
1143
1144 buf->len = 0;
1145
1146 if (sv == start) {
1147 buf_append(buf, start);
1148 return(1);
1149 }
1150
1151 while (' ' == *start)
1152 start++;
1153
1154 if (0 == strncmp(start, "-", 1))
1155 start += 1;
1156 else if (0 == strncmp(start, "\\-", 2))
1157 start += 2;
1158 else if (0 == strncmp(start, "\\(en", 4))
1159 start += 4;
1160 else if (0 == strncmp(start, "\\(em", 4))
1161 start += 4;
1162
1163 while (' ' == *start)
1164 start++;
1165
1166 sz = strlen(start) + 1;
1167 buf_appendb(dbuf, start, sz);
1168 buf_appendb(buf, start, sz);
1169
1170 hash_put(hash, buf, TYPE_Nd);
1171 }
1172 }
1173
1174 for (n = n->child; n; n = n->next)
1175 if (pman_node(hash, buf, dbuf, n))
1176 return(1);
1177
1178 return(0);
1179 }
1180
1181 static void
1182 ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
1183 {
1184 int i;
1185 struct of *nof;
1186
1187 for (i = 0; i < argc; i++) {
1188 nof = mandoc_calloc(1, sizeof(struct of));
1189 nof->fname = strdup(argv[i]);
1190 if (verb > 2)
1191 printf("%s: Scheduling\n", argv[i]);
1192 if (NULL == *of) {
1193 *of = nof;
1194 (*of)->first = nof;
1195 } else {
1196 nof->first = (*of)->first;
1197 (*of)->next = nof;
1198 *of = nof;
1199 }
1200 }
1201 }
1202
1203 /*
1204 * Recursively build up a list of files to parse.
1205 * We use this instead of ftw() and so on because I don't want global
1206 * variables hanging around.
1207 * This ignores the mandoc.db and mandoc.index files, but assumes that
1208 * everything else is a manual.
1209 * Pass in a pointer to a NULL structure for the first invocation.
1210 */
1211 static int
1212 ofile_dirbuild(const char *dir, int verb, struct of **of)
1213 {
1214 char buf[MAXPATHLEN];
1215 size_t sz;
1216 DIR *d;
1217 const char *fn;
1218 struct of *nof;
1219 struct dirent *dp;
1220
1221 if (NULL == (d = opendir(dir))) {
1222 perror(dir);
1223 return(0);
1224 }
1225
1226 while (NULL != (dp = readdir(d))) {
1227 fn = dp->d_name;
1228 if (DT_DIR == dp->d_type) {
1229 if (0 == strcmp(".", fn))
1230 continue;
1231 if (0 == strcmp("..", fn))
1232 continue;
1233
1234 buf[0] = '\0';
1235 strlcat(buf, dir, MAXPATHLEN);
1236 strlcat(buf, "/", MAXPATHLEN);
1237 sz = strlcat(buf, fn, MAXPATHLEN);
1238
1239 if (sz < MAXPATHLEN) {
1240 if ( ! ofile_dirbuild(buf, verb, of))
1241 return(0);
1242 continue;
1243 } else if (sz < MAXPATHLEN)
1244 continue;
1245
1246 fprintf(stderr, "%s: Path too long\n", dir);
1247 return(0);
1248 }
1249 if (DT_REG != dp->d_type)
1250 continue;
1251
1252 if (0 == strcmp(MANDOC_DB, fn) ||
1253 0 == strcmp(MANDOC_IDX, fn))
1254 continue;
1255
1256 buf[0] = '\0';
1257 strlcat(buf, dir, MAXPATHLEN);
1258 strlcat(buf, "/", MAXPATHLEN);
1259 sz = strlcat(buf, fn, MAXPATHLEN);
1260 if (sz >= MAXPATHLEN) {
1261 fprintf(stderr, "%s: Path too long\n", dir);
1262 return(0);
1263 }
1264
1265 nof = mandoc_calloc(1, sizeof(struct of));
1266 nof->fname = mandoc_strdup(buf);
1267
1268 if (verb > 2)
1269 printf("%s: Scheduling\n", buf);
1270
1271 if (NULL == *of) {
1272 *of = nof;
1273 (*of)->first = nof;
1274 } else {
1275 nof->first = (*of)->first;
1276 (*of)->next = nof;
1277 *of = nof;
1278 }
1279 }
1280
1281 closedir(d);
1282 return(1);
1283 }
1284
1285 static void
1286 ofile_free(struct of *of)
1287 {
1288 struct of *nof;
1289
1290 while (of) {
1291 nof = of->next;
1292 free(of->fname);
1293 free(of);
1294 of = nof;
1295 }
1296 }
1297
1298 static void
1299 usage(void)
1300 {
1301
1302 fprintf(stderr, "usage: %s [-v] "
1303 "[-d dir [files...] |"
1304 " -u dir [files...] |"
1305 " dir...]\n", progname);
1306 }