]> git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
Inventing new keywords for mostly the same thing when a well-established
[mandoc.git] / mandocdb.c
1 /* $Id: mandocdb.c,v 1.8 2011/11/13 10:49:57 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22
23 #include <assert.h>
24 #include <dirent.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #ifdef __linux__
33 # include <db_185.h>
34 #else
35 # include <db.h>
36 #endif
37
38 #include "man.h"
39 #include "mdoc.h"
40 #include "mandoc.h"
41 #include "mandocdb.h"
42
43 #define MANDOC_BUFSZ BUFSIZ
44 #define MANDOC_SLOP 1024
45
46 /* Tiny list for files. No need to bring in QUEUE. */
47
48 struct of {
49 char *fname; /* heap-allocated */
50 struct of *next; /* NULL for last one */
51 struct of *first; /* first in list */
52 };
53
54 /* Buffer for storing growable data. */
55
56 struct buf {
57 char *cp;
58 size_t len; /* current length */
59 size_t size; /* total buffer size */
60 };
61
62 /* Operation we're going to perform. */
63
64 enum op {
65 OP_NEW = 0, /* new database */
66 OP_UPDATE, /* delete/add entries in existing database */
67 OP_DELETE /* delete entries from existing database */
68 };
69
70 #define MAN_ARGS DB *hash, \
71 struct buf *buf, \
72 struct buf *dbuf, \
73 const struct man_node *n
74 #define MDOC_ARGS DB *hash, \
75 struct buf *buf, \
76 struct buf *dbuf, \
77 const struct mdoc_node *n, \
78 const struct mdoc_meta *m
79
80 static void buf_appendmdoc(struct buf *,
81 const struct mdoc_node *, int);
82 static void buf_append(struct buf *, const char *);
83 static void buf_appendb(struct buf *,
84 const void *, size_t);
85 static void dbt_put(DB *, const char *, DBT *, DBT *);
86 static void hash_put(DB *, const struct buf *, int);
87 static void hash_reset(DB **);
88 static void index_merge(const struct of *, struct mparse *,
89 struct buf *, struct buf *,
90 DB *, DB *, const char *,
91 DB *, const char *, int,
92 recno_t, const recno_t *, size_t);
93 static void index_prune(const struct of *, DB *,
94 const char *, DB *, const char *,
95 int, recno_t *, recno_t **, size_t *);
96 static void ofile_argbuild(char *[], int, int, struct of **);
97 static int ofile_dirbuild(const char *, int, struct of **);
98 static void ofile_free(struct of *);
99 static int pman_node(MAN_ARGS);
100 static void pmdoc_node(MDOC_ARGS);
101 static void pmdoc_An(MDOC_ARGS);
102 static void pmdoc_Cd(MDOC_ARGS);
103 static void pmdoc_Er(MDOC_ARGS);
104 static void pmdoc_Ev(MDOC_ARGS);
105 static void pmdoc_Fd(MDOC_ARGS);
106 static void pmdoc_In(MDOC_ARGS);
107 static void pmdoc_Fn(MDOC_ARGS);
108 static void pmdoc_Fo(MDOC_ARGS);
109 static void pmdoc_Nd(MDOC_ARGS);
110 static void pmdoc_Nm(MDOC_ARGS);
111 static void pmdoc_Pa(MDOC_ARGS);
112 static void pmdoc_St(MDOC_ARGS);
113 static void pmdoc_Vt(MDOC_ARGS);
114 static void pmdoc_Xr(MDOC_ARGS);
115 static void usage(void);
116
117 typedef void (*pmdoc_nf)(MDOC_ARGS);
118
119 static const pmdoc_nf mdocs[MDOC_MAX] = {
120 NULL, /* Ap */
121 NULL, /* Dd */
122 NULL, /* Dt */
123 NULL, /* Os */
124 NULL, /* Sh */
125 NULL, /* Ss */
126 NULL, /* Pp */
127 NULL, /* D1 */
128 NULL, /* Dl */
129 NULL, /* Bd */
130 NULL, /* Ed */
131 NULL, /* Bl */
132 NULL, /* El */
133 NULL, /* It */
134 NULL, /* Ad */
135 pmdoc_An, /* An */
136 NULL, /* Ar */
137 pmdoc_Cd, /* Cd */
138 NULL, /* Cm */
139 NULL, /* Dv */
140 pmdoc_Er, /* Er */
141 pmdoc_Ev, /* Ev */
142 NULL, /* Ex */
143 NULL, /* Fa */
144 pmdoc_Fd, /* Fd */
145 NULL, /* Fl */
146 pmdoc_Fn, /* Fn */
147 NULL, /* Ft */
148 NULL, /* Ic */
149 pmdoc_In, /* In */
150 NULL, /* Li */
151 pmdoc_Nd, /* Nd */
152 pmdoc_Nm, /* Nm */
153 NULL, /* Op */
154 NULL, /* Ot */
155 pmdoc_Pa, /* Pa */
156 NULL, /* Rv */
157 pmdoc_St, /* St */
158 pmdoc_Vt, /* Va */
159 pmdoc_Vt, /* Vt */
160 pmdoc_Xr, /* Xr */
161 NULL, /* %A */
162 NULL, /* %B */
163 NULL, /* %D */
164 NULL, /* %I */
165 NULL, /* %J */
166 NULL, /* %N */
167 NULL, /* %O */
168 NULL, /* %P */
169 NULL, /* %R */
170 NULL, /* %T */
171 NULL, /* %V */
172 NULL, /* Ac */
173 NULL, /* Ao */
174 NULL, /* Aq */
175 NULL, /* At */
176 NULL, /* Bc */
177 NULL, /* Bf */
178 NULL, /* Bo */
179 NULL, /* Bq */
180 NULL, /* Bsx */
181 NULL, /* Bx */
182 NULL, /* Db */
183 NULL, /* Dc */
184 NULL, /* Do */
185 NULL, /* Dq */
186 NULL, /* Ec */
187 NULL, /* Ef */
188 NULL, /* Em */
189 NULL, /* Eo */
190 NULL, /* Fx */
191 NULL, /* Ms */
192 NULL, /* No */
193 NULL, /* Ns */
194 NULL, /* Nx */
195 NULL, /* Ox */
196 NULL, /* Pc */
197 NULL, /* Pf */
198 NULL, /* Po */
199 NULL, /* Pq */
200 NULL, /* Qc */
201 NULL, /* Ql */
202 NULL, /* Qo */
203 NULL, /* Qq */
204 NULL, /* Re */
205 NULL, /* Rs */
206 NULL, /* Sc */
207 NULL, /* So */
208 NULL, /* Sq */
209 NULL, /* Sm */
210 NULL, /* Sx */
211 NULL, /* Sy */
212 NULL, /* Tn */
213 NULL, /* Ux */
214 NULL, /* Xc */
215 NULL, /* Xo */
216 pmdoc_Fo, /* Fo */
217 NULL, /* Fc */
218 NULL, /* Oo */
219 NULL, /* Oc */
220 NULL, /* Bk */
221 NULL, /* Ek */
222 NULL, /* Bt */
223 NULL, /* Hf */
224 NULL, /* Fr */
225 NULL, /* Ud */
226 NULL, /* Lb */
227 NULL, /* Lp */
228 NULL, /* Lk */
229 NULL, /* Mt */
230 NULL, /* Brq */
231 NULL, /* Bro */
232 NULL, /* Brc */
233 NULL, /* %C */
234 NULL, /* Es */
235 NULL, /* En */
236 NULL, /* Dx */
237 NULL, /* %Q */
238 NULL, /* br */
239 NULL, /* sp */
240 NULL, /* %U */
241 NULL, /* Ta */
242 };
243
244 static const char *progname;
245
246 int
247 main(int argc, char *argv[])
248 {
249 struct mparse *mp; /* parse sequence */
250 enum op op; /* current operation */
251 const char *dir;
252 char ibuf[MAXPATHLEN], /* index fname */
253 fbuf[MAXPATHLEN]; /* btree fname */
254 int verb, /* output verbosity */
255 ch, i, flags;
256 DB *idx, /* index database */
257 *db, /* keyword database */
258 *hash; /* temporary keyword hashtable */
259 BTREEINFO info; /* btree configuration */
260 recno_t maxrec; /* supremum of all records */
261 recno_t *recs; /* buffer of empty records */
262 size_t sz1, sz2,
263 recsz, /* buffer size of recs */
264 reccur; /* valid number of recs */
265 struct buf buf, /* keyword buffer */
266 dbuf; /* description buffer */
267 struct of *of; /* list of files for processing */
268 extern int optind;
269 extern char *optarg;
270
271 progname = strrchr(argv[0], '/');
272 if (progname == NULL)
273 progname = argv[0];
274 else
275 ++progname;
276
277 verb = 0;
278 of = NULL;
279 db = idx = NULL;
280 mp = NULL;
281 hash = NULL;
282 recs = NULL;
283 recsz = reccur = 0;
284 maxrec = 0;
285 op = OP_NEW;
286 dir = NULL;
287
288 while (-1 != (ch = getopt(argc, argv, "d:u:v")))
289 switch (ch) {
290 case ('d'):
291 dir = optarg;
292 op = OP_UPDATE;
293 break;
294 case ('u'):
295 dir = optarg;
296 op = OP_DELETE;
297 break;
298 case ('v'):
299 verb++;
300 break;
301 default:
302 usage();
303 return((int)MANDOCLEVEL_BADARG);
304 }
305
306 argc -= optind;
307 argv += optind;
308
309 memset(&info, 0, sizeof(BTREEINFO));
310 info.flags = R_DUP;
311
312 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
313
314 memset(&buf, 0, sizeof(struct buf));
315 memset(&dbuf, 0, sizeof(struct buf));
316
317 buf.size = dbuf.size = MANDOC_BUFSZ;
318
319 buf.cp = mandoc_malloc(buf.size);
320 dbuf.cp = mandoc_malloc(dbuf.size);
321
322 flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
323
324 if (OP_UPDATE == op || OP_DELETE == op) {
325 ibuf[0] = fbuf[0] = '\0';
326
327 strlcat(fbuf, dir, MAXPATHLEN);
328 strlcat(fbuf, "/", MAXPATHLEN);
329 sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
330
331 strlcat(ibuf, dir, MAXPATHLEN);
332 strlcat(ibuf, "/", MAXPATHLEN);
333 sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
334
335 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
336 fprintf(stderr, "%s: Path too long\n", dir);
337 exit((int)MANDOCLEVEL_BADARG);
338 }
339
340 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
341 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
342
343 if (NULL == db) {
344 perror(fbuf);
345 exit((int)MANDOCLEVEL_SYSERR);
346 } else if (NULL == db) {
347 perror(ibuf);
348 exit((int)MANDOCLEVEL_SYSERR);
349 }
350
351 if (verb > 2) {
352 printf("%s: Opened\n", fbuf);
353 printf("%s: Opened\n", ibuf);
354 }
355
356 ofile_argbuild(argv, argc, verb, &of);
357 if (NULL == of)
358 goto out;
359
360 of = of->first;
361
362 index_prune(of, db, fbuf, idx, ibuf, verb,
363 &maxrec, &recs, &recsz);
364
365 if (OP_UPDATE == op)
366 index_merge(of, mp, &dbuf, &buf, hash,
367 db, fbuf, idx, ibuf, verb,
368 maxrec, recs, reccur);
369
370 goto out;
371 }
372
373 for (i = 0; i < argc; i++) {
374 ibuf[0] = fbuf[0] = '\0';
375
376 strlcat(fbuf, argv[i], MAXPATHLEN);
377 strlcat(fbuf, "/", MAXPATHLEN);
378 sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
379
380 strlcat(ibuf, argv[i], MAXPATHLEN);
381 strlcat(ibuf, "/", MAXPATHLEN);
382 sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
383
384 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
385 fprintf(stderr, "%s: Path too long\n", argv[i]);
386 exit((int)MANDOCLEVEL_BADARG);
387 }
388
389 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
390 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
391
392 if (NULL == db) {
393 perror(fbuf);
394 exit((int)MANDOCLEVEL_SYSERR);
395 } else if (NULL == db) {
396 perror(ibuf);
397 exit((int)MANDOCLEVEL_SYSERR);
398 }
399
400 if (verb > 2) {
401 printf("%s: Truncated\n", fbuf);
402 printf("%s: Truncated\n", ibuf);
403 }
404
405 ofile_free(of);
406 of = NULL;
407
408 if ( ! ofile_dirbuild(argv[i], verb, &of))
409 exit((int)MANDOCLEVEL_SYSERR);
410
411 if (NULL == of)
412 continue;
413
414 of = of->first;
415
416 index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
417 idx, ibuf, verb, maxrec, recs, reccur);
418 }
419
420 out:
421 if (db)
422 (*db->close)(db);
423 if (idx)
424 (*idx->close)(idx);
425 if (hash)
426 (*hash->close)(hash);
427 if (mp)
428 mparse_free(mp);
429
430 ofile_free(of);
431 free(buf.cp);
432 free(dbuf.cp);
433 free(recs);
434
435 return(MANDOCLEVEL_OK);
436 }
437
438 void
439 index_merge(const struct of *of, struct mparse *mp,
440 struct buf *dbuf, struct buf *buf,
441 DB *hash, DB *db, const char *dbf,
442 DB *idx, const char *idxf, int verb,
443 recno_t maxrec, const recno_t *recs, size_t reccur)
444 {
445 recno_t rec;
446 int ch;
447 DBT key, val;
448 struct mdoc *mdoc;
449 struct man *man;
450 const char *fn, *msec, *mtitle, *arch;
451 size_t sv;
452 unsigned seq;
453 char vbuf[8];
454
455 for (rec = 0; of; of = of->next) {
456 fn = of->fname;
457 if (reccur > 0) {
458 --reccur;
459 rec = recs[(int)reccur];
460 } else if (maxrec > 0) {
461 rec = maxrec;
462 maxrec = 0;
463 } else
464 rec++;
465
466 mparse_reset(mp);
467 hash_reset(&hash);
468
469 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
470 fprintf(stderr, "%s: Parse failure\n", fn);
471 continue;
472 }
473
474 mparse_result(mp, &mdoc, &man);
475 if (NULL == mdoc && NULL == man)
476 continue;
477
478 msec = NULL != mdoc ?
479 mdoc_meta(mdoc)->msec : man_meta(man)->msec;
480 mtitle = NULL != mdoc ?
481 mdoc_meta(mdoc)->title : man_meta(man)->title;
482 arch = NULL != mdoc ?
483 mdoc_meta(mdoc)->arch : NULL;
484
485 if (NULL == arch)
486 arch = "";
487
488 /*
489 * The index record value consists of a nil-terminated
490 * filename, a nil-terminated manual section, and a
491 * nil-terminated description. Since the description
492 * may not be set, we set a sentinel to see if we're
493 * going to write a nil byte in its place.
494 */
495
496 dbuf->len = 0;
497 buf_appendb(dbuf, fn, strlen(fn) + 1);
498 buf_appendb(dbuf, msec, strlen(msec) + 1);
499 buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
500 buf_appendb(dbuf, arch, strlen(arch) + 1);
501
502 sv = dbuf->len;
503
504 /* Fix the record number in the btree value. */
505
506 if (mdoc)
507 pmdoc_node(hash, buf, dbuf,
508 mdoc_node(mdoc), mdoc_meta(mdoc));
509 else
510 pman_node(hash, buf, dbuf, man_node(man));
511
512 /*
513 * Copy from the in-memory hashtable of pending keywords
514 * into the database.
515 */
516
517 memset(vbuf, 0, sizeof(uint32_t));
518 memcpy(vbuf + 4, &rec, sizeof(uint32_t));
519
520 seq = R_FIRST;
521 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
522 seq = R_NEXT;
523
524 memcpy(vbuf, val.data, sizeof(uint32_t));
525 val.size = sizeof(vbuf);
526 val.data = vbuf;
527
528 if (verb > 1)
529 printf("%s: Added keyword: %s\n",
530 fn, (char *)key.data);
531 dbt_put(db, dbf, &key, &val);
532 }
533 if (ch < 0) {
534 perror("hash");
535 exit((int)MANDOCLEVEL_SYSERR);
536 }
537
538 /*
539 * Apply to the index. If we haven't had a description
540 * set, put an empty one in now.
541 */
542
543 if (dbuf->len == sv)
544 buf_appendb(dbuf, "", 1);
545
546 key.data = &rec;
547 key.size = sizeof(recno_t);
548
549 val.data = dbuf->cp;
550 val.size = dbuf->len;
551
552 if (verb)
553 printf("%s: Added index\n", fn);
554 dbt_put(idx, idxf, &key, &val);
555 }
556 }
557
558 /*
559 * Scan through all entries in the index file `idx' and prune those
560 * entries in `ofile'.
561 * Pruning consists of removing from `db', then invalidating the entry
562 * in `idx' (zeroing its value size).
563 */
564 static void
565 index_prune(const struct of *ofile, DB *db, const char *dbf,
566 DB *idx, const char *idxf, int verb,
567 recno_t *maxrec, recno_t **recs, size_t *recsz)
568 {
569 const struct of *of;
570 const char *fn;
571 unsigned seq, sseq;
572 DBT key, val;
573 size_t reccur;
574 int ch;
575
576 reccur = 0;
577 seq = R_FIRST;
578 while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
579 seq = R_NEXT;
580 *maxrec = *(recno_t *)key.data;
581 if (0 == val.size) {
582 if (reccur >= *recsz) {
583 *recsz += MANDOC_SLOP;
584 *recs = mandoc_realloc(*recs,
585 *recsz * sizeof(recno_t));
586 }
587 (*recs)[(int)reccur] = *maxrec;
588 reccur++;
589 continue;
590 }
591
592 fn = (char *)val.data;
593 for (of = ofile; of; of = of->next)
594 if (0 == strcmp(fn, of->fname))
595 break;
596
597 if (NULL == of)
598 continue;
599
600 sseq = R_FIRST;
601 while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
602 sseq = R_NEXT;
603 assert(8 == val.size);
604 if (*maxrec != *(recno_t *)(val.data + 4))
605 continue;
606 if (verb)
607 printf("%s: Deleted keyword: %s\n",
608 fn, (char *)key.data);
609 ch = (*db->del)(db, &key, R_CURSOR);
610 if (ch < 0)
611 break;
612 }
613 if (ch < 0) {
614 perror(dbf);
615 exit((int)MANDOCLEVEL_SYSERR);
616 }
617
618 if (verb)
619 printf("%s: Deleted index\n", fn);
620
621 val.size = 0;
622 ch = (*idx->put)(idx, &key, &val, R_CURSOR);
623 if (ch < 0) {
624 perror(idxf);
625 exit((int)MANDOCLEVEL_SYSERR);
626 }
627
628 if (reccur >= *recsz) {
629 *recsz += MANDOC_SLOP;
630 *recs = mandoc_realloc
631 (*recs, *recsz * sizeof(recno_t));
632 }
633
634 (*recs)[(int)reccur] = *maxrec;
635 reccur++;
636 }
637 (*maxrec)++;
638 }
639
640 /*
641 * Grow the buffer (if necessary) and copy in a binary string.
642 */
643 static void
644 buf_appendb(struct buf *buf, const void *cp, size_t sz)
645 {
646
647 /* Overshoot by MANDOC_BUFSZ. */
648
649 while (buf->len + sz >= buf->size) {
650 buf->size = buf->len + sz + MANDOC_BUFSZ;
651 buf->cp = mandoc_realloc(buf->cp, buf->size);
652 }
653
654 memcpy(buf->cp + (int)buf->len, cp, sz);
655 buf->len += sz;
656 }
657
658 /*
659 * Append a nil-terminated string to the buffer.
660 * This can be invoked multiple times.
661 * The buffer string will be nil-terminated.
662 * If invoked multiple times, a space is put between strings.
663 */
664 static void
665 buf_append(struct buf *buf, const char *cp)
666 {
667 size_t sz;
668
669 if (0 == (sz = strlen(cp)))
670 return;
671
672 if (buf->len)
673 buf->cp[(int)buf->len - 1] = ' ';
674
675 buf_appendb(buf, cp, sz + 1);
676 }
677
678 /*
679 * Recursively add all text from a given node.
680 * This is optimised for general mdoc nodes in this context, which do
681 * not consist of subexpressions and having a recursive call for n->next
682 * would be wasteful.
683 * The "f" variable should be 0 unless called from pmdoc_Nd for the
684 * description buffer, which does not start at the beginning of the
685 * buffer.
686 */
687 static void
688 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
689 {
690
691 for ( ; n; n = n->next) {
692 if (n->child)
693 buf_appendmdoc(buf, n->child, f);
694
695 if (MDOC_TEXT == n->type && f) {
696 f = 0;
697 buf_appendb(buf, n->string,
698 strlen(n->string) + 1);
699 } else if (MDOC_TEXT == n->type)
700 buf_append(buf, n->string);
701
702 }
703 }
704
705 /* ARGSUSED */
706 static void
707 pmdoc_An(MDOC_ARGS)
708 {
709
710 if (SEC_AUTHORS != n->sec)
711 return;
712
713 buf_appendmdoc(buf, n->child, 0);
714 hash_put(hash, buf, TYPE_An);
715 }
716
717 static void
718 hash_reset(DB **db)
719 {
720 DB *hash;
721
722 if (NULL != (hash = *db))
723 (*hash->close)(hash);
724
725 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
726 if (NULL == *db) {
727 perror("hash");
728 exit((int)MANDOCLEVEL_SYSERR);
729 }
730 }
731
732 /* ARGSUSED */
733 static void
734 pmdoc_Fd(MDOC_ARGS)
735 {
736 const char *start, *end;
737 size_t sz;
738
739 if (SEC_SYNOPSIS != n->sec)
740 return;
741 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
742 return;
743
744 /*
745 * Only consider those `Fd' macro fields that begin with an
746 * "inclusion" token (versus, e.g., #define).
747 */
748 if (strcmp("#include", n->string))
749 return;
750
751 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
752 return;
753
754 /*
755 * Strip away the enclosing angle brackets and make sure we're
756 * not zero-length.
757 */
758
759 start = n->string;
760 if ('<' == *start || '"' == *start)
761 start++;
762
763 if (0 == (sz = strlen(start)))
764 return;
765
766 end = &start[(int)sz - 1];
767 if ('>' == *end || '"' == *end)
768 end--;
769
770 assert(end >= start);
771
772 buf_appendb(buf, start, (size_t)(end - start + 1));
773 buf_appendb(buf, "", 1);
774
775 hash_put(hash, buf, TYPE_In);
776 }
777
778 /* ARGSUSED */
779 static void
780 pmdoc_Cd(MDOC_ARGS)
781 {
782
783 if (SEC_SYNOPSIS != n->sec)
784 return;
785
786 buf_appendmdoc(buf, n->child, 0);
787 hash_put(hash, buf, TYPE_Cd);
788 }
789
790 /* ARGSUSED */
791 static void
792 pmdoc_In(MDOC_ARGS)
793 {
794
795 if (SEC_SYNOPSIS != n->sec)
796 return;
797 if (NULL == n->child || MDOC_TEXT != n->child->type)
798 return;
799
800 buf_append(buf, n->child->string);
801 hash_put(hash, buf, TYPE_In);
802 }
803
804 /* ARGSUSED */
805 static void
806 pmdoc_Fn(MDOC_ARGS)
807 {
808 const char *cp;
809
810 if (SEC_SYNOPSIS != n->sec)
811 return;
812 if (NULL == n->child || MDOC_TEXT != n->child->type)
813 return;
814
815 /* .Fn "struct type *arg" "foo" */
816
817 cp = strrchr(n->child->string, ' ');
818 if (NULL == cp)
819 cp = n->child->string;
820
821 /* Strip away pointer symbol. */
822
823 while ('*' == *cp)
824 cp++;
825
826 buf_append(buf, cp);
827 hash_put(hash, buf, TYPE_Fn);
828 }
829
830 /* ARGSUSED */
831 static void
832 pmdoc_St(MDOC_ARGS)
833 {
834
835 if (SEC_STANDARDS != n->sec)
836 return;
837 if (NULL == n->child || MDOC_TEXT != n->child->type)
838 return;
839
840 buf_append(buf, n->child->string);
841 hash_put(hash, buf, TYPE_St);
842 }
843
844 /* ARGSUSED */
845 static void
846 pmdoc_Xr(MDOC_ARGS)
847 {
848
849 if (NULL == (n = n->child))
850 return;
851
852 buf_appendb(buf, n->string, strlen(n->string));
853
854 if (NULL != (n = n->next)) {
855 buf_appendb(buf, ".", 1);
856 buf_appendb(buf, n->string, strlen(n->string) + 1);
857 } else
858 buf_appendb(buf, ".", 2);
859
860 hash_put(hash, buf, TYPE_Xr);
861 }
862
863 /* ARGSUSED */
864 static void
865 pmdoc_Vt(MDOC_ARGS)
866 {
867 const char *start;
868 size_t sz;
869
870 if (SEC_SYNOPSIS != n->sec)
871 return;
872 if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
873 return;
874 if (NULL == n->last || MDOC_TEXT != n->last->type)
875 return;
876
877 /*
878 * Strip away leading pointer symbol '*' and trailing ';'.
879 */
880
881 start = n->last->string;
882
883 while ('*' == *start)
884 start++;
885
886 if (0 == (sz = strlen(start)))
887 return;
888
889 if (';' == start[(int)sz - 1])
890 sz--;
891
892 if (0 == sz)
893 return;
894
895 buf_appendb(buf, start, sz);
896 buf_appendb(buf, "", 1);
897 hash_put(hash, buf, TYPE_Va);
898 }
899
900 /* ARGSUSED */
901 static void
902 pmdoc_Fo(MDOC_ARGS)
903 {
904
905 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
906 return;
907 if (NULL == n->child || MDOC_TEXT != n->child->type)
908 return;
909
910 buf_append(buf, n->child->string);
911 hash_put(hash, buf, TYPE_Fn);
912 }
913
914
915 /* ARGSUSED */
916 static void
917 pmdoc_Nd(MDOC_ARGS)
918 {
919
920 if (MDOC_BODY != n->type)
921 return;
922
923 buf_appendmdoc(dbuf, n->child, 1);
924 buf_appendmdoc(buf, n->child, 0);
925
926 hash_put(hash, buf, TYPE_Nd);
927 }
928
929 /* ARGSUSED */
930 static void
931 pmdoc_Er(MDOC_ARGS)
932 {
933
934 if (SEC_ERRORS != n->sec)
935 return;
936
937 buf_appendmdoc(buf, n->child, 0);
938 hash_put(hash, buf, TYPE_Er);
939 }
940
941 /* ARGSUSED */
942 static void
943 pmdoc_Ev(MDOC_ARGS)
944 {
945
946 if (SEC_ENVIRONMENT != n->sec)
947 return;
948
949 buf_appendmdoc(buf, n->child, 0);
950 hash_put(hash, buf, TYPE_Ev);
951 }
952
953 /* ARGSUSED */
954 static void
955 pmdoc_Pa(MDOC_ARGS)
956 {
957
958 if (SEC_FILES != n->sec)
959 return;
960
961 buf_appendmdoc(buf, n->child, 0);
962 hash_put(hash, buf, TYPE_Pa);
963 }
964
965 /* ARGSUSED */
966 static void
967 pmdoc_Nm(MDOC_ARGS)
968 {
969
970 if (SEC_NAME == n->sec) {
971 buf_appendmdoc(buf, n->child, 0);
972 hash_put(hash, buf, TYPE_Nm);
973 return;
974 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
975 return;
976
977 if (NULL == n->child)
978 buf_append(buf, m->name);
979
980 buf_appendmdoc(buf, n->child, 0);
981 hash_put(hash, buf, TYPE_Nm);
982 }
983
984 static void
985 hash_put(DB *db, const struct buf *buf, int mask)
986 {
987 DBT key, val;
988 int rc;
989
990 if (buf->len < 2)
991 return;
992
993 key.data = buf->cp;
994 key.size = buf->len;
995
996 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
997 perror("hash");
998 exit((int)MANDOCLEVEL_SYSERR);
999 } else if (0 == rc)
1000 mask |= *(int *)val.data;
1001
1002 val.data = &mask;
1003 val.size = sizeof(int);
1004
1005 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1006 perror("hash");
1007 exit((int)MANDOCLEVEL_SYSERR);
1008 }
1009 }
1010
1011 static void
1012 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1013 {
1014
1015 assert(key->size);
1016 assert(val->size);
1017
1018 if (0 == (*db->put)(db, key, val, 0))
1019 return;
1020
1021 perror(dbn);
1022 exit((int)MANDOCLEVEL_SYSERR);
1023 /* NOTREACHED */
1024 }
1025
1026 /*
1027 * Call out to per-macro handlers after clearing the persistent database
1028 * key. If the macro sets the database key, flush it to the database.
1029 */
1030 static void
1031 pmdoc_node(MDOC_ARGS)
1032 {
1033
1034 if (NULL == n)
1035 return;
1036
1037 switch (n->type) {
1038 case (MDOC_HEAD):
1039 /* FALLTHROUGH */
1040 case (MDOC_BODY):
1041 /* FALLTHROUGH */
1042 case (MDOC_TAIL):
1043 /* FALLTHROUGH */
1044 case (MDOC_BLOCK):
1045 /* FALLTHROUGH */
1046 case (MDOC_ELEM):
1047 if (NULL == mdocs[n->tok])
1048 break;
1049
1050 buf->len = 0;
1051 (*mdocs[n->tok])(hash, buf, dbuf, n, m);
1052 break;
1053 default:
1054 break;
1055 }
1056
1057 pmdoc_node(hash, buf, dbuf, n->child, m);
1058 pmdoc_node(hash, buf, dbuf, n->next, m);
1059 }
1060
1061 static int
1062 pman_node(MAN_ARGS)
1063 {
1064 const struct man_node *head, *body;
1065 const char *start, *sv;
1066 size_t sz;
1067
1068 if (NULL == n)
1069 return(0);
1070
1071 /*
1072 * We're only searching for one thing: the first text child in
1073 * the BODY of a NAME section. Since we don't keep track of
1074 * sections in -man, run some hoops to find out whether we're in
1075 * the correct section or not.
1076 */
1077
1078 if (MAN_BODY == n->type && MAN_SH == n->tok) {
1079 body = n;
1080 assert(body->parent);
1081 if (NULL != (head = body->parent->head) &&
1082 1 == head->nchild &&
1083 NULL != (head = (head->child)) &&
1084 MAN_TEXT == head->type &&
1085 0 == strcmp(head->string, "NAME") &&
1086 NULL != (body = body->child) &&
1087 MAN_TEXT == body->type) {
1088
1089 assert(body->string);
1090 start = sv = body->string;
1091
1092 /*
1093 * Go through a special heuristic dance here.
1094 * This is why -man manuals are great!
1095 * (I'm being sarcastic: my eyes are bleeding.)
1096 * Conventionally, one or more manual names are
1097 * comma-specified prior to a whitespace, then a
1098 * dash, then a description. Try to puzzle out
1099 * the name parts here.
1100 */
1101
1102 for ( ;; ) {
1103 sz = strcspn(start, " ,");
1104 if ('\0' == start[(int)sz])
1105 break;
1106
1107 buf->len = 0;
1108 buf_appendb(buf, start, sz);
1109 buf_appendb(buf, "", 1);
1110
1111 hash_put(hash, buf, TYPE_Nm);
1112
1113 if (' ' == start[(int)sz]) {
1114 start += (int)sz + 1;
1115 break;
1116 }
1117
1118 assert(',' == start[(int)sz]);
1119 start += (int)sz + 1;
1120 while (' ' == *start)
1121 start++;
1122 }
1123
1124 buf->len = 0;
1125
1126 if (sv == start) {
1127 buf_append(buf, start);
1128 return(1);
1129 }
1130
1131 while (' ' == *start)
1132 start++;
1133
1134 if (0 == strncmp(start, "-", 1))
1135 start += 1;
1136 else if (0 == strncmp(start, "\\-", 2))
1137 start += 2;
1138 else if (0 == strncmp(start, "\\(en", 4))
1139 start += 4;
1140 else if (0 == strncmp(start, "\\(em", 4))
1141 start += 4;
1142
1143 while (' ' == *start)
1144 start++;
1145
1146 sz = strlen(start) + 1;
1147 buf_appendb(dbuf, start, sz);
1148 buf_appendb(buf, start, sz);
1149
1150 hash_put(hash, buf, TYPE_Nd);
1151 }
1152 }
1153
1154 for (n = n->child; n; n = n->next)
1155 if (pman_node(hash, buf, dbuf, n))
1156 return(1);
1157
1158 return(0);
1159 }
1160
1161 static void
1162 ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
1163 {
1164 int i;
1165 struct of *nof;
1166
1167 for (i = 0; i < argc; i++) {
1168 nof = mandoc_calloc(1, sizeof(struct of));
1169 nof->fname = strdup(argv[i]);
1170 if (verb > 2)
1171 printf("%s: Scheduling\n", argv[i]);
1172 if (NULL == *of) {
1173 *of = nof;
1174 (*of)->first = nof;
1175 } else {
1176 nof->first = (*of)->first;
1177 (*of)->next = nof;
1178 *of = nof;
1179 }
1180 }
1181 }
1182
1183 /*
1184 * Recursively build up a list of files to parse.
1185 * We use this instead of ftw() and so on because I don't want global
1186 * variables hanging around.
1187 * This ignores the mandoc.db and mandoc.index files, but assumes that
1188 * everything else is a manual.
1189 * Pass in a pointer to a NULL structure for the first invocation.
1190 */
1191 static int
1192 ofile_dirbuild(const char *dir, int verb, struct of **of)
1193 {
1194 char buf[MAXPATHLEN];
1195 size_t sz;
1196 DIR *d;
1197 const char *fn;
1198 struct of *nof;
1199 struct dirent *dp;
1200
1201 if (NULL == (d = opendir(dir))) {
1202 perror(dir);
1203 return(0);
1204 }
1205
1206 while (NULL != (dp = readdir(d))) {
1207 fn = dp->d_name;
1208 if (DT_DIR == dp->d_type) {
1209 if (0 == strcmp(".", fn))
1210 continue;
1211 if (0 == strcmp("..", fn))
1212 continue;
1213
1214 buf[0] = '\0';
1215 strlcat(buf, dir, MAXPATHLEN);
1216 strlcat(buf, "/", MAXPATHLEN);
1217 sz = strlcat(buf, fn, MAXPATHLEN);
1218
1219 if (sz < MAXPATHLEN) {
1220 if ( ! ofile_dirbuild(buf, verb, of))
1221 return(0);
1222 continue;
1223 } else if (sz < MAXPATHLEN)
1224 continue;
1225
1226 fprintf(stderr, "%s: Path too long\n", dir);
1227 return(0);
1228 }
1229 if (DT_REG != dp->d_type)
1230 continue;
1231
1232 if (0 == strcmp(MANDOC_DB, fn) ||
1233 0 == strcmp(MANDOC_IDX, fn))
1234 continue;
1235
1236 buf[0] = '\0';
1237 strlcat(buf, dir, MAXPATHLEN);
1238 strlcat(buf, "/", MAXPATHLEN);
1239 sz = strlcat(buf, fn, MAXPATHLEN);
1240 if (sz >= MAXPATHLEN) {
1241 fprintf(stderr, "%s: Path too long\n", dir);
1242 return(0);
1243 }
1244
1245 nof = mandoc_calloc(1, sizeof(struct of));
1246 nof->fname = mandoc_strdup(buf);
1247
1248 if (verb > 2)
1249 printf("%s: Scheduling\n", buf);
1250
1251 if (NULL == *of) {
1252 *of = nof;
1253 (*of)->first = nof;
1254 } else {
1255 nof->first = (*of)->first;
1256 (*of)->next = nof;
1257 *of = nof;
1258 }
1259 }
1260
1261 closedir(d);
1262 return(1);
1263 }
1264
1265 static void
1266 ofile_free(struct of *of)
1267 {
1268 struct of *nof;
1269
1270 while (of) {
1271 nof = of->next;
1272 free(of->fname);
1273 free(of);
1274 of = nof;
1275 }
1276 }
1277
1278 static void
1279 usage(void)
1280 {
1281
1282 fprintf(stderr, "usage: %s [-v] "
1283 "[-d dir [files...] |"
1284 " -u dir [files...] |"
1285 " dir...]\n", progname);
1286 }