]> git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
First step of making mandocdb a true makewhatis/mandb replacement:
[mandoc.git] / mandocdb.c
1 /* $Id: mandocdb.c,v 1.4 2011/07/15 10:15:24 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22
23 #include <assert.h>
24 #include <dirent.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #ifdef __linux__
33 # include <db_185.h>
34 #else
35 # include <db.h>
36 #endif
37
38 #include "man.h"
39 #include "mdoc.h"
40 #include "mandoc.h"
41
42 #define MANDOC_DB "mandoc.db"
43 #define MANDOC_IDX "mandoc.index"
44 #define MANDOC_BUFSZ BUFSIZ
45 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
46 #define MANDOC_SLOP 1024
47
48 /* Bit-fields. See mandocdb.8. */
49
50 #define TYPE_NAME 0x01
51 #define TYPE_FUNCTION 0x02
52 #define TYPE_UTILITY 0x04
53 #define TYPE_INCLUDES 0x08
54 #define TYPE_VARIABLE 0x10
55 #define TYPE_STANDARD 0x20
56 #define TYPE_AUTHOR 0x40
57 #define TYPE_CONFIG 0x80
58 #define TYPE_DESC 0x100
59 #define TYPE_XREF 0x200
60 #define TYPE_PATH 0x400
61 #define TYPE_ENV 0x800
62 #define TYPE_ERR 0x1000
63
64 struct of {
65 char *fname;
66 struct of *next;
67 struct of *first;
68 };
69
70 /* Buffer for storing growable data. */
71
72 struct buf {
73 char *cp;
74 size_t len;
75 size_t size;
76 };
77
78 /* Operation we're going to perform. */
79
80 enum op {
81 OP_NEW = 0, /* new database */
82 OP_UPDATE, /* update entries in existing database */
83 OP_DELETE /* delete entries from existing database */
84 };
85
86 #define MAN_ARGS DB *hash, \
87 struct buf *buf, \
88 struct buf *dbuf, \
89 const struct man_node *n
90 #define MDOC_ARGS DB *hash, \
91 struct buf *buf, \
92 struct buf *dbuf, \
93 const struct mdoc_node *n, \
94 const struct mdoc_meta *m
95
96 static void buf_appendmdoc(struct buf *,
97 const struct mdoc_node *, int);
98 static void buf_append(struct buf *, const char *);
99 static void buf_appendb(struct buf *,
100 const void *, size_t);
101 static void dbt_put(DB *, const char *, DBT *, DBT *);
102 static void hash_put(DB *, const struct buf *, int);
103 static void hash_reset(DB **);
104 static void index_merge(const struct of *, struct mparse *,
105 struct buf *, struct buf *,
106 DB *, DB *, const char *,
107 DB *, const char *,
108 recno_t, const recno_t *, size_t);
109 static void index_prune(const struct of *, DB *,
110 const char *, DB *, const char *,
111 recno_t *, recno_t **, size_t *);
112 static int ofile_build(const char *, struct of **);
113 static void ofile_free(struct of *);
114 static int pman_node(MAN_ARGS);
115 static void pmdoc_node(MDOC_ARGS);
116 static void pmdoc_An(MDOC_ARGS);
117 static void pmdoc_Cd(MDOC_ARGS);
118 static void pmdoc_Er(MDOC_ARGS);
119 static void pmdoc_Ev(MDOC_ARGS);
120 static void pmdoc_Fd(MDOC_ARGS);
121 static void pmdoc_In(MDOC_ARGS);
122 static void pmdoc_Fn(MDOC_ARGS);
123 static void pmdoc_Fo(MDOC_ARGS);
124 static void pmdoc_Nd(MDOC_ARGS);
125 static void pmdoc_Nm(MDOC_ARGS);
126 static void pmdoc_Pa(MDOC_ARGS);
127 static void pmdoc_St(MDOC_ARGS);
128 static void pmdoc_Vt(MDOC_ARGS);
129 static void pmdoc_Xr(MDOC_ARGS);
130 static void usage(void);
131
132 typedef void (*pmdoc_nf)(MDOC_ARGS);
133
134 static const pmdoc_nf mdocs[MDOC_MAX] = {
135 NULL, /* Ap */
136 NULL, /* Dd */
137 NULL, /* Dt */
138 NULL, /* Os */
139 NULL, /* Sh */
140 NULL, /* Ss */
141 NULL, /* Pp */
142 NULL, /* D1 */
143 NULL, /* Dl */
144 NULL, /* Bd */
145 NULL, /* Ed */
146 NULL, /* Bl */
147 NULL, /* El */
148 NULL, /* It */
149 NULL, /* Ad */
150 pmdoc_An, /* An */
151 NULL, /* Ar */
152 pmdoc_Cd, /* Cd */
153 NULL, /* Cm */
154 NULL, /* Dv */
155 pmdoc_Er, /* Er */
156 pmdoc_Ev, /* Ev */
157 NULL, /* Ex */
158 NULL, /* Fa */
159 pmdoc_Fd, /* Fd */
160 NULL, /* Fl */
161 pmdoc_Fn, /* Fn */
162 NULL, /* Ft */
163 NULL, /* Ic */
164 pmdoc_In, /* In */
165 NULL, /* Li */
166 pmdoc_Nd, /* Nd */
167 pmdoc_Nm, /* Nm */
168 NULL, /* Op */
169 NULL, /* Ot */
170 pmdoc_Pa, /* Pa */
171 NULL, /* Rv */
172 pmdoc_St, /* St */
173 pmdoc_Vt, /* Va */
174 pmdoc_Vt, /* Vt */
175 pmdoc_Xr, /* Xr */
176 NULL, /* %A */
177 NULL, /* %B */
178 NULL, /* %D */
179 NULL, /* %I */
180 NULL, /* %J */
181 NULL, /* %N */
182 NULL, /* %O */
183 NULL, /* %P */
184 NULL, /* %R */
185 NULL, /* %T */
186 NULL, /* %V */
187 NULL, /* Ac */
188 NULL, /* Ao */
189 NULL, /* Aq */
190 NULL, /* At */
191 NULL, /* Bc */
192 NULL, /* Bf */
193 NULL, /* Bo */
194 NULL, /* Bq */
195 NULL, /* Bsx */
196 NULL, /* Bx */
197 NULL, /* Db */
198 NULL, /* Dc */
199 NULL, /* Do */
200 NULL, /* Dq */
201 NULL, /* Ec */
202 NULL, /* Ef */
203 NULL, /* Em */
204 NULL, /* Eo */
205 NULL, /* Fx */
206 NULL, /* Ms */
207 NULL, /* No */
208 NULL, /* Ns */
209 NULL, /* Nx */
210 NULL, /* Ox */
211 NULL, /* Pc */
212 NULL, /* Pf */
213 NULL, /* Po */
214 NULL, /* Pq */
215 NULL, /* Qc */
216 NULL, /* Ql */
217 NULL, /* Qo */
218 NULL, /* Qq */
219 NULL, /* Re */
220 NULL, /* Rs */
221 NULL, /* Sc */
222 NULL, /* So */
223 NULL, /* Sq */
224 NULL, /* Sm */
225 NULL, /* Sx */
226 NULL, /* Sy */
227 NULL, /* Tn */
228 NULL, /* Ux */
229 NULL, /* Xc */
230 NULL, /* Xo */
231 pmdoc_Fo, /* Fo */
232 NULL, /* Fc */
233 NULL, /* Oo */
234 NULL, /* Oc */
235 NULL, /* Bk */
236 NULL, /* Ek */
237 NULL, /* Bt */
238 NULL, /* Hf */
239 NULL, /* Fr */
240 NULL, /* Ud */
241 NULL, /* Lb */
242 NULL, /* Lp */
243 NULL, /* Lk */
244 NULL, /* Mt */
245 NULL, /* Brq */
246 NULL, /* Bro */
247 NULL, /* Brc */
248 NULL, /* %C */
249 NULL, /* Es */
250 NULL, /* En */
251 NULL, /* Dx */
252 NULL, /* %Q */
253 NULL, /* br */
254 NULL, /* sp */
255 NULL, /* %U */
256 NULL, /* Ta */
257 };
258
259 static const char *progname;
260
261 int
262 main(int argc, char *argv[])
263 {
264 struct mparse *mp; /* parse sequence */
265 enum op op; /* current operation */
266 char ibuf[MAXPATHLEN], /* index fname */
267 fbuf[MAXPATHLEN]; /* btree fname */
268 int ch, i, flags;
269 DB *idx, /* index database */
270 *db, /* keyword database */
271 *hash; /* temporary keyword hashtable */
272 BTREEINFO info; /* btree configuration */
273 recno_t maxrec; /* supremum of all records */
274 recno_t *recs; /* buffer of empty records */
275 size_t recsz, /* buffer size of recs */
276 reccur; /* valid number of recs */
277 struct buf buf, /* keyword buffer */
278 dbuf; /* description buffer */
279 struct of *of;
280 extern int optind;
281 extern char *optarg;
282
283 progname = strrchr(argv[0], '/');
284 if (progname == NULL)
285 progname = argv[0];
286 else
287 ++progname;
288
289 of = NULL;
290 db = idx = NULL;
291 mp = NULL;
292 hash = NULL;
293 recs = NULL;
294 recsz = reccur = 0;
295 maxrec = 0;
296 op = OP_NEW;
297
298 memset(&buf, 0, sizeof(struct buf));
299 memset(&dbuf, 0, sizeof(struct buf));
300
301 while (-1 != (ch = getopt(argc, argv, "")))
302 switch (ch) {
303 default:
304 usage();
305 return((int)MANDOCLEVEL_BADARG);
306 }
307
308 argc -= optind;
309 argv += optind;
310
311 memset(&info, 0, sizeof(BTREEINFO));
312 info.flags = R_DUP;
313
314 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
315
316 flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
317
318 buf.size = dbuf.size = MANDOC_BUFSZ;
319
320 buf.cp = mandoc_malloc(buf.size);
321 dbuf.cp = mandoc_malloc(dbuf.size);
322
323 for (i = 0; i < argc; i++) {
324 ibuf[0] = ibuf[MAXPATHLEN - 2] =
325 fbuf[0] = fbuf[MAXPATHLEN - 2] = '\0';
326
327 strlcat(fbuf, argv[i], MAXPATHLEN);
328 strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
329
330 strlcat(ibuf, argv[i], MAXPATHLEN);
331 strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
332
333 if ('\0' != fbuf[MAXPATHLEN - 2] ||
334 '\0' != ibuf[MAXPATHLEN - 2]) {
335 fprintf(stderr, "%s: Path too long\n", argv[i]);
336 break;
337 }
338
339 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
340 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
341
342 if (NULL == db) {
343 perror(fbuf);
344 break;
345 } else if (NULL == db) {
346 perror(ibuf);
347 break;
348 }
349
350 ofile_free(of);
351 of = NULL;
352
353 if ( ! ofile_build(argv[i], &of))
354 break;
355 of = of->first;
356
357 if (OP_DELETE == op || OP_UPDATE == op)
358 index_prune(of, db, fbuf, idx, ibuf,
359 &maxrec, &recs, &recsz);
360
361 if (OP_DELETE == op)
362 continue;
363
364 index_merge(of, mp, &dbuf, &buf, hash, db,
365 fbuf, idx, ibuf, maxrec, recs, reccur);
366 }
367
368 if (db)
369 (*db->close)(db);
370 if (idx)
371 (*idx->close)(idx);
372 if (hash)
373 (*hash->close)(hash);
374 if (mp)
375 mparse_free(mp);
376
377 ofile_free(of);
378 free(buf.cp);
379 free(dbuf.cp);
380 free(recs);
381
382 return(i < argc ? MANDOCLEVEL_SYSERR : MANDOCLEVEL_OK);
383 }
384
385 void
386 index_merge(const struct of *of, struct mparse *mp,
387 struct buf *dbuf, struct buf *buf,
388 DB *hash, DB *db, const char *dbf,
389 DB *idx, const char *idxf,
390 recno_t maxrec, const recno_t *recs, size_t reccur)
391 {
392 recno_t rec;
393 int ch;
394 DBT key, val;
395 struct mdoc *mdoc;
396 struct man *man;
397 const char *fn, *msec, *mtitle, *arch;
398 size_t sv;
399 unsigned seq;
400 char vbuf[8];
401
402 for (rec = 0; of; of = of->next) {
403 fn = of->fname;
404 if (reccur > 0) {
405 --reccur;
406 rec = recs[(int)reccur];
407 } else if (maxrec > 0) {
408 rec = maxrec;
409 maxrec = 0;
410 } else
411 rec++;
412
413 mparse_reset(mp);
414 hash_reset(&hash);
415
416 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
417 fprintf(stderr, "%s: Parse failure\n", fn);
418 continue;
419 }
420
421 mparse_result(mp, &mdoc, &man);
422 if (NULL == mdoc && NULL == man)
423 continue;
424
425 msec = NULL != mdoc ?
426 mdoc_meta(mdoc)->msec : man_meta(man)->msec;
427 mtitle = NULL != mdoc ?
428 mdoc_meta(mdoc)->title : man_meta(man)->title;
429 arch = NULL != mdoc ?
430 mdoc_meta(mdoc)->arch : NULL;
431
432 if (NULL == arch)
433 arch = "";
434
435 /*
436 * The index record value consists of a nil-terminated
437 * filename, a nil-terminated manual section, and a
438 * nil-terminated description. Since the description
439 * may not be set, we set a sentinel to see if we're
440 * going to write a nil byte in its place.
441 */
442
443 dbuf->len = 0;
444 buf_appendb(dbuf, fn, strlen(fn) + 1);
445 buf_appendb(dbuf, msec, strlen(msec) + 1);
446 buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
447 buf_appendb(dbuf, arch, strlen(arch) + 1);
448
449 sv = dbuf->len;
450
451 /* Fix the record number in the btree value. */
452
453 if (mdoc)
454 pmdoc_node(hash, buf, dbuf,
455 mdoc_node(mdoc), mdoc_meta(mdoc));
456 else
457 pman_node(hash, buf, dbuf, man_node(man));
458
459 /*
460 * Copy from the in-memory hashtable of pending keywords
461 * into the database.
462 */
463
464 memset(vbuf, 0, sizeof(uint32_t));
465 memcpy(vbuf + 4, &rec, sizeof(uint32_t));
466
467 seq = R_FIRST;
468 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
469 seq = R_NEXT;
470
471 memcpy(vbuf, val.data, sizeof(uint32_t));
472 val.size = sizeof(vbuf);
473 val.data = vbuf;
474
475 printf("%s: Added keyword: %s\n",
476 fn, (char *)key.data);
477 dbt_put(db, dbf, &key, &val);
478 }
479 if (ch < 0) {
480 perror("hash");
481 exit((int)MANDOCLEVEL_SYSERR);
482 }
483
484 /*
485 * Apply to the index. If we haven't had a description
486 * set, put an empty one in now.
487 */
488
489 if (dbuf->len == sv)
490 buf_appendb(dbuf, "", 1);
491
492 key.data = &rec;
493 key.size = sizeof(recno_t);
494
495 val.data = dbuf->cp;
496 val.size = dbuf->len;
497
498 printf("%s: Added index\n", fn);
499 dbt_put(idx, idxf, &key, &val);
500 }
501 }
502
503 /*
504 * Scan through all entries in the index file `idx' and prune those
505 * entries in `ofile'.
506 * Pruning consists of removing from `db', then invalidating the entry
507 * in `idx' (zeroing its value size).
508 */
509 static void
510 index_prune(const struct of *ofile, DB *db, const char *dbf,
511 DB *idx, const char *idxf,
512 recno_t *maxrec, recno_t **recs, size_t *recsz)
513 {
514 const struct of *of;
515 const char *fn;
516 unsigned seq, sseq;
517 DBT key, val;
518 size_t reccur;
519 int ch;
520
521 reccur = 0;
522 seq = R_FIRST;
523 while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
524 seq = R_NEXT;
525 *maxrec = *(recno_t *)key.data;
526 if (0 == val.size) {
527 if (reccur >= *recsz) {
528 *recsz += MANDOC_SLOP;
529 *recs = mandoc_realloc(*recs,
530 *recsz * sizeof(recno_t));
531 }
532 (*recs)[(int)reccur] = *maxrec;
533 reccur++;
534 continue;
535 }
536
537 fn = (char *)val.data;
538 for (of = ofile; of; of = of->next)
539 if (0 == strcmp(fn, of->fname))
540 break;
541
542 if (NULL == of)
543 continue;
544
545 sseq = R_FIRST;
546 while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
547 sseq = R_NEXT;
548 assert(8 == val.size);
549 if (*maxrec != *(recno_t *)(val.data + 4))
550 continue;
551 printf("%s: Deleted keyword: %s\n",
552 fn, (char *)key.data);
553 ch = (*db->del)(db, &key, R_CURSOR);
554 if (ch < 0)
555 break;
556 }
557 if (ch < 0) {
558 perror(dbf);
559 exit((int)MANDOCLEVEL_SYSERR);
560 }
561
562 printf("%s: Deleted index\n", fn);
563
564 val.size = 0;
565 ch = (*idx->put)(idx, &key, &val, R_CURSOR);
566 if (ch < 0) {
567 perror(idxf);
568 exit((int)MANDOCLEVEL_SYSERR);
569 }
570
571 if (reccur >= *recsz) {
572 *recsz += MANDOC_SLOP;
573 *recs = mandoc_realloc
574 (*recs, *recsz * sizeof(recno_t));
575 }
576
577 (*recs)[(int)reccur] = *maxrec;
578 reccur++;
579 }
580 (*maxrec)++;
581 }
582
583 /*
584 * Grow the buffer (if necessary) and copy in a binary string.
585 */
586 static void
587 buf_appendb(struct buf *buf, const void *cp, size_t sz)
588 {
589
590 /* Overshoot by MANDOC_BUFSZ. */
591
592 while (buf->len + sz >= buf->size) {
593 buf->size = buf->len + sz + MANDOC_BUFSZ;
594 buf->cp = mandoc_realloc(buf->cp, buf->size);
595 }
596
597 memcpy(buf->cp + (int)buf->len, cp, sz);
598 buf->len += sz;
599 }
600
601 /*
602 * Append a nil-terminated string to the buffer.
603 * This can be invoked multiple times.
604 * The buffer string will be nil-terminated.
605 * If invoked multiple times, a space is put between strings.
606 */
607 static void
608 buf_append(struct buf *buf, const char *cp)
609 {
610 size_t sz;
611
612 if (0 == (sz = strlen(cp)))
613 return;
614
615 if (buf->len)
616 buf->cp[(int)buf->len - 1] = ' ';
617
618 buf_appendb(buf, cp, sz + 1);
619 }
620
621 /*
622 * Recursively add all text from a given node.
623 * This is optimised for general mdoc nodes in this context, which do
624 * not consist of subexpressions and having a recursive call for n->next
625 * would be wasteful.
626 * The "f" variable should be 0 unless called from pmdoc_Nd for the
627 * description buffer, which does not start at the beginning of the
628 * buffer.
629 */
630 static void
631 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
632 {
633
634 for ( ; n; n = n->next) {
635 if (n->child)
636 buf_appendmdoc(buf, n->child, f);
637
638 if (MDOC_TEXT == n->type && f) {
639 f = 0;
640 buf_appendb(buf, n->string,
641 strlen(n->string) + 1);
642 } else if (MDOC_TEXT == n->type)
643 buf_append(buf, n->string);
644
645 }
646 }
647
648 /* ARGSUSED */
649 static void
650 pmdoc_An(MDOC_ARGS)
651 {
652
653 if (SEC_AUTHORS != n->sec)
654 return;
655
656 buf_appendmdoc(buf, n->child, 0);
657 hash_put(hash, buf, TYPE_AUTHOR);
658 }
659
660 static void
661 hash_reset(DB **db)
662 {
663 DB *hash;
664
665 if (NULL != (hash = *db))
666 (*hash->close)(hash);
667
668 *db = dbopen(NULL, MANDOC_FLAGS, 0644, DB_HASH, NULL);
669 if (NULL == *db) {
670 perror("hash");
671 exit((int)MANDOCLEVEL_SYSERR);
672 }
673 }
674
675 /* ARGSUSED */
676 static void
677 pmdoc_Fd(MDOC_ARGS)
678 {
679 const char *start, *end;
680 size_t sz;
681
682 if (SEC_SYNOPSIS != n->sec)
683 return;
684 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
685 return;
686
687 /*
688 * Only consider those `Fd' macro fields that begin with an
689 * "inclusion" token (versus, e.g., #define).
690 */
691 if (strcmp("#include", n->string))
692 return;
693
694 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
695 return;
696
697 /*
698 * Strip away the enclosing angle brackets and make sure we're
699 * not zero-length.
700 */
701
702 start = n->string;
703 if ('<' == *start || '"' == *start)
704 start++;
705
706 if (0 == (sz = strlen(start)))
707 return;
708
709 end = &start[(int)sz - 1];
710 if ('>' == *end || '"' == *end)
711 end--;
712
713 assert(end >= start);
714
715 buf_appendb(buf, start, (size_t)(end - start + 1));
716 buf_appendb(buf, "", 1);
717
718 hash_put(hash, buf, TYPE_INCLUDES);
719 }
720
721 /* ARGSUSED */
722 static void
723 pmdoc_Cd(MDOC_ARGS)
724 {
725
726 if (SEC_SYNOPSIS != n->sec)
727 return;
728
729 buf_appendmdoc(buf, n->child, 0);
730 hash_put(hash, buf, TYPE_CONFIG);
731 }
732
733 /* ARGSUSED */
734 static void
735 pmdoc_In(MDOC_ARGS)
736 {
737
738 if (SEC_SYNOPSIS != n->sec)
739 return;
740 if (NULL == n->child || MDOC_TEXT != n->child->type)
741 return;
742
743 buf_append(buf, n->child->string);
744 hash_put(hash, buf, TYPE_INCLUDES);
745 }
746
747 /* ARGSUSED */
748 static void
749 pmdoc_Fn(MDOC_ARGS)
750 {
751 const char *cp;
752
753 if (SEC_SYNOPSIS != n->sec)
754 return;
755 if (NULL == n->child || MDOC_TEXT != n->child->type)
756 return;
757
758 /* .Fn "struct type *arg" "foo" */
759
760 cp = strrchr(n->child->string, ' ');
761 if (NULL == cp)
762 cp = n->child->string;
763
764 /* Strip away pointer symbol. */
765
766 while ('*' == *cp)
767 cp++;
768
769 buf_append(buf, cp);
770 hash_put(hash, buf, TYPE_FUNCTION);
771 }
772
773 /* ARGSUSED */
774 static void
775 pmdoc_St(MDOC_ARGS)
776 {
777
778 if (SEC_STANDARDS != n->sec)
779 return;
780 if (NULL == n->child || MDOC_TEXT != n->child->type)
781 return;
782
783 buf_append(buf, n->child->string);
784 hash_put(hash, buf, TYPE_STANDARD);
785 }
786
787 /* ARGSUSED */
788 static void
789 pmdoc_Xr(MDOC_ARGS)
790 {
791
792 if (NULL == (n = n->child))
793 return;
794
795 buf_appendb(buf, n->string, strlen(n->string));
796
797 if (NULL != (n = n->next)) {
798 buf_appendb(buf, ".", 1);
799 buf_appendb(buf, n->string, strlen(n->string) + 1);
800 } else
801 buf_appendb(buf, ".", 2);
802
803 hash_put(hash, buf, TYPE_XREF);
804 }
805
806 /* ARGSUSED */
807 static void
808 pmdoc_Vt(MDOC_ARGS)
809 {
810 const char *start;
811 size_t sz;
812
813 if (SEC_SYNOPSIS != n->sec)
814 return;
815 if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
816 return;
817 if (NULL == n->last || MDOC_TEXT != n->last->type)
818 return;
819
820 /*
821 * Strip away leading pointer symbol '*' and trailing ';'.
822 */
823
824 start = n->last->string;
825
826 while ('*' == *start)
827 start++;
828
829 if (0 == (sz = strlen(start)))
830 return;
831
832 if (';' == start[(int)sz - 1])
833 sz--;
834
835 if (0 == sz)
836 return;
837
838 buf_appendb(buf, start, sz);
839 buf_appendb(buf, "", 1);
840 hash_put(hash, buf, TYPE_VARIABLE);
841 }
842
843 /* ARGSUSED */
844 static void
845 pmdoc_Fo(MDOC_ARGS)
846 {
847
848 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
849 return;
850 if (NULL == n->child || MDOC_TEXT != n->child->type)
851 return;
852
853 buf_append(buf, n->child->string);
854 hash_put(hash, buf, TYPE_FUNCTION);
855 }
856
857
858 /* ARGSUSED */
859 static void
860 pmdoc_Nd(MDOC_ARGS)
861 {
862
863 if (MDOC_BODY != n->type)
864 return;
865
866 buf_appendmdoc(dbuf, n->child, 1);
867 buf_appendmdoc(buf, n->child, 0);
868
869 hash_put(hash, buf, TYPE_DESC);
870 }
871
872 /* ARGSUSED */
873 static void
874 pmdoc_Er(MDOC_ARGS)
875 {
876
877 if (SEC_ERRORS != n->sec)
878 return;
879
880 buf_appendmdoc(buf, n->child, 0);
881 hash_put(hash, buf, TYPE_ERR);
882 }
883
884 /* ARGSUSED */
885 static void
886 pmdoc_Ev(MDOC_ARGS)
887 {
888
889 if (SEC_ENVIRONMENT != n->sec)
890 return;
891
892 buf_appendmdoc(buf, n->child, 0);
893 hash_put(hash, buf, TYPE_ENV);
894 }
895
896 /* ARGSUSED */
897 static void
898 pmdoc_Pa(MDOC_ARGS)
899 {
900
901 if (SEC_FILES != n->sec)
902 return;
903
904 buf_appendmdoc(buf, n->child, 0);
905 hash_put(hash, buf, TYPE_PATH);
906 }
907
908 /* ARGSUSED */
909 static void
910 pmdoc_Nm(MDOC_ARGS)
911 {
912
913 if (SEC_NAME == n->sec) {
914 buf_appendmdoc(buf, n->child, 0);
915 hash_put(hash, buf, TYPE_NAME);
916 return;
917 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
918 return;
919
920 if (NULL == n->child)
921 buf_append(buf, m->name);
922
923 buf_appendmdoc(buf, n->child, 0);
924 hash_put(hash, buf, TYPE_UTILITY);
925 }
926
927 static void
928 hash_put(DB *db, const struct buf *buf, int mask)
929 {
930 DBT key, val;
931 int rc;
932
933 if (buf->len < 2)
934 return;
935
936 key.data = buf->cp;
937 key.size = buf->len;
938
939 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
940 perror("hash");
941 exit((int)MANDOCLEVEL_SYSERR);
942 } else if (0 == rc)
943 mask |= *(int *)val.data;
944
945 val.data = &mask;
946 val.size = sizeof(int);
947
948 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
949 perror("hash");
950 exit((int)MANDOCLEVEL_SYSERR);
951 }
952 }
953
954 static void
955 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
956 {
957
958 assert(key->size);
959 assert(val->size);
960
961 if (0 == (*db->put)(db, key, val, 0))
962 return;
963
964 perror(dbn);
965 exit((int)MANDOCLEVEL_SYSERR);
966 /* NOTREACHED */
967 }
968
969 /*
970 * Call out to per-macro handlers after clearing the persistent database
971 * key. If the macro sets the database key, flush it to the database.
972 */
973 static void
974 pmdoc_node(MDOC_ARGS)
975 {
976
977 if (NULL == n)
978 return;
979
980 switch (n->type) {
981 case (MDOC_HEAD):
982 /* FALLTHROUGH */
983 case (MDOC_BODY):
984 /* FALLTHROUGH */
985 case (MDOC_TAIL):
986 /* FALLTHROUGH */
987 case (MDOC_BLOCK):
988 /* FALLTHROUGH */
989 case (MDOC_ELEM):
990 if (NULL == mdocs[n->tok])
991 break;
992
993 buf->len = 0;
994 (*mdocs[n->tok])(hash, buf, dbuf, n, m);
995 break;
996 default:
997 break;
998 }
999
1000 pmdoc_node(hash, buf, dbuf, n->child, m);
1001 pmdoc_node(hash, buf, dbuf, n->next, m);
1002 }
1003
1004 static int
1005 pman_node(MAN_ARGS)
1006 {
1007 const struct man_node *head, *body;
1008 const char *start, *sv;
1009 size_t sz;
1010
1011 if (NULL == n)
1012 return(0);
1013
1014 /*
1015 * We're only searching for one thing: the first text child in
1016 * the BODY of a NAME section. Since we don't keep track of
1017 * sections in -man, run some hoops to find out whether we're in
1018 * the correct section or not.
1019 */
1020
1021 if (MAN_BODY == n->type && MAN_SH == n->tok) {
1022 body = n;
1023 assert(body->parent);
1024 if (NULL != (head = body->parent->head) &&
1025 1 == head->nchild &&
1026 NULL != (head = (head->child)) &&
1027 MAN_TEXT == head->type &&
1028 0 == strcmp(head->string, "NAME") &&
1029 NULL != (body = body->child) &&
1030 MAN_TEXT == body->type) {
1031
1032 assert(body->string);
1033 start = sv = body->string;
1034
1035 /*
1036 * Go through a special heuristic dance here.
1037 * This is why -man manuals are great!
1038 * (I'm being sarcastic: my eyes are bleeding.)
1039 * Conventionally, one or more manual names are
1040 * comma-specified prior to a whitespace, then a
1041 * dash, then a description. Try to puzzle out
1042 * the name parts here.
1043 */
1044
1045 for ( ;; ) {
1046 sz = strcspn(start, " ,");
1047 if ('\0' == start[(int)sz])
1048 break;
1049
1050 buf->len = 0;
1051 buf_appendb(buf, start, sz);
1052 buf_appendb(buf, "", 1);
1053
1054 hash_put(hash, buf, TYPE_NAME);
1055
1056 if (' ' == start[(int)sz]) {
1057 start += (int)sz + 1;
1058 break;
1059 }
1060
1061 assert(',' == start[(int)sz]);
1062 start += (int)sz + 1;
1063 while (' ' == *start)
1064 start++;
1065 }
1066
1067 buf->len = 0;
1068
1069 if (sv == start) {
1070 buf_append(buf, start);
1071 return(1);
1072 }
1073
1074 while (' ' == *start)
1075 start++;
1076
1077 if (0 == strncmp(start, "-", 1))
1078 start += 1;
1079 else if (0 == strncmp(start, "\\-", 2))
1080 start += 2;
1081 else if (0 == strncmp(start, "\\(en", 4))
1082 start += 4;
1083 else if (0 == strncmp(start, "\\(em", 4))
1084 start += 4;
1085
1086 while (' ' == *start)
1087 start++;
1088
1089 sz = strlen(start) + 1;
1090 buf_appendb(dbuf, start, sz);
1091 buf_appendb(buf, start, sz);
1092
1093 hash_put(hash, buf, TYPE_DESC);
1094 }
1095 }
1096
1097 if (pman_node(hash, buf, dbuf, n->child))
1098 return(1);
1099 if (pman_node(hash, buf, dbuf, n->next))
1100 return(1);
1101
1102 return(0);
1103 }
1104
1105 /*
1106 * Recursively build up a list of files to parse.
1107 * We use this instead of ftw() and so on because I don't want global
1108 * variables hanging around.
1109 * This ignores the mandoc.db and mandoc.index files, but assumes that
1110 * everything else is a manual.
1111 * Pass in a pointer to a NULL structure for the first invocation.
1112 */
1113 static int
1114 ofile_build(const char *dir, struct of **of)
1115 {
1116 DIR *d;
1117 const char *fn;
1118 struct of *nof;
1119 struct dirent *dp;
1120
1121 if (NULL == (d = opendir(dir))) {
1122 perror(dir);
1123 return(0);
1124 }
1125
1126 while (NULL != (dp = readdir(d))) {
1127 fn = dp->d_name;
1128 if (DT_DIR == dp->d_type) {
1129 if (strcmp(".", fn) && strcmp("..", fn))
1130 if ( ! ofile_build(dp->d_name, of))
1131 return(0);
1132 continue;
1133 } else if (DT_REG != dp->d_type)
1134 continue;
1135
1136 if (0 == strcmp(MANDOC_DB, fn) ||
1137 0 == strcmp(MANDOC_IDX, fn))
1138 continue;
1139
1140 nof = mandoc_calloc(1, sizeof(struct of));
1141 nof->fname = mandoc_strdup(fn);
1142
1143 if (NULL == *of) {
1144 *of = nof;
1145 (*of)->first = nof;
1146 } else {
1147 (*of)->next = nof;
1148 *of = nof;
1149 }
1150 }
1151
1152 return(1);
1153 }
1154
1155 static void
1156 ofile_free(struct of *of)
1157 {
1158 struct of *nof;
1159
1160 while (of) {
1161 nof = of->next;
1162 free(of->fname);
1163 free(of);
1164 of = nof;
1165 }
1166 }
1167
1168 static void
1169 usage(void)
1170 {
1171
1172 fprintf(stderr, "usage: %s [dir...]\n", progname);
1173 }