]> git.cameronkatri.com Git - mandoc.git/blob - mandoc-db.c
Use a little more horsepower in parsing out NAME sections from -man
[mandoc.git] / mandoc-db.c
1 /* $Id: mandoc-db.c,v 1.8 2011/04/05 14:10:52 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22
23 #include <assert.h>
24 #ifdef __linux__
25 # include <db_185.h>
26 #else
27 # include <db.h>
28 #endif
29 #include <fcntl.h>
30 #include <getopt.h>
31 #include <stdio.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #include "man.h"
37 #include "mdoc.h"
38 #include "mandoc.h"
39
40 #define MANDOC_DB "mandoc.db"
41 #define MANDOC_IDX "mandoc.index"
42 #define MANDOC_BUFSZ BUFSIZ
43 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
44
45 enum type {
46 MANDOC_NONE = 0,
47 MANDOC_NAME,
48 MANDOC_FUNCTION,
49 MANDOC_UTILITY,
50 MANDOC_INCLUDES,
51 MANDOC_VARIABLE
52 };
53
54 #define MAN_ARGS DB *db, \
55 const char *dbn, \
56 DBT *key, size_t *ksz, \
57 DBT *val, \
58 const struct man_node *n
59 #define MDOC_ARGS DB *db, \
60 const char *dbn, \
61 DBT *key, size_t *ksz, \
62 DBT *val, \
63 const struct mdoc_node *n
64
65 static void dbt_append(DBT *, size_t *, const char *);
66 static void dbt_appendb(DBT *, size_t *,
67 const void *, size_t);
68 static void dbt_init(DBT *, size_t *);
69 static void dbt_put(DB *, const char *, DBT *, DBT *);
70 static void usage(void);
71 static void pman(DB *, const char *,
72 DBT *, size_t *, DBT *,
73 const char *, struct man *);
74 static int pman_node(MAN_ARGS);
75 static void pmdoc(DB *, const char *,
76 DBT *, size_t *, DBT *,
77 const char *, struct mdoc *);
78 static void pmdoc_node(MDOC_ARGS);
79 static void pmdoc_Fd(MDOC_ARGS);
80 static void pmdoc_In(MDOC_ARGS);
81 static void pmdoc_Fn(MDOC_ARGS);
82 static void pmdoc_Fo(MDOC_ARGS);
83 static void pmdoc_Nm(MDOC_ARGS);
84 static void pmdoc_Vt(MDOC_ARGS);
85
86 typedef void (*pmdoc_nf)(MDOC_ARGS);
87
88 static const char *progname;
89
90 static const pmdoc_nf mdocs[MDOC_MAX] = {
91 NULL, /* Ap */
92 NULL, /* Dd */
93 NULL, /* Dt */
94 NULL, /* Os */
95 NULL, /* Sh */
96 NULL, /* Ss */
97 NULL, /* Pp */
98 NULL, /* D1 */
99 NULL, /* Dl */
100 NULL, /* Bd */
101 NULL, /* Ed */
102 NULL, /* Bl */
103 NULL, /* El */
104 NULL, /* It */
105 NULL, /* Ad */
106 NULL, /* An */
107 NULL, /* Ar */
108 NULL, /* Cd */
109 NULL, /* Cm */
110 NULL, /* Dv */
111 NULL, /* Er */
112 NULL, /* Ev */
113 NULL, /* Ex */
114 NULL, /* Fa */
115 pmdoc_Fd, /* Fd */
116 NULL, /* Fl */
117 pmdoc_Fn, /* Fn */
118 NULL, /* Ft */
119 NULL, /* Ic */
120 pmdoc_In, /* In */
121 NULL, /* Li */
122 NULL, /* Nd */
123 pmdoc_Nm, /* Nm */
124 NULL, /* Op */
125 NULL, /* Ot */
126 NULL, /* Pa */
127 NULL, /* Rv */
128 NULL, /* St */
129 pmdoc_Vt, /* Va */
130 pmdoc_Vt, /* Vt */
131 NULL, /* Xr */
132 NULL, /* %A */
133 NULL, /* %B */
134 NULL, /* %D */
135 NULL, /* %I */
136 NULL, /* %J */
137 NULL, /* %N */
138 NULL, /* %O */
139 NULL, /* %P */
140 NULL, /* %R */
141 NULL, /* %T */
142 NULL, /* %V */
143 NULL, /* Ac */
144 NULL, /* Ao */
145 NULL, /* Aq */
146 NULL, /* At */
147 NULL, /* Bc */
148 NULL, /* Bf */
149 NULL, /* Bo */
150 NULL, /* Bq */
151 NULL, /* Bsx */
152 NULL, /* Bx */
153 NULL, /* Db */
154 NULL, /* Dc */
155 NULL, /* Do */
156 NULL, /* Dq */
157 NULL, /* Ec */
158 NULL, /* Ef */
159 NULL, /* Em */
160 NULL, /* Eo */
161 NULL, /* Fx */
162 NULL, /* Ms */
163 NULL, /* No */
164 NULL, /* Ns */
165 NULL, /* Nx */
166 NULL, /* Ox */
167 NULL, /* Pc */
168 NULL, /* Pf */
169 NULL, /* Po */
170 NULL, /* Pq */
171 NULL, /* Qc */
172 NULL, /* Ql */
173 NULL, /* Qo */
174 NULL, /* Qq */
175 NULL, /* Re */
176 NULL, /* Rs */
177 NULL, /* Sc */
178 NULL, /* So */
179 NULL, /* Sq */
180 NULL, /* Sm */
181 NULL, /* Sx */
182 NULL, /* Sy */
183 NULL, /* Tn */
184 NULL, /* Ux */
185 NULL, /* Xc */
186 NULL, /* Xo */
187 pmdoc_Fo, /* Fo */
188 NULL, /* Fc */
189 NULL, /* Oo */
190 NULL, /* Oc */
191 NULL, /* Bk */
192 NULL, /* Ek */
193 NULL, /* Bt */
194 NULL, /* Hf */
195 NULL, /* Fr */
196 NULL, /* Ud */
197 NULL, /* Lb */
198 NULL, /* Lp */
199 NULL, /* Lk */
200 NULL, /* Mt */
201 NULL, /* Brq */
202 NULL, /* Bro */
203 NULL, /* Brc */
204 NULL, /* %C */
205 NULL, /* Es */
206 NULL, /* En */
207 NULL, /* Dx */
208 NULL, /* %Q */
209 NULL, /* br */
210 NULL, /* sp */
211 NULL, /* %U */
212 NULL, /* Ta */
213 };
214
215 int
216 main(int argc, char *argv[])
217 {
218 struct mparse *mp; /* parse sequence */
219 struct mdoc *mdoc; /* resulting mdoc */
220 struct man *man; /* resulting man */
221 char *fn;
222 const char *dir; /* result dir (default: cwd) */
223 char ibuf[MAXPATHLEN], /* index fname */
224 ibbuf[MAXPATHLEN], /* index backup fname */
225 fbuf[MAXPATHLEN], /* btree fname */
226 fbbuf[MAXPATHLEN]; /* btree backup fname */
227 int c;
228 DB *index, /* index database */
229 *db; /* keyword database */
230 DBT rkey, rval, /* recno entries */
231 key, val; /* persistent keyword entries */
232 size_t ksz; /* entry buffer size */
233 char vbuf[8];
234 BTREEINFO info; /* btree configuration */
235 recno_t rec;
236 extern int optind;
237 extern char *optarg;
238
239 progname = strrchr(argv[0], '/');
240 if (progname == NULL)
241 progname = argv[0];
242 else
243 ++progname;
244
245 dir = "";
246
247 while (-1 != (c = getopt(argc, argv, "d:")))
248 switch (c) {
249 case ('d'):
250 dir = optarg;
251 break;
252 default:
253 usage();
254 return((int)MANDOCLEVEL_BADARG);
255 }
256
257 argc -= optind;
258 argv += optind;
259
260 /*
261 * Set up temporary file-names into which we're going to write
262 * all of our data (both for the index and database). These
263 * will be securely renamed to the real file-names after we've
264 * written all of our data.
265 */
266
267 ibuf[0] = ibuf[MAXPATHLEN - 2] =
268 ibbuf[0] = ibbuf[MAXPATHLEN - 2] =
269 fbuf[0] = fbuf[MAXPATHLEN - 2] =
270 fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';
271
272 strlcat(fbuf, dir, MAXPATHLEN);
273 strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
274
275 strlcat(fbbuf, fbuf, MAXPATHLEN);
276 strlcat(fbbuf, "~", MAXPATHLEN);
277
278 strlcat(ibuf, dir, MAXPATHLEN);
279 strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
280
281 strlcat(ibbuf, ibuf, MAXPATHLEN);
282 strlcat(ibbuf, "~", MAXPATHLEN);
283
284 if ('\0' != fbuf[MAXPATHLEN - 2] ||
285 '\0' != fbbuf[MAXPATHLEN - 2] ||
286 '\0' != ibuf[MAXPATHLEN - 2] ||
287 '\0' != ibbuf[MAXPATHLEN - 2]) {
288 fprintf(stderr, "%s: Path too long\n", progname);
289 exit((int)MANDOCLEVEL_SYSERR);
290 }
291
292 /*
293 * For the keyword database, open a BTREE database that allows
294 * duplicates. For the index database, use a standard RECNO
295 * database type.
296 */
297
298 memset(&info, 0, sizeof(BTREEINFO));
299 info.flags = R_DUP;
300 db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);
301
302 if (NULL == db) {
303 perror(fbbuf);
304 exit((int)MANDOCLEVEL_SYSERR);
305 }
306
307 index = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
308
309 if (NULL == db) {
310 perror(ibbuf);
311 (*db->close)(db);
312 exit((int)MANDOCLEVEL_SYSERR);
313 }
314
315 /*
316 * Try parsing the manuals given on the command line. If we
317 * totally fail, then just keep on going. Take resulting trees
318 * and push them down into the database code.
319 * Use the auto-parser and don't report any errors.
320 */
321
322 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
323
324 memset(&key, 0, sizeof(DBT));
325 memset(&val, 0, sizeof(DBT));
326 memset(&rkey, 0, sizeof(DBT));
327 memset(&rval, 0, sizeof(DBT));
328
329 val.size = sizeof(vbuf);
330 val.data = vbuf;
331 rkey.size = sizeof(recno_t);
332
333 rec = 1;
334 ksz = 0;
335
336 while (NULL != (fn = *argv++)) {
337 mparse_reset(mp);
338
339 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
340 fprintf(stderr, "%s: Parse failure\n", fn);
341 continue;
342 }
343
344 mparse_result(mp, &mdoc, &man);
345 if (NULL == mdoc && NULL == man)
346 continue;
347
348 rkey.data = &rec;
349 rval.data = fn;
350 rval.size = strlen(fn) + 1;
351
352 if (-1 == (*index->put)(index, &rkey, &rval, 0)) {
353 perror(ibbuf);
354 break;
355 }
356
357 memset(val.data, 0, sizeof(uint32_t));
358 memcpy(val.data + 4, &rec, sizeof(uint32_t));
359
360 if (mdoc)
361 pmdoc(db, fbbuf, &key, &ksz, &val, fn, mdoc);
362 else
363 pman(db, fbbuf, &key, &ksz, &val, fn, man);
364 rec++;
365 }
366
367 (*db->close)(db);
368 (*index->close)(index);
369
370 mparse_free(mp);
371
372 free(key.data);
373
374 /* Atomically replace the file with our temporary one. */
375
376 if (-1 == rename(fbbuf, fbuf))
377 perror(fbuf);
378 if (-1 == rename(ibbuf, ibuf))
379 perror(fbuf);
380
381 return((int)MANDOCLEVEL_OK);
382 }
383
384 /*
385 * Initialise the stored database key whose data buffer is shared
386 * between uses (as the key must sometimes be constructed from an array
387 * of
388 */
389 static void
390 dbt_init(DBT *key, size_t *ksz)
391 {
392
393 if (0 == *ksz) {
394 assert(0 == key->size);
395 assert(NULL == key->data);
396 key->data = mandoc_malloc(MANDOC_BUFSZ);
397 *ksz = MANDOC_BUFSZ;
398 }
399
400 key->size = 0;
401 }
402
403 /*
404 * Append a binary value to a database entry. This can be invoked
405 * multiple times; the buffer is automatically resized.
406 */
407 static void
408 dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
409 {
410
411 assert(key->data);
412
413 /* Overshoot by MANDOC_BUFSZ. */
414
415 while (key->size + sz >= *ksz) {
416 *ksz = key->size + sz + MANDOC_BUFSZ;
417 key->data = mandoc_realloc(key->data, *ksz);
418 }
419
420 memcpy(key->data + (int)key->size, cp, sz);
421 key->size += sz;
422 }
423
424 /*
425 * Append a nil-terminated string to the database entry. This can be
426 * invoked multiple times. The database entry will be nil-terminated as
427 * well; if invoked multiple times, a space is put between strings.
428 */
429 static void
430 dbt_append(DBT *key, size_t *ksz, const char *cp)
431 {
432 size_t sz;
433
434 if (0 == (sz = strlen(cp)))
435 return;
436
437 assert(key->data);
438
439 if (key->size)
440 ((char *)key->data)[(int)key->size - 1] = ' ';
441
442 dbt_appendb(key, ksz, cp, sz + 1);
443 }
444
445 /* ARGSUSED */
446 static void
447 pmdoc_Fd(MDOC_ARGS)
448 {
449 uint32_t fl;
450 const char *start, *end;
451 size_t sz;
452 char nil;
453
454 if (SEC_SYNOPSIS != n->sec)
455 return;
456 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
457 return;
458
459 /*
460 * Only consider those `Fd' macro fields that begin with an
461 * "inclusion" token (versus, e.g., #define).
462 */
463 if (strcmp("#include", n->string))
464 return;
465
466 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
467 return;
468
469 /*
470 * Strip away the enclosing angle brackets and make sure we're
471 * not zero-length.
472 */
473
474 start = n->string;
475 if ('<' == *start || '"' == *start)
476 start++;
477
478 if (0 == (sz = strlen(start)))
479 return;
480
481 end = &start[(int)sz - 1];
482 if ('>' == *end || '"' == *end)
483 end--;
484
485 nil = '\0';
486 dbt_appendb(key, ksz, start, end - start + 1);
487 dbt_appendb(key, ksz, &nil, 1);
488
489 fl = MANDOC_INCLUDES;
490 memcpy(val->data, &fl, 4);
491 }
492
493 /* ARGSUSED */
494 static void
495 pmdoc_In(MDOC_ARGS)
496 {
497 uint32_t fl;
498
499 if (SEC_SYNOPSIS != n->sec)
500 return;
501 if (NULL == n->child || MDOC_TEXT != n->child->type)
502 return;
503
504 dbt_append(key, ksz, n->child->string);
505 fl = MANDOC_INCLUDES;
506 memcpy(val->data, &fl, 4);
507 }
508
509 /* ARGSUSED */
510 static void
511 pmdoc_Fn(MDOC_ARGS)
512 {
513 uint32_t fl;
514 const char *cp;
515
516 if (SEC_SYNOPSIS != n->sec)
517 return;
518 if (NULL == n->child || MDOC_TEXT != n->child->type)
519 return;
520
521 /* .Fn "struct type *arg" "foo" */
522
523 cp = strrchr(n->child->string, ' ');
524 if (NULL == cp)
525 cp = n->child->string;
526
527 /* Strip away pointer symbol. */
528
529 while ('*' == *cp)
530 cp++;
531
532 dbt_append(key, ksz, cp);
533 fl = MANDOC_FUNCTION;
534 memcpy(val->data, &fl, 4);
535 }
536
537 /* ARGSUSED */
538 static void
539 pmdoc_Vt(MDOC_ARGS)
540 {
541 uint32_t fl;
542 const char *start, *end;
543 size_t sz;
544 char nil;
545
546 if (SEC_SYNOPSIS != n->sec)
547 return;
548 if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
549 return;
550 if (NULL == n->last || MDOC_TEXT != n->last->type)
551 return;
552
553 /*
554 * Strip away leading pointer symbol '*' and trailing ';'.
555 */
556
557 start = n->last->string;
558
559 while ('*' == *start)
560 start++;
561
562 if (0 == (sz = strlen(start)))
563 return;
564
565 end = &start[sz - 1];
566 while (end > start && ';' == *end)
567 end--;
568
569 if (end == start)
570 return;
571
572 nil = '\0';
573 dbt_appendb(key, ksz, start, end - start + 1);
574 dbt_appendb(key, ksz, &nil, 1);
575 fl = MANDOC_VARIABLE;
576 memcpy(val->data, &fl, 4);
577 }
578
579 /* ARGSUSED */
580 static void
581 pmdoc_Fo(MDOC_ARGS)
582 {
583 uint32_t fl;
584
585 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
586 return;
587 if (NULL == n->child || MDOC_TEXT != n->child->type)
588 return;
589
590 dbt_append(key, ksz, n->child->string);
591 fl = MANDOC_FUNCTION;
592 memcpy(val->data, &fl, 4);
593 }
594
595 /* ARGSUSED */
596 static void
597 pmdoc_Nm(MDOC_ARGS)
598 {
599 uint32_t fl;
600
601 if (SEC_NAME == n->sec) {
602 for (n = n->child; n; n = n->next) {
603 if (MDOC_TEXT != n->type)
604 continue;
605 dbt_append(key, ksz, n->string);
606 }
607 fl = MANDOC_NAME;
608 memcpy(val->data, &fl, 4);
609 return;
610 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
611 return;
612
613 for (n = n->child; n; n = n->next) {
614 if (MDOC_TEXT != n->type)
615 continue;
616 dbt_append(key, ksz, n->string);
617 }
618
619 fl = MANDOC_UTILITY;
620 memcpy(val->data, &fl, 4);
621 }
622
623 static void
624 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
625 {
626
627 if (0 == key->size)
628 return;
629
630 assert(key->data);
631 assert(8 == val->size);
632 assert(val->data);
633
634 if (0 == (*db->put)(db, key, val, 0))
635 return;
636
637 perror(dbn);
638 exit((int)MANDOCLEVEL_SYSERR);
639 /* NOTREACHED */
640 }
641
642 /*
643 * Call out to per-macro handlers after clearing the persistent database
644 * key. If the macro sets the database key, flush it to the database.
645 */
646 static void
647 pmdoc_node(MDOC_ARGS)
648 {
649
650 if (NULL == n)
651 return;
652
653 switch (n->type) {
654 case (MDOC_HEAD):
655 /* FALLTHROUGH */
656 case (MDOC_BODY):
657 /* FALLTHROUGH */
658 case (MDOC_TAIL):
659 /* FALLTHROUGH */
660 case (MDOC_BLOCK):
661 /* FALLTHROUGH */
662 case (MDOC_ELEM):
663 if (NULL == mdocs[n->tok])
664 break;
665
666 dbt_init(key, ksz);
667 (*mdocs[n->tok])(db, dbn, key, ksz, val, n);
668
669 dbt_put(db, dbn, key, val);
670 break;
671 default:
672 break;
673 }
674
675 pmdoc_node(db, dbn, key, ksz, val, n->child);
676 pmdoc_node(db, dbn, key, ksz, val, n->next);
677 }
678
679 static int
680 pman_node(MAN_ARGS)
681 {
682 const struct man_node *head, *body;
683 const char *start;
684 char nil;
685 size_t sz;
686 uint32_t fl;
687
688 if (NULL == n)
689 return(0);
690
691 /*
692 * We're only searching for one thing: the first text child in
693 * the BODY of a NAME section. Since we don't keep track of
694 * sections in -man, run some hoops to find out whether we're in
695 * the correct section or not.
696 */
697
698 if (MAN_BODY == n->type && MAN_SH == n->tok) {
699 body = n;
700 assert(body->parent);
701 if (NULL != (head = body->parent->head) &&
702 1 == head->nchild &&
703 NULL != (head = (head->child)) &&
704 MAN_TEXT == head->type &&
705 0 == strcmp(head->string, "NAME") &&
706 NULL != (body = body->child) &&
707 MAN_TEXT == body->type) {
708 nil = '\0';
709
710 fl = MANDOC_NAME;
711 memcpy(val->data, &fl, 4);
712
713 start = body->string;
714
715 /*
716 * Go through a special heuristic dance here.
717 * This is why -man manuals are great!
718 * Conventionally, one or more manual names are
719 * comma-specified prior to a whitespace, then a
720 * dash, then a description. Try to puzzle out
721 * the name parts here.
722 */
723
724 while (start) {
725 sz = strcspn(start, " ,");
726 if ('\0' == start[(int)sz])
727 break;
728
729 dbt_init(key, ksz);
730 dbt_appendb(key, ksz, start, sz);
731 dbt_appendb(key, ksz, &nil, 1);
732
733 dbt_put(db, dbn, key, val);
734
735 if (' ' == start[(int)sz])
736 break;
737
738 assert(',' == start[(int)sz]);
739 start += (int)sz + 1;
740 while (' ' == *start)
741 start++;
742 }
743
744 return(1);
745 }
746 }
747
748 if (pman_node(db, dbn, key, ksz, val, n->child))
749 return(1);
750 if (pman_node(db, dbn, key, ksz, val, n->next))
751 return(1);
752
753 return(0);
754 }
755
756 static void
757 pman(DB *db, const char *dbn,
758 DBT *key, size_t *ksz, DBT *val,
759 const char *path, struct man *m)
760 {
761
762 pman_node(db, dbn, key, ksz, val, man_node(m));
763 }
764
765
766 static void
767 pmdoc(DB *db, const char *dbn,
768 DBT *key, size_t *ksz, DBT *val,
769 const char *path, struct mdoc *m)
770 {
771
772 pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
773 }
774
775 static void
776 usage(void)
777 {
778
779 fprintf(stderr, "usage: %s "
780 "[-d path] "
781 "[file...]\n",
782 progname);
783 }