]> git.cameronkatri.com Git - mandoc.git/blob - mandoc-db.c
First, properly escape periods with \&. Then consistently refer to
[mandoc.git] / mandoc-db.c
1 /* $Id: mandoc-db.c,v 1.9 2011/04/05 14:16:05 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22
23 #include <assert.h>
24 #ifdef __linux__
25 # include <db_185.h>
26 #else
27 # include <db.h>
28 #endif
29 #include <fcntl.h>
30 #include <getopt.h>
31 #include <stdio.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #include "man.h"
37 #include "mdoc.h"
38 #include "mandoc.h"
39
40 #define MANDOC_DB "mandoc.db"
41 #define MANDOC_IDX "mandoc.index"
42 #define MANDOC_BUFSZ BUFSIZ
43 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
44
45 enum type {
46 MANDOC_NONE = 0,
47 MANDOC_NAME,
48 MANDOC_FUNCTION,
49 MANDOC_UTILITY,
50 MANDOC_INCLUDES,
51 MANDOC_VARIABLE
52 };
53
54 #define MAN_ARGS DB *db, \
55 const char *dbn, \
56 DBT *key, size_t *ksz, \
57 DBT *val, \
58 const struct man_node *n
59 #define MDOC_ARGS DB *db, \
60 const char *dbn, \
61 DBT *key, size_t *ksz, \
62 DBT *val, \
63 const struct mdoc_node *n
64
65 static void dbt_append(DBT *, size_t *, const char *);
66 static void dbt_appendb(DBT *, size_t *,
67 const void *, size_t);
68 static void dbt_init(DBT *, size_t *);
69 static void dbt_put(DB *, const char *, DBT *, DBT *);
70 static void usage(void);
71 static void pman(DB *, const char *, DBT *,
72 size_t *, DBT *, struct man *);
73 static int pman_node(MAN_ARGS);
74 static void pmdoc(DB *, const char *, DBT *,
75 size_t *, DBT *, struct mdoc *);
76 static void pmdoc_node(MDOC_ARGS);
77 static void pmdoc_Fd(MDOC_ARGS);
78 static void pmdoc_In(MDOC_ARGS);
79 static void pmdoc_Fn(MDOC_ARGS);
80 static void pmdoc_Fo(MDOC_ARGS);
81 static void pmdoc_Nm(MDOC_ARGS);
82 static void pmdoc_Vt(MDOC_ARGS);
83
84 typedef void (*pmdoc_nf)(MDOC_ARGS);
85
86 static const char *progname;
87
88 static const pmdoc_nf mdocs[MDOC_MAX] = {
89 NULL, /* Ap */
90 NULL, /* Dd */
91 NULL, /* Dt */
92 NULL, /* Os */
93 NULL, /* Sh */
94 NULL, /* Ss */
95 NULL, /* Pp */
96 NULL, /* D1 */
97 NULL, /* Dl */
98 NULL, /* Bd */
99 NULL, /* Ed */
100 NULL, /* Bl */
101 NULL, /* El */
102 NULL, /* It */
103 NULL, /* Ad */
104 NULL, /* An */
105 NULL, /* Ar */
106 NULL, /* Cd */
107 NULL, /* Cm */
108 NULL, /* Dv */
109 NULL, /* Er */
110 NULL, /* Ev */
111 NULL, /* Ex */
112 NULL, /* Fa */
113 pmdoc_Fd, /* Fd */
114 NULL, /* Fl */
115 pmdoc_Fn, /* Fn */
116 NULL, /* Ft */
117 NULL, /* Ic */
118 pmdoc_In, /* In */
119 NULL, /* Li */
120 NULL, /* Nd */
121 pmdoc_Nm, /* Nm */
122 NULL, /* Op */
123 NULL, /* Ot */
124 NULL, /* Pa */
125 NULL, /* Rv */
126 NULL, /* St */
127 pmdoc_Vt, /* Va */
128 pmdoc_Vt, /* Vt */
129 NULL, /* Xr */
130 NULL, /* %A */
131 NULL, /* %B */
132 NULL, /* %D */
133 NULL, /* %I */
134 NULL, /* %J */
135 NULL, /* %N */
136 NULL, /* %O */
137 NULL, /* %P */
138 NULL, /* %R */
139 NULL, /* %T */
140 NULL, /* %V */
141 NULL, /* Ac */
142 NULL, /* Ao */
143 NULL, /* Aq */
144 NULL, /* At */
145 NULL, /* Bc */
146 NULL, /* Bf */
147 NULL, /* Bo */
148 NULL, /* Bq */
149 NULL, /* Bsx */
150 NULL, /* Bx */
151 NULL, /* Db */
152 NULL, /* Dc */
153 NULL, /* Do */
154 NULL, /* Dq */
155 NULL, /* Ec */
156 NULL, /* Ef */
157 NULL, /* Em */
158 NULL, /* Eo */
159 NULL, /* Fx */
160 NULL, /* Ms */
161 NULL, /* No */
162 NULL, /* Ns */
163 NULL, /* Nx */
164 NULL, /* Ox */
165 NULL, /* Pc */
166 NULL, /* Pf */
167 NULL, /* Po */
168 NULL, /* Pq */
169 NULL, /* Qc */
170 NULL, /* Ql */
171 NULL, /* Qo */
172 NULL, /* Qq */
173 NULL, /* Re */
174 NULL, /* Rs */
175 NULL, /* Sc */
176 NULL, /* So */
177 NULL, /* Sq */
178 NULL, /* Sm */
179 NULL, /* Sx */
180 NULL, /* Sy */
181 NULL, /* Tn */
182 NULL, /* Ux */
183 NULL, /* Xc */
184 NULL, /* Xo */
185 pmdoc_Fo, /* Fo */
186 NULL, /* Fc */
187 NULL, /* Oo */
188 NULL, /* Oc */
189 NULL, /* Bk */
190 NULL, /* Ek */
191 NULL, /* Bt */
192 NULL, /* Hf */
193 NULL, /* Fr */
194 NULL, /* Ud */
195 NULL, /* Lb */
196 NULL, /* Lp */
197 NULL, /* Lk */
198 NULL, /* Mt */
199 NULL, /* Brq */
200 NULL, /* Bro */
201 NULL, /* Brc */
202 NULL, /* %C */
203 NULL, /* Es */
204 NULL, /* En */
205 NULL, /* Dx */
206 NULL, /* %Q */
207 NULL, /* br */
208 NULL, /* sp */
209 NULL, /* %U */
210 NULL, /* Ta */
211 };
212
213 int
214 main(int argc, char *argv[])
215 {
216 struct mparse *mp; /* parse sequence */
217 struct mdoc *mdoc; /* resulting mdoc */
218 struct man *man; /* resulting man */
219 char *fn;
220 const char *dir; /* result dir (default: cwd) */
221 char ibuf[MAXPATHLEN], /* index fname */
222 ibbuf[MAXPATHLEN], /* index backup fname */
223 fbuf[MAXPATHLEN], /* btree fname */
224 fbbuf[MAXPATHLEN]; /* btree backup fname */
225 int c;
226 DB *idx, /* index database */
227 *db; /* keyword database */
228 DBT rkey, rval, /* recno entries */
229 key, val; /* persistent keyword entries */
230 size_t ksz; /* entry buffer size */
231 char vbuf[8];
232 BTREEINFO info; /* btree configuration */
233 recno_t rec;
234 extern int optind;
235 extern char *optarg;
236
237 progname = strrchr(argv[0], '/');
238 if (progname == NULL)
239 progname = argv[0];
240 else
241 ++progname;
242
243 dir = "";
244
245 while (-1 != (c = getopt(argc, argv, "d:")))
246 switch (c) {
247 case ('d'):
248 dir = optarg;
249 break;
250 default:
251 usage();
252 return((int)MANDOCLEVEL_BADARG);
253 }
254
255 argc -= optind;
256 argv += optind;
257
258 /*
259 * Set up temporary file-names into which we're going to write
260 * all of our data (both for the index and database). These
261 * will be securely renamed to the real file-names after we've
262 * written all of our data.
263 */
264
265 ibuf[0] = ibuf[MAXPATHLEN - 2] =
266 ibbuf[0] = ibbuf[MAXPATHLEN - 2] =
267 fbuf[0] = fbuf[MAXPATHLEN - 2] =
268 fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';
269
270 strlcat(fbuf, dir, MAXPATHLEN);
271 strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
272
273 strlcat(fbbuf, fbuf, MAXPATHLEN);
274 strlcat(fbbuf, "~", MAXPATHLEN);
275
276 strlcat(ibuf, dir, MAXPATHLEN);
277 strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
278
279 strlcat(ibbuf, ibuf, MAXPATHLEN);
280 strlcat(ibbuf, "~", MAXPATHLEN);
281
282 if ('\0' != fbuf[MAXPATHLEN - 2] ||
283 '\0' != fbbuf[MAXPATHLEN - 2] ||
284 '\0' != ibuf[MAXPATHLEN - 2] ||
285 '\0' != ibbuf[MAXPATHLEN - 2]) {
286 fprintf(stderr, "%s: Path too long\n", progname);
287 exit((int)MANDOCLEVEL_SYSERR);
288 }
289
290 /*
291 * For the keyword database, open a BTREE database that allows
292 * duplicates. For the index database, use a standard RECNO
293 * database type.
294 */
295
296 memset(&info, 0, sizeof(BTREEINFO));
297 info.flags = R_DUP;
298 db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);
299
300 if (NULL == db) {
301 perror(fbbuf);
302 exit((int)MANDOCLEVEL_SYSERR);
303 }
304
305 idx = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
306
307 if (NULL == db) {
308 perror(ibbuf);
309 (*db->close)(db);
310 exit((int)MANDOCLEVEL_SYSERR);
311 }
312
313 /*
314 * Try parsing the manuals given on the command line. If we
315 * totally fail, then just keep on going. Take resulting trees
316 * and push them down into the database code.
317 * Use the auto-parser and don't report any errors.
318 */
319
320 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
321
322 memset(&key, 0, sizeof(DBT));
323 memset(&val, 0, sizeof(DBT));
324 memset(&rkey, 0, sizeof(DBT));
325 memset(&rval, 0, sizeof(DBT));
326
327 val.size = sizeof(vbuf);
328 val.data = vbuf;
329 rkey.size = sizeof(recno_t);
330
331 rec = 1;
332 ksz = 0;
333
334 while (NULL != (fn = *argv++)) {
335 mparse_reset(mp);
336
337 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
338 fprintf(stderr, "%s: Parse failure\n", fn);
339 continue;
340 }
341
342 mparse_result(mp, &mdoc, &man);
343 if (NULL == mdoc && NULL == man)
344 continue;
345
346 rkey.data = &rec;
347 rval.data = fn;
348 rval.size = strlen(fn) + 1;
349
350 if (-1 == (*idx->put)(idx, &rkey, &rval, 0)) {
351 perror(ibbuf);
352 break;
353 }
354
355 memset(val.data, 0, sizeof(uint32_t));
356 memcpy(val.data + 4, &rec, sizeof(uint32_t));
357
358 if (mdoc)
359 pmdoc(db, fbbuf, &key, &ksz, &val, mdoc);
360 else
361 pman(db, fbbuf, &key, &ksz, &val, man);
362 rec++;
363 }
364
365 (*db->close)(db);
366 (*idx->close)(idx);
367
368 mparse_free(mp);
369
370 free(key.data);
371
372 /* Atomically replace the file with our temporary one. */
373
374 if (-1 == rename(fbbuf, fbuf))
375 perror(fbuf);
376 if (-1 == rename(ibbuf, ibuf))
377 perror(fbuf);
378
379 return((int)MANDOCLEVEL_OK);
380 }
381
382 /*
383 * Initialise the stored database key whose data buffer is shared
384 * between uses (as the key must sometimes be constructed from an array
385 * of
386 */
387 static void
388 dbt_init(DBT *key, size_t *ksz)
389 {
390
391 if (0 == *ksz) {
392 assert(0 == key->size);
393 assert(NULL == key->data);
394 key->data = mandoc_malloc(MANDOC_BUFSZ);
395 *ksz = MANDOC_BUFSZ;
396 }
397
398 key->size = 0;
399 }
400
401 /*
402 * Append a binary value to a database entry. This can be invoked
403 * multiple times; the buffer is automatically resized.
404 */
405 static void
406 dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
407 {
408
409 assert(key->data);
410
411 /* Overshoot by MANDOC_BUFSZ. */
412
413 while (key->size + sz >= *ksz) {
414 *ksz = key->size + sz + MANDOC_BUFSZ;
415 key->data = mandoc_realloc(key->data, *ksz);
416 }
417
418 memcpy(key->data + (int)key->size, cp, sz);
419 key->size += sz;
420 }
421
422 /*
423 * Append a nil-terminated string to the database entry. This can be
424 * invoked multiple times. The database entry will be nil-terminated as
425 * well; if invoked multiple times, a space is put between strings.
426 */
427 static void
428 dbt_append(DBT *key, size_t *ksz, const char *cp)
429 {
430 size_t sz;
431
432 if (0 == (sz = strlen(cp)))
433 return;
434
435 assert(key->data);
436
437 if (key->size)
438 ((char *)key->data)[(int)key->size - 1] = ' ';
439
440 dbt_appendb(key, ksz, cp, sz + 1);
441 }
442
443 /* ARGSUSED */
444 static void
445 pmdoc_Fd(MDOC_ARGS)
446 {
447 uint32_t fl;
448 const char *start, *end;
449 size_t sz;
450 char nil;
451
452 if (SEC_SYNOPSIS != n->sec)
453 return;
454 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
455 return;
456
457 /*
458 * Only consider those `Fd' macro fields that begin with an
459 * "inclusion" token (versus, e.g., #define).
460 */
461 if (strcmp("#include", n->string))
462 return;
463
464 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
465 return;
466
467 /*
468 * Strip away the enclosing angle brackets and make sure we're
469 * not zero-length.
470 */
471
472 start = n->string;
473 if ('<' == *start || '"' == *start)
474 start++;
475
476 if (0 == (sz = strlen(start)))
477 return;
478
479 end = &start[(int)sz - 1];
480 if ('>' == *end || '"' == *end)
481 end--;
482
483 nil = '\0';
484 dbt_appendb(key, ksz, start, end - start + 1);
485 dbt_appendb(key, ksz, &nil, 1);
486
487 fl = MANDOC_INCLUDES;
488 memcpy(val->data, &fl, 4);
489 }
490
491 /* ARGSUSED */
492 static void
493 pmdoc_In(MDOC_ARGS)
494 {
495 uint32_t fl;
496
497 if (SEC_SYNOPSIS != n->sec)
498 return;
499 if (NULL == n->child || MDOC_TEXT != n->child->type)
500 return;
501
502 dbt_append(key, ksz, n->child->string);
503 fl = MANDOC_INCLUDES;
504 memcpy(val->data, &fl, 4);
505 }
506
507 /* ARGSUSED */
508 static void
509 pmdoc_Fn(MDOC_ARGS)
510 {
511 uint32_t fl;
512 const char *cp;
513
514 if (SEC_SYNOPSIS != n->sec)
515 return;
516 if (NULL == n->child || MDOC_TEXT != n->child->type)
517 return;
518
519 /* .Fn "struct type *arg" "foo" */
520
521 cp = strrchr(n->child->string, ' ');
522 if (NULL == cp)
523 cp = n->child->string;
524
525 /* Strip away pointer symbol. */
526
527 while ('*' == *cp)
528 cp++;
529
530 dbt_append(key, ksz, cp);
531 fl = MANDOC_FUNCTION;
532 memcpy(val->data, &fl, 4);
533 }
534
535 /* ARGSUSED */
536 static void
537 pmdoc_Vt(MDOC_ARGS)
538 {
539 uint32_t fl;
540 const char *start, *end;
541 size_t sz;
542 char nil;
543
544 if (SEC_SYNOPSIS != n->sec)
545 return;
546 if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
547 return;
548 if (NULL == n->last || MDOC_TEXT != n->last->type)
549 return;
550
551 /*
552 * Strip away leading pointer symbol '*' and trailing ';'.
553 */
554
555 start = n->last->string;
556
557 while ('*' == *start)
558 start++;
559
560 if (0 == (sz = strlen(start)))
561 return;
562
563 end = &start[sz - 1];
564 while (end > start && ';' == *end)
565 end--;
566
567 if (end == start)
568 return;
569
570 nil = '\0';
571 dbt_appendb(key, ksz, start, end - start + 1);
572 dbt_appendb(key, ksz, &nil, 1);
573 fl = MANDOC_VARIABLE;
574 memcpy(val->data, &fl, 4);
575 }
576
577 /* ARGSUSED */
578 static void
579 pmdoc_Fo(MDOC_ARGS)
580 {
581 uint32_t fl;
582
583 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
584 return;
585 if (NULL == n->child || MDOC_TEXT != n->child->type)
586 return;
587
588 dbt_append(key, ksz, n->child->string);
589 fl = MANDOC_FUNCTION;
590 memcpy(val->data, &fl, 4);
591 }
592
593 /* ARGSUSED */
594 static void
595 pmdoc_Nm(MDOC_ARGS)
596 {
597 uint32_t fl;
598
599 if (SEC_NAME == n->sec) {
600 for (n = n->child; n; n = n->next) {
601 if (MDOC_TEXT != n->type)
602 continue;
603 dbt_append(key, ksz, n->string);
604 }
605 fl = MANDOC_NAME;
606 memcpy(val->data, &fl, 4);
607 return;
608 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
609 return;
610
611 for (n = n->child; n; n = n->next) {
612 if (MDOC_TEXT != n->type)
613 continue;
614 dbt_append(key, ksz, n->string);
615 }
616
617 fl = MANDOC_UTILITY;
618 memcpy(val->data, &fl, 4);
619 }
620
621 static void
622 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
623 {
624
625 if (0 == key->size)
626 return;
627
628 assert(key->data);
629 assert(8 == val->size);
630 assert(val->data);
631
632 if (0 == (*db->put)(db, key, val, 0))
633 return;
634
635 perror(dbn);
636 exit((int)MANDOCLEVEL_SYSERR);
637 /* NOTREACHED */
638 }
639
640 /*
641 * Call out to per-macro handlers after clearing the persistent database
642 * key. If the macro sets the database key, flush it to the database.
643 */
644 static void
645 pmdoc_node(MDOC_ARGS)
646 {
647
648 if (NULL == n)
649 return;
650
651 switch (n->type) {
652 case (MDOC_HEAD):
653 /* FALLTHROUGH */
654 case (MDOC_BODY):
655 /* FALLTHROUGH */
656 case (MDOC_TAIL):
657 /* FALLTHROUGH */
658 case (MDOC_BLOCK):
659 /* FALLTHROUGH */
660 case (MDOC_ELEM):
661 if (NULL == mdocs[n->tok])
662 break;
663
664 dbt_init(key, ksz);
665 (*mdocs[n->tok])(db, dbn, key, ksz, val, n);
666
667 dbt_put(db, dbn, key, val);
668 break;
669 default:
670 break;
671 }
672
673 pmdoc_node(db, dbn, key, ksz, val, n->child);
674 pmdoc_node(db, dbn, key, ksz, val, n->next);
675 }
676
677 static int
678 pman_node(MAN_ARGS)
679 {
680 const struct man_node *head, *body;
681 const char *start;
682 char nil;
683 size_t sz;
684 uint32_t fl;
685
686 if (NULL == n)
687 return(0);
688
689 /*
690 * We're only searching for one thing: the first text child in
691 * the BODY of a NAME section. Since we don't keep track of
692 * sections in -man, run some hoops to find out whether we're in
693 * the correct section or not.
694 */
695
696 if (MAN_BODY == n->type && MAN_SH == n->tok) {
697 body = n;
698 assert(body->parent);
699 if (NULL != (head = body->parent->head) &&
700 1 == head->nchild &&
701 NULL != (head = (head->child)) &&
702 MAN_TEXT == head->type &&
703 0 == strcmp(head->string, "NAME") &&
704 NULL != (body = body->child) &&
705 MAN_TEXT == body->type) {
706 nil = '\0';
707
708 fl = MANDOC_NAME;
709 memcpy(val->data, &fl, 4);
710
711 start = body->string;
712
713 /*
714 * Go through a special heuristic dance here.
715 * This is why -man manuals are great!
716 * Conventionally, one or more manual names are
717 * comma-specified prior to a whitespace, then a
718 * dash, then a description. Try to puzzle out
719 * the name parts here.
720 */
721
722 while (start) {
723 sz = strcspn(start, " ,");
724 if ('\0' == start[(int)sz])
725 break;
726
727 dbt_init(key, ksz);
728 dbt_appendb(key, ksz, start, sz);
729 dbt_appendb(key, ksz, &nil, 1);
730
731 dbt_put(db, dbn, key, val);
732
733 if (' ' == start[(int)sz])
734 break;
735
736 assert(',' == start[(int)sz]);
737 start += (int)sz + 1;
738 while (' ' == *start)
739 start++;
740 }
741
742 return(1);
743 }
744 }
745
746 if (pman_node(db, dbn, key, ksz, val, n->child))
747 return(1);
748 if (pman_node(db, dbn, key, ksz, val, n->next))
749 return(1);
750
751 return(0);
752 }
753
754 static void
755 pman(DB *db, const char *dbn, DBT *key,
756 size_t *ksz, DBT *val, struct man *m)
757 {
758
759 pman_node(db, dbn, key, ksz, val, man_node(m));
760 }
761
762
763 static void
764 pmdoc(DB *db, const char *dbn, DBT *key,
765 size_t *ksz, DBT *val, struct mdoc *m)
766 {
767
768 pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
769 }
770
771 static void
772 usage(void)
773 {
774
775 fprintf(stderr, "usage: %s "
776 "[-d path] "
777 "[file...]\n",
778 progname);
779 }