]> git.cameronkatri.com Git - mandoc.git/blob - mandoc-db.c
Fix type- and NULL-check to be correct node (last, not child). Prevents
[mandoc.git] / mandoc-db.c
1 /* $Id: mandoc-db.c,v 1.7 2011/04/05 13:12:34 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22
23 #include <assert.h>
24 #ifdef __linux__
25 # include <db_185.h>
26 #else
27 # include <db.h>
28 #endif
29 #include <fcntl.h>
30 #include <getopt.h>
31 #include <stdio.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #include "man.h"
37 #include "mdoc.h"
38 #include "mandoc.h"
39
40 #define MANDOC_DB "mandoc.db"
41 #define MANDOC_IDX "mandoc.index"
42 #define MANDOC_BUFSZ BUFSIZ
43 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
44
45 enum type {
46 MANDOC_NONE = 0,
47 MANDOC_NAME,
48 MANDOC_FUNCTION,
49 MANDOC_UTILITY,
50 MANDOC_INCLUDES,
51 MANDOC_VARIABLE
52 };
53
54 #define MAN_ARGS DB *db, \
55 const char *dbn, \
56 DBT *key, size_t *ksz, \
57 DBT *val, \
58 const struct man_node *n
59 #define MDOC_ARGS DB *db, \
60 const char *dbn, \
61 DBT *key, size_t *ksz, \
62 DBT *val, \
63 const struct mdoc_node *n
64
65 static void dbt_append(DBT *, size_t *, const char *);
66 static void dbt_appendb(DBT *, size_t *,
67 const void *, size_t);
68 static void dbt_init(DBT *, size_t *);
69 static void usage(void);
70 static void pman(DB *, const char *,
71 DBT *, size_t *, DBT *,
72 const char *, struct man *);
73 static int pman_node(MAN_ARGS);
74 static void pmdoc(DB *, const char *,
75 DBT *, size_t *, DBT *,
76 const char *, struct mdoc *);
77 static void pmdoc_node(MDOC_ARGS);
78 static void pmdoc_Fd(MDOC_ARGS);
79 static void pmdoc_In(MDOC_ARGS);
80 static void pmdoc_Fn(MDOC_ARGS);
81 static void pmdoc_Fo(MDOC_ARGS);
82 static void pmdoc_Nm(MDOC_ARGS);
83 static void pmdoc_Vt(MDOC_ARGS);
84
85 typedef void (*pmdoc_nf)(MDOC_ARGS);
86
87 static const char *progname;
88
89 static const pmdoc_nf mdocs[MDOC_MAX] = {
90 NULL, /* Ap */
91 NULL, /* Dd */
92 NULL, /* Dt */
93 NULL, /* Os */
94 NULL, /* Sh */
95 NULL, /* Ss */
96 NULL, /* Pp */
97 NULL, /* D1 */
98 NULL, /* Dl */
99 NULL, /* Bd */
100 NULL, /* Ed */
101 NULL, /* Bl */
102 NULL, /* El */
103 NULL, /* It */
104 NULL, /* Ad */
105 NULL, /* An */
106 NULL, /* Ar */
107 NULL, /* Cd */
108 NULL, /* Cm */
109 NULL, /* Dv */
110 NULL, /* Er */
111 NULL, /* Ev */
112 NULL, /* Ex */
113 NULL, /* Fa */
114 pmdoc_Fd, /* Fd */
115 NULL, /* Fl */
116 pmdoc_Fn, /* Fn */
117 NULL, /* Ft */
118 NULL, /* Ic */
119 pmdoc_In, /* In */
120 NULL, /* Li */
121 NULL, /* Nd */
122 pmdoc_Nm, /* Nm */
123 NULL, /* Op */
124 NULL, /* Ot */
125 NULL, /* Pa */
126 NULL, /* Rv */
127 NULL, /* St */
128 pmdoc_Vt, /* Va */
129 pmdoc_Vt, /* Vt */
130 NULL, /* Xr */
131 NULL, /* %A */
132 NULL, /* %B */
133 NULL, /* %D */
134 NULL, /* %I */
135 NULL, /* %J */
136 NULL, /* %N */
137 NULL, /* %O */
138 NULL, /* %P */
139 NULL, /* %R */
140 NULL, /* %T */
141 NULL, /* %V */
142 NULL, /* Ac */
143 NULL, /* Ao */
144 NULL, /* Aq */
145 NULL, /* At */
146 NULL, /* Bc */
147 NULL, /* Bf */
148 NULL, /* Bo */
149 NULL, /* Bq */
150 NULL, /* Bsx */
151 NULL, /* Bx */
152 NULL, /* Db */
153 NULL, /* Dc */
154 NULL, /* Do */
155 NULL, /* Dq */
156 NULL, /* Ec */
157 NULL, /* Ef */
158 NULL, /* Em */
159 NULL, /* Eo */
160 NULL, /* Fx */
161 NULL, /* Ms */
162 NULL, /* No */
163 NULL, /* Ns */
164 NULL, /* Nx */
165 NULL, /* Ox */
166 NULL, /* Pc */
167 NULL, /* Pf */
168 NULL, /* Po */
169 NULL, /* Pq */
170 NULL, /* Qc */
171 NULL, /* Ql */
172 NULL, /* Qo */
173 NULL, /* Qq */
174 NULL, /* Re */
175 NULL, /* Rs */
176 NULL, /* Sc */
177 NULL, /* So */
178 NULL, /* Sq */
179 NULL, /* Sm */
180 NULL, /* Sx */
181 NULL, /* Sy */
182 NULL, /* Tn */
183 NULL, /* Ux */
184 NULL, /* Xc */
185 NULL, /* Xo */
186 pmdoc_Fo, /* Fo */
187 NULL, /* Fc */
188 NULL, /* Oo */
189 NULL, /* Oc */
190 NULL, /* Bk */
191 NULL, /* Ek */
192 NULL, /* Bt */
193 NULL, /* Hf */
194 NULL, /* Fr */
195 NULL, /* Ud */
196 NULL, /* Lb */
197 NULL, /* Lp */
198 NULL, /* Lk */
199 NULL, /* Mt */
200 NULL, /* Brq */
201 NULL, /* Bro */
202 NULL, /* Brc */
203 NULL, /* %C */
204 NULL, /* Es */
205 NULL, /* En */
206 NULL, /* Dx */
207 NULL, /* %Q */
208 NULL, /* br */
209 NULL, /* sp */
210 NULL, /* %U */
211 NULL, /* Ta */
212 };
213
214 int
215 main(int argc, char *argv[])
216 {
217 struct mparse *mp; /* parse sequence */
218 struct mdoc *mdoc; /* resulting mdoc */
219 struct man *man; /* resulting man */
220 char *fn;
221 const char *dir; /* result dir (default: cwd) */
222 char ibuf[MAXPATHLEN], /* index fname */
223 ibbuf[MAXPATHLEN], /* index backup fname */
224 fbuf[MAXPATHLEN], /* btree fname */
225 fbbuf[MAXPATHLEN]; /* btree backup fname */
226 int c;
227 DB *index, /* index database */
228 *db; /* keyword database */
229 DBT rkey, rval, /* recno entries */
230 key, val; /* persistent keyword entries */
231 size_t ksz; /* entry buffer size */
232 char vbuf[8];
233 BTREEINFO info; /* btree configuration */
234 recno_t rec;
235 extern int optind;
236 extern char *optarg;
237
238 progname = strrchr(argv[0], '/');
239 if (progname == NULL)
240 progname = argv[0];
241 else
242 ++progname;
243
244 dir = "";
245
246 while (-1 != (c = getopt(argc, argv, "d:")))
247 switch (c) {
248 case ('d'):
249 dir = optarg;
250 break;
251 default:
252 usage();
253 return((int)MANDOCLEVEL_BADARG);
254 }
255
256 argc -= optind;
257 argv += optind;
258
259 /*
260 * Set up temporary file-names into which we're going to write
261 * all of our data (both for the index and database). These
262 * will be securely renamed to the real file-names after we've
263 * written all of our data.
264 */
265
266 ibuf[0] = ibuf[MAXPATHLEN - 2] =
267 ibbuf[0] = ibbuf[MAXPATHLEN - 2] =
268 fbuf[0] = fbuf[MAXPATHLEN - 2] =
269 fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';
270
271 strlcat(fbuf, dir, MAXPATHLEN);
272 strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
273
274 strlcat(fbbuf, fbuf, MAXPATHLEN);
275 strlcat(fbbuf, "~", MAXPATHLEN);
276
277 strlcat(ibuf, dir, MAXPATHLEN);
278 strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
279
280 strlcat(ibbuf, ibuf, MAXPATHLEN);
281 strlcat(ibbuf, "~", MAXPATHLEN);
282
283 if ('\0' != fbuf[MAXPATHLEN - 2] ||
284 '\0' != fbbuf[MAXPATHLEN - 2] ||
285 '\0' != ibuf[MAXPATHLEN - 2] ||
286 '\0' != ibbuf[MAXPATHLEN - 2]) {
287 fprintf(stderr, "%s: Path too long\n", progname);
288 exit((int)MANDOCLEVEL_SYSERR);
289 }
290
291 /*
292 * For the keyword database, open a BTREE database that allows
293 * duplicates. For the index database, use a standard RECNO
294 * database type.
295 */
296
297 memset(&info, 0, sizeof(BTREEINFO));
298 info.flags = R_DUP;
299 db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);
300
301 if (NULL == db) {
302 perror(fbbuf);
303 exit((int)MANDOCLEVEL_SYSERR);
304 }
305
306 index = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
307
308 if (NULL == db) {
309 perror(ibbuf);
310 (*db->close)(db);
311 exit((int)MANDOCLEVEL_SYSERR);
312 }
313
314 /*
315 * Try parsing the manuals given on the command line. If we
316 * totally fail, then just keep on going. Take resulting trees
317 * and push them down into the database code.
318 * Use the auto-parser and don't report any errors.
319 */
320
321 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
322
323 memset(&key, 0, sizeof(DBT));
324 memset(&val, 0, sizeof(DBT));
325 memset(&rkey, 0, sizeof(DBT));
326 memset(&rval, 0, sizeof(DBT));
327
328 val.size = sizeof(vbuf);
329 val.data = vbuf;
330 rkey.size = sizeof(recno_t);
331
332 rec = 1;
333 ksz = 0;
334
335 while (NULL != (fn = *argv++)) {
336 mparse_reset(mp);
337
338 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
339 fprintf(stderr, "%s: Parse failure\n", fn);
340 continue;
341 }
342
343 mparse_result(mp, &mdoc, &man);
344 if (NULL == mdoc && NULL == man)
345 continue;
346
347 rkey.data = &rec;
348 rval.data = fn;
349 rval.size = strlen(fn) + 1;
350
351 if (-1 == (*index->put)(index, &rkey, &rval, 0)) {
352 perror(ibbuf);
353 break;
354 }
355
356 memset(val.data, 0, sizeof(uint32_t));
357 memcpy(val.data + 4, &rec, sizeof(uint32_t));
358
359 if (mdoc)
360 pmdoc(db, fbbuf, &key, &ksz, &val, fn, mdoc);
361 else
362 pman(db, fbbuf, &key, &ksz, &val, fn, man);
363 rec++;
364 }
365
366 (*db->close)(db);
367 (*index->close)(index);
368
369 mparse_free(mp);
370
371 free(key.data);
372
373 /* Atomically replace the file with our temporary one. */
374
375 if (-1 == rename(fbbuf, fbuf))
376 perror(fbuf);
377 if (-1 == rename(ibbuf, ibuf))
378 perror(fbuf);
379
380 return((int)MANDOCLEVEL_OK);
381 }
382
383 /*
384 * Initialise the stored database key whose data buffer is shared
385 * between uses (as the key must sometimes be constructed from an array
386 * of
387 */
388 static void
389 dbt_init(DBT *key, size_t *ksz)
390 {
391
392 if (0 == *ksz) {
393 assert(0 == key->size);
394 assert(NULL == key->data);
395 key->data = mandoc_malloc(MANDOC_BUFSZ);
396 *ksz = MANDOC_BUFSZ;
397 }
398
399 key->size = 0;
400 }
401
402 /*
403 * Append a binary value to a database entry. This can be invoked
404 * multiple times; the buffer is automatically resized.
405 */
406 static void
407 dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
408 {
409
410 assert(key->data);
411
412 /* Overshoot by MANDOC_BUFSZ. */
413
414 while (key->size + sz >= *ksz) {
415 *ksz = key->size + sz + MANDOC_BUFSZ;
416 key->data = mandoc_realloc(key->data, *ksz);
417 }
418
419 memcpy(key->data + (int)key->size, cp, sz);
420 key->size += sz;
421 }
422
423 /*
424 * Append a nil-terminated string to the database entry. This can be
425 * invoked multiple times. The database entry will be nil-terminated as
426 * well; if invoked multiple times, a space is put between strings.
427 */
428 static void
429 dbt_append(DBT *key, size_t *ksz, const char *cp)
430 {
431 size_t sz;
432
433 if (0 == (sz = strlen(cp)))
434 return;
435
436 assert(key->data);
437
438 if (key->size)
439 ((char *)key->data)[(int)key->size - 1] = ' ';
440
441 dbt_appendb(key, ksz, cp, sz + 1);
442 }
443
444 /* ARGSUSED */
445 static void
446 pmdoc_Fd(MDOC_ARGS)
447 {
448 uint32_t fl;
449 const char *start, *end;
450 size_t sz;
451 char nil;
452
453 if (SEC_SYNOPSIS != n->sec)
454 return;
455 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
456 return;
457
458 /*
459 * Only consider those `Fd' macro fields that begin with an
460 * "inclusion" token (versus, e.g., #define).
461 */
462 if (strcmp("#include", n->string))
463 return;
464
465 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
466 return;
467
468 /*
469 * Strip away the enclosing angle brackets and make sure we're
470 * not zero-length.
471 */
472
473 start = n->string;
474 if ('<' == *start || '"' == *start)
475 start++;
476
477 if (0 == (sz = strlen(start)))
478 return;
479
480 end = &start[(int)sz - 1];
481 if ('>' == *end || '"' == *end)
482 end--;
483
484 nil = '\0';
485 dbt_appendb(key, ksz, start, end - start + 1);
486 dbt_appendb(key, ksz, &nil, 1);
487
488 fl = MANDOC_INCLUDES;
489 memcpy(val->data, &fl, 4);
490 }
491
492 /* ARGSUSED */
493 static void
494 pmdoc_In(MDOC_ARGS)
495 {
496 uint32_t fl;
497
498 if (SEC_SYNOPSIS != n->sec)
499 return;
500 if (NULL == n->child || MDOC_TEXT != n->child->type)
501 return;
502
503 dbt_append(key, ksz, n->child->string);
504 fl = MANDOC_INCLUDES;
505 memcpy(val->data, &fl, 4);
506 }
507
508 /* ARGSUSED */
509 static void
510 pmdoc_Fn(MDOC_ARGS)
511 {
512 uint32_t fl;
513 const char *cp;
514
515 if (SEC_SYNOPSIS != n->sec)
516 return;
517 if (NULL == n->child || MDOC_TEXT != n->child->type)
518 return;
519
520 /* .Fn "struct type *arg" "foo" */
521
522 cp = strrchr(n->child->string, ' ');
523 if (NULL == cp)
524 cp = n->child->string;
525
526 /* Strip away pointer symbol. */
527
528 while ('*' == *cp)
529 cp++;
530
531 dbt_append(key, ksz, cp);
532 fl = MANDOC_FUNCTION;
533 memcpy(val->data, &fl, 4);
534 }
535
536 /* ARGSUSED */
537 static void
538 pmdoc_Vt(MDOC_ARGS)
539 {
540 uint32_t fl;
541 const char *start, *end;
542 size_t sz;
543 char nil;
544
545 if (SEC_SYNOPSIS != n->sec)
546 return;
547 if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
548 return;
549 if (NULL == n->last || MDOC_TEXT != n->last->type)
550 return;
551
552 /*
553 * Strip away leading pointer symbol '*' and trailing ';'.
554 */
555
556 start = n->last->string;
557
558 while ('*' == *start)
559 start++;
560
561 if (0 == (sz = strlen(start)))
562 return;
563
564 end = &start[sz - 1];
565 while (end > start && ';' == *end)
566 end--;
567
568 if (end == start)
569 return;
570
571 nil = '\0';
572 dbt_appendb(key, ksz, start, end - start + 1);
573 dbt_appendb(key, ksz, &nil, 1);
574 fl = MANDOC_VARIABLE;
575 memcpy(val->data, &fl, 4);
576 }
577
578 /* ARGSUSED */
579 static void
580 pmdoc_Fo(MDOC_ARGS)
581 {
582 uint32_t fl;
583
584 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
585 return;
586 if (NULL == n->child || MDOC_TEXT != n->child->type)
587 return;
588
589 dbt_append(key, ksz, n->child->string);
590 fl = MANDOC_FUNCTION;
591 memcpy(val->data, &fl, 4);
592 }
593
594 /* ARGSUSED */
595 static void
596 pmdoc_Nm(MDOC_ARGS)
597 {
598 uint32_t fl;
599
600 if (SEC_NAME == n->sec) {
601 for (n = n->child; n; n = n->next) {
602 if (MDOC_TEXT != n->type)
603 continue;
604 dbt_append(key, ksz, n->string);
605 }
606 fl = MANDOC_NAME;
607 memcpy(val->data, &fl, 4);
608 return;
609 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
610 return;
611
612 for (n = n->child; n; n = n->next) {
613 if (MDOC_TEXT != n->type)
614 continue;
615 dbt_append(key, ksz, n->string);
616 }
617
618 fl = MANDOC_UTILITY;
619 memcpy(val->data, &fl, 4);
620 }
621
622 /*
623 * Call out to per-macro handlers after clearing the persistent database
624 * key. If the macro sets the database key, flush it to the database.
625 */
626 static void
627 pmdoc_node(MDOC_ARGS)
628 {
629
630 if (NULL == n)
631 return;
632
633 switch (n->type) {
634 case (MDOC_HEAD):
635 /* FALLTHROUGH */
636 case (MDOC_BODY):
637 /* FALLTHROUGH */
638 case (MDOC_TAIL):
639 /* FALLTHROUGH */
640 case (MDOC_BLOCK):
641 /* FALLTHROUGH */
642 case (MDOC_ELEM):
643 if (NULL == mdocs[n->tok])
644 break;
645
646 dbt_init(key, ksz);
647 (*mdocs[n->tok])(db, dbn, key, ksz, val, n);
648
649 if (0 == key->size)
650 break;
651 if (0 == (*db->put)(db, key, val, 0))
652 break;
653
654 perror(dbn);
655 exit((int)MANDOCLEVEL_SYSERR);
656 /* NOTREACHED */
657 default:
658 break;
659 }
660
661 pmdoc_node(db, dbn, key, ksz, val, n->child);
662 pmdoc_node(db, dbn, key, ksz, val, n->next);
663 }
664
665 static int
666 pman_node(MAN_ARGS)
667 {
668 const struct man_node *head, *body;
669 const char *end, *start;
670 char nil;
671 uint32_t fl;
672
673 if (NULL == n)
674 return(0);
675
676 /*
677 * We're only searching for one thing: the first text child in
678 * the BODY of a NAME section. Since we don't keep track of
679 * sections in -man, run some hoops to find out whether we're in
680 * the correct section or not.
681 */
682
683 if (MAN_BODY == n->type && MAN_SH == n->tok) {
684 body = n;
685 assert(body->parent);
686 if (NULL != (head = body->parent->head) &&
687 1 == head->nchild &&
688 NULL != (head = (head->child)) &&
689 MAN_TEXT == head->type &&
690 0 == strcmp(head->string, "NAME") &&
691 NULL != (body = body->child) &&
692 MAN_TEXT == body->type) {
693 nil = '\0';
694
695 start = body->string;
696 if (NULL == (end = strchr(start, ' ')))
697 end = start + strlen(start);
698
699 dbt_init(key, ksz);
700 dbt_appendb(key, ksz, start, end - start + 1);
701 dbt_appendb(key, ksz, &nil, 1);
702 fl = MANDOC_NAME;
703 memcpy(val->data, &fl, 4);
704 return(1);
705 }
706 }
707
708 if (pman_node(db, dbn, key, ksz, val, n->child))
709 return(1);
710 if (pman_node(db, dbn, key, ksz, val, n->next))
711 return(1);
712
713 return(0);
714 }
715
716 static void
717 pman(DB *db, const char *dbn,
718 DBT *key, size_t *ksz, DBT *val,
719 const char *path, struct man *m)
720 {
721
722 pman_node(db, dbn, key, ksz, val, man_node(m));
723 }
724
725
726 static void
727 pmdoc(DB *db, const char *dbn,
728 DBT *key, size_t *ksz, DBT *val,
729 const char *path, struct mdoc *m)
730 {
731
732 pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
733 }
734
735 static void
736 usage(void)
737 {
738
739 fprintf(stderr, "usage: %s "
740 "[-d path] "
741 "[file...]\n",
742 progname);
743 }