]> git.cameronkatri.com Git - mandoc.git/blob - mandoc-db.c
Have mandoc-db create an recno-addressed index of files alongside the
[mandoc.git] / mandoc-db.c
1 /* $Id: mandoc-db.c,v 1.3 2011/04/03 14:18:29 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22
23 #include <assert.h>
24 #ifdef __linux__
25 # include <db_185.h>
26 #else
27 # include <db.h>
28 #endif
29 #include <fcntl.h>
30 #include <getopt.h>
31 #include <stdio.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #include "man.h"
37 #include "mdoc.h"
38 #include "mandoc.h"
39
40 #define MANDOC_DB "mandoc.db"
41 #define MANDOC_IDX "mandoc.index"
42 #define MANDOC_BUFSZ BUFSIZ
43 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
44
45 enum type {
46 MANDOC_NONE = 0,
47 MANDOC_NAME,
48 MANDOC_FUNCTION,
49 MANDOC_UTILITY,
50 MANDOC_INCLUDES,
51 MANDOC_VARIABLE
52 };
53
54 #define MDOC_ARGS DB *db, \
55 const char *dbn, \
56 DBT *key, size_t *ksz, \
57 DBT *val, \
58 const struct mdoc_node *n
59
60 static void dbt_append(DBT *, size_t *, const char *);
61 static void dbt_appendb(DBT *, size_t *,
62 const void *, size_t);
63 static void dbt_init(DBT *, size_t *);
64 static void version(void);
65 static void usage(void);
66 static void pmdoc(DB *, const char *,
67 DBT *, size_t *, DBT *,
68 const char *, struct mdoc *);
69 static void pmdoc_node(MDOC_ARGS);
70 static void pmdoc_Fd(MDOC_ARGS);
71 static void pmdoc_In(MDOC_ARGS);
72 static void pmdoc_Fn(MDOC_ARGS);
73 static void pmdoc_Fo(MDOC_ARGS);
74 static void pmdoc_Nm(MDOC_ARGS);
75 static void pmdoc_Vt(MDOC_ARGS);
76
77 typedef void (*pmdoc_nf)(MDOC_ARGS);
78
79 static const char *progname;
80
81 static const pmdoc_nf mdocs[MDOC_MAX] = {
82 NULL, /* Ap */
83 NULL, /* Dd */
84 NULL, /* Dt */
85 NULL, /* Os */
86 NULL, /* Sh */
87 NULL, /* Ss */
88 NULL, /* Pp */
89 NULL, /* D1 */
90 NULL, /* Dl */
91 NULL, /* Bd */
92 NULL, /* Ed */
93 NULL, /* Bl */
94 NULL, /* El */
95 NULL, /* It */
96 NULL, /* Ad */
97 NULL, /* An */
98 NULL, /* Ar */
99 NULL, /* Cd */
100 NULL, /* Cm */
101 NULL, /* Dv */
102 NULL, /* Er */
103 NULL, /* Ev */
104 NULL, /* Ex */
105 NULL, /* Fa */
106 pmdoc_Fd, /* Fd */
107 NULL, /* Fl */
108 pmdoc_Fn, /* Fn */
109 NULL, /* Ft */
110 NULL, /* Ic */
111 pmdoc_In, /* In */
112 NULL, /* Li */
113 NULL, /* Nd */
114 pmdoc_Nm, /* Nm */
115 NULL, /* Op */
116 NULL, /* Ot */
117 NULL, /* Pa */
118 NULL, /* Rv */
119 NULL, /* St */
120 pmdoc_Vt, /* Va */
121 pmdoc_Vt, /* Vt */
122 NULL, /* Xr */
123 NULL, /* %A */
124 NULL, /* %B */
125 NULL, /* %D */
126 NULL, /* %I */
127 NULL, /* %J */
128 NULL, /* %N */
129 NULL, /* %O */
130 NULL, /* %P */
131 NULL, /* %R */
132 NULL, /* %T */
133 NULL, /* %V */
134 NULL, /* Ac */
135 NULL, /* Ao */
136 NULL, /* Aq */
137 NULL, /* At */
138 NULL, /* Bc */
139 NULL, /* Bf */
140 NULL, /* Bo */
141 NULL, /* Bq */
142 NULL, /* Bsx */
143 NULL, /* Bx */
144 NULL, /* Db */
145 NULL, /* Dc */
146 NULL, /* Do */
147 NULL, /* Dq */
148 NULL, /* Ec */
149 NULL, /* Ef */
150 NULL, /* Em */
151 NULL, /* Eo */
152 NULL, /* Fx */
153 NULL, /* Ms */
154 NULL, /* No */
155 NULL, /* Ns */
156 NULL, /* Nx */
157 NULL, /* Ox */
158 NULL, /* Pc */
159 NULL, /* Pf */
160 NULL, /* Po */
161 NULL, /* Pq */
162 NULL, /* Qc */
163 NULL, /* Ql */
164 NULL, /* Qo */
165 NULL, /* Qq */
166 NULL, /* Re */
167 NULL, /* Rs */
168 NULL, /* Sc */
169 NULL, /* So */
170 NULL, /* Sq */
171 NULL, /* Sm */
172 NULL, /* Sx */
173 NULL, /* Sy */
174 NULL, /* Tn */
175 NULL, /* Ux */
176 NULL, /* Xc */
177 NULL, /* Xo */
178 pmdoc_Fo, /* Fo */
179 NULL, /* Fc */
180 NULL, /* Oo */
181 NULL, /* Oc */
182 NULL, /* Bk */
183 NULL, /* Ek */
184 NULL, /* Bt */
185 NULL, /* Hf */
186 NULL, /* Fr */
187 NULL, /* Ud */
188 NULL, /* Lb */
189 NULL, /* Lp */
190 NULL, /* Lk */
191 NULL, /* Mt */
192 NULL, /* Brq */
193 NULL, /* Bro */
194 NULL, /* Brc */
195 NULL, /* %C */
196 NULL, /* Es */
197 NULL, /* En */
198 NULL, /* Dx */
199 NULL, /* %Q */
200 NULL, /* br */
201 NULL, /* sp */
202 NULL, /* %U */
203 NULL, /* Ta */
204 };
205
206 int
207 main(int argc, char *argv[])
208 {
209 struct mparse *mp; /* parse sequence */
210 struct mdoc *mdoc; /* resulting mdoc */
211 char *fn;
212 const char *dir; /* result dir (default: cwd) */
213 char ibuf[MAXPATHLEN], /* index fname */
214 ibbuf[MAXPATHLEN], /* index backup fname */
215 fbuf[MAXPATHLEN], /* btree fname */
216 fbbuf[MAXPATHLEN]; /* btree backup fname */
217 int c;
218 DB *index, /* index database */
219 *db; /* keyword database */
220 DBT rkey, rval, /* recno entries */
221 key, val; /* persistent keyword entries */
222 size_t ksz; /* entry buffer size */
223 char vbuf[8];
224 BTREEINFO info; /* btree configuration */
225 recno_t rec;
226 extern int optind;
227 extern char *optarg;
228
229 progname = strrchr(argv[0], '/');
230 if (progname == NULL)
231 progname = argv[0];
232 else
233 ++progname;
234
235 dir = "";
236
237 while (-1 != (c = getopt(argc, argv, "d:V")))
238 switch (c) {
239 case ('d'):
240 dir = optarg;
241 break;
242 case ('V'):
243 version();
244 return((int)MANDOCLEVEL_OK);
245 default:
246 usage();
247 return((int)MANDOCLEVEL_BADARG);
248 }
249
250 argc -= optind;
251 argv += optind;
252
253 /*
254 * Set up temporary file-names into which we're going to write
255 * all of our data (both for the index and database). These
256 * will be securely renamed to the real file-names after we've
257 * written all of our data.
258 */
259
260 ibuf[0] = ibuf[MAXPATHLEN - 2] =
261 ibbuf[0] = ibbuf[MAXPATHLEN - 2] =
262 fbuf[0] = fbuf[MAXPATHLEN - 2] =
263 fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';
264
265 strlcat(fbuf, dir, MAXPATHLEN);
266 strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
267
268 strlcat(fbbuf, fbuf, MAXPATHLEN);
269 strlcat(fbbuf, "~", MAXPATHLEN);
270
271 strlcat(ibuf, dir, MAXPATHLEN);
272 strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
273
274 strlcat(ibbuf, ibuf, MAXPATHLEN);
275 strlcat(ibbuf, "~", MAXPATHLEN);
276
277 if ('\0' != fbuf[MAXPATHLEN - 2] ||
278 '\0' != fbbuf[MAXPATHLEN - 2] ||
279 '\0' != ibuf[MAXPATHLEN - 2] ||
280 '\0' != ibbuf[MAXPATHLEN - 2]) {
281 fprintf(stderr, "%s: Path too long\n", progname);
282 exit((int)MANDOCLEVEL_SYSERR);
283 }
284
285 /*
286 * For the keyword database, open a BTREE database that allows
287 * duplicates. For the index database, use a standard RECNO
288 * database type.
289 */
290
291 memset(&info, 0, sizeof(BTREEINFO));
292 info.flags = R_DUP;
293 db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);
294
295 if (NULL == db) {
296 perror(fbbuf);
297 exit((int)MANDOCLEVEL_SYSERR);
298 }
299
300 index = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
301
302 if (NULL == db) {
303 perror(ibbuf);
304 (*db->close)(db);
305 exit((int)MANDOCLEVEL_SYSERR);
306 }
307
308 /*
309 * Try parsing the manuals given on the command line. If we
310 * totally fail, then just keep on going. Take resulting trees
311 * and push them down into the database code.
312 * Use the auto-parser and don't report any errors.
313 */
314
315 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
316
317 memset(&key, 0, sizeof(DBT));
318 memset(&val, 0, sizeof(DBT));
319 memset(&rkey, 0, sizeof(DBT));
320 memset(&rval, 0, sizeof(DBT));
321
322 val.size = sizeof(vbuf);
323 val.data = vbuf;
324 rkey.size = sizeof(recno_t);
325
326 rec = 1;
327 ksz = 0;
328
329 while (NULL != (fn = *argv++)) {
330 mparse_reset(mp);
331
332 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL)
333 continue;
334
335 mparse_result(mp, &mdoc, NULL);
336 if (NULL == mdoc)
337 continue;
338
339 rkey.data = &rec;
340 rval.data = fn;
341 rval.size = strlen(fn) + 1;
342
343 if (-1 == (*index->put)(index, &rkey, &rval, 0)) {
344 perror(ibbuf);
345 break;
346 }
347
348 memset(val.data, 0, sizeof(uint32_t));
349 memcpy(val.data + 4, &rec, sizeof(uint32_t));
350
351 pmdoc(db, fbbuf, &key, &ksz, &val, fn, mdoc);
352 rec++;
353 }
354
355 (*db->close)(db);
356 (*index->close)(index);
357
358 mparse_free(mp);
359
360 free(key.data);
361
362 /* Atomically replace the file with our temporary one. */
363
364 if (-1 == rename(fbbuf, fbuf))
365 perror(fbuf);
366 if (-1 == rename(ibbuf, ibuf))
367 perror(fbuf);
368
369 return((int)MANDOCLEVEL_OK);
370 }
371
372 /*
373 * Initialise the stored database key whose data buffer is shared
374 * between uses (as the key must sometimes be constructed from an array
375 * of
376 */
377 static void
378 dbt_init(DBT *key, size_t *ksz)
379 {
380
381 if (0 == *ksz) {
382 assert(0 == key->size);
383 assert(NULL == key->data);
384 key->data = mandoc_malloc(MANDOC_BUFSZ);
385 *ksz = MANDOC_BUFSZ;
386 }
387
388 key->size = 0;
389 }
390
391 /*
392 * Append a binary value to a database entry. This can be invoked
393 * multiple times; the buffer is automatically resized.
394 */
395 static void
396 dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
397 {
398
399 assert(key->data);
400
401 /* Overshoot by MANDOC_BUFSZ. */
402
403 while (key->size + sz >= *ksz) {
404 *ksz = key->size + sz + MANDOC_BUFSZ;
405 key->data = mandoc_realloc(key->data, *ksz);
406 }
407
408 memcpy(key->data + (int)key->size, cp, sz);
409 key->size += sz;
410 }
411
412 /*
413 * Append a nil-terminated string to the database entry. This can be
414 * invoked multiple times. The database entry will be nil-terminated as
415 * well; if invoked multiple times, a space is put between strings.
416 */
417 static void
418 dbt_append(DBT *key, size_t *ksz, const char *cp)
419 {
420 size_t sz;
421
422 if (0 == (sz = strlen(cp)))
423 return;
424
425 assert(key->data);
426
427 if (key->size)
428 ((char *)key->data)[(int)key->size - 1] = ' ';
429
430 dbt_appendb(key, ksz, cp, sz + 1);
431 }
432
433 /* ARGSUSED */
434 static void
435 pmdoc_Fd(MDOC_ARGS)
436 {
437 uint32_t fl;
438 const char *start, *end;
439 size_t sz;
440 char nil;
441
442 if (SEC_SYNOPSIS != n->sec)
443 return;
444 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
445 return;
446 if (strcmp("#include", n->string))
447 return;
448 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
449 return;
450
451 start = n->string;
452 if ('<' == *start)
453 start++;
454
455 if (0 == (sz = strlen(start)))
456 return;
457
458 end = &start[(int)sz - 1];
459 if ('>' == *end)
460 end--;
461
462 nil = '\0';
463 dbt_appendb(key, ksz, start, end - start + 1);
464 dbt_appendb(key, ksz, &nil, 1);
465
466 fl = MANDOC_INCLUDES;
467 memcpy(val->data, &fl, 4);
468 }
469
470 /* ARGSUSED */
471 static void
472 pmdoc_In(MDOC_ARGS)
473 {
474 uint32_t fl;
475
476 if (SEC_SYNOPSIS != n->sec)
477 return;
478 if (NULL == n->child || MDOC_TEXT != n->child->type)
479 return;
480
481 dbt_append(key, ksz, n->child->string);
482 fl = MANDOC_INCLUDES;
483 memcpy(val->data, &fl, 4);
484 }
485
486 /* ARGSUSED */
487 static void
488 pmdoc_Fn(MDOC_ARGS)
489 {
490 uint32_t fl;
491 const char *cp;
492
493 if (SEC_SYNOPSIS != n->sec)
494 return;
495 if (NULL == n->child || MDOC_TEXT != n->child->type)
496 return;
497
498 /* .Fn "struct type *arg" "foo" */
499
500 cp = strrchr(n->child->string, ' ');
501 if (NULL == cp)
502 cp = n->child->string;
503
504 /* Ignore pointers. */
505
506 while ('*' == *cp)
507 cp++;
508
509 dbt_append(key, ksz, cp);
510 fl = MANDOC_FUNCTION;
511 memcpy(val->data, &fl, 4);
512 }
513
514 /* ARGSUSED */
515 static void
516 pmdoc_Vt(MDOC_ARGS)
517 {
518 uint32_t fl;
519 const char *start, *end;
520 size_t sz;
521 char nil;
522
523 if (SEC_SYNOPSIS != n->sec)
524 return;
525 if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
526 return;
527 if (NULL == n->child || MDOC_TEXT != n->child->type)
528 return;
529
530 /*
531 * Strip away leading '*' and trailing ';'.
532 */
533
534 start = n->last->string;
535
536 while ('*' == *start)
537 start++;
538
539 if (0 == (sz = strlen(start)))
540 return;
541
542 end = &start[sz - 1];
543 while (end > start && ';' == *end)
544 end--;
545
546 if (end == start)
547 return;
548
549 nil = '\0';
550 dbt_appendb(key, ksz, start, end - start + 1);
551 dbt_appendb(key, ksz, &nil, 1);
552 fl = MANDOC_VARIABLE;
553 memcpy(val->data, &fl, 4);
554 }
555
556 /* ARGSUSED */
557 static void
558 pmdoc_Fo(MDOC_ARGS)
559 {
560 uint32_t fl;
561
562 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
563 return;
564 if (NULL == n->child || MDOC_TEXT != n->child->type)
565 return;
566
567 dbt_append(key, ksz, n->child->string);
568 fl = MANDOC_FUNCTION;
569 memcpy(val->data, &fl, 4);
570 }
571
572 /* ARGSUSED */
573 static void
574 pmdoc_Nm(MDOC_ARGS)
575 {
576 uint32_t fl;
577
578 if (SEC_NAME == n->sec) {
579 for (n = n->child; n; n = n->next) {
580 if (MDOC_TEXT != n->type)
581 continue;
582 dbt_append(key, ksz, n->string);
583 }
584 fl = MANDOC_NAME;
585 memcpy(val->data, &fl, 4);
586 return;
587 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
588 return;
589
590 for (n = n->child; n; n = n->next) {
591 if (MDOC_TEXT != n->type)
592 continue;
593 dbt_append(key, ksz, n->string);
594 }
595
596 fl = MANDOC_UTILITY;
597 memcpy(val->data, &fl, 4);
598 }
599
600 /*
601 * Call out to per-macro handlers after clearing the persistent database
602 * key. If the macro sets the database key, flush it to the database.
603 */
604 static void
605 pmdoc_node(MDOC_ARGS)
606 {
607
608 if (NULL == n)
609 return;
610
611 switch (n->type) {
612 case (MDOC_HEAD):
613 /* FALLTHROUGH */
614 case (MDOC_BODY):
615 /* FALLTHROUGH */
616 case (MDOC_TAIL):
617 /* FALLTHROUGH */
618 case (MDOC_BLOCK):
619 /* FALLTHROUGH */
620 case (MDOC_ELEM):
621 if (NULL == mdocs[n->tok])
622 break;
623
624 dbt_init(key, ksz);
625 (*mdocs[n->tok])(db, dbn, key, ksz, val, n);
626
627 if (0 == key->size)
628 break;
629 if (0 == (*db->put)(db, key, val, 0))
630 break;
631
632 perror(dbn);
633 exit((int)MANDOCLEVEL_SYSERR);
634 /* NOTREACHED */
635 default:
636 break;
637 }
638
639 pmdoc_node(db, dbn, key, ksz, val, n->child);
640 pmdoc_node(db, dbn, key, ksz, val, n->next);
641 }
642
643 static void
644 pmdoc(DB *db, const char *dbn,
645 DBT *key, size_t *ksz, DBT *val,
646 const char *path, struct mdoc *m)
647 {
648
649 pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
650 }
651
652 static void
653 version(void)
654 {
655
656 printf("%s %s\n", progname, VERSION);
657 }
658
659 static void
660 usage(void)
661 {
662
663 fprintf(stderr, "usage: %s "
664 "[-V] "
665 "[-d path] "
666 "[file...]\n",
667 progname);
668 }