]> git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
Use the traditional name "whatis.db" for the mandocdb(8) databases.
[mandoc.git] / mandocdb.c
1 /* $Id: mandocdb.c,v 1.40 2011/12/25 16:53:51 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/param.h>
23 #include <sys/types.h>
24
25 #include <assert.h>
26 #include <dirent.h>
27 #include <fcntl.h>
28 #include <getopt.h>
29 #include <stdio.h>
30 #include <stdint.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34
35 #if defined(__linux__)
36 # include <endian.h>
37 # include <db_185.h>
38 #elif defined(__APPLE__)
39 # include <libkern/OSByteOrder.h>
40 # include <db.h>
41 #else
42 # include <db.h>
43 #endif
44
45 #include "man.h"
46 #include "mdoc.h"
47 #include "mandoc.h"
48 #include "mandocdb.h"
49 #include "manpath.h"
50
51 #define MANDOC_BUFSZ BUFSIZ
52 #define MANDOC_SLOP 1024
53
54 #define MANDOC_SRC 0x1
55 #define MANDOC_FORM 0x2
56
57 /* Access to the mandoc database on disk. */
58
59 struct mdb {
60 char idxn[MAXPATHLEN]; /* index db filename */
61 char dbn[MAXPATHLEN]; /* keyword db filename */
62 DB *idx; /* index recno database */
63 DB *db; /* keyword btree database */
64 };
65
66 /* Stack of temporarily unused index records. */
67
68 struct recs {
69 recno_t *stack; /* pointer to a malloc'ed array */
70 size_t size; /* number of allocated slots */
71 size_t cur; /* current number of empty records */
72 recno_t last; /* last record number in the index */
73 };
74
75 /* Tiny list for files. No need to bring in QUEUE. */
76
77 struct of {
78 char *fname; /* heap-allocated */
79 char *sec;
80 char *arch;
81 char *title;
82 int src_form;
83 struct of *next; /* NULL for last one */
84 struct of *first; /* first in list */
85 };
86
87 /* Buffer for storing growable data. */
88
89 struct buf {
90 char *cp;
91 size_t len; /* current length */
92 size_t size; /* total buffer size */
93 };
94
95 /* Operation we're going to perform. */
96
97 enum op {
98 OP_DEFAULT = 0, /* new dbs from dir list or default config */
99 OP_CONFFILE, /* new databases from custom config file */
100 OP_UPDATE, /* delete/add entries in existing database */
101 OP_DELETE, /* delete entries from existing database */
102 OP_TEST /* change no databases, report potential problems */
103 };
104
105 #define MAN_ARGS DB *hash, \
106 struct buf *buf, \
107 struct buf *dbuf, \
108 const struct man_node *n
109 #define MDOC_ARGS DB *hash, \
110 struct buf *buf, \
111 struct buf *dbuf, \
112 const struct mdoc_node *n, \
113 const struct mdoc_meta *m
114
115 static void buf_appendmdoc(struct buf *,
116 const struct mdoc_node *, int);
117 static void buf_append(struct buf *, const char *);
118 static void buf_appendb(struct buf *,
119 const void *, size_t);
120 static void dbt_put(DB *, const char *, DBT *, DBT *);
121 static void hash_put(DB *, const struct buf *, uint64_t);
122 static void hash_reset(DB **);
123 static void index_merge(const struct of *, struct mparse *,
124 struct buf *, struct buf *, DB *,
125 struct mdb *, struct recs *);
126 static void index_prune(const struct of *, struct mdb *,
127 struct recs *);
128 static void ofile_argbuild(int, char *[], struct of **);
129 static void ofile_dirbuild(const char *, const char *,
130 const char *, int, struct of **);
131 static void ofile_free(struct of *);
132 static void pformatted(DB *, struct buf *, struct buf *,
133 const struct of *);
134 static int pman_node(MAN_ARGS);
135 static void pmdoc_node(MDOC_ARGS);
136 static int pmdoc_head(MDOC_ARGS);
137 static int pmdoc_body(MDOC_ARGS);
138 static int pmdoc_Fd(MDOC_ARGS);
139 static int pmdoc_In(MDOC_ARGS);
140 static int pmdoc_Fn(MDOC_ARGS);
141 static int pmdoc_Nd(MDOC_ARGS);
142 static int pmdoc_Nm(MDOC_ARGS);
143 static int pmdoc_Sh(MDOC_ARGS);
144 static int pmdoc_St(MDOC_ARGS);
145 static int pmdoc_Xr(MDOC_ARGS);
146
147 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
148
149 struct mdoc_handler {
150 int (*fp)(MDOC_ARGS); /* Optional handler. */
151 uint64_t mask; /* Set unless handler returns 0. */
152 int flags; /* For use by pmdoc_node. */
153 };
154
155 static const struct mdoc_handler mdocs[MDOC_MAX] = {
156 { NULL, 0, 0 }, /* Ap */
157 { NULL, 0, 0 }, /* Dd */
158 { NULL, 0, 0 }, /* Dt */
159 { NULL, 0, 0 }, /* Os */
160 { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
161 { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
162 { NULL, 0, 0 }, /* Pp */
163 { NULL, 0, 0 }, /* D1 */
164 { NULL, 0, 0 }, /* Dl */
165 { NULL, 0, 0 }, /* Bd */
166 { NULL, 0, 0 }, /* Ed */
167 { NULL, 0, 0 }, /* Bl */
168 { NULL, 0, 0 }, /* El */
169 { NULL, 0, 0 }, /* It */
170 { NULL, 0, 0 }, /* Ad */
171 { NULL, TYPE_An, MDOCF_CHILD }, /* An */
172 { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */
173 { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */
174 { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */
175 { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */
176 { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */
177 { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */
178 { NULL, 0, 0 }, /* Ex */
179 { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */
180 { pmdoc_Fd, TYPE_In, 0 }, /* Fd */
181 { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */
182 { pmdoc_Fn, 0, 0 }, /* Fn */
183 { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */
184 { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */
185 { pmdoc_In, TYPE_In, 0 }, /* In */
186 { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */
187 { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */
188 { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */
189 { NULL, 0, 0 }, /* Op */
190 { NULL, 0, 0 }, /* Ot */
191 { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */
192 { NULL, 0, 0 }, /* Rv */
193 { pmdoc_St, TYPE_St, 0 }, /* St */
194 { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */
195 { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */
196 { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */
197 { NULL, 0, 0 }, /* %A */
198 { NULL, 0, 0 }, /* %B */
199 { NULL, 0, 0 }, /* %D */
200 { NULL, 0, 0 }, /* %I */
201 { NULL, 0, 0 }, /* %J */
202 { NULL, 0, 0 }, /* %N */
203 { NULL, 0, 0 }, /* %O */
204 { NULL, 0, 0 }, /* %P */
205 { NULL, 0, 0 }, /* %R */
206 { NULL, 0, 0 }, /* %T */
207 { NULL, 0, 0 }, /* %V */
208 { NULL, 0, 0 }, /* Ac */
209 { NULL, 0, 0 }, /* Ao */
210 { NULL, 0, 0 }, /* Aq */
211 { NULL, TYPE_At, MDOCF_CHILD }, /* At */
212 { NULL, 0, 0 }, /* Bc */
213 { NULL, 0, 0 }, /* Bf */
214 { NULL, 0, 0 }, /* Bo */
215 { NULL, 0, 0 }, /* Bq */
216 { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */
217 { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */
218 { NULL, 0, 0 }, /* Db */
219 { NULL, 0, 0 }, /* Dc */
220 { NULL, 0, 0 }, /* Do */
221 { NULL, 0, 0 }, /* Dq */
222 { NULL, 0, 0 }, /* Ec */
223 { NULL, 0, 0 }, /* Ef */
224 { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */
225 { NULL, 0, 0 }, /* Eo */
226 { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */
227 { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */
228 { NULL, 0, 0 }, /* No */
229 { NULL, 0, 0 }, /* Ns */
230 { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */
231 { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */
232 { NULL, 0, 0 }, /* Pc */
233 { NULL, 0, 0 }, /* Pf */
234 { NULL, 0, 0 }, /* Po */
235 { NULL, 0, 0 }, /* Pq */
236 { NULL, 0, 0 }, /* Qc */
237 { NULL, 0, 0 }, /* Ql */
238 { NULL, 0, 0 }, /* Qo */
239 { NULL, 0, 0 }, /* Qq */
240 { NULL, 0, 0 }, /* Re */
241 { NULL, 0, 0 }, /* Rs */
242 { NULL, 0, 0 }, /* Sc */
243 { NULL, 0, 0 }, /* So */
244 { NULL, 0, 0 }, /* Sq */
245 { NULL, 0, 0 }, /* Sm */
246 { NULL, 0, 0 }, /* Sx */
247 { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */
248 { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */
249 { NULL, 0, 0 }, /* Ux */
250 { NULL, 0, 0 }, /* Xc */
251 { NULL, 0, 0 }, /* Xo */
252 { pmdoc_head, TYPE_Fn, 0 }, /* Fo */
253 { NULL, 0, 0 }, /* Fc */
254 { NULL, 0, 0 }, /* Oo */
255 { NULL, 0, 0 }, /* Oc */
256 { NULL, 0, 0 }, /* Bk */
257 { NULL, 0, 0 }, /* Ek */
258 { NULL, 0, 0 }, /* Bt */
259 { NULL, 0, 0 }, /* Hf */
260 { NULL, 0, 0 }, /* Fr */
261 { NULL, 0, 0 }, /* Ud */
262 { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */
263 { NULL, 0, 0 }, /* Lp */
264 { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */
265 { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */
266 { NULL, 0, 0 }, /* Brq */
267 { NULL, 0, 0 }, /* Bro */
268 { NULL, 0, 0 }, /* Brc */
269 { NULL, 0, 0 }, /* %C */
270 { NULL, 0, 0 }, /* Es */
271 { NULL, 0, 0 }, /* En */
272 { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */
273 { NULL, 0, 0 }, /* %Q */
274 { NULL, 0, 0 }, /* br */
275 { NULL, 0, 0 }, /* sp */
276 { NULL, 0, 0 }, /* %U */
277 { NULL, 0, 0 }, /* Ta */
278 };
279
280 static const char *progname;
281 static int use_all; /* Use all directories and files. */
282 static int verb; /* Output verbosity level. */
283 static int warnings; /* Potential problems in manuals. */
284
285 int
286 main(int argc, char *argv[])
287 {
288 struct mparse *mp; /* parse sequence */
289 struct manpaths dirs;
290 struct mdb mdb;
291 struct recs recs;
292 enum op op; /* current operation */
293 const char *dir;
294 char *cp;
295 char pbuf[PATH_MAX];
296 int ch, i, flags;
297 DB *hash; /* temporary keyword hashtable */
298 BTREEINFO info; /* btree configuration */
299 size_t sz1, sz2;
300 struct buf buf, /* keyword buffer */
301 dbuf; /* description buffer */
302 struct of *of; /* list of files for processing */
303 extern int optind;
304 extern char *optarg;
305
306 progname = strrchr(argv[0], '/');
307 if (progname == NULL)
308 progname = argv[0];
309 else
310 ++progname;
311
312 memset(&dirs, 0, sizeof(struct manpaths));
313 memset(&mdb, 0, sizeof(struct mdb));
314 memset(&recs, 0, sizeof(struct recs));
315
316 of = NULL;
317 mp = NULL;
318 hash = NULL;
319 op = OP_DEFAULT;
320 dir = NULL;
321
322 while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
323 switch (ch) {
324 case ('a'):
325 use_all = 1;
326 break;
327 case ('C'):
328 if (op) {
329 fprintf(stderr,
330 "-C: conflicting options\n");
331 goto usage;
332 }
333 dir = optarg;
334 op = OP_CONFFILE;
335 break;
336 case ('d'):
337 if (op) {
338 fprintf(stderr,
339 "-d: conflicting options\n");
340 goto usage;
341 }
342 dir = optarg;
343 op = OP_UPDATE;
344 break;
345 case ('t'):
346 dup2(STDOUT_FILENO, STDERR_FILENO);
347 if (op) {
348 fprintf(stderr,
349 "-t: conflicting options\n");
350 goto usage;
351 }
352 op = OP_TEST;
353 use_all = 1;
354 warnings = 1;
355 break;
356 case ('u'):
357 if (op) {
358 fprintf(stderr,
359 "-u: conflicting options\n");
360 goto usage;
361 }
362 dir = optarg;
363 op = OP_DELETE;
364 break;
365 case ('v'):
366 verb++;
367 break;
368 case ('W'):
369 warnings = 1;
370 break;
371 default:
372 goto usage;
373 }
374
375 argc -= optind;
376 argv += optind;
377
378 if (OP_CONFFILE == op && argc > 0) {
379 fprintf(stderr, "-C: too many arguments\n");
380 goto usage;
381 }
382
383 memset(&info, 0, sizeof(BTREEINFO));
384 info.flags = R_DUP;
385
386 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
387
388 memset(&buf, 0, sizeof(struct buf));
389 memset(&dbuf, 0, sizeof(struct buf));
390
391 buf.size = dbuf.size = MANDOC_BUFSZ;
392
393 buf.cp = mandoc_malloc(buf.size);
394 dbuf.cp = mandoc_malloc(dbuf.size);
395
396 flags = O_CREAT | O_RDWR;
397 if (OP_DEFAULT == op || OP_CONFFILE == op)
398 flags |= O_TRUNC;
399
400 if (OP_TEST == op) {
401 ofile_argbuild(argc, argv, &of);
402 if (NULL == of)
403 goto out;
404 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
405 goto out;
406 }
407
408 if (OP_UPDATE == op || OP_DELETE == op) {
409 strlcat(mdb.dbn, dir, MAXPATHLEN);
410 strlcat(mdb.dbn, "/", MAXPATHLEN);
411 sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
412
413 strlcat(mdb.idxn, dir, MAXPATHLEN);
414 strlcat(mdb.idxn, "/", MAXPATHLEN);
415 sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
416
417 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
418 fprintf(stderr, "%s: path too long\n", dir);
419 exit((int)MANDOCLEVEL_BADARG);
420 }
421
422 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
423 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
424
425 if (NULL == mdb.db) {
426 perror(mdb.dbn);
427 exit((int)MANDOCLEVEL_SYSERR);
428 } else if (NULL == mdb.idx) {
429 perror(mdb.idxn);
430 exit((int)MANDOCLEVEL_SYSERR);
431 }
432
433 ofile_argbuild(argc, argv, &of);
434
435 if (NULL == of)
436 goto out;
437
438 index_prune(of, &mdb, &recs);
439
440 /*
441 * Go to the root of the respective manual tree.
442 * This must work or no manuals may be found (they're
443 * indexed relative to the root).
444 */
445
446 if (OP_UPDATE == op) {
447 if (-1 == chdir(dir)) {
448 perror(dir);
449 exit((int)MANDOCLEVEL_SYSERR);
450 }
451 index_merge(of, mp, &dbuf, &buf, hash,
452 &mdb, &recs);
453 }
454
455 goto out;
456 }
457
458 /*
459 * Configure the directories we're going to scan.
460 * If we have command-line arguments, use them.
461 * If not, we use man(1)'s method (see mandocdb.8).
462 */
463
464 if (argc > 0) {
465 dirs.paths = mandoc_calloc(argc, sizeof(char *));
466 dirs.sz = argc;
467 for (i = 0; i < argc; i++) {
468 if (NULL == (cp = realpath(argv[i], pbuf))) {
469 perror(argv[i]);
470 goto out;
471 }
472 dirs.paths[i] = mandoc_strdup(cp);
473 }
474 } else
475 manpath_parse(&dirs, dir, NULL, NULL);
476
477 for (i = 0; i < dirs.sz; i++) {
478 mdb.idxn[0] = mdb.dbn[0] = '\0';
479
480 strlcat(mdb.dbn, dirs.paths[i], MAXPATHLEN);
481 strlcat(mdb.dbn, "/", MAXPATHLEN);
482 sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
483
484 strlcat(mdb.idxn, dirs.paths[i], MAXPATHLEN);
485 strlcat(mdb.idxn, "/", MAXPATHLEN);
486 sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
487
488 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
489 fprintf(stderr, "%s: path too long\n",
490 dirs.paths[i]);
491 exit((int)MANDOCLEVEL_BADARG);
492 }
493
494 if (mdb.db)
495 (*mdb.db->close)(mdb.db);
496 if (mdb.idx)
497 (*mdb.idx->close)(mdb.idx);
498
499 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
500 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
501
502 if (NULL == mdb.db) {
503 perror(mdb.dbn);
504 exit((int)MANDOCLEVEL_SYSERR);
505 } else if (NULL == mdb.idx) {
506 perror(mdb.idxn);
507 exit((int)MANDOCLEVEL_SYSERR);
508 }
509
510 ofile_free(of);
511 of = NULL;
512
513 if (-1 == chdir(dirs.paths[i])) {
514 perror(dirs.paths[i]);
515 exit((int)MANDOCLEVEL_SYSERR);
516 }
517
518 ofile_dirbuild(".", "", "", 0, &of);
519 if (NULL == of)
520 continue;
521
522 /*
523 * Go to the root of the respective manual tree.
524 * This must work or no manuals may be found (they're
525 * indexed relative to the root).
526 */
527
528 if (-1 == chdir(dirs.paths[i])) {
529 perror(dirs.paths[i]);
530 exit((int)MANDOCLEVEL_SYSERR);
531 }
532
533 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
534 }
535
536 out:
537 if (mdb.db)
538 (*mdb.db->close)(mdb.db);
539 if (mdb.idx)
540 (*mdb.idx->close)(mdb.idx);
541 if (hash)
542 (*hash->close)(hash);
543 if (mp)
544 mparse_free(mp);
545
546 manpath_free(&dirs);
547 ofile_free(of);
548 free(buf.cp);
549 free(dbuf.cp);
550 free(recs.stack);
551
552 return(MANDOCLEVEL_OK);
553
554 usage:
555 fprintf(stderr,
556 "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
557 " -d dir [file ...] | "
558 "-u dir [file ...]\n",
559 progname);
560
561 return((int)MANDOCLEVEL_BADARG);
562 }
563
564 void
565 index_merge(const struct of *of, struct mparse *mp,
566 struct buf *dbuf, struct buf *buf, DB *hash,
567 struct mdb *mdb, struct recs *recs)
568 {
569 recno_t rec;
570 int ch, skip;
571 DBT key, val;
572 struct mdoc *mdoc;
573 struct man *man;
574 const char *fn, *msec, *march, *mtitle;
575 uint64_t mask;
576 size_t sv;
577 unsigned seq;
578 uint64_t vbuf[2];
579 char type;
580
581 rec = 0;
582 for (of = of->first; of; of = of->next) {
583 fn = of->fname;
584
585 /*
586 * Try interpreting the file as mdoc(7) or man(7)
587 * source code, unless it is already known to be
588 * formatted. Fall back to formatted mode.
589 */
590
591 mparse_reset(mp);
592 mdoc = NULL;
593 man = NULL;
594
595 if ((MANDOC_SRC & of->src_form ||
596 ! (MANDOC_FORM & of->src_form)) &&
597 MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
598 mparse_result(mp, &mdoc, &man);
599
600 if (NULL != mdoc) {
601 msec = mdoc_meta(mdoc)->msec;
602 march = mdoc_meta(mdoc)->arch;
603 if (NULL == march)
604 march = "";
605 mtitle = mdoc_meta(mdoc)->title;
606 } else if (NULL != man) {
607 msec = man_meta(man)->msec;
608 march = "";
609 mtitle = man_meta(man)->title;
610 } else {
611 msec = of->sec;
612 march = of->arch;
613 mtitle = of->title;
614 }
615
616 /*
617 * By default, skip a file if the manual section
618 * and architecture given in the file disagree
619 * with the directory where the file is located.
620 */
621
622 skip = 0;
623 assert(of->sec);
624 assert(msec);
625 if (strcasecmp(msec, of->sec)) {
626 if (warnings)
627 fprintf(stderr, "%s: "
628 "section \"%s\" manual "
629 "in \"%s\" directory\n",
630 fn, msec, of->sec);
631 skip = 1;
632 }
633
634 assert(of->arch);
635 assert(march);
636 if (strcasecmp(march, of->arch)) {
637 if (warnings)
638 fprintf(stderr, "%s: "
639 "architecture \"%s\" manual "
640 "in \"%s\" directory\n",
641 fn, march, of->arch);
642 skip = 1;
643 }
644
645 /*
646 * By default, skip a file if the title given
647 * in the file disagrees with the file name.
648 * If both agree, use the file name as the title,
649 * because the one in the file usually is all caps.
650 */
651
652 assert(of->title);
653 assert(mtitle);
654 if (strcasecmp(mtitle, of->title)) {
655 if (warnings)
656 fprintf(stderr, "%s: "
657 "title \"%s\" in file "
658 "but \"%s\" in filename\n",
659 fn, mtitle, of->title);
660 skip = 1;
661 } else
662 mtitle = of->title;
663
664 if (skip && !use_all)
665 continue;
666
667 /*
668 * The index record value consists of a nil-terminated
669 * filename, a nil-terminated manual section, and a
670 * nil-terminated description. Since the description
671 * may not be set, we set a sentinel to see if we're
672 * going to write a nil byte in its place.
673 */
674
675 dbuf->len = 0;
676 type = mdoc ? 'd' : (man ? 'a' : 'c');
677 buf_appendb(dbuf, &type, 1);
678 buf_appendb(dbuf, fn, strlen(fn) + 1);
679 buf_appendb(dbuf, msec, strlen(msec) + 1);
680 buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
681 buf_appendb(dbuf, march, strlen(march) + 1);
682
683 sv = dbuf->len;
684
685 /*
686 * Collect keyword/mask pairs.
687 * Each pair will become a new btree node.
688 */
689
690 hash_reset(&hash);
691 if (mdoc)
692 pmdoc_node(hash, buf, dbuf,
693 mdoc_node(mdoc), mdoc_meta(mdoc));
694 else if (man)
695 pman_node(hash, buf, dbuf, man_node(man));
696 else
697 pformatted(hash, buf, dbuf, of);
698
699 /* Test mode, do not access any database. */
700
701 if (NULL == mdb->db || NULL == mdb->idx)
702 continue;
703
704 /*
705 * Reclaim an empty index record, if available.
706 * Use its record number for all new btree nodes.
707 */
708
709 if (recs->cur > 0) {
710 recs->cur--;
711 rec = recs->stack[(int)recs->cur];
712 } else if (recs->last > 0) {
713 rec = recs->last;
714 recs->last = 0;
715 } else
716 rec++;
717 vbuf[1] = htobe64(rec);
718
719 /*
720 * Copy from the in-memory hashtable of pending
721 * keyword/mask pairs into the database.
722 */
723
724 seq = R_FIRST;
725 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
726 seq = R_NEXT;
727 assert(sizeof(uint64_t) == val.size);
728 memcpy(&mask, val.data, val.size);
729 vbuf[0] = htobe64(mask);
730 val.size = sizeof(vbuf);
731 val.data = &vbuf;
732 dbt_put(mdb->db, mdb->dbn, &key, &val);
733 }
734 if (ch < 0) {
735 perror("hash");
736 exit((int)MANDOCLEVEL_SYSERR);
737 }
738
739 /*
740 * Apply to the index. If we haven't had a description
741 * set, put an empty one in now.
742 */
743
744 if (dbuf->len == sv)
745 buf_appendb(dbuf, "", 1);
746
747 key.data = &rec;
748 key.size = sizeof(recno_t);
749
750 val.data = dbuf->cp;
751 val.size = dbuf->len;
752
753 if (verb)
754 printf("%s: adding to index\n", fn);
755
756 dbt_put(mdb->idx, mdb->idxn, &key, &val);
757 }
758 }
759
760 /*
761 * Scan through all entries in the index file `idx' and prune those
762 * entries in `ofile'.
763 * Pruning consists of removing from `db', then invalidating the entry
764 * in `idx' (zeroing its value size).
765 */
766 static void
767 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
768 {
769 const struct of *of;
770 const char *fn;
771 uint64_t vbuf[2];
772 unsigned seq, sseq;
773 DBT key, val;
774 int ch;
775
776 recs->cur = 0;
777 seq = R_FIRST;
778 while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
779 seq = R_NEXT;
780 assert(sizeof(recno_t) == key.size);
781 memcpy(&recs->last, key.data, key.size);
782
783 /* Deleted records are zero-sized. Skip them. */
784
785 if (0 == val.size)
786 goto cont;
787
788 /*
789 * Make sure we're sane.
790 * Read past our mdoc/man/cat type to the next string,
791 * then make sure it's bounded by a NUL.
792 * Failing any of these, we go into our error handler.
793 */
794
795 fn = (char *)val.data + 1;
796 if (NULL == memchr(fn, '\0', val.size - 1))
797 break;
798
799 /*
800 * Search for the file in those we care about.
801 * XXX: build this into a tree. Too slow.
802 */
803
804 for (of = ofile->first; of; of = of->next)
805 if (0 == strcmp(fn, of->fname))
806 break;
807
808 if (NULL == of)
809 continue;
810
811 /*
812 * Search through the keyword database, throwing out all
813 * references to our file.
814 */
815
816 sseq = R_FIRST;
817 while (0 == (ch = (*mdb->db->seq)(mdb->db,
818 &key, &val, sseq))) {
819 sseq = R_NEXT;
820 if (sizeof(vbuf) != val.size)
821 break;
822
823 memcpy(vbuf, val.data, val.size);
824 if (recs->last != betoh64(vbuf[1]))
825 continue;
826
827 if ((ch = (*mdb->db->del)(mdb->db,
828 &key, R_CURSOR)) < 0)
829 break;
830 }
831
832 if (ch < 0) {
833 perror(mdb->dbn);
834 exit((int)MANDOCLEVEL_SYSERR);
835 } else if (1 != ch) {
836 fprintf(stderr, "%s: corrupt database\n",
837 mdb->dbn);
838 exit((int)MANDOCLEVEL_SYSERR);
839 }
840
841 if (verb)
842 printf("%s: deleting from index\n", fn);
843
844 val.size = 0;
845 ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
846
847 if (ch < 0)
848 break;
849 cont:
850 if (recs->cur >= recs->size) {
851 recs->size += MANDOC_SLOP;
852 recs->stack = mandoc_realloc(recs->stack,
853 recs->size * sizeof(recno_t));
854 }
855
856 recs->stack[(int)recs->cur] = recs->last;
857 recs->cur++;
858 }
859
860 if (ch < 0) {
861 perror(mdb->idxn);
862 exit((int)MANDOCLEVEL_SYSERR);
863 } else if (1 != ch) {
864 fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
865 exit((int)MANDOCLEVEL_SYSERR);
866 }
867
868 recs->last++;
869 }
870
871 /*
872 * Grow the buffer (if necessary) and copy in a binary string.
873 */
874 static void
875 buf_appendb(struct buf *buf, const void *cp, size_t sz)
876 {
877
878 /* Overshoot by MANDOC_BUFSZ. */
879
880 while (buf->len + sz >= buf->size) {
881 buf->size = buf->len + sz + MANDOC_BUFSZ;
882 buf->cp = mandoc_realloc(buf->cp, buf->size);
883 }
884
885 memcpy(buf->cp + (int)buf->len, cp, sz);
886 buf->len += sz;
887 }
888
889 /*
890 * Append a nil-terminated string to the buffer.
891 * This can be invoked multiple times.
892 * The buffer string will be nil-terminated.
893 * If invoked multiple times, a space is put between strings.
894 */
895 static void
896 buf_append(struct buf *buf, const char *cp)
897 {
898 size_t sz;
899
900 if (0 == (sz = strlen(cp)))
901 return;
902
903 if (buf->len)
904 buf->cp[(int)buf->len - 1] = ' ';
905
906 buf_appendb(buf, cp, sz + 1);
907 }
908
909 /*
910 * Recursively add all text from a given node.
911 * This is optimised for general mdoc nodes in this context, which do
912 * not consist of subexpressions and having a recursive call for n->next
913 * would be wasteful.
914 * The "f" variable should be 0 unless called from pmdoc_Nd for the
915 * description buffer, which does not start at the beginning of the
916 * buffer.
917 */
918 static void
919 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
920 {
921
922 for ( ; n; n = n->next) {
923 if (n->child)
924 buf_appendmdoc(buf, n->child, f);
925
926 if (MDOC_TEXT == n->type && f) {
927 f = 0;
928 buf_appendb(buf, n->string,
929 strlen(n->string) + 1);
930 } else if (MDOC_TEXT == n->type)
931 buf_append(buf, n->string);
932
933 }
934 }
935
936 static void
937 hash_reset(DB **db)
938 {
939 DB *hash;
940
941 if (NULL != (hash = *db))
942 (*hash->close)(hash);
943
944 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
945 if (NULL == *db) {
946 perror("hash");
947 exit((int)MANDOCLEVEL_SYSERR);
948 }
949 }
950
951 /* ARGSUSED */
952 static int
953 pmdoc_head(MDOC_ARGS)
954 {
955
956 return(MDOC_HEAD == n->type);
957 }
958
959 /* ARGSUSED */
960 static int
961 pmdoc_body(MDOC_ARGS)
962 {
963
964 return(MDOC_BODY == n->type);
965 }
966
967 /* ARGSUSED */
968 static int
969 pmdoc_Fd(MDOC_ARGS)
970 {
971 const char *start, *end;
972 size_t sz;
973
974 if (SEC_SYNOPSIS != n->sec)
975 return(0);
976 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
977 return(0);
978
979 /*
980 * Only consider those `Fd' macro fields that begin with an
981 * "inclusion" token (versus, e.g., #define).
982 */
983 if (strcmp("#include", n->string))
984 return(0);
985
986 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
987 return(0);
988
989 /*
990 * Strip away the enclosing angle brackets and make sure we're
991 * not zero-length.
992 */
993
994 start = n->string;
995 if ('<' == *start || '"' == *start)
996 start++;
997
998 if (0 == (sz = strlen(start)))
999 return(0);
1000
1001 end = &start[(int)sz - 1];
1002 if ('>' == *end || '"' == *end)
1003 end--;
1004
1005 assert(end >= start);
1006
1007 buf_appendb(buf, start, (size_t)(end - start + 1));
1008 buf_appendb(buf, "", 1);
1009 return(1);
1010 }
1011
1012 /* ARGSUSED */
1013 static int
1014 pmdoc_In(MDOC_ARGS)
1015 {
1016
1017 if (NULL == n->child || MDOC_TEXT != n->child->type)
1018 return(0);
1019
1020 buf_append(buf, n->child->string);
1021 return(1);
1022 }
1023
1024 /* ARGSUSED */
1025 static int
1026 pmdoc_Fn(MDOC_ARGS)
1027 {
1028 struct mdoc_node *nn;
1029 const char *cp;
1030
1031 nn = n->child;
1032
1033 if (NULL == nn || MDOC_TEXT != nn->type)
1034 return(0);
1035
1036 /* .Fn "struct type *name" "char *arg" */
1037
1038 cp = strrchr(nn->string, ' ');
1039 if (NULL == cp)
1040 cp = nn->string;
1041
1042 /* Strip away pointer symbol. */
1043
1044 while ('*' == *cp)
1045 cp++;
1046
1047 /* Store the function name. */
1048
1049 buf_append(buf, cp);
1050 hash_put(hash, buf, TYPE_Fn);
1051
1052 /* Store the function type. */
1053
1054 if (nn->string < cp) {
1055 buf->len = 0;
1056 buf_appendb(buf, nn->string, cp - nn->string);
1057 buf_appendb(buf, "", 1);
1058 hash_put(hash, buf, TYPE_Ft);
1059 }
1060
1061 /* Store the arguments. */
1062
1063 for (nn = nn->next; nn; nn = nn->next) {
1064 if (MDOC_TEXT != nn->type)
1065 continue;
1066 buf->len = 0;
1067 buf_append(buf, nn->string);
1068 hash_put(hash, buf, TYPE_Fa);
1069 }
1070
1071 return(0);
1072 }
1073
1074 /* ARGSUSED */
1075 static int
1076 pmdoc_St(MDOC_ARGS)
1077 {
1078
1079 if (NULL == n->child || MDOC_TEXT != n->child->type)
1080 return(0);
1081
1082 buf_append(buf, n->child->string);
1083 return(1);
1084 }
1085
1086 /* ARGSUSED */
1087 static int
1088 pmdoc_Xr(MDOC_ARGS)
1089 {
1090
1091 if (NULL == (n = n->child))
1092 return(0);
1093
1094 buf_appendb(buf, n->string, strlen(n->string));
1095
1096 if (NULL != (n = n->next)) {
1097 buf_appendb(buf, ".", 1);
1098 buf_appendb(buf, n->string, strlen(n->string) + 1);
1099 } else
1100 buf_appendb(buf, ".", 2);
1101
1102 return(1);
1103 }
1104
1105 /* ARGSUSED */
1106 static int
1107 pmdoc_Nd(MDOC_ARGS)
1108 {
1109
1110 if (MDOC_BODY != n->type)
1111 return(0);
1112
1113 buf_appendmdoc(dbuf, n->child, 1);
1114 return(1);
1115 }
1116
1117 /* ARGSUSED */
1118 static int
1119 pmdoc_Nm(MDOC_ARGS)
1120 {
1121
1122 if (SEC_NAME == n->sec)
1123 return(1);
1124 else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1125 return(0);
1126
1127 if (NULL == n->child)
1128 buf_append(buf, m->name);
1129
1130 return(1);
1131 }
1132
1133 /* ARGSUSED */
1134 static int
1135 pmdoc_Sh(MDOC_ARGS)
1136 {
1137
1138 return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1139 }
1140
1141 static void
1142 hash_put(DB *db, const struct buf *buf, uint64_t mask)
1143 {
1144 uint64_t oldmask;
1145 DBT key, val;
1146 int rc;
1147
1148 if (buf->len < 2)
1149 return;
1150
1151 key.data = buf->cp;
1152 key.size = buf->len;
1153
1154 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1155 perror("hash");
1156 exit((int)MANDOCLEVEL_SYSERR);
1157 } else if (0 == rc) {
1158 assert(sizeof(uint64_t) == val.size);
1159 memcpy(&oldmask, val.data, val.size);
1160 mask |= oldmask;
1161 }
1162
1163 val.data = &mask;
1164 val.size = sizeof(uint64_t);
1165
1166 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1167 perror("hash");
1168 exit((int)MANDOCLEVEL_SYSERR);
1169 }
1170 }
1171
1172 static void
1173 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1174 {
1175
1176 assert(key->size);
1177 assert(val->size);
1178
1179 if (0 == (*db->put)(db, key, val, 0))
1180 return;
1181
1182 perror(dbn);
1183 exit((int)MANDOCLEVEL_SYSERR);
1184 /* NOTREACHED */
1185 }
1186
1187 /*
1188 * Call out to per-macro handlers after clearing the persistent database
1189 * key. If the macro sets the database key, flush it to the database.
1190 */
1191 static void
1192 pmdoc_node(MDOC_ARGS)
1193 {
1194
1195 if (NULL == n)
1196 return;
1197
1198 switch (n->type) {
1199 case (MDOC_HEAD):
1200 /* FALLTHROUGH */
1201 case (MDOC_BODY):
1202 /* FALLTHROUGH */
1203 case (MDOC_TAIL):
1204 /* FALLTHROUGH */
1205 case (MDOC_BLOCK):
1206 /* FALLTHROUGH */
1207 case (MDOC_ELEM):
1208 buf->len = 0;
1209
1210 /*
1211 * Both NULL handlers and handlers returning true
1212 * request using the data. Only skip the element
1213 * when the handler returns false.
1214 */
1215
1216 if (NULL != mdocs[n->tok].fp &&
1217 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
1218 break;
1219
1220 /*
1221 * For many macros, use the text from all children.
1222 * Set zero flags for macros not needing this.
1223 * In that case, the handler must fill the buffer.
1224 */
1225
1226 if (MDOCF_CHILD & mdocs[n->tok].flags)
1227 buf_appendmdoc(buf, n->child, 0);
1228
1229 /*
1230 * Cover the most common case:
1231 * Automatically stage one string per element.
1232 * Set a zero mask for macros not needing this.
1233 * Additional staging can be done in the handler.
1234 */
1235
1236 if (mdocs[n->tok].mask)
1237 hash_put(hash, buf, mdocs[n->tok].mask);
1238 break;
1239 default:
1240 break;
1241 }
1242
1243 pmdoc_node(hash, buf, dbuf, n->child, m);
1244 pmdoc_node(hash, buf, dbuf, n->next, m);
1245 }
1246
1247 static int
1248 pman_node(MAN_ARGS)
1249 {
1250 const struct man_node *head, *body;
1251 const char *start, *sv;
1252 size_t sz;
1253
1254 if (NULL == n)
1255 return(0);
1256
1257 /*
1258 * We're only searching for one thing: the first text child in
1259 * the BODY of a NAME section. Since we don't keep track of
1260 * sections in -man, run some hoops to find out whether we're in
1261 * the correct section or not.
1262 */
1263
1264 if (MAN_BODY == n->type && MAN_SH == n->tok) {
1265 body = n;
1266 assert(body->parent);
1267 if (NULL != (head = body->parent->head) &&
1268 1 == head->nchild &&
1269 NULL != (head = (head->child)) &&
1270 MAN_TEXT == head->type &&
1271 0 == strcmp(head->string, "NAME") &&
1272 NULL != (body = body->child) &&
1273 MAN_TEXT == body->type) {
1274
1275 assert(body->string);
1276 start = sv = body->string;
1277
1278 /*
1279 * Go through a special heuristic dance here.
1280 * This is why -man manuals are great!
1281 * (I'm being sarcastic: my eyes are bleeding.)
1282 * Conventionally, one or more manual names are
1283 * comma-specified prior to a whitespace, then a
1284 * dash, then a description. Try to puzzle out
1285 * the name parts here.
1286 */
1287
1288 for ( ;; ) {
1289 sz = strcspn(start, " ,");
1290 if ('\0' == start[(int)sz])
1291 break;
1292
1293 buf->len = 0;
1294 buf_appendb(buf, start, sz);
1295 buf_appendb(buf, "", 1);
1296
1297 hash_put(hash, buf, TYPE_Nm);
1298
1299 if (' ' == start[(int)sz]) {
1300 start += (int)sz + 1;
1301 break;
1302 }
1303
1304 assert(',' == start[(int)sz]);
1305 start += (int)sz + 1;
1306 while (' ' == *start)
1307 start++;
1308 }
1309
1310 buf->len = 0;
1311
1312 if (sv == start) {
1313 buf_append(buf, start);
1314 return(1);
1315 }
1316
1317 while (' ' == *start)
1318 start++;
1319
1320 if (0 == strncmp(start, "-", 1))
1321 start += 1;
1322 else if (0 == strncmp(start, "\\-", 2))
1323 start += 2;
1324 else if (0 == strncmp(start, "\\(en", 4))
1325 start += 4;
1326 else if (0 == strncmp(start, "\\(em", 4))
1327 start += 4;
1328
1329 while (' ' == *start)
1330 start++;
1331
1332 sz = strlen(start) + 1;
1333 buf_appendb(dbuf, start, sz);
1334 buf_appendb(buf, start, sz);
1335
1336 hash_put(hash, buf, TYPE_Nd);
1337 }
1338 }
1339
1340 for (n = n->child; n; n = n->next)
1341 if (pman_node(hash, buf, dbuf, n))
1342 return(1);
1343
1344 return(0);
1345 }
1346
1347 /*
1348 * Parse a formatted manual page.
1349 * By necessity, this involves rather crude guesswork.
1350 */
1351 static void
1352 pformatted(DB *hash, struct buf *buf, struct buf *dbuf,
1353 const struct of *of)
1354 {
1355 FILE *stream;
1356 char *line, *p;
1357 size_t len, plen;
1358
1359 if (NULL == (stream = fopen(of->fname, "r"))) {
1360 if (warnings)
1361 perror(of->fname);
1362 return;
1363 }
1364
1365 /*
1366 * Always use the title derived from the filename up front,
1367 * do not even try to find it in the file. This also makes
1368 * sure we don't end up with an orphan index record, even if
1369 * the file content turns out to be completely unintelligible.
1370 */
1371
1372 buf->len = 0;
1373 buf_append(buf, of->title);
1374 hash_put(hash, buf, TYPE_Nm);
1375
1376 /* Skip to first blank line. */
1377
1378 while (NULL != (line = fgetln(stream, &len)))
1379 if ('\n' == *line)
1380 break;
1381
1382 /*
1383 * Assume the first line that is not indented
1384 * is the first section header. Skip to it.
1385 */
1386
1387 while (NULL != (line = fgetln(stream, &len)))
1388 if ('\n' != *line && ' ' != *line)
1389 break;
1390
1391 /*
1392 * If no page content can be found, or the input line
1393 * is already the next section header, or there is no
1394 * trailing newline, reuse the page title as the page
1395 * description.
1396 */
1397
1398 line = fgetln(stream, &len);
1399 if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) {
1400 if (warnings)
1401 fprintf(stderr, "%s: cannot find NAME section\n",
1402 of->fname);
1403 buf_appendb(dbuf, buf->cp, buf->size);
1404 hash_put(hash, buf, TYPE_Nd);
1405 fclose(stream);
1406 return;
1407 }
1408
1409 line[(int)--len] = '\0';
1410
1411 /*
1412 * Skip to the first dash.
1413 * Use the remaining line as the description (no more than 70
1414 * bytes).
1415 */
1416
1417 if (NULL != (p = strstr(line, "- "))) {
1418 for (p += 2; ' ' == *p || '\b' == *p; p++)
1419 /* Skip to next word. */ ;
1420 } else {
1421 if (warnings)
1422 fprintf(stderr, "%s: no dash in title line\n",
1423 of->fname);
1424 p = line;
1425 }
1426
1427 if ((plen = strlen(p)) > 70) {
1428 plen = 70;
1429 p[plen] = '\0';
1430 }
1431
1432 /* Strip backspace-encoding from line. */
1433
1434 while (NULL != (line = memchr(p, '\b', plen))) {
1435 len = line - p;
1436 if (0 == len) {
1437 memmove(line, line + 1, plen--);
1438 continue;
1439 }
1440 memmove(line - 1, line + 1, plen - len);
1441 plen -= 2;
1442 }
1443
1444 buf_appendb(dbuf, p, plen + 1);
1445 buf->len = 0;
1446 buf_appendb(buf, p, plen + 1);
1447 hash_put(hash, buf, TYPE_Nd);
1448 fclose(stream);
1449 }
1450
1451 static void
1452 ofile_argbuild(int argc, char *argv[], struct of **of)
1453 {
1454 char buf[MAXPATHLEN];
1455 char *sec, *arch, *title, *p;
1456 int i, src_form;
1457 struct of *nof;
1458
1459 for (i = 0; i < argc; i++) {
1460
1461 /*
1462 * Try to infer the manual section, architecture and
1463 * page title from the path, assuming it looks like
1464 * man*[/<arch>]/<title>.<section> or
1465 * cat<section>[/<arch>]/<title>.0
1466 */
1467
1468 if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
1469 fprintf(stderr, "%s: path too long\n", argv[i]);
1470 continue;
1471 }
1472 sec = arch = title = "";
1473 src_form = 0;
1474 p = strrchr(buf, '\0');
1475 while (p-- > buf) {
1476 if ('\0' == *sec && '.' == *p) {
1477 sec = p + 1;
1478 *p = '\0';
1479 if ('0' == *sec)
1480 src_form |= MANDOC_FORM;
1481 else if ('1' <= *sec && '9' >= *sec)
1482 src_form |= MANDOC_SRC;
1483 continue;
1484 }
1485 if ('/' != *p)
1486 continue;
1487 if ('\0' == *title) {
1488 title = p + 1;
1489 *p = '\0';
1490 continue;
1491 }
1492 if (0 == strncmp("man", p + 1, 3))
1493 src_form |= MANDOC_SRC;
1494 else if (0 == strncmp("cat", p + 1, 3))
1495 src_form |= MANDOC_FORM;
1496 else
1497 arch = p + 1;
1498 break;
1499 }
1500 if ('\0' == *title) {
1501 if (warnings)
1502 fprintf(stderr,
1503 "%s: cannot deduce title "
1504 "from filename\n",
1505 argv[i]);
1506 title = buf;
1507 }
1508
1509 /*
1510 * Build the file structure.
1511 */
1512
1513 nof = mandoc_calloc(1, sizeof(struct of));
1514 nof->fname = mandoc_strdup(argv[i]);
1515 nof->sec = mandoc_strdup(sec);
1516 nof->arch = mandoc_strdup(arch);
1517 nof->title = mandoc_strdup(title);
1518 nof->src_form = src_form;
1519
1520 /*
1521 * Add the structure to the list.
1522 */
1523
1524 if (verb > 1)
1525 printf("%s: scheduling\n", argv[i]);
1526 if (NULL == *of) {
1527 *of = nof;
1528 (*of)->first = nof;
1529 } else {
1530 nof->first = (*of)->first;
1531 (*of)->next = nof;
1532 *of = nof;
1533 }
1534 }
1535 }
1536
1537 /*
1538 * Recursively build up a list of files to parse.
1539 * We use this instead of ftw() and so on because I don't want global
1540 * variables hanging around.
1541 * This ignores the whatis.db and whatis.index files, but assumes that
1542 * everything else is a manual.
1543 * Pass in a pointer to a NULL structure for the first invocation.
1544 */
1545 static void
1546 ofile_dirbuild(const char *dir, const char* psec, const char *parch,
1547 int p_src_form, struct of **of)
1548 {
1549 char buf[MAXPATHLEN];
1550 size_t sz;
1551 DIR *d;
1552 const char *fn, *sec, *arch;
1553 char *p, *q, *suffix;
1554 struct of *nof;
1555 struct dirent *dp;
1556 int src_form;
1557
1558 if (NULL == (d = opendir(dir))) {
1559 if (warnings)
1560 perror(dir);
1561 return;
1562 }
1563
1564 while (NULL != (dp = readdir(d))) {
1565 fn = dp->d_name;
1566
1567 if ('.' == *fn)
1568 continue;
1569
1570 src_form = p_src_form;
1571
1572 if (DT_DIR == dp->d_type) {
1573 sec = psec;
1574 arch = parch;
1575
1576 /*
1577 * By default, only use directories called:
1578 * man<section>/[<arch>/] or
1579 * cat<section>/[<arch>/]
1580 */
1581
1582 if ('\0' == *sec) {
1583 if(0 == strncmp("man", fn, 3)) {
1584 src_form |= MANDOC_SRC;
1585 sec = fn + 3;
1586 } else if (0 == strncmp("cat", fn, 3)) {
1587 src_form |= MANDOC_FORM;
1588 sec = fn + 3;
1589 } else {
1590 if (warnings) fprintf(stderr,
1591 "%s/%s: bad section\n",
1592 dir, fn);
1593 if (use_all)
1594 sec = fn;
1595 else
1596 continue;
1597 }
1598 } else if ('\0' == *arch) {
1599 if (NULL != strchr(fn, '.')) {
1600 if (warnings) fprintf(stderr,
1601 "%s/%s: bad architecture\n",
1602 dir, fn);
1603 if (0 == use_all)
1604 continue;
1605 }
1606 arch = fn;
1607 } else {
1608 if (warnings) fprintf(stderr, "%s/%s: "
1609 "excessive subdirectory\n", dir, fn);
1610 if (0 == use_all)
1611 continue;
1612 }
1613
1614 buf[0] = '\0';
1615 strlcat(buf, dir, MAXPATHLEN);
1616 strlcat(buf, "/", MAXPATHLEN);
1617 sz = strlcat(buf, fn, MAXPATHLEN);
1618
1619 if (MAXPATHLEN <= sz) {
1620 if (warnings) fprintf(stderr, "%s/%s: "
1621 "path too long\n", dir, fn);
1622 continue;
1623 }
1624
1625 if (verb > 1)
1626 printf("%s: scanning\n", buf);
1627
1628 ofile_dirbuild(buf, sec, arch, src_form, of);
1629 continue;
1630 }
1631
1632 if (DT_REG != dp->d_type) {
1633 if (warnings)
1634 fprintf(stderr,
1635 "%s/%s: not a regular file\n",
1636 dir, fn);
1637 continue;
1638 }
1639 if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
1640 continue;
1641 if ('\0' == *psec) {
1642 if (warnings)
1643 fprintf(stderr,
1644 "%s/%s: file outside section\n",
1645 dir, fn);
1646 if (0 == use_all)
1647 continue;
1648 }
1649
1650 /*
1651 * By default, skip files where the file name suffix
1652 * does not agree with the section directory
1653 * they are located in.
1654 */
1655
1656 suffix = strrchr(fn, '.');
1657 if (NULL == suffix) {
1658 if (warnings)
1659 fprintf(stderr,
1660 "%s/%s: no filename suffix\n",
1661 dir, fn);
1662 if (0 == use_all)
1663 continue;
1664 } else if ((MANDOC_SRC & src_form &&
1665 strcmp(suffix + 1, psec)) ||
1666 (MANDOC_FORM & src_form &&
1667 strcmp(suffix + 1, "0"))) {
1668 if (warnings)
1669 fprintf(stderr,
1670 "%s/%s: wrong filename suffix\n",
1671 dir, fn);
1672 if (0 == use_all)
1673 continue;
1674 if ('0' == suffix[1])
1675 src_form |= MANDOC_FORM;
1676 else if ('1' <= suffix[1] && '9' >= suffix[1])
1677 src_form |= MANDOC_SRC;
1678 }
1679
1680 /*
1681 * Skip formatted manuals if a source version is
1682 * available. Ignore the age: it is very unlikely
1683 * that people install newer formatted base manuals
1684 * when they used to have source manuals before,
1685 * and in ports, old manuals get removed on update.
1686 */
1687 if (0 == use_all && MANDOC_FORM & src_form &&
1688 '\0' != *psec) {
1689 buf[0] = '\0';
1690 strlcat(buf, dir, MAXPATHLEN);
1691 p = strrchr(buf, '/');
1692 if ('\0' != *parch && NULL != p)
1693 for (p--; p > buf; p--)
1694 if ('/' == *p)
1695 break;
1696 if (NULL == p)
1697 p = buf;
1698 else
1699 p++;
1700 if (0 == strncmp("cat", p, 3))
1701 memcpy(p, "man", 3);
1702 strlcat(buf, "/", MAXPATHLEN);
1703 sz = strlcat(buf, fn, MAXPATHLEN);
1704 if (sz >= MAXPATHLEN) {
1705 if (warnings) fprintf(stderr,
1706 "%s/%s: path too long\n",
1707 dir, fn);
1708 continue;
1709 }
1710 q = strrchr(buf, '.');
1711 if (NULL != q && p < q++) {
1712 *q = '\0';
1713 sz = strlcat(buf, psec, MAXPATHLEN);
1714 if (sz >= MAXPATHLEN) {
1715 if (warnings) fprintf(stderr,
1716 "%s/%s: path too long\n",
1717 dir, fn);
1718 continue;
1719 }
1720 if (0 == access(buf, R_OK))
1721 continue;
1722 }
1723 }
1724
1725 buf[0] = '\0';
1726 assert('.' == dir[0]);
1727 if ('/' == dir[1]) {
1728 strlcat(buf, dir + 2, MAXPATHLEN);
1729 strlcat(buf, "/", MAXPATHLEN);
1730 }
1731 sz = strlcat(buf, fn, MAXPATHLEN);
1732 if (sz >= MAXPATHLEN) {
1733 if (warnings) fprintf(stderr,
1734 "%s/%s: path too long\n", dir, fn);
1735 continue;
1736 }
1737
1738 nof = mandoc_calloc(1, sizeof(struct of));
1739 nof->fname = mandoc_strdup(buf);
1740 nof->sec = mandoc_strdup(psec);
1741 nof->arch = mandoc_strdup(parch);
1742 nof->src_form = src_form;
1743
1744 /*
1745 * Remember the file name without the extension,
1746 * to be used as the page title in the database.
1747 */
1748
1749 if (NULL != suffix)
1750 *suffix = '\0';
1751 nof->title = mandoc_strdup(fn);
1752
1753 /*
1754 * Add the structure to the list.
1755 */
1756
1757 if (verb > 1)
1758 printf("%s: scheduling\n", buf);
1759 if (NULL == *of) {
1760 *of = nof;
1761 (*of)->first = nof;
1762 } else {
1763 nof->first = (*of)->first;
1764 (*of)->next = nof;
1765 *of = nof;
1766 }
1767 }
1768
1769 closedir(d);
1770 }
1771
1772 static void
1773 ofile_free(struct of *of)
1774 {
1775 struct of *nof;
1776
1777 while (of) {
1778 nof = of->next;
1779 free(of->fname);
1780 free(of->sec);
1781 free(of->arch);
1782 free(of->title);
1783 free(of);
1784 of = nof;
1785 }
1786 }