]> git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
Add libquota
[mandoc.git] / mandocdb.c
1 /* $Id: mandocdb.c,v 1.43 2011/12/31 18:47:52 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/param.h>
23 #include <sys/types.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <fcntl.h>
29 #include <getopt.h>
30 #include <stdio.h>
31 #include <stdint.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #if defined(__linux__)
37 # include <endian.h>
38 # include <db_185.h>
39 #elif defined(__APPLE__)
40 # include <libkern/OSByteOrder.h>
41 # include <db.h>
42 #else
43 # include <db.h>
44 #endif
45
46 #include "man.h"
47 #include "mdoc.h"
48 #include "mandoc.h"
49 #include "mandocdb.h"
50 #include "manpath.h"
51
52 #define MANDOC_BUFSZ BUFSIZ
53 #define MANDOC_SLOP 1024
54
55 #define MANDOC_SRC 0x1
56 #define MANDOC_FORM 0x2
57
58 /* Access to the mandoc database on disk. */
59
60 struct mdb {
61 char idxn[MAXPATHLEN]; /* index db filename */
62 char dbn[MAXPATHLEN]; /* keyword db filename */
63 DB *idx; /* index recno database */
64 DB *db; /* keyword btree database */
65 };
66
67 /* Stack of temporarily unused index records. */
68
69 struct recs {
70 recno_t *stack; /* pointer to a malloc'ed array */
71 size_t size; /* number of allocated slots */
72 size_t cur; /* current number of empty records */
73 recno_t last; /* last record number in the index */
74 };
75
76 /* Tiny list for files. No need to bring in QUEUE. */
77
78 struct of {
79 char *fname; /* heap-allocated */
80 char *sec;
81 char *arch;
82 char *title;
83 int src_form;
84 struct of *next; /* NULL for last one */
85 struct of *first; /* first in list */
86 };
87
88 /* Buffer for storing growable data. */
89
90 struct buf {
91 char *cp;
92 size_t len; /* current length */
93 size_t size; /* total buffer size */
94 };
95
96 /* Operation we're going to perform. */
97
98 enum op {
99 OP_DEFAULT = 0, /* new dbs from dir list or default config */
100 OP_CONFFILE, /* new databases from custom config file */
101 OP_UPDATE, /* delete/add entries in existing database */
102 OP_DELETE, /* delete entries from existing database */
103 OP_TEST /* change no databases, report potential problems */
104 };
105
106 #define MAN_ARGS DB *hash, \
107 struct buf *buf, \
108 struct buf *dbuf, \
109 const struct man_node *n
110 #define MDOC_ARGS DB *hash, \
111 struct buf *buf, \
112 struct buf *dbuf, \
113 const struct mdoc_node *n, \
114 const struct mdoc_meta *m
115
116 static void buf_appendmdoc(struct buf *,
117 const struct mdoc_node *, int);
118 static void buf_append(struct buf *, const char *);
119 static void buf_appendb(struct buf *,
120 const void *, size_t);
121 static void dbt_put(DB *, const char *, DBT *, DBT *);
122 static void hash_put(DB *, const struct buf *, uint64_t);
123 static void hash_reset(DB **);
124 static void index_merge(const struct of *, struct mparse *,
125 struct buf *, struct buf *, DB *,
126 struct mdb *, struct recs *);
127 static void index_prune(const struct of *, struct mdb *,
128 struct recs *);
129 static void ofile_argbuild(int, char *[], struct of **);
130 static void ofile_dirbuild(const char *, const char *,
131 const char *, int, struct of **);
132 static void ofile_free(struct of *);
133 static void pformatted(DB *, struct buf *,
134 struct buf *, const struct of *);
135 static int pman_node(MAN_ARGS);
136 static void pmdoc_node(MDOC_ARGS);
137 static int pmdoc_head(MDOC_ARGS);
138 static int pmdoc_body(MDOC_ARGS);
139 static int pmdoc_Fd(MDOC_ARGS);
140 static int pmdoc_In(MDOC_ARGS);
141 static int pmdoc_Fn(MDOC_ARGS);
142 static int pmdoc_Nd(MDOC_ARGS);
143 static int pmdoc_Nm(MDOC_ARGS);
144 static int pmdoc_Sh(MDOC_ARGS);
145 static int pmdoc_St(MDOC_ARGS);
146 static int pmdoc_Xr(MDOC_ARGS);
147
148 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
149
150 struct mdoc_handler {
151 int (*fp)(MDOC_ARGS); /* Optional handler. */
152 uint64_t mask; /* Set unless handler returns 0. */
153 int flags; /* For use by pmdoc_node. */
154 };
155
156 static const struct mdoc_handler mdocs[MDOC_MAX] = {
157 { NULL, 0, 0 }, /* Ap */
158 { NULL, 0, 0 }, /* Dd */
159 { NULL, 0, 0 }, /* Dt */
160 { NULL, 0, 0 }, /* Os */
161 { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
162 { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
163 { NULL, 0, 0 }, /* Pp */
164 { NULL, 0, 0 }, /* D1 */
165 { NULL, 0, 0 }, /* Dl */
166 { NULL, 0, 0 }, /* Bd */
167 { NULL, 0, 0 }, /* Ed */
168 { NULL, 0, 0 }, /* Bl */
169 { NULL, 0, 0 }, /* El */
170 { NULL, 0, 0 }, /* It */
171 { NULL, 0, 0 }, /* Ad */
172 { NULL, TYPE_An, MDOCF_CHILD }, /* An */
173 { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */
174 { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */
175 { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */
176 { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */
177 { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */
178 { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */
179 { NULL, 0, 0 }, /* Ex */
180 { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */
181 { pmdoc_Fd, TYPE_In, 0 }, /* Fd */
182 { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */
183 { pmdoc_Fn, 0, 0 }, /* Fn */
184 { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */
185 { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */
186 { pmdoc_In, TYPE_In, 0 }, /* In */
187 { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */
188 { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */
189 { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */
190 { NULL, 0, 0 }, /* Op */
191 { NULL, 0, 0 }, /* Ot */
192 { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */
193 { NULL, 0, 0 }, /* Rv */
194 { pmdoc_St, TYPE_St, 0 }, /* St */
195 { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */
196 { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */
197 { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */
198 { NULL, 0, 0 }, /* %A */
199 { NULL, 0, 0 }, /* %B */
200 { NULL, 0, 0 }, /* %D */
201 { NULL, 0, 0 }, /* %I */
202 { NULL, 0, 0 }, /* %J */
203 { NULL, 0, 0 }, /* %N */
204 { NULL, 0, 0 }, /* %O */
205 { NULL, 0, 0 }, /* %P */
206 { NULL, 0, 0 }, /* %R */
207 { NULL, 0, 0 }, /* %T */
208 { NULL, 0, 0 }, /* %V */
209 { NULL, 0, 0 }, /* Ac */
210 { NULL, 0, 0 }, /* Ao */
211 { NULL, 0, 0 }, /* Aq */
212 { NULL, TYPE_At, MDOCF_CHILD }, /* At */
213 { NULL, 0, 0 }, /* Bc */
214 { NULL, 0, 0 }, /* Bf */
215 { NULL, 0, 0 }, /* Bo */
216 { NULL, 0, 0 }, /* Bq */
217 { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */
218 { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */
219 { NULL, 0, 0 }, /* Db */
220 { NULL, 0, 0 }, /* Dc */
221 { NULL, 0, 0 }, /* Do */
222 { NULL, 0, 0 }, /* Dq */
223 { NULL, 0, 0 }, /* Ec */
224 { NULL, 0, 0 }, /* Ef */
225 { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */
226 { NULL, 0, 0 }, /* Eo */
227 { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */
228 { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */
229 { NULL, 0, 0 }, /* No */
230 { NULL, 0, 0 }, /* Ns */
231 { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */
232 { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */
233 { NULL, 0, 0 }, /* Pc */
234 { NULL, 0, 0 }, /* Pf */
235 { NULL, 0, 0 }, /* Po */
236 { NULL, 0, 0 }, /* Pq */
237 { NULL, 0, 0 }, /* Qc */
238 { NULL, 0, 0 }, /* Ql */
239 { NULL, 0, 0 }, /* Qo */
240 { NULL, 0, 0 }, /* Qq */
241 { NULL, 0, 0 }, /* Re */
242 { NULL, 0, 0 }, /* Rs */
243 { NULL, 0, 0 }, /* Sc */
244 { NULL, 0, 0 }, /* So */
245 { NULL, 0, 0 }, /* Sq */
246 { NULL, 0, 0 }, /* Sm */
247 { NULL, 0, 0 }, /* Sx */
248 { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */
249 { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */
250 { NULL, 0, 0 }, /* Ux */
251 { NULL, 0, 0 }, /* Xc */
252 { NULL, 0, 0 }, /* Xo */
253 { pmdoc_head, TYPE_Fn, 0 }, /* Fo */
254 { NULL, 0, 0 }, /* Fc */
255 { NULL, 0, 0 }, /* Oo */
256 { NULL, 0, 0 }, /* Oc */
257 { NULL, 0, 0 }, /* Bk */
258 { NULL, 0, 0 }, /* Ek */
259 { NULL, 0, 0 }, /* Bt */
260 { NULL, 0, 0 }, /* Hf */
261 { NULL, 0, 0 }, /* Fr */
262 { NULL, 0, 0 }, /* Ud */
263 { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */
264 { NULL, 0, 0 }, /* Lp */
265 { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */
266 { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */
267 { NULL, 0, 0 }, /* Brq */
268 { NULL, 0, 0 }, /* Bro */
269 { NULL, 0, 0 }, /* Brc */
270 { NULL, 0, 0 }, /* %C */
271 { NULL, 0, 0 }, /* Es */
272 { NULL, 0, 0 }, /* En */
273 { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */
274 { NULL, 0, 0 }, /* %Q */
275 { NULL, 0, 0 }, /* br */
276 { NULL, 0, 0 }, /* sp */
277 { NULL, 0, 0 }, /* %U */
278 { NULL, 0, 0 }, /* Ta */
279 };
280
281 static const char *progname;
282 static int use_all; /* Use all directories and files. */
283 static int verb; /* Output verbosity level. */
284 static int warnings; /* Potential problems in manuals. */
285
286 int
287 main(int argc, char *argv[])
288 {
289 struct mparse *mp; /* parse sequence */
290 struct manpaths dirs;
291 struct mdb mdb;
292 struct recs recs;
293 enum op op; /* current operation */
294 const char *dir;
295 char *cp;
296 char pbuf[PATH_MAX];
297 int ch, i, flags;
298 DB *hash; /* temporary keyword hashtable */
299 BTREEINFO info; /* btree configuration */
300 size_t sz1, sz2;
301 struct buf buf, /* keyword buffer */
302 dbuf; /* description buffer */
303 struct of *of; /* list of files for processing */
304 extern int optind;
305 extern char *optarg;
306
307 progname = strrchr(argv[0], '/');
308 if (progname == NULL)
309 progname = argv[0];
310 else
311 ++progname;
312
313 memset(&dirs, 0, sizeof(struct manpaths));
314 memset(&mdb, 0, sizeof(struct mdb));
315 memset(&recs, 0, sizeof(struct recs));
316
317 of = NULL;
318 mp = NULL;
319 hash = NULL;
320 op = OP_DEFAULT;
321 dir = NULL;
322
323 while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
324 switch (ch) {
325 case ('a'):
326 use_all = 1;
327 break;
328 case ('C'):
329 if (op) {
330 fprintf(stderr,
331 "-C: conflicting options\n");
332 goto usage;
333 }
334 dir = optarg;
335 op = OP_CONFFILE;
336 break;
337 case ('d'):
338 if (op) {
339 fprintf(stderr,
340 "-d: conflicting options\n");
341 goto usage;
342 }
343 dir = optarg;
344 op = OP_UPDATE;
345 break;
346 case ('t'):
347 dup2(STDOUT_FILENO, STDERR_FILENO);
348 if (op) {
349 fprintf(stderr,
350 "-t: conflicting options\n");
351 goto usage;
352 }
353 op = OP_TEST;
354 use_all = 1;
355 warnings = 1;
356 break;
357 case ('u'):
358 if (op) {
359 fprintf(stderr,
360 "-u: conflicting options\n");
361 goto usage;
362 }
363 dir = optarg;
364 op = OP_DELETE;
365 break;
366 case ('v'):
367 verb++;
368 break;
369 case ('W'):
370 warnings = 1;
371 break;
372 default:
373 goto usage;
374 }
375
376 argc -= optind;
377 argv += optind;
378
379 if (OP_CONFFILE == op && argc > 0) {
380 fprintf(stderr, "-C: too many arguments\n");
381 goto usage;
382 }
383
384 memset(&info, 0, sizeof(BTREEINFO));
385 info.flags = R_DUP;
386
387 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
388
389 memset(&buf, 0, sizeof(struct buf));
390 memset(&dbuf, 0, sizeof(struct buf));
391
392 buf.size = dbuf.size = MANDOC_BUFSZ;
393
394 buf.cp = mandoc_malloc(buf.size);
395 dbuf.cp = mandoc_malloc(dbuf.size);
396
397 flags = O_CREAT | O_RDWR;
398 if (OP_DEFAULT == op || OP_CONFFILE == op)
399 flags |= O_TRUNC;
400
401 if (OP_TEST == op) {
402 ofile_argbuild(argc, argv, &of);
403 if (NULL == of)
404 goto out;
405 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
406 goto out;
407 }
408
409 if (OP_UPDATE == op || OP_DELETE == op) {
410 strlcat(mdb.dbn, dir, MAXPATHLEN);
411 strlcat(mdb.dbn, "/", MAXPATHLEN);
412 sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
413
414 strlcat(mdb.idxn, dir, MAXPATHLEN);
415 strlcat(mdb.idxn, "/", MAXPATHLEN);
416 sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
417
418 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
419 fprintf(stderr, "%s: path too long\n", dir);
420 exit((int)MANDOCLEVEL_BADARG);
421 }
422
423 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
424 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
425
426 if (NULL == mdb.db) {
427 perror(mdb.dbn);
428 exit((int)MANDOCLEVEL_SYSERR);
429 } else if (NULL == mdb.idx) {
430 perror(mdb.idxn);
431 exit((int)MANDOCLEVEL_SYSERR);
432 }
433
434 ofile_argbuild(argc, argv, &of);
435
436 if (NULL == of)
437 goto out;
438
439 index_prune(of, &mdb, &recs);
440
441 /*
442 * Go to the root of the respective manual tree.
443 * This must work or no manuals may be found (they're
444 * indexed relative to the root).
445 */
446
447 if (OP_UPDATE == op) {
448 if (-1 == chdir(dir)) {
449 perror(dir);
450 exit((int)MANDOCLEVEL_SYSERR);
451 }
452 index_merge(of, mp, &dbuf, &buf, hash,
453 &mdb, &recs);
454 }
455
456 goto out;
457 }
458
459 /*
460 * Configure the directories we're going to scan.
461 * If we have command-line arguments, use them.
462 * If not, we use man(1)'s method (see mandocdb.8).
463 */
464
465 if (argc > 0) {
466 dirs.paths = mandoc_calloc(argc, sizeof(char *));
467 dirs.sz = argc;
468 for (i = 0; i < argc; i++) {
469 if (NULL == (cp = realpath(argv[i], pbuf))) {
470 perror(argv[i]);
471 goto out;
472 }
473 dirs.paths[i] = mandoc_strdup(cp);
474 }
475 } else
476 manpath_parse(&dirs, dir, NULL, NULL);
477
478 for (i = 0; i < dirs.sz; i++) {
479 mdb.idxn[0] = mdb.dbn[0] = '\0';
480
481 strlcat(mdb.dbn, dirs.paths[i], MAXPATHLEN);
482 strlcat(mdb.dbn, "/", MAXPATHLEN);
483 sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
484
485 strlcat(mdb.idxn, dirs.paths[i], MAXPATHLEN);
486 strlcat(mdb.idxn, "/", MAXPATHLEN);
487 sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
488
489 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
490 fprintf(stderr, "%s: path too long\n",
491 dirs.paths[i]);
492 exit((int)MANDOCLEVEL_BADARG);
493 }
494
495 if (mdb.db)
496 (*mdb.db->close)(mdb.db);
497 if (mdb.idx)
498 (*mdb.idx->close)(mdb.idx);
499
500 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
501 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
502
503 if (NULL == mdb.db) {
504 perror(mdb.dbn);
505 exit((int)MANDOCLEVEL_SYSERR);
506 } else if (NULL == mdb.idx) {
507 perror(mdb.idxn);
508 exit((int)MANDOCLEVEL_SYSERR);
509 }
510
511 ofile_free(of);
512 of = NULL;
513
514 if (-1 == chdir(dirs.paths[i])) {
515 perror(dirs.paths[i]);
516 exit((int)MANDOCLEVEL_SYSERR);
517 }
518
519 ofile_dirbuild(".", "", "", 0, &of);
520 if (NULL == of)
521 continue;
522
523 /*
524 * Go to the root of the respective manual tree.
525 * This must work or no manuals may be found (they're
526 * indexed relative to the root).
527 */
528
529 if (-1 == chdir(dirs.paths[i])) {
530 perror(dirs.paths[i]);
531 exit((int)MANDOCLEVEL_SYSERR);
532 }
533
534 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
535 }
536
537 out:
538 if (mdb.db)
539 (*mdb.db->close)(mdb.db);
540 if (mdb.idx)
541 (*mdb.idx->close)(mdb.idx);
542 if (hash)
543 (*hash->close)(hash);
544 if (mp)
545 mparse_free(mp);
546
547 manpath_free(&dirs);
548 ofile_free(of);
549 free(buf.cp);
550 free(dbuf.cp);
551 free(recs.stack);
552
553 return(MANDOCLEVEL_OK);
554
555 usage:
556 fprintf(stderr,
557 "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
558 " -d dir [file ...] | "
559 "-u dir [file ...]\n",
560 progname);
561
562 return((int)MANDOCLEVEL_BADARG);
563 }
564
565 void
566 index_merge(const struct of *of, struct mparse *mp,
567 struct buf *dbuf, struct buf *buf, DB *hash,
568 struct mdb *mdb, struct recs *recs)
569 {
570 recno_t rec;
571 int ch, skip;
572 DBT key, val;
573 struct mdoc *mdoc;
574 struct man *man;
575 const char *fn, *msec, *march, *mtitle;
576 uint64_t mask;
577 size_t sv;
578 unsigned seq;
579 uint64_t vbuf[2];
580 char type;
581
582 rec = 0;
583 for (of = of->first; of; of = of->next) {
584 fn = of->fname;
585
586 /*
587 * Try interpreting the file as mdoc(7) or man(7)
588 * source code, unless it is already known to be
589 * formatted. Fall back to formatted mode.
590 */
591
592 mparse_reset(mp);
593 mdoc = NULL;
594 man = NULL;
595
596 if ((MANDOC_SRC & of->src_form ||
597 ! (MANDOC_FORM & of->src_form)) &&
598 MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
599 mparse_result(mp, &mdoc, &man);
600
601 if (NULL != mdoc) {
602 msec = mdoc_meta(mdoc)->msec;
603 march = mdoc_meta(mdoc)->arch;
604 if (NULL == march)
605 march = "";
606 mtitle = mdoc_meta(mdoc)->title;
607 } else if (NULL != man) {
608 msec = man_meta(man)->msec;
609 march = "";
610 mtitle = man_meta(man)->title;
611 } else {
612 msec = of->sec;
613 march = of->arch;
614 mtitle = of->title;
615 }
616
617 /*
618 * By default, skip a file if the manual section
619 * given in the file disagrees with the directory
620 * where the file is located.
621 */
622
623 skip = 0;
624 assert(of->sec);
625 assert(msec);
626 if (strcasecmp(msec, of->sec)) {
627 if (warnings)
628 fprintf(stderr, "%s: "
629 "section \"%s\" manual "
630 "in \"%s\" directory\n",
631 fn, msec, of->sec);
632 skip = 1;
633 }
634
635 /*
636 * Manual page directories exist for each kernel
637 * architecture as returned by machine(1).
638 * However, many manuals only depend on the
639 * application architecture as returned by arch(1).
640 * For example, some (2/ARM) manuals are shared
641 * across the "armish" and "zaurus" kernel
642 * architectures.
643 * A few manuals are even shared across completely
644 * different architectures, for example fdformat(1)
645 * on amd64, i386, sparc, and sparc64.
646 * Thus, warn about architecture mismatches,
647 * but don't skip manuals for this reason.
648 */
649
650 assert(of->arch);
651 assert(march);
652 if (strcasecmp(march, of->arch)) {
653 if (warnings)
654 fprintf(stderr, "%s: "
655 "architecture \"%s\" manual "
656 "in \"%s\" directory\n",
657 fn, march, of->arch);
658 march = of->arch;
659 }
660
661 /*
662 * By default, skip a file if the title given
663 * in the file disagrees with the file name.
664 * If both agree, use the file name as the title,
665 * because the one in the file usually is all caps.
666 */
667
668 assert(of->title);
669 assert(mtitle);
670 if (strcasecmp(mtitle, of->title)) {
671 if (warnings)
672 fprintf(stderr, "%s: "
673 "title \"%s\" in file "
674 "but \"%s\" in filename\n",
675 fn, mtitle, of->title);
676 skip = 1;
677 } else
678 mtitle = of->title;
679
680 if (skip && !use_all)
681 continue;
682
683 /*
684 * The index record value consists of a nil-terminated
685 * filename, a nil-terminated manual section, and a
686 * nil-terminated description. Since the description
687 * may not be set, we set a sentinel to see if we're
688 * going to write a nil byte in its place.
689 */
690
691 dbuf->len = 0;
692 type = mdoc ? 'd' : (man ? 'a' : 'c');
693 buf_appendb(dbuf, &type, 1);
694 buf_appendb(dbuf, fn, strlen(fn) + 1);
695 buf_appendb(dbuf, msec, strlen(msec) + 1);
696 buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
697 buf_appendb(dbuf, march, strlen(march) + 1);
698
699 sv = dbuf->len;
700
701 /*
702 * Collect keyword/mask pairs.
703 * Each pair will become a new btree node.
704 */
705
706 hash_reset(&hash);
707 if (mdoc)
708 pmdoc_node(hash, buf, dbuf,
709 mdoc_node(mdoc), mdoc_meta(mdoc));
710 else if (man)
711 pman_node(hash, buf, dbuf, man_node(man));
712 else
713 pformatted(hash, buf, dbuf, of);
714
715 /* Test mode, do not access any database. */
716
717 if (NULL == mdb->db || NULL == mdb->idx)
718 continue;
719
720 /*
721 * Reclaim an empty index record, if available.
722 * Use its record number for all new btree nodes.
723 */
724
725 if (recs->cur > 0) {
726 recs->cur--;
727 rec = recs->stack[(int)recs->cur];
728 } else if (recs->last > 0) {
729 rec = recs->last;
730 recs->last = 0;
731 } else
732 rec++;
733 vbuf[1] = htobe64(rec);
734
735 /*
736 * Copy from the in-memory hashtable of pending
737 * keyword/mask pairs into the database.
738 */
739
740 seq = R_FIRST;
741 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
742 seq = R_NEXT;
743 assert(sizeof(uint64_t) == val.size);
744 memcpy(&mask, val.data, val.size);
745 vbuf[0] = htobe64(mask);
746 val.size = sizeof(vbuf);
747 val.data = &vbuf;
748 dbt_put(mdb->db, mdb->dbn, &key, &val);
749 }
750 if (ch < 0) {
751 perror("hash");
752 exit((int)MANDOCLEVEL_SYSERR);
753 }
754
755 /*
756 * Apply to the index. If we haven't had a description
757 * set, put an empty one in now.
758 */
759
760 if (dbuf->len == sv)
761 buf_appendb(dbuf, "", 1);
762
763 key.data = &rec;
764 key.size = sizeof(recno_t);
765
766 val.data = dbuf->cp;
767 val.size = dbuf->len;
768
769 if (verb)
770 printf("%s: adding to index\n", fn);
771
772 dbt_put(mdb->idx, mdb->idxn, &key, &val);
773 }
774 }
775
776 /*
777 * Scan through all entries in the index file `idx' and prune those
778 * entries in `ofile'.
779 * Pruning consists of removing from `db', then invalidating the entry
780 * in `idx' (zeroing its value size).
781 */
782 static void
783 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
784 {
785 const struct of *of;
786 const char *fn;
787 uint64_t vbuf[2];
788 unsigned seq, sseq;
789 DBT key, val;
790 int ch;
791
792 recs->cur = 0;
793 seq = R_FIRST;
794 while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
795 seq = R_NEXT;
796 assert(sizeof(recno_t) == key.size);
797 memcpy(&recs->last, key.data, key.size);
798
799 /* Deleted records are zero-sized. Skip them. */
800
801 if (0 == val.size)
802 goto cont;
803
804 /*
805 * Make sure we're sane.
806 * Read past our mdoc/man/cat type to the next string,
807 * then make sure it's bounded by a NUL.
808 * Failing any of these, we go into our error handler.
809 */
810
811 fn = (char *)val.data + 1;
812 if (NULL == memchr(fn, '\0', val.size - 1))
813 break;
814
815 /*
816 * Search for the file in those we care about.
817 * XXX: build this into a tree. Too slow.
818 */
819
820 for (of = ofile->first; of; of = of->next)
821 if (0 == strcmp(fn, of->fname))
822 break;
823
824 if (NULL == of)
825 continue;
826
827 /*
828 * Search through the keyword database, throwing out all
829 * references to our file.
830 */
831
832 sseq = R_FIRST;
833 while (0 == (ch = (*mdb->db->seq)(mdb->db,
834 &key, &val, sseq))) {
835 sseq = R_NEXT;
836 if (sizeof(vbuf) != val.size)
837 break;
838
839 memcpy(vbuf, val.data, val.size);
840 if (recs->last != betoh64(vbuf[1]))
841 continue;
842
843 if ((ch = (*mdb->db->del)(mdb->db,
844 &key, R_CURSOR)) < 0)
845 break;
846 }
847
848 if (ch < 0) {
849 perror(mdb->dbn);
850 exit((int)MANDOCLEVEL_SYSERR);
851 } else if (1 != ch) {
852 fprintf(stderr, "%s: corrupt database\n",
853 mdb->dbn);
854 exit((int)MANDOCLEVEL_SYSERR);
855 }
856
857 if (verb)
858 printf("%s: deleting from index\n", fn);
859
860 val.size = 0;
861 ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
862
863 if (ch < 0)
864 break;
865 cont:
866 if (recs->cur >= recs->size) {
867 recs->size += MANDOC_SLOP;
868 recs->stack = mandoc_realloc(recs->stack,
869 recs->size * sizeof(recno_t));
870 }
871
872 recs->stack[(int)recs->cur] = recs->last;
873 recs->cur++;
874 }
875
876 if (ch < 0) {
877 perror(mdb->idxn);
878 exit((int)MANDOCLEVEL_SYSERR);
879 } else if (1 != ch) {
880 fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
881 exit((int)MANDOCLEVEL_SYSERR);
882 }
883
884 recs->last++;
885 }
886
887 /*
888 * Grow the buffer (if necessary) and copy in a binary string.
889 */
890 static void
891 buf_appendb(struct buf *buf, const void *cp, size_t sz)
892 {
893
894 /* Overshoot by MANDOC_BUFSZ. */
895
896 while (buf->len + sz >= buf->size) {
897 buf->size = buf->len + sz + MANDOC_BUFSZ;
898 buf->cp = mandoc_realloc(buf->cp, buf->size);
899 }
900
901 memcpy(buf->cp + (int)buf->len, cp, sz);
902 buf->len += sz;
903 }
904
905 /*
906 * Append a nil-terminated string to the buffer.
907 * This can be invoked multiple times.
908 * The buffer string will be nil-terminated.
909 * If invoked multiple times, a space is put between strings.
910 */
911 static void
912 buf_append(struct buf *buf, const char *cp)
913 {
914 size_t sz;
915
916 if (0 == (sz = strlen(cp)))
917 return;
918
919 if (buf->len)
920 buf->cp[(int)buf->len - 1] = ' ';
921
922 buf_appendb(buf, cp, sz + 1);
923 }
924
925 /*
926 * Recursively add all text from a given node.
927 * This is optimised for general mdoc nodes in this context, which do
928 * not consist of subexpressions and having a recursive call for n->next
929 * would be wasteful.
930 * The "f" variable should be 0 unless called from pmdoc_Nd for the
931 * description buffer, which does not start at the beginning of the
932 * buffer.
933 */
934 static void
935 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
936 {
937
938 for ( ; n; n = n->next) {
939 if (n->child)
940 buf_appendmdoc(buf, n->child, f);
941
942 if (MDOC_TEXT == n->type && f) {
943 f = 0;
944 buf_appendb(buf, n->string,
945 strlen(n->string) + 1);
946 } else if (MDOC_TEXT == n->type)
947 buf_append(buf, n->string);
948
949 }
950 }
951
952 static void
953 hash_reset(DB **db)
954 {
955 DB *hash;
956
957 if (NULL != (hash = *db))
958 (*hash->close)(hash);
959
960 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
961 if (NULL == *db) {
962 perror("hash");
963 exit((int)MANDOCLEVEL_SYSERR);
964 }
965 }
966
967 /* ARGSUSED */
968 static int
969 pmdoc_head(MDOC_ARGS)
970 {
971
972 return(MDOC_HEAD == n->type);
973 }
974
975 /* ARGSUSED */
976 static int
977 pmdoc_body(MDOC_ARGS)
978 {
979
980 return(MDOC_BODY == n->type);
981 }
982
983 /* ARGSUSED */
984 static int
985 pmdoc_Fd(MDOC_ARGS)
986 {
987 const char *start, *end;
988 size_t sz;
989
990 if (SEC_SYNOPSIS != n->sec)
991 return(0);
992 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
993 return(0);
994
995 /*
996 * Only consider those `Fd' macro fields that begin with an
997 * "inclusion" token (versus, e.g., #define).
998 */
999 if (strcmp("#include", n->string))
1000 return(0);
1001
1002 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
1003 return(0);
1004
1005 /*
1006 * Strip away the enclosing angle brackets and make sure we're
1007 * not zero-length.
1008 */
1009
1010 start = n->string;
1011 if ('<' == *start || '"' == *start)
1012 start++;
1013
1014 if (0 == (sz = strlen(start)))
1015 return(0);
1016
1017 end = &start[(int)sz - 1];
1018 if ('>' == *end || '"' == *end)
1019 end--;
1020
1021 assert(end >= start);
1022
1023 buf_appendb(buf, start, (size_t)(end - start + 1));
1024 buf_appendb(buf, "", 1);
1025 return(1);
1026 }
1027
1028 /* ARGSUSED */
1029 static int
1030 pmdoc_In(MDOC_ARGS)
1031 {
1032
1033 if (NULL == n->child || MDOC_TEXT != n->child->type)
1034 return(0);
1035
1036 buf_append(buf, n->child->string);
1037 return(1);
1038 }
1039
1040 /* ARGSUSED */
1041 static int
1042 pmdoc_Fn(MDOC_ARGS)
1043 {
1044 struct mdoc_node *nn;
1045 const char *cp;
1046
1047 nn = n->child;
1048
1049 if (NULL == nn || MDOC_TEXT != nn->type)
1050 return(0);
1051
1052 /* .Fn "struct type *name" "char *arg" */
1053
1054 cp = strrchr(nn->string, ' ');
1055 if (NULL == cp)
1056 cp = nn->string;
1057
1058 /* Strip away pointer symbol. */
1059
1060 while ('*' == *cp)
1061 cp++;
1062
1063 /* Store the function name. */
1064
1065 buf_append(buf, cp);
1066 hash_put(hash, buf, TYPE_Fn);
1067
1068 /* Store the function type. */
1069
1070 if (nn->string < cp) {
1071 buf->len = 0;
1072 buf_appendb(buf, nn->string, cp - nn->string);
1073 buf_appendb(buf, "", 1);
1074 hash_put(hash, buf, TYPE_Ft);
1075 }
1076
1077 /* Store the arguments. */
1078
1079 for (nn = nn->next; nn; nn = nn->next) {
1080 if (MDOC_TEXT != nn->type)
1081 continue;
1082 buf->len = 0;
1083 buf_append(buf, nn->string);
1084 hash_put(hash, buf, TYPE_Fa);
1085 }
1086
1087 return(0);
1088 }
1089
1090 /* ARGSUSED */
1091 static int
1092 pmdoc_St(MDOC_ARGS)
1093 {
1094
1095 if (NULL == n->child || MDOC_TEXT != n->child->type)
1096 return(0);
1097
1098 buf_append(buf, n->child->string);
1099 return(1);
1100 }
1101
1102 /* ARGSUSED */
1103 static int
1104 pmdoc_Xr(MDOC_ARGS)
1105 {
1106
1107 if (NULL == (n = n->child))
1108 return(0);
1109
1110 buf_appendb(buf, n->string, strlen(n->string));
1111
1112 if (NULL != (n = n->next)) {
1113 buf_appendb(buf, ".", 1);
1114 buf_appendb(buf, n->string, strlen(n->string) + 1);
1115 } else
1116 buf_appendb(buf, ".", 2);
1117
1118 return(1);
1119 }
1120
1121 /* ARGSUSED */
1122 static int
1123 pmdoc_Nd(MDOC_ARGS)
1124 {
1125
1126 if (MDOC_BODY != n->type)
1127 return(0);
1128
1129 buf_appendmdoc(dbuf, n->child, 1);
1130 return(1);
1131 }
1132
1133 /* ARGSUSED */
1134 static int
1135 pmdoc_Nm(MDOC_ARGS)
1136 {
1137
1138 if (SEC_NAME == n->sec)
1139 return(1);
1140 else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1141 return(0);
1142
1143 if (NULL == n->child)
1144 buf_append(buf, m->name);
1145
1146 return(1);
1147 }
1148
1149 /* ARGSUSED */
1150 static int
1151 pmdoc_Sh(MDOC_ARGS)
1152 {
1153
1154 return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1155 }
1156
1157 static void
1158 hash_put(DB *db, const struct buf *buf, uint64_t mask)
1159 {
1160 uint64_t oldmask;
1161 DBT key, val;
1162 int rc;
1163
1164 if (buf->len < 2)
1165 return;
1166
1167 key.data = buf->cp;
1168 key.size = buf->len;
1169
1170 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1171 perror("hash");
1172 exit((int)MANDOCLEVEL_SYSERR);
1173 } else if (0 == rc) {
1174 assert(sizeof(uint64_t) == val.size);
1175 memcpy(&oldmask, val.data, val.size);
1176 mask |= oldmask;
1177 }
1178
1179 val.data = &mask;
1180 val.size = sizeof(uint64_t);
1181
1182 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1183 perror("hash");
1184 exit((int)MANDOCLEVEL_SYSERR);
1185 }
1186 }
1187
1188 static void
1189 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1190 {
1191
1192 assert(key->size);
1193 assert(val->size);
1194
1195 if (0 == (*db->put)(db, key, val, 0))
1196 return;
1197
1198 perror(dbn);
1199 exit((int)MANDOCLEVEL_SYSERR);
1200 /* NOTREACHED */
1201 }
1202
1203 /*
1204 * Call out to per-macro handlers after clearing the persistent database
1205 * key. If the macro sets the database key, flush it to the database.
1206 */
1207 static void
1208 pmdoc_node(MDOC_ARGS)
1209 {
1210
1211 if (NULL == n)
1212 return;
1213
1214 switch (n->type) {
1215 case (MDOC_HEAD):
1216 /* FALLTHROUGH */
1217 case (MDOC_BODY):
1218 /* FALLTHROUGH */
1219 case (MDOC_TAIL):
1220 /* FALLTHROUGH */
1221 case (MDOC_BLOCK):
1222 /* FALLTHROUGH */
1223 case (MDOC_ELEM):
1224 buf->len = 0;
1225
1226 /*
1227 * Both NULL handlers and handlers returning true
1228 * request using the data. Only skip the element
1229 * when the handler returns false.
1230 */
1231
1232 if (NULL != mdocs[n->tok].fp &&
1233 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
1234 break;
1235
1236 /*
1237 * For many macros, use the text from all children.
1238 * Set zero flags for macros not needing this.
1239 * In that case, the handler must fill the buffer.
1240 */
1241
1242 if (MDOCF_CHILD & mdocs[n->tok].flags)
1243 buf_appendmdoc(buf, n->child, 0);
1244
1245 /*
1246 * Cover the most common case:
1247 * Automatically stage one string per element.
1248 * Set a zero mask for macros not needing this.
1249 * Additional staging can be done in the handler.
1250 */
1251
1252 if (mdocs[n->tok].mask)
1253 hash_put(hash, buf, mdocs[n->tok].mask);
1254 break;
1255 default:
1256 break;
1257 }
1258
1259 pmdoc_node(hash, buf, dbuf, n->child, m);
1260 pmdoc_node(hash, buf, dbuf, n->next, m);
1261 }
1262
1263 static int
1264 pman_node(MAN_ARGS)
1265 {
1266 const struct man_node *head, *body;
1267 const char *start, *sv;
1268 size_t sz;
1269
1270 if (NULL == n)
1271 return(0);
1272
1273 /*
1274 * We're only searching for one thing: the first text child in
1275 * the BODY of a NAME section. Since we don't keep track of
1276 * sections in -man, run some hoops to find out whether we're in
1277 * the correct section or not.
1278 */
1279
1280 if (MAN_BODY == n->type && MAN_SH == n->tok) {
1281 body = n;
1282 assert(body->parent);
1283 if (NULL != (head = body->parent->head) &&
1284 1 == head->nchild &&
1285 NULL != (head = (head->child)) &&
1286 MAN_TEXT == head->type &&
1287 0 == strcmp(head->string, "NAME") &&
1288 NULL != (body = body->child) &&
1289 MAN_TEXT == body->type) {
1290
1291 assert(body->string);
1292 start = sv = body->string;
1293
1294 /*
1295 * Go through a special heuristic dance here.
1296 * This is why -man manuals are great!
1297 * (I'm being sarcastic: my eyes are bleeding.)
1298 * Conventionally, one or more manual names are
1299 * comma-specified prior to a whitespace, then a
1300 * dash, then a description. Try to puzzle out
1301 * the name parts here.
1302 */
1303
1304 for ( ;; ) {
1305 sz = strcspn(start, " ,");
1306 if ('\0' == start[(int)sz])
1307 break;
1308
1309 buf->len = 0;
1310 buf_appendb(buf, start, sz);
1311 buf_appendb(buf, "", 1);
1312
1313 hash_put(hash, buf, TYPE_Nm);
1314
1315 if (' ' == start[(int)sz]) {
1316 start += (int)sz + 1;
1317 break;
1318 }
1319
1320 assert(',' == start[(int)sz]);
1321 start += (int)sz + 1;
1322 while (' ' == *start)
1323 start++;
1324 }
1325
1326 buf->len = 0;
1327
1328 if (sv == start) {
1329 buf_append(buf, start);
1330 return(1);
1331 }
1332
1333 while (' ' == *start)
1334 start++;
1335
1336 if (0 == strncmp(start, "-", 1))
1337 start += 1;
1338 else if (0 == strncmp(start, "\\-\\-", 4))
1339 start += 4;
1340 else if (0 == strncmp(start, "\\-", 2))
1341 start += 2;
1342 else if (0 == strncmp(start, "\\(en", 4))
1343 start += 4;
1344 else if (0 == strncmp(start, "\\(em", 4))
1345 start += 4;
1346
1347 while (' ' == *start)
1348 start++;
1349
1350 sz = strlen(start) + 1;
1351 buf_appendb(dbuf, start, sz);
1352 buf_appendb(buf, start, sz);
1353
1354 hash_put(hash, buf, TYPE_Nd);
1355 }
1356 }
1357
1358 for (n = n->child; n; n = n->next)
1359 if (pman_node(hash, buf, dbuf, n))
1360 return(1);
1361
1362 return(0);
1363 }
1364
1365 /*
1366 * Parse a formatted manual page.
1367 * By necessity, this involves rather crude guesswork.
1368 */
1369 static void
1370 pformatted(DB *hash, struct buf *buf,
1371 struct buf *dbuf, const struct of *of)
1372 {
1373 FILE *stream;
1374 char *line, *p, *title;
1375 size_t len, plen, titlesz;
1376
1377 if (NULL == (stream = fopen(of->fname, "r"))) {
1378 if (warnings)
1379 perror(of->fname);
1380 return;
1381 }
1382
1383 /*
1384 * Always use the title derived from the filename up front,
1385 * do not even try to find it in the file. This also makes
1386 * sure we don't end up with an orphan index record, even if
1387 * the file content turns out to be completely unintelligible.
1388 */
1389
1390 buf->len = 0;
1391 buf_append(buf, of->title);
1392 hash_put(hash, buf, TYPE_Nm);
1393
1394 /* Skip to first blank line. */
1395
1396 while (NULL != (line = fgetln(stream, &len)))
1397 if ('\n' == *line)
1398 break;
1399
1400 /*
1401 * Assume the first line that is not indented
1402 * is the first section header. Skip to it.
1403 */
1404
1405 while (NULL != (line = fgetln(stream, &len)))
1406 if ('\n' != *line && ' ' != *line)
1407 break;
1408
1409 /*
1410 * Read up until the next section into a buffer.
1411 * Strip the leading and trailing newline from each read line,
1412 * appending a trailing space.
1413 * Ignore empty (whitespace-only) lines.
1414 */
1415
1416 titlesz = 0;
1417 title = NULL;
1418
1419 while (NULL != (line = fgetln(stream, &len))) {
1420 if (' ' != *line || '\n' != line[(int)len - 1])
1421 break;
1422 while (len > 0 && isspace((unsigned char)*line)) {
1423 line++;
1424 len--;
1425 }
1426 if (1 == len)
1427 continue;
1428 title = mandoc_realloc(title, titlesz + len);
1429 memcpy(title + titlesz, line, len);
1430 titlesz += len;
1431 title[(int)titlesz - 1] = ' ';
1432 }
1433
1434
1435 /*
1436 * If no page content can be found, or the input line
1437 * is already the next section header, or there is no
1438 * trailing newline, reuse the page title as the page
1439 * description.
1440 */
1441
1442 if (NULL == title || '\0' == *title) {
1443 if (warnings)
1444 fprintf(stderr, "%s: cannot find NAME section\n",
1445 of->fname);
1446 buf_appendb(dbuf, buf->cp, buf->size);
1447 hash_put(hash, buf, TYPE_Nd);
1448 fclose(stream);
1449 free(title);
1450 return;
1451 }
1452
1453 title = mandoc_realloc(title, titlesz + 1);
1454 title[(int)titlesz] = '\0';
1455
1456 /*
1457 * Skip to the first dash.
1458 * Use the remaining line as the description (no more than 70
1459 * bytes).
1460 */
1461
1462 if (NULL != (p = strstr(title, "- "))) {
1463 for (p += 2; ' ' == *p || '\b' == *p; p++)
1464 /* Skip to next word. */ ;
1465 } else {
1466 if (warnings)
1467 fprintf(stderr, "%s: no dash in title line\n",
1468 of->fname);
1469 p = title;
1470 }
1471
1472 plen = strlen(p);
1473
1474 /* Strip backspace-encoding from line. */
1475
1476 while (NULL != (line = memchr(p, '\b', plen))) {
1477 len = line - p;
1478 if (0 == len) {
1479 memmove(line, line + 1, plen--);
1480 continue;
1481 }
1482 memmove(line - 1, line + 1, plen - len);
1483 plen -= 2;
1484 }
1485
1486 buf_appendb(dbuf, p, plen + 1);
1487 buf->len = 0;
1488 buf_appendb(buf, p, plen + 1);
1489 hash_put(hash, buf, TYPE_Nd);
1490 fclose(stream);
1491 free(title);
1492 }
1493
1494 static void
1495 ofile_argbuild(int argc, char *argv[], struct of **of)
1496 {
1497 char buf[MAXPATHLEN];
1498 const char *sec, *arch, *title;
1499 char *p;
1500 int i, src_form;
1501 struct of *nof;
1502
1503 for (i = 0; i < argc; i++) {
1504
1505 /*
1506 * Try to infer the manual section, architecture and
1507 * page title from the path, assuming it looks like
1508 * man*[/<arch>]/<title>.<section> or
1509 * cat<section>[/<arch>]/<title>.0
1510 */
1511
1512 if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
1513 fprintf(stderr, "%s: path too long\n", argv[i]);
1514 continue;
1515 }
1516 sec = arch = title = "";
1517 src_form = 0;
1518 p = strrchr(buf, '\0');
1519 while (p-- > buf) {
1520 if ('\0' == *sec && '.' == *p) {
1521 sec = p + 1;
1522 *p = '\0';
1523 if ('0' == *sec)
1524 src_form |= MANDOC_FORM;
1525 else if ('1' <= *sec && '9' >= *sec)
1526 src_form |= MANDOC_SRC;
1527 continue;
1528 }
1529 if ('/' != *p)
1530 continue;
1531 if ('\0' == *title) {
1532 title = p + 1;
1533 *p = '\0';
1534 continue;
1535 }
1536 if (0 == strncmp("man", p + 1, 3))
1537 src_form |= MANDOC_SRC;
1538 else if (0 == strncmp("cat", p + 1, 3))
1539 src_form |= MANDOC_FORM;
1540 else
1541 arch = p + 1;
1542 break;
1543 }
1544 if ('\0' == *title) {
1545 if (warnings)
1546 fprintf(stderr,
1547 "%s: cannot deduce title "
1548 "from filename\n",
1549 argv[i]);
1550 title = buf;
1551 }
1552
1553 /*
1554 * Build the file structure.
1555 */
1556
1557 nof = mandoc_calloc(1, sizeof(struct of));
1558 nof->fname = mandoc_strdup(argv[i]);
1559 nof->sec = mandoc_strdup(sec);
1560 nof->arch = mandoc_strdup(arch);
1561 nof->title = mandoc_strdup(title);
1562 nof->src_form = src_form;
1563
1564 /*
1565 * Add the structure to the list.
1566 */
1567
1568 if (verb > 1)
1569 printf("%s: scheduling\n", argv[i]);
1570 if (NULL == *of) {
1571 *of = nof;
1572 (*of)->first = nof;
1573 } else {
1574 nof->first = (*of)->first;
1575 (*of)->next = nof;
1576 *of = nof;
1577 }
1578 }
1579 }
1580
1581 /*
1582 * Recursively build up a list of files to parse.
1583 * We use this instead of ftw() and so on because I don't want global
1584 * variables hanging around.
1585 * This ignores the whatis.db and whatis.index files, but assumes that
1586 * everything else is a manual.
1587 * Pass in a pointer to a NULL structure for the first invocation.
1588 */
1589 static void
1590 ofile_dirbuild(const char *dir, const char* psec, const char *parch,
1591 int p_src_form, struct of **of)
1592 {
1593 char buf[MAXPATHLEN];
1594 size_t sz;
1595 DIR *d;
1596 const char *fn, *sec, *arch;
1597 char *p, *q, *suffix;
1598 struct of *nof;
1599 struct dirent *dp;
1600 int src_form;
1601
1602 if (NULL == (d = opendir(dir))) {
1603 if (warnings)
1604 perror(dir);
1605 return;
1606 }
1607
1608 while (NULL != (dp = readdir(d))) {
1609 fn = dp->d_name;
1610
1611 if ('.' == *fn)
1612 continue;
1613
1614 src_form = p_src_form;
1615
1616 if (DT_DIR == dp->d_type) {
1617 sec = psec;
1618 arch = parch;
1619
1620 /*
1621 * By default, only use directories called:
1622 * man<section>/[<arch>/] or
1623 * cat<section>/[<arch>/]
1624 */
1625
1626 if ('\0' == *sec) {
1627 if(0 == strncmp("man", fn, 3)) {
1628 src_form |= MANDOC_SRC;
1629 sec = fn + 3;
1630 } else if (0 == strncmp("cat", fn, 3)) {
1631 src_form |= MANDOC_FORM;
1632 sec = fn + 3;
1633 } else {
1634 if (warnings) fprintf(stderr,
1635 "%s/%s: bad section\n",
1636 dir, fn);
1637 if (use_all)
1638 sec = fn;
1639 else
1640 continue;
1641 }
1642 } else if ('\0' == *arch) {
1643 if (NULL != strchr(fn, '.')) {
1644 if (warnings) fprintf(stderr,
1645 "%s/%s: bad architecture\n",
1646 dir, fn);
1647 if (0 == use_all)
1648 continue;
1649 }
1650 arch = fn;
1651 } else {
1652 if (warnings) fprintf(stderr, "%s/%s: "
1653 "excessive subdirectory\n", dir, fn);
1654 if (0 == use_all)
1655 continue;
1656 }
1657
1658 buf[0] = '\0';
1659 strlcat(buf, dir, MAXPATHLEN);
1660 strlcat(buf, "/", MAXPATHLEN);
1661 sz = strlcat(buf, fn, MAXPATHLEN);
1662
1663 if (MAXPATHLEN <= sz) {
1664 if (warnings) fprintf(stderr, "%s/%s: "
1665 "path too long\n", dir, fn);
1666 continue;
1667 }
1668
1669 if (verb > 1)
1670 printf("%s: scanning\n", buf);
1671
1672 ofile_dirbuild(buf, sec, arch, src_form, of);
1673 continue;
1674 }
1675
1676 if (DT_REG != dp->d_type) {
1677 if (warnings)
1678 fprintf(stderr,
1679 "%s/%s: not a regular file\n",
1680 dir, fn);
1681 continue;
1682 }
1683 if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
1684 continue;
1685 if ('\0' == *psec) {
1686 if (warnings)
1687 fprintf(stderr,
1688 "%s/%s: file outside section\n",
1689 dir, fn);
1690 if (0 == use_all)
1691 continue;
1692 }
1693
1694 /*
1695 * By default, skip files where the file name suffix
1696 * does not agree with the section directory
1697 * they are located in.
1698 */
1699
1700 suffix = strrchr(fn, '.');
1701 if (NULL == suffix) {
1702 if (warnings)
1703 fprintf(stderr,
1704 "%s/%s: no filename suffix\n",
1705 dir, fn);
1706 if (0 == use_all)
1707 continue;
1708 } else if ((MANDOC_SRC & src_form &&
1709 strcmp(suffix + 1, psec)) ||
1710 (MANDOC_FORM & src_form &&
1711 strcmp(suffix + 1, "0"))) {
1712 if (warnings)
1713 fprintf(stderr,
1714 "%s/%s: wrong filename suffix\n",
1715 dir, fn);
1716 if (0 == use_all)
1717 continue;
1718 if ('0' == suffix[1])
1719 src_form |= MANDOC_FORM;
1720 else if ('1' <= suffix[1] && '9' >= suffix[1])
1721 src_form |= MANDOC_SRC;
1722 }
1723
1724 /*
1725 * Skip formatted manuals if a source version is
1726 * available. Ignore the age: it is very unlikely
1727 * that people install newer formatted base manuals
1728 * when they used to have source manuals before,
1729 * and in ports, old manuals get removed on update.
1730 */
1731 if (0 == use_all && MANDOC_FORM & src_form &&
1732 '\0' != *psec) {
1733 buf[0] = '\0';
1734 strlcat(buf, dir, MAXPATHLEN);
1735 p = strrchr(buf, '/');
1736 if ('\0' != *parch && NULL != p)
1737 for (p--; p > buf; p--)
1738 if ('/' == *p)
1739 break;
1740 if (NULL == p)
1741 p = buf;
1742 else
1743 p++;
1744 if (0 == strncmp("cat", p, 3))
1745 memcpy(p, "man", 3);
1746 strlcat(buf, "/", MAXPATHLEN);
1747 sz = strlcat(buf, fn, MAXPATHLEN);
1748 if (sz >= MAXPATHLEN) {
1749 if (warnings) fprintf(stderr,
1750 "%s/%s: path too long\n",
1751 dir, fn);
1752 continue;
1753 }
1754 q = strrchr(buf, '.');
1755 if (NULL != q && p < q++) {
1756 *q = '\0';
1757 sz = strlcat(buf, psec, MAXPATHLEN);
1758 if (sz >= MAXPATHLEN) {
1759 if (warnings) fprintf(stderr,
1760 "%s/%s: path too long\n",
1761 dir, fn);
1762 continue;
1763 }
1764 if (0 == access(buf, R_OK))
1765 continue;
1766 }
1767 }
1768
1769 buf[0] = '\0';
1770 assert('.' == dir[0]);
1771 if ('/' == dir[1]) {
1772 strlcat(buf, dir + 2, MAXPATHLEN);
1773 strlcat(buf, "/", MAXPATHLEN);
1774 }
1775 sz = strlcat(buf, fn, MAXPATHLEN);
1776 if (sz >= MAXPATHLEN) {
1777 if (warnings) fprintf(stderr,
1778 "%s/%s: path too long\n", dir, fn);
1779 continue;
1780 }
1781
1782 nof = mandoc_calloc(1, sizeof(struct of));
1783 nof->fname = mandoc_strdup(buf);
1784 nof->sec = mandoc_strdup(psec);
1785 nof->arch = mandoc_strdup(parch);
1786 nof->src_form = src_form;
1787
1788 /*
1789 * Remember the file name without the extension,
1790 * to be used as the page title in the database.
1791 */
1792
1793 if (NULL != suffix)
1794 *suffix = '\0';
1795 nof->title = mandoc_strdup(fn);
1796
1797 /*
1798 * Add the structure to the list.
1799 */
1800
1801 if (verb > 1)
1802 printf("%s: scheduling\n", buf);
1803
1804 if (NULL == *of) {
1805 *of = nof;
1806 (*of)->first = nof;
1807 } else {
1808 nof->first = (*of)->first;
1809 (*of)->next = nof;
1810 *of = nof;
1811 }
1812 }
1813
1814 closedir(d);
1815 }
1816
1817 static void
1818 ofile_free(struct of *of)
1819 {
1820 struct of *nof;
1821
1822 if (NULL != of)
1823 of = of->first;
1824
1825 while (NULL != of) {
1826 nof = of->next;
1827 free(of->fname);
1828 free(of->sec);
1829 free(of->arch);
1830 free(of->title);
1831 free(of);
1832 of = nof;
1833 }
1834 }