]>
git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
1 /* $Id: mandocdb.c,v 1.43 2011/12/31 18:47:52 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 #include <sys/param.h>
23 #include <sys/types.h>
36 #if defined(__linux__)
39 #elif defined(__APPLE__)
40 # include <libkern/OSByteOrder.h>
52 #define MANDOC_BUFSZ BUFSIZ
53 #define MANDOC_SLOP 1024
55 #define MANDOC_SRC 0x1
56 #define MANDOC_FORM 0x2
58 /* Access to the mandoc database on disk. */
61 char idxn
[MAXPATHLEN
]; /* index db filename */
62 char dbn
[MAXPATHLEN
]; /* keyword db filename */
63 DB
*idx
; /* index recno database */
64 DB
*db
; /* keyword btree database */
67 /* Stack of temporarily unused index records. */
70 recno_t
*stack
; /* pointer to a malloc'ed array */
71 size_t size
; /* number of allocated slots */
72 size_t cur
; /* current number of empty records */
73 recno_t last
; /* last record number in the index */
76 /* Tiny list for files. No need to bring in QUEUE. */
79 char *fname
; /* heap-allocated */
84 struct of
*next
; /* NULL for last one */
85 struct of
*first
; /* first in list */
88 /* Buffer for storing growable data. */
92 size_t len
; /* current length */
93 size_t size
; /* total buffer size */
96 /* Operation we're going to perform. */
99 OP_DEFAULT
= 0, /* new dbs from dir list or default config */
100 OP_CONFFILE
, /* new databases from custom config file */
101 OP_UPDATE
, /* delete/add entries in existing database */
102 OP_DELETE
, /* delete entries from existing database */
103 OP_TEST
/* change no databases, report potential problems */
106 #define MAN_ARGS DB *hash, \
109 const struct man_node *n
110 #define MDOC_ARGS DB *hash, \
113 const struct mdoc_node *n, \
114 const struct mdoc_meta *m
116 static void buf_appendmdoc(struct buf
*,
117 const struct mdoc_node
*, int);
118 static void buf_append(struct buf
*, const char *);
119 static void buf_appendb(struct buf
*,
120 const void *, size_t);
121 static void dbt_put(DB
*, const char *, DBT
*, DBT
*);
122 static void hash_put(DB
*, const struct buf
*, uint64_t);
123 static void hash_reset(DB
**);
124 static void index_merge(const struct of
*, struct mparse
*,
125 struct buf
*, struct buf
*, DB
*,
126 struct mdb
*, struct recs
*);
127 static void index_prune(const struct of
*, struct mdb
*,
129 static void ofile_argbuild(int, char *[], struct of
**);
130 static void ofile_dirbuild(const char *, const char *,
131 const char *, int, struct of
**);
132 static void ofile_free(struct of
*);
133 static void pformatted(DB
*, struct buf
*,
134 struct buf
*, const struct of
*);
135 static int pman_node(MAN_ARGS
);
136 static void pmdoc_node(MDOC_ARGS
);
137 static int pmdoc_head(MDOC_ARGS
);
138 static int pmdoc_body(MDOC_ARGS
);
139 static int pmdoc_Fd(MDOC_ARGS
);
140 static int pmdoc_In(MDOC_ARGS
);
141 static int pmdoc_Fn(MDOC_ARGS
);
142 static int pmdoc_Nd(MDOC_ARGS
);
143 static int pmdoc_Nm(MDOC_ARGS
);
144 static int pmdoc_Sh(MDOC_ARGS
);
145 static int pmdoc_St(MDOC_ARGS
);
146 static int pmdoc_Xr(MDOC_ARGS
);
148 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
150 struct mdoc_handler
{
151 int (*fp
)(MDOC_ARGS
); /* Optional handler. */
152 uint64_t mask
; /* Set unless handler returns 0. */
153 int flags
; /* For use by pmdoc_node. */
156 static const struct mdoc_handler mdocs
[MDOC_MAX
] = {
157 { NULL
, 0, 0 }, /* Ap */
158 { NULL
, 0, 0 }, /* Dd */
159 { NULL
, 0, 0 }, /* Dt */
160 { NULL
, 0, 0 }, /* Os */
161 { pmdoc_Sh
, TYPE_Sh
, MDOCF_CHILD
}, /* Sh */
162 { pmdoc_head
, TYPE_Ss
, MDOCF_CHILD
}, /* Ss */
163 { NULL
, 0, 0 }, /* Pp */
164 { NULL
, 0, 0 }, /* D1 */
165 { NULL
, 0, 0 }, /* Dl */
166 { NULL
, 0, 0 }, /* Bd */
167 { NULL
, 0, 0 }, /* Ed */
168 { NULL
, 0, 0 }, /* Bl */
169 { NULL
, 0, 0 }, /* El */
170 { NULL
, 0, 0 }, /* It */
171 { NULL
, 0, 0 }, /* Ad */
172 { NULL
, TYPE_An
, MDOCF_CHILD
}, /* An */
173 { NULL
, TYPE_Ar
, MDOCF_CHILD
}, /* Ar */
174 { NULL
, TYPE_Cd
, MDOCF_CHILD
}, /* Cd */
175 { NULL
, TYPE_Cm
, MDOCF_CHILD
}, /* Cm */
176 { NULL
, TYPE_Dv
, MDOCF_CHILD
}, /* Dv */
177 { NULL
, TYPE_Er
, MDOCF_CHILD
}, /* Er */
178 { NULL
, TYPE_Ev
, MDOCF_CHILD
}, /* Ev */
179 { NULL
, 0, 0 }, /* Ex */
180 { NULL
, TYPE_Fa
, MDOCF_CHILD
}, /* Fa */
181 { pmdoc_Fd
, TYPE_In
, 0 }, /* Fd */
182 { NULL
, TYPE_Fl
, MDOCF_CHILD
}, /* Fl */
183 { pmdoc_Fn
, 0, 0 }, /* Fn */
184 { NULL
, TYPE_Ft
, MDOCF_CHILD
}, /* Ft */
185 { NULL
, TYPE_Ic
, MDOCF_CHILD
}, /* Ic */
186 { pmdoc_In
, TYPE_In
, 0 }, /* In */
187 { NULL
, TYPE_Li
, MDOCF_CHILD
}, /* Li */
188 { pmdoc_Nd
, TYPE_Nd
, MDOCF_CHILD
}, /* Nd */
189 { pmdoc_Nm
, TYPE_Nm
, MDOCF_CHILD
}, /* Nm */
190 { NULL
, 0, 0 }, /* Op */
191 { NULL
, 0, 0 }, /* Ot */
192 { NULL
, TYPE_Pa
, MDOCF_CHILD
}, /* Pa */
193 { NULL
, 0, 0 }, /* Rv */
194 { pmdoc_St
, TYPE_St
, 0 }, /* St */
195 { NULL
, TYPE_Va
, MDOCF_CHILD
}, /* Va */
196 { pmdoc_body
, TYPE_Va
, MDOCF_CHILD
}, /* Vt */
197 { pmdoc_Xr
, TYPE_Xr
, 0 }, /* Xr */
198 { NULL
, 0, 0 }, /* %A */
199 { NULL
, 0, 0 }, /* %B */
200 { NULL
, 0, 0 }, /* %D */
201 { NULL
, 0, 0 }, /* %I */
202 { NULL
, 0, 0 }, /* %J */
203 { NULL
, 0, 0 }, /* %N */
204 { NULL
, 0, 0 }, /* %O */
205 { NULL
, 0, 0 }, /* %P */
206 { NULL
, 0, 0 }, /* %R */
207 { NULL
, 0, 0 }, /* %T */
208 { NULL
, 0, 0 }, /* %V */
209 { NULL
, 0, 0 }, /* Ac */
210 { NULL
, 0, 0 }, /* Ao */
211 { NULL
, 0, 0 }, /* Aq */
212 { NULL
, TYPE_At
, MDOCF_CHILD
}, /* At */
213 { NULL
, 0, 0 }, /* Bc */
214 { NULL
, 0, 0 }, /* Bf */
215 { NULL
, 0, 0 }, /* Bo */
216 { NULL
, 0, 0 }, /* Bq */
217 { NULL
, TYPE_Bsx
, MDOCF_CHILD
}, /* Bsx */
218 { NULL
, TYPE_Bx
, MDOCF_CHILD
}, /* Bx */
219 { NULL
, 0, 0 }, /* Db */
220 { NULL
, 0, 0 }, /* Dc */
221 { NULL
, 0, 0 }, /* Do */
222 { NULL
, 0, 0 }, /* Dq */
223 { NULL
, 0, 0 }, /* Ec */
224 { NULL
, 0, 0 }, /* Ef */
225 { NULL
, TYPE_Em
, MDOCF_CHILD
}, /* Em */
226 { NULL
, 0, 0 }, /* Eo */
227 { NULL
, TYPE_Fx
, MDOCF_CHILD
}, /* Fx */
228 { NULL
, TYPE_Ms
, MDOCF_CHILD
}, /* Ms */
229 { NULL
, 0, 0 }, /* No */
230 { NULL
, 0, 0 }, /* Ns */
231 { NULL
, TYPE_Nx
, MDOCF_CHILD
}, /* Nx */
232 { NULL
, TYPE_Ox
, MDOCF_CHILD
}, /* Ox */
233 { NULL
, 0, 0 }, /* Pc */
234 { NULL
, 0, 0 }, /* Pf */
235 { NULL
, 0, 0 }, /* Po */
236 { NULL
, 0, 0 }, /* Pq */
237 { NULL
, 0, 0 }, /* Qc */
238 { NULL
, 0, 0 }, /* Ql */
239 { NULL
, 0, 0 }, /* Qo */
240 { NULL
, 0, 0 }, /* Qq */
241 { NULL
, 0, 0 }, /* Re */
242 { NULL
, 0, 0 }, /* Rs */
243 { NULL
, 0, 0 }, /* Sc */
244 { NULL
, 0, 0 }, /* So */
245 { NULL
, 0, 0 }, /* Sq */
246 { NULL
, 0, 0 }, /* Sm */
247 { NULL
, 0, 0 }, /* Sx */
248 { NULL
, TYPE_Sy
, MDOCF_CHILD
}, /* Sy */
249 { NULL
, TYPE_Tn
, MDOCF_CHILD
}, /* Tn */
250 { NULL
, 0, 0 }, /* Ux */
251 { NULL
, 0, 0 }, /* Xc */
252 { NULL
, 0, 0 }, /* Xo */
253 { pmdoc_head
, TYPE_Fn
, 0 }, /* Fo */
254 { NULL
, 0, 0 }, /* Fc */
255 { NULL
, 0, 0 }, /* Oo */
256 { NULL
, 0, 0 }, /* Oc */
257 { NULL
, 0, 0 }, /* Bk */
258 { NULL
, 0, 0 }, /* Ek */
259 { NULL
, 0, 0 }, /* Bt */
260 { NULL
, 0, 0 }, /* Hf */
261 { NULL
, 0, 0 }, /* Fr */
262 { NULL
, 0, 0 }, /* Ud */
263 { NULL
, TYPE_Lb
, MDOCF_CHILD
}, /* Lb */
264 { NULL
, 0, 0 }, /* Lp */
265 { NULL
, TYPE_Lk
, MDOCF_CHILD
}, /* Lk */
266 { NULL
, TYPE_Mt
, MDOCF_CHILD
}, /* Mt */
267 { NULL
, 0, 0 }, /* Brq */
268 { NULL
, 0, 0 }, /* Bro */
269 { NULL
, 0, 0 }, /* Brc */
270 { NULL
, 0, 0 }, /* %C */
271 { NULL
, 0, 0 }, /* Es */
272 { NULL
, 0, 0 }, /* En */
273 { NULL
, TYPE_Dx
, MDOCF_CHILD
}, /* Dx */
274 { NULL
, 0, 0 }, /* %Q */
275 { NULL
, 0, 0 }, /* br */
276 { NULL
, 0, 0 }, /* sp */
277 { NULL
, 0, 0 }, /* %U */
278 { NULL
, 0, 0 }, /* Ta */
281 static const char *progname
;
282 static int use_all
; /* Use all directories and files. */
283 static int verb
; /* Output verbosity level. */
284 static int warnings
; /* Potential problems in manuals. */
287 main(int argc
, char *argv
[])
289 struct mparse
*mp
; /* parse sequence */
290 struct manpaths dirs
;
293 enum op op
; /* current operation */
298 DB
*hash
; /* temporary keyword hashtable */
299 BTREEINFO info
; /* btree configuration */
301 struct buf buf
, /* keyword buffer */
302 dbuf
; /* description buffer */
303 struct of
*of
; /* list of files for processing */
307 progname
= strrchr(argv
[0], '/');
308 if (progname
== NULL
)
313 memset(&dirs
, 0, sizeof(struct manpaths
));
314 memset(&mdb
, 0, sizeof(struct mdb
));
315 memset(&recs
, 0, sizeof(struct recs
));
323 while (-1 != (ch
= getopt(argc
, argv
, "aC:d:tu:vW")))
331 "-C: conflicting options\n");
340 "-d: conflicting options\n");
347 dup2(STDOUT_FILENO
, STDERR_FILENO
);
350 "-t: conflicting options\n");
360 "-u: conflicting options\n");
379 if (OP_CONFFILE
== op
&& argc
> 0) {
380 fprintf(stderr
, "-C: too many arguments\n");
384 memset(&info
, 0, sizeof(BTREEINFO
));
387 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
389 memset(&buf
, 0, sizeof(struct buf
));
390 memset(&dbuf
, 0, sizeof(struct buf
));
392 buf
.size
= dbuf
.size
= MANDOC_BUFSZ
;
394 buf
.cp
= mandoc_malloc(buf
.size
);
395 dbuf
.cp
= mandoc_malloc(dbuf
.size
);
397 flags
= O_CREAT
| O_RDWR
;
398 if (OP_DEFAULT
== op
|| OP_CONFFILE
== op
)
402 ofile_argbuild(argc
, argv
, &of
);
405 index_merge(of
, mp
, &dbuf
, &buf
, hash
, &mdb
, &recs
);
409 if (OP_UPDATE
== op
|| OP_DELETE
== op
) {
410 strlcat(mdb
.dbn
, dir
, MAXPATHLEN
);
411 strlcat(mdb
.dbn
, "/", MAXPATHLEN
);
412 sz1
= strlcat(mdb
.dbn
, MANDOC_DB
, MAXPATHLEN
);
414 strlcat(mdb
.idxn
, dir
, MAXPATHLEN
);
415 strlcat(mdb
.idxn
, "/", MAXPATHLEN
);
416 sz2
= strlcat(mdb
.idxn
, MANDOC_IDX
, MAXPATHLEN
);
418 if (sz1
>= MAXPATHLEN
|| sz2
>= MAXPATHLEN
) {
419 fprintf(stderr
, "%s: path too long\n", dir
);
420 exit((int)MANDOCLEVEL_BADARG
);
423 mdb
.db
= dbopen(mdb
.dbn
, flags
, 0644, DB_BTREE
, &info
);
424 mdb
.idx
= dbopen(mdb
.idxn
, flags
, 0644, DB_RECNO
, NULL
);
426 if (NULL
== mdb
.db
) {
428 exit((int)MANDOCLEVEL_SYSERR
);
429 } else if (NULL
== mdb
.idx
) {
431 exit((int)MANDOCLEVEL_SYSERR
);
434 ofile_argbuild(argc
, argv
, &of
);
439 index_prune(of
, &mdb
, &recs
);
442 * Go to the root of the respective manual tree.
443 * This must work or no manuals may be found (they're
444 * indexed relative to the root).
447 if (OP_UPDATE
== op
) {
448 if (-1 == chdir(dir
)) {
450 exit((int)MANDOCLEVEL_SYSERR
);
452 index_merge(of
, mp
, &dbuf
, &buf
, hash
,
460 * Configure the directories we're going to scan.
461 * If we have command-line arguments, use them.
462 * If not, we use man(1)'s method (see mandocdb.8).
466 dirs
.paths
= mandoc_calloc(argc
, sizeof(char *));
468 for (i
= 0; i
< argc
; i
++) {
469 if (NULL
== (cp
= realpath(argv
[i
], pbuf
))) {
473 dirs
.paths
[i
] = mandoc_strdup(cp
);
476 manpath_parse(&dirs
, dir
, NULL
, NULL
);
478 for (i
= 0; i
< dirs
.sz
; i
++) {
479 mdb
.idxn
[0] = mdb
.dbn
[0] = '\0';
481 strlcat(mdb
.dbn
, dirs
.paths
[i
], MAXPATHLEN
);
482 strlcat(mdb
.dbn
, "/", MAXPATHLEN
);
483 sz1
= strlcat(mdb
.dbn
, MANDOC_DB
, MAXPATHLEN
);
485 strlcat(mdb
.idxn
, dirs
.paths
[i
], MAXPATHLEN
);
486 strlcat(mdb
.idxn
, "/", MAXPATHLEN
);
487 sz2
= strlcat(mdb
.idxn
, MANDOC_IDX
, MAXPATHLEN
);
489 if (sz1
>= MAXPATHLEN
|| sz2
>= MAXPATHLEN
) {
490 fprintf(stderr
, "%s: path too long\n",
492 exit((int)MANDOCLEVEL_BADARG
);
496 (*mdb
.db
->close
)(mdb
.db
);
498 (*mdb
.idx
->close
)(mdb
.idx
);
500 mdb
.db
= dbopen(mdb
.dbn
, flags
, 0644, DB_BTREE
, &info
);
501 mdb
.idx
= dbopen(mdb
.idxn
, flags
, 0644, DB_RECNO
, NULL
);
503 if (NULL
== mdb
.db
) {
505 exit((int)MANDOCLEVEL_SYSERR
);
506 } else if (NULL
== mdb
.idx
) {
508 exit((int)MANDOCLEVEL_SYSERR
);
514 if (-1 == chdir(dirs
.paths
[i
])) {
515 perror(dirs
.paths
[i
]);
516 exit((int)MANDOCLEVEL_SYSERR
);
519 ofile_dirbuild(".", "", "", 0, &of
);
524 * Go to the root of the respective manual tree.
525 * This must work or no manuals may be found (they're
526 * indexed relative to the root).
529 if (-1 == chdir(dirs
.paths
[i
])) {
530 perror(dirs
.paths
[i
]);
531 exit((int)MANDOCLEVEL_SYSERR
);
534 index_merge(of
, mp
, &dbuf
, &buf
, hash
, &mdb
, &recs
);
539 (*mdb
.db
->close
)(mdb
.db
);
541 (*mdb
.idx
->close
)(mdb
.idx
);
543 (*hash
->close
)(hash
);
553 return(MANDOCLEVEL_OK
);
557 "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
558 " -d dir [file ...] | "
559 "-u dir [file ...]\n",
562 return((int)MANDOCLEVEL_BADARG
);
566 index_merge(const struct of
*of
, struct mparse
*mp
,
567 struct buf
*dbuf
, struct buf
*buf
, DB
*hash
,
568 struct mdb
*mdb
, struct recs
*recs
)
575 const char *fn
, *msec
, *march
, *mtitle
;
583 for (of
= of
->first
; of
; of
= of
->next
) {
587 * Try interpreting the file as mdoc(7) or man(7)
588 * source code, unless it is already known to be
589 * formatted. Fall back to formatted mode.
596 if ((MANDOC_SRC
& of
->src_form
||
597 ! (MANDOC_FORM
& of
->src_form
)) &&
598 MANDOCLEVEL_FATAL
> mparse_readfd(mp
, -1, fn
))
599 mparse_result(mp
, &mdoc
, &man
);
602 msec
= mdoc_meta(mdoc
)->msec
;
603 march
= mdoc_meta(mdoc
)->arch
;
606 mtitle
= mdoc_meta(mdoc
)->title
;
607 } else if (NULL
!= man
) {
608 msec
= man_meta(man
)->msec
;
610 mtitle
= man_meta(man
)->title
;
618 * By default, skip a file if the manual section
619 * given in the file disagrees with the directory
620 * where the file is located.
626 if (strcasecmp(msec
, of
->sec
)) {
628 fprintf(stderr
, "%s: "
629 "section \"%s\" manual "
630 "in \"%s\" directory\n",
636 * Manual page directories exist for each kernel
637 * architecture as returned by machine(1).
638 * However, many manuals only depend on the
639 * application architecture as returned by arch(1).
640 * For example, some (2/ARM) manuals are shared
641 * across the "armish" and "zaurus" kernel
643 * A few manuals are even shared across completely
644 * different architectures, for example fdformat(1)
645 * on amd64, i386, sparc, and sparc64.
646 * Thus, warn about architecture mismatches,
647 * but don't skip manuals for this reason.
652 if (strcasecmp(march
, of
->arch
)) {
654 fprintf(stderr
, "%s: "
655 "architecture \"%s\" manual "
656 "in \"%s\" directory\n",
657 fn
, march
, of
->arch
);
662 * By default, skip a file if the title given
663 * in the file disagrees with the file name.
664 * If both agree, use the file name as the title,
665 * because the one in the file usually is all caps.
670 if (strcasecmp(mtitle
, of
->title
)) {
672 fprintf(stderr
, "%s: "
673 "title \"%s\" in file "
674 "but \"%s\" in filename\n",
675 fn
, mtitle
, of
->title
);
680 if (skip
&& !use_all
)
684 * The index record value consists of a nil-terminated
685 * filename, a nil-terminated manual section, and a
686 * nil-terminated description. Since the description
687 * may not be set, we set a sentinel to see if we're
688 * going to write a nil byte in its place.
692 type
= mdoc
? 'd' : (man
? 'a' : 'c');
693 buf_appendb(dbuf
, &type
, 1);
694 buf_appendb(dbuf
, fn
, strlen(fn
) + 1);
695 buf_appendb(dbuf
, msec
, strlen(msec
) + 1);
696 buf_appendb(dbuf
, mtitle
, strlen(mtitle
) + 1);
697 buf_appendb(dbuf
, march
, strlen(march
) + 1);
702 * Collect keyword/mask pairs.
703 * Each pair will become a new btree node.
708 pmdoc_node(hash
, buf
, dbuf
,
709 mdoc_node(mdoc
), mdoc_meta(mdoc
));
711 pman_node(hash
, buf
, dbuf
, man_node(man
));
713 pformatted(hash
, buf
, dbuf
, of
);
715 /* Test mode, do not access any database. */
717 if (NULL
== mdb
->db
|| NULL
== mdb
->idx
)
721 * Reclaim an empty index record, if available.
722 * Use its record number for all new btree nodes.
727 rec
= recs
->stack
[(int)recs
->cur
];
728 } else if (recs
->last
> 0) {
733 vbuf
[1] = htobe64(rec
);
736 * Copy from the in-memory hashtable of pending
737 * keyword/mask pairs into the database.
741 while (0 == (ch
= (*hash
->seq
)(hash
, &key
, &val
, seq
))) {
743 assert(sizeof(uint64_t) == val
.size
);
744 memcpy(&mask
, val
.data
, val
.size
);
745 vbuf
[0] = htobe64(mask
);
746 val
.size
= sizeof(vbuf
);
748 dbt_put(mdb
->db
, mdb
->dbn
, &key
, &val
);
752 exit((int)MANDOCLEVEL_SYSERR
);
756 * Apply to the index. If we haven't had a description
757 * set, put an empty one in now.
761 buf_appendb(dbuf
, "", 1);
764 key
.size
= sizeof(recno_t
);
767 val
.size
= dbuf
->len
;
770 printf("%s: adding to index\n", fn
);
772 dbt_put(mdb
->idx
, mdb
->idxn
, &key
, &val
);
777 * Scan through all entries in the index file `idx' and prune those
778 * entries in `ofile'.
779 * Pruning consists of removing from `db', then invalidating the entry
780 * in `idx' (zeroing its value size).
783 index_prune(const struct of
*ofile
, struct mdb
*mdb
, struct recs
*recs
)
794 while (0 == (ch
= (*mdb
->idx
->seq
)(mdb
->idx
, &key
, &val
, seq
))) {
796 assert(sizeof(recno_t
) == key
.size
);
797 memcpy(&recs
->last
, key
.data
, key
.size
);
799 /* Deleted records are zero-sized. Skip them. */
805 * Make sure we're sane.
806 * Read past our mdoc/man/cat type to the next string,
807 * then make sure it's bounded by a NUL.
808 * Failing any of these, we go into our error handler.
811 fn
= (char *)val
.data
+ 1;
812 if (NULL
== memchr(fn
, '\0', val
.size
- 1))
816 * Search for the file in those we care about.
817 * XXX: build this into a tree. Too slow.
820 for (of
= ofile
->first
; of
; of
= of
->next
)
821 if (0 == strcmp(fn
, of
->fname
))
828 * Search through the keyword database, throwing out all
829 * references to our file.
833 while (0 == (ch
= (*mdb
->db
->seq
)(mdb
->db
,
834 &key
, &val
, sseq
))) {
836 if (sizeof(vbuf
) != val
.size
)
839 memcpy(vbuf
, val
.data
, val
.size
);
840 if (recs
->last
!= betoh64(vbuf
[1]))
843 if ((ch
= (*mdb
->db
->del
)(mdb
->db
,
844 &key
, R_CURSOR
)) < 0)
850 exit((int)MANDOCLEVEL_SYSERR
);
851 } else if (1 != ch
) {
852 fprintf(stderr
, "%s: corrupt database\n",
854 exit((int)MANDOCLEVEL_SYSERR
);
858 printf("%s: deleting from index\n", fn
);
861 ch
= (*mdb
->idx
->put
)(mdb
->idx
, &key
, &val
, R_CURSOR
);
866 if (recs
->cur
>= recs
->size
) {
867 recs
->size
+= MANDOC_SLOP
;
868 recs
->stack
= mandoc_realloc(recs
->stack
,
869 recs
->size
* sizeof(recno_t
));
872 recs
->stack
[(int)recs
->cur
] = recs
->last
;
878 exit((int)MANDOCLEVEL_SYSERR
);
879 } else if (1 != ch
) {
880 fprintf(stderr
, "%s: corrupt index\n", mdb
->idxn
);
881 exit((int)MANDOCLEVEL_SYSERR
);
888 * Grow the buffer (if necessary) and copy in a binary string.
891 buf_appendb(struct buf
*buf
, const void *cp
, size_t sz
)
894 /* Overshoot by MANDOC_BUFSZ. */
896 while (buf
->len
+ sz
>= buf
->size
) {
897 buf
->size
= buf
->len
+ sz
+ MANDOC_BUFSZ
;
898 buf
->cp
= mandoc_realloc(buf
->cp
, buf
->size
);
901 memcpy(buf
->cp
+ (int)buf
->len
, cp
, sz
);
906 * Append a nil-terminated string to the buffer.
907 * This can be invoked multiple times.
908 * The buffer string will be nil-terminated.
909 * If invoked multiple times, a space is put between strings.
912 buf_append(struct buf
*buf
, const char *cp
)
916 if (0 == (sz
= strlen(cp
)))
920 buf
->cp
[(int)buf
->len
- 1] = ' ';
922 buf_appendb(buf
, cp
, sz
+ 1);
926 * Recursively add all text from a given node.
927 * This is optimised for general mdoc nodes in this context, which do
928 * not consist of subexpressions and having a recursive call for n->next
930 * The "f" variable should be 0 unless called from pmdoc_Nd for the
931 * description buffer, which does not start at the beginning of the
935 buf_appendmdoc(struct buf
*buf
, const struct mdoc_node
*n
, int f
)
938 for ( ; n
; n
= n
->next
) {
940 buf_appendmdoc(buf
, n
->child
, f
);
942 if (MDOC_TEXT
== n
->type
&& f
) {
944 buf_appendb(buf
, n
->string
,
945 strlen(n
->string
) + 1);
946 } else if (MDOC_TEXT
== n
->type
)
947 buf_append(buf
, n
->string
);
957 if (NULL
!= (hash
= *db
))
958 (*hash
->close
)(hash
);
960 *db
= dbopen(NULL
, O_CREAT
|O_RDWR
, 0644, DB_HASH
, NULL
);
963 exit((int)MANDOCLEVEL_SYSERR
);
969 pmdoc_head(MDOC_ARGS
)
972 return(MDOC_HEAD
== n
->type
);
977 pmdoc_body(MDOC_ARGS
)
980 return(MDOC_BODY
== n
->type
);
987 const char *start
, *end
;
990 if (SEC_SYNOPSIS
!= n
->sec
)
992 if (NULL
== (n
= n
->child
) || MDOC_TEXT
!= n
->type
)
996 * Only consider those `Fd' macro fields that begin with an
997 * "inclusion" token (versus, e.g., #define).
999 if (strcmp("#include", n
->string
))
1002 if (NULL
== (n
= n
->next
) || MDOC_TEXT
!= n
->type
)
1006 * Strip away the enclosing angle brackets and make sure we're
1011 if ('<' == *start
|| '"' == *start
)
1014 if (0 == (sz
= strlen(start
)))
1017 end
= &start
[(int)sz
- 1];
1018 if ('>' == *end
|| '"' == *end
)
1021 assert(end
>= start
);
1023 buf_appendb(buf
, start
, (size_t)(end
- start
+ 1));
1024 buf_appendb(buf
, "", 1);
1033 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
1036 buf_append(buf
, n
->child
->string
);
1044 struct mdoc_node
*nn
;
1049 if (NULL
== nn
|| MDOC_TEXT
!= nn
->type
)
1052 /* .Fn "struct type *name" "char *arg" */
1054 cp
= strrchr(nn
->string
, ' ');
1058 /* Strip away pointer symbol. */
1063 /* Store the function name. */
1065 buf_append(buf
, cp
);
1066 hash_put(hash
, buf
, TYPE_Fn
);
1068 /* Store the function type. */
1070 if (nn
->string
< cp
) {
1072 buf_appendb(buf
, nn
->string
, cp
- nn
->string
);
1073 buf_appendb(buf
, "", 1);
1074 hash_put(hash
, buf
, TYPE_Ft
);
1077 /* Store the arguments. */
1079 for (nn
= nn
->next
; nn
; nn
= nn
->next
) {
1080 if (MDOC_TEXT
!= nn
->type
)
1083 buf_append(buf
, nn
->string
);
1084 hash_put(hash
, buf
, TYPE_Fa
);
1095 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
1098 buf_append(buf
, n
->child
->string
);
1107 if (NULL
== (n
= n
->child
))
1110 buf_appendb(buf
, n
->string
, strlen(n
->string
));
1112 if (NULL
!= (n
= n
->next
)) {
1113 buf_appendb(buf
, ".", 1);
1114 buf_appendb(buf
, n
->string
, strlen(n
->string
) + 1);
1116 buf_appendb(buf
, ".", 2);
1126 if (MDOC_BODY
!= n
->type
)
1129 buf_appendmdoc(dbuf
, n
->child
, 1);
1138 if (SEC_NAME
== n
->sec
)
1140 else if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
1143 if (NULL
== n
->child
)
1144 buf_append(buf
, m
->name
);
1154 return(SEC_CUSTOM
== n
->sec
&& MDOC_HEAD
== n
->type
);
1158 hash_put(DB
*db
, const struct buf
*buf
, uint64_t mask
)
1168 key
.size
= buf
->len
;
1170 if ((rc
= (*db
->get
)(db
, &key
, &val
, 0)) < 0) {
1172 exit((int)MANDOCLEVEL_SYSERR
);
1173 } else if (0 == rc
) {
1174 assert(sizeof(uint64_t) == val
.size
);
1175 memcpy(&oldmask
, val
.data
, val
.size
);
1180 val
.size
= sizeof(uint64_t);
1182 if ((rc
= (*db
->put
)(db
, &key
, &val
, 0)) < 0) {
1184 exit((int)MANDOCLEVEL_SYSERR
);
1189 dbt_put(DB
*db
, const char *dbn
, DBT
*key
, DBT
*val
)
1195 if (0 == (*db
->put
)(db
, key
, val
, 0))
1199 exit((int)MANDOCLEVEL_SYSERR
);
1204 * Call out to per-macro handlers after clearing the persistent database
1205 * key. If the macro sets the database key, flush it to the database.
1208 pmdoc_node(MDOC_ARGS
)
1227 * Both NULL handlers and handlers returning true
1228 * request using the data. Only skip the element
1229 * when the handler returns false.
1232 if (NULL
!= mdocs
[n
->tok
].fp
&&
1233 0 == (*mdocs
[n
->tok
].fp
)(hash
, buf
, dbuf
, n
, m
))
1237 * For many macros, use the text from all children.
1238 * Set zero flags for macros not needing this.
1239 * In that case, the handler must fill the buffer.
1242 if (MDOCF_CHILD
& mdocs
[n
->tok
].flags
)
1243 buf_appendmdoc(buf
, n
->child
, 0);
1246 * Cover the most common case:
1247 * Automatically stage one string per element.
1248 * Set a zero mask for macros not needing this.
1249 * Additional staging can be done in the handler.
1252 if (mdocs
[n
->tok
].mask
)
1253 hash_put(hash
, buf
, mdocs
[n
->tok
].mask
);
1259 pmdoc_node(hash
, buf
, dbuf
, n
->child
, m
);
1260 pmdoc_node(hash
, buf
, dbuf
, n
->next
, m
);
1266 const struct man_node
*head
, *body
;
1267 const char *start
, *sv
;
1274 * We're only searching for one thing: the first text child in
1275 * the BODY of a NAME section. Since we don't keep track of
1276 * sections in -man, run some hoops to find out whether we're in
1277 * the correct section or not.
1280 if (MAN_BODY
== n
->type
&& MAN_SH
== n
->tok
) {
1282 assert(body
->parent
);
1283 if (NULL
!= (head
= body
->parent
->head
) &&
1284 1 == head
->nchild
&&
1285 NULL
!= (head
= (head
->child
)) &&
1286 MAN_TEXT
== head
->type
&&
1287 0 == strcmp(head
->string
, "NAME") &&
1288 NULL
!= (body
= body
->child
) &&
1289 MAN_TEXT
== body
->type
) {
1291 assert(body
->string
);
1292 start
= sv
= body
->string
;
1295 * Go through a special heuristic dance here.
1296 * This is why -man manuals are great!
1297 * (I'm being sarcastic: my eyes are bleeding.)
1298 * Conventionally, one or more manual names are
1299 * comma-specified prior to a whitespace, then a
1300 * dash, then a description. Try to puzzle out
1301 * the name parts here.
1305 sz
= strcspn(start
, " ,");
1306 if ('\0' == start
[(int)sz
])
1310 buf_appendb(buf
, start
, sz
);
1311 buf_appendb(buf
, "", 1);
1313 hash_put(hash
, buf
, TYPE_Nm
);
1315 if (' ' == start
[(int)sz
]) {
1316 start
+= (int)sz
+ 1;
1320 assert(',' == start
[(int)sz
]);
1321 start
+= (int)sz
+ 1;
1322 while (' ' == *start
)
1329 buf_append(buf
, start
);
1333 while (' ' == *start
)
1336 if (0 == strncmp(start
, "-", 1))
1338 else if (0 == strncmp(start
, "\\-\\-", 4))
1340 else if (0 == strncmp(start
, "\\-", 2))
1342 else if (0 == strncmp(start
, "\\(en", 4))
1344 else if (0 == strncmp(start
, "\\(em", 4))
1347 while (' ' == *start
)
1350 sz
= strlen(start
) + 1;
1351 buf_appendb(dbuf
, start
, sz
);
1352 buf_appendb(buf
, start
, sz
);
1354 hash_put(hash
, buf
, TYPE_Nd
);
1358 for (n
= n
->child
; n
; n
= n
->next
)
1359 if (pman_node(hash
, buf
, dbuf
, n
))
1366 * Parse a formatted manual page.
1367 * By necessity, this involves rather crude guesswork.
1370 pformatted(DB
*hash
, struct buf
*buf
,
1371 struct buf
*dbuf
, const struct of
*of
)
1374 char *line
, *p
, *title
;
1375 size_t len
, plen
, titlesz
;
1377 if (NULL
== (stream
= fopen(of
->fname
, "r"))) {
1384 * Always use the title derived from the filename up front,
1385 * do not even try to find it in the file. This also makes
1386 * sure we don't end up with an orphan index record, even if
1387 * the file content turns out to be completely unintelligible.
1391 buf_append(buf
, of
->title
);
1392 hash_put(hash
, buf
, TYPE_Nm
);
1394 /* Skip to first blank line. */
1396 while (NULL
!= (line
= fgetln(stream
, &len
)))
1401 * Assume the first line that is not indented
1402 * is the first section header. Skip to it.
1405 while (NULL
!= (line
= fgetln(stream
, &len
)))
1406 if ('\n' != *line
&& ' ' != *line
)
1410 * Read up until the next section into a buffer.
1411 * Strip the leading and trailing newline from each read line,
1412 * appending a trailing space.
1413 * Ignore empty (whitespace-only) lines.
1419 while (NULL
!= (line
= fgetln(stream
, &len
))) {
1420 if (' ' != *line
|| '\n' != line
[(int)len
- 1])
1422 while (len
> 0 && isspace((unsigned char)*line
)) {
1428 title
= mandoc_realloc(title
, titlesz
+ len
);
1429 memcpy(title
+ titlesz
, line
, len
);
1431 title
[(int)titlesz
- 1] = ' ';
1436 * If no page content can be found, or the input line
1437 * is already the next section header, or there is no
1438 * trailing newline, reuse the page title as the page
1442 if (NULL
== title
|| '\0' == *title
) {
1444 fprintf(stderr
, "%s: cannot find NAME section\n",
1446 buf_appendb(dbuf
, buf
->cp
, buf
->size
);
1447 hash_put(hash
, buf
, TYPE_Nd
);
1453 title
= mandoc_realloc(title
, titlesz
+ 1);
1454 title
[(int)titlesz
] = '\0';
1457 * Skip to the first dash.
1458 * Use the remaining line as the description (no more than 70
1462 if (NULL
!= (p
= strstr(title
, "- "))) {
1463 for (p
+= 2; ' ' == *p
|| '\b' == *p
; p
++)
1464 /* Skip to next word. */ ;
1467 fprintf(stderr
, "%s: no dash in title line\n",
1474 /* Strip backspace-encoding from line. */
1476 while (NULL
!= (line
= memchr(p
, '\b', plen
))) {
1479 memmove(line
, line
+ 1, plen
--);
1482 memmove(line
- 1, line
+ 1, plen
- len
);
1486 buf_appendb(dbuf
, p
, plen
+ 1);
1488 buf_appendb(buf
, p
, plen
+ 1);
1489 hash_put(hash
, buf
, TYPE_Nd
);
1495 ofile_argbuild(int argc
, char *argv
[], struct of
**of
)
1497 char buf
[MAXPATHLEN
];
1498 const char *sec
, *arch
, *title
;
1503 for (i
= 0; i
< argc
; i
++) {
1506 * Try to infer the manual section, architecture and
1507 * page title from the path, assuming it looks like
1508 * man*[/<arch>]/<title>.<section> or
1509 * cat<section>[/<arch>]/<title>.0
1512 if (strlcpy(buf
, argv
[i
], sizeof(buf
)) >= sizeof(buf
)) {
1513 fprintf(stderr
, "%s: path too long\n", argv
[i
]);
1516 sec
= arch
= title
= "";
1518 p
= strrchr(buf
, '\0');
1520 if ('\0' == *sec
&& '.' == *p
) {
1524 src_form
|= MANDOC_FORM
;
1525 else if ('1' <= *sec
&& '9' >= *sec
)
1526 src_form
|= MANDOC_SRC
;
1531 if ('\0' == *title
) {
1536 if (0 == strncmp("man", p
+ 1, 3))
1537 src_form
|= MANDOC_SRC
;
1538 else if (0 == strncmp("cat", p
+ 1, 3))
1539 src_form
|= MANDOC_FORM
;
1544 if ('\0' == *title
) {
1547 "%s: cannot deduce title "
1554 * Build the file structure.
1557 nof
= mandoc_calloc(1, sizeof(struct of
));
1558 nof
->fname
= mandoc_strdup(argv
[i
]);
1559 nof
->sec
= mandoc_strdup(sec
);
1560 nof
->arch
= mandoc_strdup(arch
);
1561 nof
->title
= mandoc_strdup(title
);
1562 nof
->src_form
= src_form
;
1565 * Add the structure to the list.
1569 printf("%s: scheduling\n", argv
[i
]);
1574 nof
->first
= (*of
)->first
;
1582 * Recursively build up a list of files to parse.
1583 * We use this instead of ftw() and so on because I don't want global
1584 * variables hanging around.
1585 * This ignores the whatis.db and whatis.index files, but assumes that
1586 * everything else is a manual.
1587 * Pass in a pointer to a NULL structure for the first invocation.
1590 ofile_dirbuild(const char *dir
, const char* psec
, const char *parch
,
1591 int p_src_form
, struct of
**of
)
1593 char buf
[MAXPATHLEN
];
1596 const char *fn
, *sec
, *arch
;
1597 char *p
, *q
, *suffix
;
1602 if (NULL
== (d
= opendir(dir
))) {
1608 while (NULL
!= (dp
= readdir(d
))) {
1614 src_form
= p_src_form
;
1616 if (DT_DIR
== dp
->d_type
) {
1621 * By default, only use directories called:
1622 * man<section>/[<arch>/] or
1623 * cat<section>/[<arch>/]
1627 if(0 == strncmp("man", fn
, 3)) {
1628 src_form
|= MANDOC_SRC
;
1630 } else if (0 == strncmp("cat", fn
, 3)) {
1631 src_form
|= MANDOC_FORM
;
1634 if (warnings
) fprintf(stderr
,
1635 "%s/%s: bad section\n",
1642 } else if ('\0' == *arch
) {
1643 if (NULL
!= strchr(fn
, '.')) {
1644 if (warnings
) fprintf(stderr
,
1645 "%s/%s: bad architecture\n",
1652 if (warnings
) fprintf(stderr
, "%s/%s: "
1653 "excessive subdirectory\n", dir
, fn
);
1659 strlcat(buf
, dir
, MAXPATHLEN
);
1660 strlcat(buf
, "/", MAXPATHLEN
);
1661 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1663 if (MAXPATHLEN
<= sz
) {
1664 if (warnings
) fprintf(stderr
, "%s/%s: "
1665 "path too long\n", dir
, fn
);
1670 printf("%s: scanning\n", buf
);
1672 ofile_dirbuild(buf
, sec
, arch
, src_form
, of
);
1676 if (DT_REG
!= dp
->d_type
) {
1679 "%s/%s: not a regular file\n",
1683 if (!strcmp(MANDOC_DB
, fn
) || !strcmp(MANDOC_IDX
, fn
))
1685 if ('\0' == *psec
) {
1688 "%s/%s: file outside section\n",
1695 * By default, skip files where the file name suffix
1696 * does not agree with the section directory
1697 * they are located in.
1700 suffix
= strrchr(fn
, '.');
1701 if (NULL
== suffix
) {
1704 "%s/%s: no filename suffix\n",
1708 } else if ((MANDOC_SRC
& src_form
&&
1709 strcmp(suffix
+ 1, psec
)) ||
1710 (MANDOC_FORM
& src_form
&&
1711 strcmp(suffix
+ 1, "0"))) {
1714 "%s/%s: wrong filename suffix\n",
1718 if ('0' == suffix
[1])
1719 src_form
|= MANDOC_FORM
;
1720 else if ('1' <= suffix
[1] && '9' >= suffix
[1])
1721 src_form
|= MANDOC_SRC
;
1725 * Skip formatted manuals if a source version is
1726 * available. Ignore the age: it is very unlikely
1727 * that people install newer formatted base manuals
1728 * when they used to have source manuals before,
1729 * and in ports, old manuals get removed on update.
1731 if (0 == use_all
&& MANDOC_FORM
& src_form
&&
1734 strlcat(buf
, dir
, MAXPATHLEN
);
1735 p
= strrchr(buf
, '/');
1736 if ('\0' != *parch
&& NULL
!= p
)
1737 for (p
--; p
> buf
; p
--)
1744 if (0 == strncmp("cat", p
, 3))
1745 memcpy(p
, "man", 3);
1746 strlcat(buf
, "/", MAXPATHLEN
);
1747 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1748 if (sz
>= MAXPATHLEN
) {
1749 if (warnings
) fprintf(stderr
,
1750 "%s/%s: path too long\n",
1754 q
= strrchr(buf
, '.');
1755 if (NULL
!= q
&& p
< q
++) {
1757 sz
= strlcat(buf
, psec
, MAXPATHLEN
);
1758 if (sz
>= MAXPATHLEN
) {
1759 if (warnings
) fprintf(stderr
,
1760 "%s/%s: path too long\n",
1764 if (0 == access(buf
, R_OK
))
1770 assert('.' == dir
[0]);
1771 if ('/' == dir
[1]) {
1772 strlcat(buf
, dir
+ 2, MAXPATHLEN
);
1773 strlcat(buf
, "/", MAXPATHLEN
);
1775 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1776 if (sz
>= MAXPATHLEN
) {
1777 if (warnings
) fprintf(stderr
,
1778 "%s/%s: path too long\n", dir
, fn
);
1782 nof
= mandoc_calloc(1, sizeof(struct of
));
1783 nof
->fname
= mandoc_strdup(buf
);
1784 nof
->sec
= mandoc_strdup(psec
);
1785 nof
->arch
= mandoc_strdup(parch
);
1786 nof
->src_form
= src_form
;
1789 * Remember the file name without the extension,
1790 * to be used as the page title in the database.
1795 nof
->title
= mandoc_strdup(fn
);
1798 * Add the structure to the list.
1802 printf("%s: scheduling\n", buf
);
1808 nof
->first
= (*of
)->first
;
1818 ofile_free(struct of
*of
)
1825 while (NULL
!= of
) {