]>
git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
1 /* $Id: mandocdb.c,v 1.28 2011/12/08 09:19:13 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 #include <sys/param.h>
23 #include <sys/types.h>
36 #if defined(__linux__)
39 #elif defined(__APPLE__)
40 # include <libkern/OSByteOrder.h>
52 #define MANDOC_BUFSZ BUFSIZ
53 #define MANDOC_SLOP 1024
55 #define MANDOC_SRC 0x1
56 #define MANDOC_FORM 0x2
58 /* Tiny list for files. No need to bring in QUEUE. */
61 char *fname
; /* heap-allocated */
66 struct of
*next
; /* NULL for last one */
67 struct of
*first
; /* first in list */
70 /* Buffer for storing growable data. */
74 size_t len
; /* current length */
75 size_t size
; /* total buffer size */
78 /* Operation we're going to perform. */
81 OP_NEW
= 0, /* new database */
82 OP_UPDATE
, /* delete/add entries in existing database */
83 OP_DELETE
/* delete entries from existing database */
86 #define MAN_ARGS DB *hash, \
89 const struct man_node *n
90 #define MDOC_ARGS DB *hash, \
93 const struct mdoc_node *n, \
94 const struct mdoc_meta *m
96 static void buf_appendmdoc(struct buf
*,
97 const struct mdoc_node
*, int);
98 static void buf_append(struct buf
*, const char *);
99 static void buf_appendb(struct buf
*,
100 const void *, size_t);
101 static void dbt_put(DB
*, const char *, DBT
*, DBT
*);
102 static void hash_put(DB
*, const struct buf
*, uint64_t);
103 static void hash_reset(DB
**);
104 static void index_merge(const struct of
*, struct mparse
*,
105 struct buf
*, struct buf
*, DB
*,
106 DB
*, const char *, DB
*, const char *,
107 recno_t
, const recno_t
*, size_t);
108 static void index_prune(const struct of
*, DB
*,
109 const char *, DB
*, const char *,
110 recno_t
*, recno_t
**, size_t *,
112 static void ofile_argbuild(int, char *[], struct of
**);
113 static int ofile_dirbuild(const char *, const char *,
114 const char *, int, struct of
**);
115 static void ofile_free(struct of
*);
116 static void pformatted(DB
*, struct buf
*, struct buf
*,
118 static int pman_node(MAN_ARGS
);
119 static void pmdoc_node(MDOC_ARGS
);
120 static int pmdoc_head(MDOC_ARGS
);
121 static int pmdoc_body(MDOC_ARGS
);
122 static int pmdoc_Fd(MDOC_ARGS
);
123 static int pmdoc_In(MDOC_ARGS
);
124 static int pmdoc_Fn(MDOC_ARGS
);
125 static int pmdoc_Nd(MDOC_ARGS
);
126 static int pmdoc_Nm(MDOC_ARGS
);
127 static int pmdoc_Sh(MDOC_ARGS
);
128 static int pmdoc_St(MDOC_ARGS
);
129 static int pmdoc_Xr(MDOC_ARGS
);
130 static void usage(void);
132 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
134 struct mdoc_handler
{
135 int (*fp
)(MDOC_ARGS
); /* Optional handler. */
136 uint64_t mask
; /* Set unless handler returns 0. */
137 int flags
; /* For use by pmdoc_node. */
140 static const struct mdoc_handler mdocs
[MDOC_MAX
] = {
141 { NULL
, 0, 0 }, /* Ap */
142 { NULL
, 0, 0 }, /* Dd */
143 { NULL
, 0, 0 }, /* Dt */
144 { NULL
, 0, 0 }, /* Os */
145 { pmdoc_Sh
, TYPE_Sh
, MDOCF_CHILD
}, /* Sh */
146 { pmdoc_head
, TYPE_Ss
, MDOCF_CHILD
}, /* Ss */
147 { NULL
, 0, 0 }, /* Pp */
148 { NULL
, 0, 0 }, /* D1 */
149 { NULL
, 0, 0 }, /* Dl */
150 { NULL
, 0, 0 }, /* Bd */
151 { NULL
, 0, 0 }, /* Ed */
152 { NULL
, 0, 0 }, /* Bl */
153 { NULL
, 0, 0 }, /* El */
154 { NULL
, 0, 0 }, /* It */
155 { NULL
, 0, 0 }, /* Ad */
156 { NULL
, TYPE_An
, MDOCF_CHILD
}, /* An */
157 { NULL
, TYPE_Ar
, MDOCF_CHILD
}, /* Ar */
158 { NULL
, TYPE_Cd
, MDOCF_CHILD
}, /* Cd */
159 { NULL
, TYPE_Cm
, MDOCF_CHILD
}, /* Cm */
160 { NULL
, TYPE_Dv
, MDOCF_CHILD
}, /* Dv */
161 { NULL
, TYPE_Er
, MDOCF_CHILD
}, /* Er */
162 { NULL
, TYPE_Ev
, MDOCF_CHILD
}, /* Ev */
163 { NULL
, 0, 0 }, /* Ex */
164 { NULL
, TYPE_Fa
, MDOCF_CHILD
}, /* Fa */
165 { pmdoc_Fd
, TYPE_In
, 0 }, /* Fd */
166 { NULL
, TYPE_Fl
, MDOCF_CHILD
}, /* Fl */
167 { pmdoc_Fn
, 0, 0 }, /* Fn */
168 { NULL
, TYPE_Ft
, MDOCF_CHILD
}, /* Ft */
169 { NULL
, TYPE_Ic
, MDOCF_CHILD
}, /* Ic */
170 { pmdoc_In
, TYPE_In
, 0 }, /* In */
171 { NULL
, TYPE_Li
, MDOCF_CHILD
}, /* Li */
172 { pmdoc_Nd
, TYPE_Nd
, MDOCF_CHILD
}, /* Nd */
173 { pmdoc_Nm
, TYPE_Nm
, MDOCF_CHILD
}, /* Nm */
174 { NULL
, 0, 0 }, /* Op */
175 { NULL
, 0, 0 }, /* Ot */
176 { NULL
, TYPE_Pa
, MDOCF_CHILD
}, /* Pa */
177 { NULL
, 0, 0 }, /* Rv */
178 { pmdoc_St
, TYPE_St
, 0 }, /* St */
179 { NULL
, TYPE_Va
, MDOCF_CHILD
}, /* Va */
180 { pmdoc_body
, TYPE_Va
, MDOCF_CHILD
}, /* Vt */
181 { pmdoc_Xr
, TYPE_Xr
, 0 }, /* Xr */
182 { NULL
, 0, 0 }, /* %A */
183 { NULL
, 0, 0 }, /* %B */
184 { NULL
, 0, 0 }, /* %D */
185 { NULL
, 0, 0 }, /* %I */
186 { NULL
, 0, 0 }, /* %J */
187 { NULL
, 0, 0 }, /* %N */
188 { NULL
, 0, 0 }, /* %O */
189 { NULL
, 0, 0 }, /* %P */
190 { NULL
, 0, 0 }, /* %R */
191 { NULL
, 0, 0 }, /* %T */
192 { NULL
, 0, 0 }, /* %V */
193 { NULL
, 0, 0 }, /* Ac */
194 { NULL
, 0, 0 }, /* Ao */
195 { NULL
, 0, 0 }, /* Aq */
196 { NULL
, TYPE_At
, MDOCF_CHILD
}, /* At */
197 { NULL
, 0, 0 }, /* Bc */
198 { NULL
, 0, 0 }, /* Bf */
199 { NULL
, 0, 0 }, /* Bo */
200 { NULL
, 0, 0 }, /* Bq */
201 { NULL
, TYPE_Bsx
, MDOCF_CHILD
}, /* Bsx */
202 { NULL
, TYPE_Bx
, MDOCF_CHILD
}, /* Bx */
203 { NULL
, 0, 0 }, /* Db */
204 { NULL
, 0, 0 }, /* Dc */
205 { NULL
, 0, 0 }, /* Do */
206 { NULL
, 0, 0 }, /* Dq */
207 { NULL
, 0, 0 }, /* Ec */
208 { NULL
, 0, 0 }, /* Ef */
209 { NULL
, TYPE_Em
, MDOCF_CHILD
}, /* Em */
210 { NULL
, 0, 0 }, /* Eo */
211 { NULL
, TYPE_Fx
, MDOCF_CHILD
}, /* Fx */
212 { NULL
, TYPE_Ms
, MDOCF_CHILD
}, /* Ms */
213 { NULL
, 0, 0 }, /* No */
214 { NULL
, 0, 0 }, /* Ns */
215 { NULL
, TYPE_Nx
, MDOCF_CHILD
}, /* Nx */
216 { NULL
, TYPE_Ox
, MDOCF_CHILD
}, /* Ox */
217 { NULL
, 0, 0 }, /* Pc */
218 { NULL
, 0, 0 }, /* Pf */
219 { NULL
, 0, 0 }, /* Po */
220 { NULL
, 0, 0 }, /* Pq */
221 { NULL
, 0, 0 }, /* Qc */
222 { NULL
, 0, 0 }, /* Ql */
223 { NULL
, 0, 0 }, /* Qo */
224 { NULL
, 0, 0 }, /* Qq */
225 { NULL
, 0, 0 }, /* Re */
226 { NULL
, 0, 0 }, /* Rs */
227 { NULL
, 0, 0 }, /* Sc */
228 { NULL
, 0, 0 }, /* So */
229 { NULL
, 0, 0 }, /* Sq */
230 { NULL
, 0, 0 }, /* Sm */
231 { NULL
, 0, 0 }, /* Sx */
232 { NULL
, TYPE_Sy
, MDOCF_CHILD
}, /* Sy */
233 { NULL
, TYPE_Tn
, MDOCF_CHILD
}, /* Tn */
234 { NULL
, 0, 0 }, /* Ux */
235 { NULL
, 0, 0 }, /* Xc */
236 { NULL
, 0, 0 }, /* Xo */
237 { pmdoc_head
, TYPE_Fn
, 0 }, /* Fo */
238 { NULL
, 0, 0 }, /* Fc */
239 { NULL
, 0, 0 }, /* Oo */
240 { NULL
, 0, 0 }, /* Oc */
241 { NULL
, 0, 0 }, /* Bk */
242 { NULL
, 0, 0 }, /* Ek */
243 { NULL
, 0, 0 }, /* Bt */
244 { NULL
, 0, 0 }, /* Hf */
245 { NULL
, 0, 0 }, /* Fr */
246 { NULL
, 0, 0 }, /* Ud */
247 { NULL
, TYPE_Lb
, MDOCF_CHILD
}, /* Lb */
248 { NULL
, 0, 0 }, /* Lp */
249 { NULL
, TYPE_Lk
, MDOCF_CHILD
}, /* Lk */
250 { NULL
, TYPE_Mt
, MDOCF_CHILD
}, /* Mt */
251 { NULL
, 0, 0 }, /* Brq */
252 { NULL
, 0, 0 }, /* Bro */
253 { NULL
, 0, 0 }, /* Brc */
254 { NULL
, 0, 0 }, /* %C */
255 { NULL
, 0, 0 }, /* Es */
256 { NULL
, 0, 0 }, /* En */
257 { NULL
, TYPE_Dx
, MDOCF_CHILD
}, /* Dx */
258 { NULL
, 0, 0 }, /* %Q */
259 { NULL
, 0, 0 }, /* br */
260 { NULL
, 0, 0 }, /* sp */
261 { NULL
, 0, 0 }, /* %U */
262 { NULL
, 0, 0 }, /* Ta */
265 static const char *progname
;
266 static int use_all
; /* Use all directories and files. */
267 static int verb
; /* Output verbosity level. */
270 main(int argc
, char *argv
[])
272 struct mparse
*mp
; /* parse sequence */
273 struct manpaths dirs
;
274 enum op op
; /* current operation */
278 ibuf
[MAXPATHLEN
], /* index fname */
279 fbuf
[MAXPATHLEN
]; /* btree fname */
281 DB
*idx
, /* index database */
282 *db
, /* keyword database */
283 *hash
; /* temporary keyword hashtable */
284 BTREEINFO info
; /* btree configuration */
285 recno_t maxrec
; /* last record number in the index */
286 recno_t
*recs
; /* the numbers of all empty records */
288 recsz
, /* number of allocated slots in recs */
289 reccur
; /* current number of empty records */
290 struct buf buf
, /* keyword buffer */
291 dbuf
; /* description buffer */
292 struct of
*of
; /* list of files for processing */
296 progname
= strrchr(argv
[0], '/');
297 if (progname
== NULL
)
302 memset(&dirs
, 0, sizeof(struct manpaths
));
316 while (-1 != (ch
= getopt(argc
, argv
, "ad:u:v")))
334 return((int)MANDOCLEVEL_BADARG
);
340 memset(&info
, 0, sizeof(BTREEINFO
));
343 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
345 memset(&buf
, 0, sizeof(struct buf
));
346 memset(&dbuf
, 0, sizeof(struct buf
));
348 buf
.size
= dbuf
.size
= MANDOC_BUFSZ
;
350 buf
.cp
= mandoc_malloc(buf
.size
);
351 dbuf
.cp
= mandoc_malloc(dbuf
.size
);
353 flags
= OP_NEW
== op
? O_CREAT
|O_TRUNC
|O_RDWR
: O_CREAT
|O_RDWR
;
355 if (OP_UPDATE
== op
|| OP_DELETE
== op
) {
356 ibuf
[0] = fbuf
[0] = '\0';
358 strlcat(fbuf
, dir
, MAXPATHLEN
);
359 strlcat(fbuf
, "/", MAXPATHLEN
);
360 sz1
= strlcat(fbuf
, MANDOC_DB
, MAXPATHLEN
);
362 strlcat(ibuf
, dir
, MAXPATHLEN
);
363 strlcat(ibuf
, "/", MAXPATHLEN
);
364 sz2
= strlcat(ibuf
, MANDOC_IDX
, MAXPATHLEN
);
366 if (sz1
>= MAXPATHLEN
|| sz2
>= MAXPATHLEN
) {
367 fprintf(stderr
, "%s: Path too long\n", dir
);
368 exit((int)MANDOCLEVEL_BADARG
);
371 db
= dbopen(fbuf
, flags
, 0644, DB_BTREE
, &info
);
372 idx
= dbopen(ibuf
, flags
, 0644, DB_RECNO
, NULL
);
376 exit((int)MANDOCLEVEL_SYSERR
);
377 } else if (NULL
== idx
) {
379 exit((int)MANDOCLEVEL_SYSERR
);
383 printf("%s: Opened\n", fbuf
);
384 printf("%s: Opened\n", ibuf
);
387 ofile_argbuild(argc
, argv
, &of
);
393 index_prune(of
, db
, fbuf
, idx
, ibuf
,
394 &maxrec
, &recs
, &recsz
, &reccur
);
397 * Go to the root of the respective manual tree
398 * such that .so links work. In case of failure,
399 * just prod on, even though .so links won't work.
402 if (OP_UPDATE
== op
) {
404 index_merge(of
, mp
, &dbuf
, &buf
, hash
,
406 maxrec
, recs
, reccur
);
413 * Configure the directories we're going to scan.
414 * If we have command-line arguments, use them.
415 * If not, we use man(1)'s method (see mandocdb.8).
419 dirs
.paths
= mandoc_calloc(argc
, sizeof(char *));
421 for (i
= 0; i
< argc
; i
++) {
422 if (NULL
== (cp
= realpath(argv
[i
], pbuf
))) {
426 dirs
.paths
[i
] = mandoc_strdup(cp
);
429 manpath_parse(&dirs
, NULL
, NULL
);
431 for (i
= 0; i
< dirs
.sz
; i
++) {
432 ibuf
[0] = fbuf
[0] = '\0';
434 strlcat(fbuf
, dirs
.paths
[i
], MAXPATHLEN
);
435 strlcat(fbuf
, "/", MAXPATHLEN
);
436 sz1
= strlcat(fbuf
, MANDOC_DB
, MAXPATHLEN
);
438 strlcat(ibuf
, dirs
.paths
[i
], MAXPATHLEN
);
439 strlcat(ibuf
, "/", MAXPATHLEN
);
440 sz2
= strlcat(ibuf
, MANDOC_IDX
, MAXPATHLEN
);
442 if (sz1
>= MAXPATHLEN
|| sz2
>= MAXPATHLEN
) {
443 fprintf(stderr
, "%s: Path too long\n",
445 exit((int)MANDOCLEVEL_BADARG
);
453 db
= dbopen(fbuf
, flags
, 0644, DB_BTREE
, &info
);
454 idx
= dbopen(ibuf
, flags
, 0644, DB_RECNO
, NULL
);
458 exit((int)MANDOCLEVEL_SYSERR
);
459 } else if (NULL
== idx
) {
461 exit((int)MANDOCLEVEL_SYSERR
);
465 printf("%s: Truncated\n", fbuf
);
466 printf("%s: Truncated\n", ibuf
);
472 if ( ! ofile_dirbuild(dirs
.paths
[i
], NULL
, NULL
,
474 exit((int)MANDOCLEVEL_SYSERR
);
482 * Go to the root of the respective manual tree
483 * such that .so links work. In case of failure,
484 * just prod on, even though .so links won't work.
487 chdir(dirs
.paths
[i
]);
488 index_merge(of
, mp
, &dbuf
, &buf
, hash
, db
, fbuf
,
489 idx
, ibuf
, maxrec
, recs
, reccur
);
498 (*hash
->close
)(hash
);
508 return(MANDOCLEVEL_OK
);
512 index_merge(const struct of
*of
, struct mparse
*mp
,
513 struct buf
*dbuf
, struct buf
*buf
, DB
*hash
,
514 DB
*db
, const char *dbf
, DB
*idx
, const char *idxf
,
515 recno_t maxrec
, const recno_t
*recs
, size_t reccur
)
522 const char *fn
, *msec
, *mtitle
, *arch
;
527 for (rec
= 0; of
; of
= of
->next
) {
531 * Reclaim an empty index record, if available.
536 rec
= recs
[(int)reccur
];
537 } else if (maxrec
> 0) {
549 * Try interpreting the file as mdoc(7) or man(7)
550 * source code, unless it is already known to be
551 * formatted. Fall back to formatted mode.
554 if ((MANDOC_SRC
& of
->src_form
||
555 ! (MANDOC_FORM
& of
->src_form
)) &&
556 MANDOCLEVEL_FATAL
> mparse_readfd(mp
, -1, fn
))
557 mparse_result(mp
, &mdoc
, &man
);
560 msec
= mdoc_meta(mdoc
)->msec
;
561 arch
= mdoc_meta(mdoc
)->arch
;
562 mtitle
= mdoc_meta(mdoc
)->title
;
563 } else if (NULL
!= man
) {
564 msec
= man_meta(man
)->msec
;
566 mtitle
= man_meta(man
)->title
;
574 * By default, skip a file if the manual section
575 * and architecture given in the file disagree
576 * with the directory where the file is located.
582 if (strcmp(msec
, of
->sec
))
586 if (NULL
!= of
->arch
)
588 } else if (NULL
== of
->arch
||
589 strcmp(arch
, of
->arch
))
597 * By default, skip a file if the title given
598 * in the file disagrees with the file name.
599 * If both agree, use the file name as the title,
600 * because the one in the file usually is all caps.
606 if (0 == strcasecmp(mtitle
, of
->title
))
608 else if (0 == use_all
)
612 * The index record value consists of a nil-terminated
613 * filename, a nil-terminated manual section, and a
614 * nil-terminated description. Since the description
615 * may not be set, we set a sentinel to see if we're
616 * going to write a nil byte in its place.
620 buf_append(dbuf
, mdoc
? "mdoc" : (man
? "man" : "cat"));
621 buf_appendb(dbuf
, fn
, strlen(fn
) + 1);
622 buf_appendb(dbuf
, msec
, strlen(msec
) + 1);
623 buf_appendb(dbuf
, mtitle
, strlen(mtitle
) + 1);
624 buf_appendb(dbuf
, arch
, strlen(arch
) + 1);
628 /* Fix the record number in the btree value. */
631 pmdoc_node(hash
, buf
, dbuf
,
632 mdoc_node(mdoc
), mdoc_meta(mdoc
));
634 pman_node(hash
, buf
, dbuf
, man_node(man
));
636 pformatted(hash
, buf
, dbuf
, of
);
639 * Copy from the in-memory hashtable of pending keywords
643 vbuf
.rec
= htobe32(rec
);
645 while (0 == (ch
= (*hash
->seq
)(hash
, &key
, &val
, seq
))) {
647 vbuf
.mask
= htobe64(*(uint64_t *)val
.data
);
648 val
.size
= sizeof(struct db_val
);
650 dbt_put(db
, dbf
, &key
, &val
);
654 exit((int)MANDOCLEVEL_SYSERR
);
658 * Apply to the index. If we haven't had a description
659 * set, put an empty one in now.
663 buf_appendb(dbuf
, "", 1);
666 key
.size
= sizeof(recno_t
);
669 val
.size
= dbuf
->len
;
672 printf("%s: Added index\n", fn
);
674 dbt_put(idx
, idxf
, &key
, &val
);
679 * Scan through all entries in the index file `idx' and prune those
680 * entries in `ofile'.
681 * Pruning consists of removing from `db', then invalidating the entry
682 * in `idx' (zeroing its value size).
685 index_prune(const struct of
*ofile
, DB
*db
, const char *dbf
,
686 DB
*idx
, const char *idxf
, recno_t
*maxrec
,
687 recno_t
**recs
, size_t *recsz
, size_t *reccur
)
698 while (0 == (ch
= (*idx
->seq
)(idx
, &key
, &val
, seq
))) {
700 *maxrec
= *(recno_t
*)key
.data
;
703 /* Deleted records are zero-sized. Skip them. */
709 * Make sure we're sane.
710 * Read past our mdoc/man/cat type to the next string,
711 * then make sure it's bounded by a NUL.
712 * Failing any of these, we go into our error handler.
715 if (NULL
== (fn
= memchr(cp
, '\0', val
.size
)))
717 if (++fn
- cp
>= (int)val
.size
)
719 if (NULL
== memchr(fn
, '\0', val
.size
- (fn
- cp
)))
723 * Search for the file in those we care about.
724 * XXX: build this into a tree. Too slow.
727 for (of
= ofile
; of
; of
= of
->next
)
728 if (0 == strcmp(fn
, of
->fname
))
735 * Search through the keyword database, throwing out all
736 * references to our file.
740 while (0 == (ch
= (*db
->seq
)(db
, &key
, &val
, sseq
))) {
742 if (sizeof(struct db_val
) != val
.size
)
746 if (*maxrec
!= betoh32(vbuf
->rec
))
749 if ((ch
= (*db
->del
)(db
, &key
, R_CURSOR
)) < 0)
755 exit((int)MANDOCLEVEL_SYSERR
);
756 } else if (1 != ch
) {
757 fprintf(stderr
, "%s: Corrupt database\n", dbf
);
758 exit((int)MANDOCLEVEL_SYSERR
);
762 printf("%s: Deleted index\n", fn
);
765 ch
= (*idx
->put
)(idx
, &key
, &val
, R_CURSOR
);
770 if (*reccur
>= *recsz
) {
771 *recsz
+= MANDOC_SLOP
;
772 *recs
= mandoc_realloc
773 (*recs
, *recsz
* sizeof(recno_t
));
776 (*recs
)[(int)*reccur
] = *maxrec
;
782 exit((int)MANDOCLEVEL_SYSERR
);
783 } else if (1 != ch
) {
784 fprintf(stderr
, "%s: Corrupt index\n", idxf
);
785 exit((int)MANDOCLEVEL_SYSERR
);
792 * Grow the buffer (if necessary) and copy in a binary string.
795 buf_appendb(struct buf
*buf
, const void *cp
, size_t sz
)
798 /* Overshoot by MANDOC_BUFSZ. */
800 while (buf
->len
+ sz
>= buf
->size
) {
801 buf
->size
= buf
->len
+ sz
+ MANDOC_BUFSZ
;
802 buf
->cp
= mandoc_realloc(buf
->cp
, buf
->size
);
805 memcpy(buf
->cp
+ (int)buf
->len
, cp
, sz
);
810 * Append a nil-terminated string to the buffer.
811 * This can be invoked multiple times.
812 * The buffer string will be nil-terminated.
813 * If invoked multiple times, a space is put between strings.
816 buf_append(struct buf
*buf
, const char *cp
)
820 if (0 == (sz
= strlen(cp
)))
824 buf
->cp
[(int)buf
->len
- 1] = ' ';
826 buf_appendb(buf
, cp
, sz
+ 1);
830 * Recursively add all text from a given node.
831 * This is optimised for general mdoc nodes in this context, which do
832 * not consist of subexpressions and having a recursive call for n->next
834 * The "f" variable should be 0 unless called from pmdoc_Nd for the
835 * description buffer, which does not start at the beginning of the
839 buf_appendmdoc(struct buf
*buf
, const struct mdoc_node
*n
, int f
)
842 for ( ; n
; n
= n
->next
) {
844 buf_appendmdoc(buf
, n
->child
, f
);
846 if (MDOC_TEXT
== n
->type
&& f
) {
848 buf_appendb(buf
, n
->string
,
849 strlen(n
->string
) + 1);
850 } else if (MDOC_TEXT
== n
->type
)
851 buf_append(buf
, n
->string
);
861 if (NULL
!= (hash
= *db
))
862 (*hash
->close
)(hash
);
864 *db
= dbopen(NULL
, O_CREAT
|O_RDWR
, 0644, DB_HASH
, NULL
);
867 exit((int)MANDOCLEVEL_SYSERR
);
873 pmdoc_head(MDOC_ARGS
)
876 return(MDOC_HEAD
== n
->type
);
881 pmdoc_body(MDOC_ARGS
)
884 return(MDOC_BODY
== n
->type
);
891 const char *start
, *end
;
894 if (SEC_SYNOPSIS
!= n
->sec
)
896 if (NULL
== (n
= n
->child
) || MDOC_TEXT
!= n
->type
)
900 * Only consider those `Fd' macro fields that begin with an
901 * "inclusion" token (versus, e.g., #define).
903 if (strcmp("#include", n
->string
))
906 if (NULL
== (n
= n
->next
) || MDOC_TEXT
!= n
->type
)
910 * Strip away the enclosing angle brackets and make sure we're
915 if ('<' == *start
|| '"' == *start
)
918 if (0 == (sz
= strlen(start
)))
921 end
= &start
[(int)sz
- 1];
922 if ('>' == *end
|| '"' == *end
)
925 assert(end
>= start
);
927 buf_appendb(buf
, start
, (size_t)(end
- start
+ 1));
928 buf_appendb(buf
, "", 1);
937 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
940 buf_append(buf
, n
->child
->string
);
948 struct mdoc_node
*nn
;
953 if (NULL
== nn
|| MDOC_TEXT
!= nn
->type
)
956 /* .Fn "struct type *name" "char *arg" */
958 cp
= strrchr(nn
->string
, ' ');
962 /* Strip away pointer symbol. */
967 /* Store the function name. */
970 hash_put(hash
, buf
, TYPE_Fn
);
972 /* Store the function type. */
974 if (nn
->string
< cp
) {
976 buf_appendb(buf
, nn
->string
, cp
- nn
->string
);
977 buf_appendb(buf
, "", 1);
978 hash_put(hash
, buf
, TYPE_Ft
);
981 /* Store the arguments. */
983 for (nn
= nn
->next
; nn
; nn
= nn
->next
) {
984 if (MDOC_TEXT
!= nn
->type
)
987 buf_append(buf
, nn
->string
);
988 hash_put(hash
, buf
, TYPE_Fa
);
999 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
1002 buf_append(buf
, n
->child
->string
);
1011 if (NULL
== (n
= n
->child
))
1014 buf_appendb(buf
, n
->string
, strlen(n
->string
));
1016 if (NULL
!= (n
= n
->next
)) {
1017 buf_appendb(buf
, ".", 1);
1018 buf_appendb(buf
, n
->string
, strlen(n
->string
) + 1);
1020 buf_appendb(buf
, ".", 2);
1030 if (MDOC_BODY
!= n
->type
)
1033 buf_appendmdoc(dbuf
, n
->child
, 1);
1042 if (SEC_NAME
== n
->sec
)
1044 else if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
1047 if (NULL
== n
->child
)
1048 buf_append(buf
, m
->name
);
1058 return(SEC_CUSTOM
== n
->sec
&& MDOC_HEAD
== n
->type
);
1062 hash_put(DB
*db
, const struct buf
*buf
, uint64_t mask
)
1071 key
.size
= buf
->len
;
1073 if ((rc
= (*db
->get
)(db
, &key
, &val
, 0)) < 0) {
1075 exit((int)MANDOCLEVEL_SYSERR
);
1077 mask
|= *(uint64_t *)val
.data
;
1080 val
.size
= sizeof(uint64_t);
1082 if ((rc
= (*db
->put
)(db
, &key
, &val
, 0)) < 0) {
1084 exit((int)MANDOCLEVEL_SYSERR
);
1089 dbt_put(DB
*db
, const char *dbn
, DBT
*key
, DBT
*val
)
1095 if (0 == (*db
->put
)(db
, key
, val
, 0))
1099 exit((int)MANDOCLEVEL_SYSERR
);
1104 * Call out to per-macro handlers after clearing the persistent database
1105 * key. If the macro sets the database key, flush it to the database.
1108 pmdoc_node(MDOC_ARGS
)
1127 * Both NULL handlers and handlers returning true
1128 * request using the data. Only skip the element
1129 * when the handler returns false.
1132 if (NULL
!= mdocs
[n
->tok
].fp
&&
1133 0 == (*mdocs
[n
->tok
].fp
)(hash
, buf
, dbuf
, n
, m
))
1137 * For many macros, use the text from all children.
1138 * Set zero flags for macros not needing this.
1139 * In that case, the handler must fill the buffer.
1142 if (MDOCF_CHILD
& mdocs
[n
->tok
].flags
)
1143 buf_appendmdoc(buf
, n
->child
, 0);
1146 * Cover the most common case:
1147 * Automatically stage one string per element.
1148 * Set a zero mask for macros not needing this.
1149 * Additional staging can be done in the handler.
1152 if (mdocs
[n
->tok
].mask
)
1153 hash_put(hash
, buf
, mdocs
[n
->tok
].mask
);
1159 pmdoc_node(hash
, buf
, dbuf
, n
->child
, m
);
1160 pmdoc_node(hash
, buf
, dbuf
, n
->next
, m
);
1166 const struct man_node
*head
, *body
;
1167 const char *start
, *sv
;
1174 * We're only searching for one thing: the first text child in
1175 * the BODY of a NAME section. Since we don't keep track of
1176 * sections in -man, run some hoops to find out whether we're in
1177 * the correct section or not.
1180 if (MAN_BODY
== n
->type
&& MAN_SH
== n
->tok
) {
1182 assert(body
->parent
);
1183 if (NULL
!= (head
= body
->parent
->head
) &&
1184 1 == head
->nchild
&&
1185 NULL
!= (head
= (head
->child
)) &&
1186 MAN_TEXT
== head
->type
&&
1187 0 == strcmp(head
->string
, "NAME") &&
1188 NULL
!= (body
= body
->child
) &&
1189 MAN_TEXT
== body
->type
) {
1191 assert(body
->string
);
1192 start
= sv
= body
->string
;
1195 * Go through a special heuristic dance here.
1196 * This is why -man manuals are great!
1197 * (I'm being sarcastic: my eyes are bleeding.)
1198 * Conventionally, one or more manual names are
1199 * comma-specified prior to a whitespace, then a
1200 * dash, then a description. Try to puzzle out
1201 * the name parts here.
1205 sz
= strcspn(start
, " ,");
1206 if ('\0' == start
[(int)sz
])
1210 buf_appendb(buf
, start
, sz
);
1211 buf_appendb(buf
, "", 1);
1213 hash_put(hash
, buf
, TYPE_Nm
);
1215 if (' ' == start
[(int)sz
]) {
1216 start
+= (int)sz
+ 1;
1220 assert(',' == start
[(int)sz
]);
1221 start
+= (int)sz
+ 1;
1222 while (' ' == *start
)
1229 buf_append(buf
, start
);
1233 while (' ' == *start
)
1236 if (0 == strncmp(start
, "-", 1))
1238 else if (0 == strncmp(start
, "\\-", 2))
1240 else if (0 == strncmp(start
, "\\(en", 4))
1242 else if (0 == strncmp(start
, "\\(em", 4))
1245 while (' ' == *start
)
1248 sz
= strlen(start
) + 1;
1249 buf_appendb(dbuf
, start
, sz
);
1250 buf_appendb(buf
, start
, sz
);
1252 hash_put(hash
, buf
, TYPE_Nd
);
1256 for (n
= n
->child
; n
; n
= n
->next
)
1257 if (pman_node(hash
, buf
, dbuf
, n
))
1264 * Parse a formatted manual page.
1265 * By necessity, this involves rather crude guesswork.
1268 pformatted(DB
*hash
, struct buf
*buf
, struct buf
*dbuf
,
1269 const struct of
*of
)
1275 if (NULL
== (stream
= fopen(of
->fname
, "r"))) {
1281 * Always use the title derived from the filename up front,
1282 * do not even try to find it in the file. This also makes
1283 * sure we don't end up with an orphan index record, even if
1284 * the file content turns out to be completely unintelligible.
1288 buf_append(buf
, of
->title
);
1289 hash_put(hash
, buf
, TYPE_Nm
);
1291 /* Skip to first blank line. */
1293 while (NULL
!= (line
= fgetln(stream
, &len
)))
1294 if (len
&& '\n' == *line
)
1298 * Skip to first section header.
1299 * This happens when text is flush-left.
1302 while (NULL
!= (line
= fgetln(stream
, &len
)))
1303 if (len
&& '\n' != *line
&& ' ' != *line
)
1307 * If no page content can be found or the input line is
1308 * malformed (zer-length or has no trailing newline), reuse the
1309 * page title as the page description.
1312 line
= fgetln(stream
, &len
);
1313 if (NULL
== line
|| len
== 0 || '\n' != line
[(int)len
- 1]) {
1314 buf_appendb(dbuf
, buf
->cp
, buf
->size
);
1315 hash_put(hash
, buf
, TYPE_Nd
);
1320 line
[(int)--len
] = '\0';
1323 * Skip to the last dash.
1324 * Use the remaining line as the description (no more than 70
1328 if (NULL
!= (p
= strrchr(line
, '-'))) {
1329 for (++p
; ' ' == *p
|| '\b' == *p
; p
++)
1330 /* Skip to next word. */ ;
1334 if ((plen
= strlen(p
)) > 70) {
1339 buf_appendb(dbuf
, p
, plen
+ 1);
1341 buf_appendb(buf
, p
, plen
+ 1);
1342 hash_put(hash
, buf
, TYPE_Nd
);
1347 ofile_argbuild(int argc
, char *argv
[], struct of
**of
)
1349 char buf
[MAXPATHLEN
];
1350 char *sec
, *arch
, *title
, *p
;
1354 for (i
= 0; i
< argc
; i
++) {
1357 * Try to infer the manual section, architecture and
1358 * page title from the path, assuming it looks like
1359 * man*[/<arch>]/<title>.<section> or
1360 * cat<section>[/<arch>]/<title>.0
1363 if (strlcpy(buf
, argv
[i
], sizeof(buf
)) >= sizeof(buf
)) {
1364 fprintf(stderr
, "%s: Path too long\n", argv
[i
]);
1367 sec
= arch
= title
= NULL
;
1369 p
= strrchr(buf
, '\0');
1371 if (NULL
== sec
&& '.' == *p
) {
1375 src_form
|= MANDOC_FORM
;
1376 else if ('1' <= *sec
&& '9' >= *sec
)
1377 src_form
|= MANDOC_SRC
;
1382 if (NULL
== title
) {
1387 if (0 == strncmp("man", p
+ 1, 3))
1388 src_form
|= MANDOC_SRC
;
1389 else if (0 == strncmp("cat", p
+ 1, 3))
1390 src_form
|= MANDOC_FORM
;
1399 * Build the file structure.
1402 nof
= mandoc_calloc(1, sizeof(struct of
));
1403 nof
->fname
= mandoc_strdup(argv
[i
]);
1405 nof
->sec
= mandoc_strdup(sec
);
1407 nof
->arch
= mandoc_strdup(arch
);
1408 nof
->title
= mandoc_strdup(title
);
1409 nof
->src_form
= src_form
;
1412 * Add the structure to the list.
1416 printf("%s: Scheduling\n", argv
[i
]);
1421 nof
->first
= (*of
)->first
;
1429 * Recursively build up a list of files to parse.
1430 * We use this instead of ftw() and so on because I don't want global
1431 * variables hanging around.
1432 * This ignores the mandoc.db and mandoc.index files, but assumes that
1433 * everything else is a manual.
1434 * Pass in a pointer to a NULL structure for the first invocation.
1437 ofile_dirbuild(const char *dir
, const char* psec
, const char *parch
,
1438 int p_src_form
, struct of
**of
)
1440 char buf
[MAXPATHLEN
];
1444 const char *fn
, *sec
, *arch
;
1445 char *p
, *q
, *suffix
;
1450 if (NULL
== (d
= opendir(dir
))) {
1455 while (NULL
!= (dp
= readdir(d
))) {
1461 src_form
= p_src_form
;
1463 if (DT_DIR
== dp
->d_type
) {
1468 * By default, only use directories called:
1469 * man<section>/[<arch>/] or
1470 * cat<section>/[<arch>/]
1474 if(0 == strncmp("man", fn
, 3)) {
1475 src_form
|= MANDOC_SRC
;
1477 } else if (0 == strncmp("cat", fn
, 3)) {
1478 src_form
|= MANDOC_FORM
;
1484 } else if (NULL
== arch
&& (use_all
||
1485 NULL
== strchr(fn
, '.')))
1487 else if (0 == use_all
)
1491 strlcat(buf
, dir
, MAXPATHLEN
);
1492 strlcat(buf
, "/", MAXPATHLEN
);
1493 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1495 if (MAXPATHLEN
<= sz
) {
1496 fprintf(stderr
, "%s: Path too long\n", dir
);
1501 printf("%s: Scanning\n", buf
);
1503 if ( ! ofile_dirbuild(buf
, sec
, arch
,
1507 if (DT_REG
!= dp
->d_type
||
1508 (NULL
== psec
&& !use_all
) ||
1509 !strcmp(MANDOC_DB
, fn
) ||
1510 !strcmp(MANDOC_IDX
, fn
))
1514 * By default, skip files where the file name suffix
1515 * does not agree with the section directory
1516 * they are located in.
1519 suffix
= strrchr(fn
, '.');
1523 if ((MANDOC_SRC
& src_form
&&
1524 strcmp(suffix
+ 1, psec
)) ||
1525 (MANDOC_FORM
& src_form
&&
1526 strcmp(suffix
+ 1, "0")))
1529 if (NULL
!= suffix
) {
1530 if ('0' == suffix
[1])
1531 src_form
|= MANDOC_FORM
;
1532 else if ('1' <= suffix
[1] && '9' >= suffix
[1])
1533 src_form
|= MANDOC_SRC
;
1538 * Skip formatted manuals if a source version is
1539 * available. Ignore the age: it is very unlikely
1540 * that people install newer formatted base manuals
1541 * when they used to have source manuals before,
1542 * and in ports, old manuals get removed on update.
1544 if (0 == use_all
&& MANDOC_FORM
& src_form
&&
1547 strlcat(buf
, dir
, MAXPATHLEN
);
1548 p
= strrchr(buf
, '/');
1553 if (0 == strncmp("cat", p
, 3))
1554 memcpy(p
, "man", 3);
1555 strlcat(buf
, "/", MAXPATHLEN
);
1556 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1557 if (sz
>= MAXPATHLEN
) {
1558 fprintf(stderr
, "%s: Path too long\n", buf
);
1561 q
= strrchr(buf
, '.');
1562 if (NULL
!= q
&& p
< q
++) {
1564 sz
= strlcat(buf
, psec
, MAXPATHLEN
);
1565 if (sz
>= MAXPATHLEN
) {
1567 "%s: Path too long\n", buf
);
1570 if (0 == stat(buf
, &sb
))
1576 strlcat(buf
, dir
, MAXPATHLEN
);
1577 strlcat(buf
, "/", MAXPATHLEN
);
1578 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1579 if (sz
>= MAXPATHLEN
) {
1580 fprintf(stderr
, "%s: Path too long\n", dir
);
1584 nof
= mandoc_calloc(1, sizeof(struct of
));
1585 nof
->fname
= mandoc_strdup(buf
);
1587 nof
->sec
= mandoc_strdup(psec
);
1589 nof
->arch
= mandoc_strdup(parch
);
1590 nof
->src_form
= src_form
;
1593 * Remember the file name without the extension,
1594 * to be used as the page title in the database.
1599 nof
->title
= mandoc_strdup(fn
);
1602 * Add the structure to the list.
1606 printf("%s: Scheduling\n", buf
);
1611 nof
->first
= (*of
)->first
;
1622 ofile_free(struct of
*of
)
1641 fprintf(stderr
, "usage: %s [-v] "
1642 "[-d dir [files...] |"
1643 " -u dir [files...] |"
1644 " dir...]\n", progname
);