]>
git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
1 /* $Id: mandocdb.c,v 1.39 2011/12/25 14:58:39 schwarze Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 #include <sys/param.h>
23 #include <sys/types.h>
35 #if defined(__linux__)
38 #elif defined(__APPLE__)
39 # include <libkern/OSByteOrder.h>
51 #define MANDOC_BUFSZ BUFSIZ
52 #define MANDOC_SLOP 1024
54 #define MANDOC_SRC 0x1
55 #define MANDOC_FORM 0x2
57 /* Access to the mandoc database on disk. */
60 char idxn
[MAXPATHLEN
]; /* index db filename */
61 char dbn
[MAXPATHLEN
]; /* keyword db filename */
62 DB
*idx
; /* index recno database */
63 DB
*db
; /* keyword btree database */
66 /* Stack of temporarily unused index records. */
69 recno_t
*stack
; /* pointer to a malloc'ed array */
70 size_t size
; /* number of allocated slots */
71 size_t cur
; /* current number of empty records */
72 recno_t last
; /* last record number in the index */
75 /* Tiny list for files. No need to bring in QUEUE. */
78 char *fname
; /* heap-allocated */
83 struct of
*next
; /* NULL for last one */
84 struct of
*first
; /* first in list */
87 /* Buffer for storing growable data. */
91 size_t len
; /* current length */
92 size_t size
; /* total buffer size */
95 /* Operation we're going to perform. */
98 OP_DEFAULT
= 0, /* new dbs from dir list or default config */
99 OP_CONFFILE
, /* new databases from custom config file */
100 OP_UPDATE
, /* delete/add entries in existing database */
101 OP_DELETE
, /* delete entries from existing database */
102 OP_TEST
/* change no databases, report potential problems */
105 #define MAN_ARGS DB *hash, \
108 const struct man_node *n
109 #define MDOC_ARGS DB *hash, \
112 const struct mdoc_node *n, \
113 const struct mdoc_meta *m
115 static void buf_appendmdoc(struct buf
*,
116 const struct mdoc_node
*, int);
117 static void buf_append(struct buf
*, const char *);
118 static void buf_appendb(struct buf
*,
119 const void *, size_t);
120 static void dbt_put(DB
*, const char *, DBT
*, DBT
*);
121 static void hash_put(DB
*, const struct buf
*, uint64_t);
122 static void hash_reset(DB
**);
123 static void index_merge(const struct of
*, struct mparse
*,
124 struct buf
*, struct buf
*, DB
*,
125 struct mdb
*, struct recs
*);
126 static void index_prune(const struct of
*, struct mdb
*,
128 static void ofile_argbuild(int, char *[], struct of
**);
129 static void ofile_dirbuild(const char *, const char *,
130 const char *, int, struct of
**);
131 static void ofile_free(struct of
*);
132 static void pformatted(DB
*, struct buf
*, struct buf
*,
134 static int pman_node(MAN_ARGS
);
135 static void pmdoc_node(MDOC_ARGS
);
136 static int pmdoc_head(MDOC_ARGS
);
137 static int pmdoc_body(MDOC_ARGS
);
138 static int pmdoc_Fd(MDOC_ARGS
);
139 static int pmdoc_In(MDOC_ARGS
);
140 static int pmdoc_Fn(MDOC_ARGS
);
141 static int pmdoc_Nd(MDOC_ARGS
);
142 static int pmdoc_Nm(MDOC_ARGS
);
143 static int pmdoc_Sh(MDOC_ARGS
);
144 static int pmdoc_St(MDOC_ARGS
);
145 static int pmdoc_Xr(MDOC_ARGS
);
147 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
149 struct mdoc_handler
{
150 int (*fp
)(MDOC_ARGS
); /* Optional handler. */
151 uint64_t mask
; /* Set unless handler returns 0. */
152 int flags
; /* For use by pmdoc_node. */
155 static const struct mdoc_handler mdocs
[MDOC_MAX
] = {
156 { NULL
, 0, 0 }, /* Ap */
157 { NULL
, 0, 0 }, /* Dd */
158 { NULL
, 0, 0 }, /* Dt */
159 { NULL
, 0, 0 }, /* Os */
160 { pmdoc_Sh
, TYPE_Sh
, MDOCF_CHILD
}, /* Sh */
161 { pmdoc_head
, TYPE_Ss
, MDOCF_CHILD
}, /* Ss */
162 { NULL
, 0, 0 }, /* Pp */
163 { NULL
, 0, 0 }, /* D1 */
164 { NULL
, 0, 0 }, /* Dl */
165 { NULL
, 0, 0 }, /* Bd */
166 { NULL
, 0, 0 }, /* Ed */
167 { NULL
, 0, 0 }, /* Bl */
168 { NULL
, 0, 0 }, /* El */
169 { NULL
, 0, 0 }, /* It */
170 { NULL
, 0, 0 }, /* Ad */
171 { NULL
, TYPE_An
, MDOCF_CHILD
}, /* An */
172 { NULL
, TYPE_Ar
, MDOCF_CHILD
}, /* Ar */
173 { NULL
, TYPE_Cd
, MDOCF_CHILD
}, /* Cd */
174 { NULL
, TYPE_Cm
, MDOCF_CHILD
}, /* Cm */
175 { NULL
, TYPE_Dv
, MDOCF_CHILD
}, /* Dv */
176 { NULL
, TYPE_Er
, MDOCF_CHILD
}, /* Er */
177 { NULL
, TYPE_Ev
, MDOCF_CHILD
}, /* Ev */
178 { NULL
, 0, 0 }, /* Ex */
179 { NULL
, TYPE_Fa
, MDOCF_CHILD
}, /* Fa */
180 { pmdoc_Fd
, TYPE_In
, 0 }, /* Fd */
181 { NULL
, TYPE_Fl
, MDOCF_CHILD
}, /* Fl */
182 { pmdoc_Fn
, 0, 0 }, /* Fn */
183 { NULL
, TYPE_Ft
, MDOCF_CHILD
}, /* Ft */
184 { NULL
, TYPE_Ic
, MDOCF_CHILD
}, /* Ic */
185 { pmdoc_In
, TYPE_In
, 0 }, /* In */
186 { NULL
, TYPE_Li
, MDOCF_CHILD
}, /* Li */
187 { pmdoc_Nd
, TYPE_Nd
, MDOCF_CHILD
}, /* Nd */
188 { pmdoc_Nm
, TYPE_Nm
, MDOCF_CHILD
}, /* Nm */
189 { NULL
, 0, 0 }, /* Op */
190 { NULL
, 0, 0 }, /* Ot */
191 { NULL
, TYPE_Pa
, MDOCF_CHILD
}, /* Pa */
192 { NULL
, 0, 0 }, /* Rv */
193 { pmdoc_St
, TYPE_St
, 0 }, /* St */
194 { NULL
, TYPE_Va
, MDOCF_CHILD
}, /* Va */
195 { pmdoc_body
, TYPE_Va
, MDOCF_CHILD
}, /* Vt */
196 { pmdoc_Xr
, TYPE_Xr
, 0 }, /* Xr */
197 { NULL
, 0, 0 }, /* %A */
198 { NULL
, 0, 0 }, /* %B */
199 { NULL
, 0, 0 }, /* %D */
200 { NULL
, 0, 0 }, /* %I */
201 { NULL
, 0, 0 }, /* %J */
202 { NULL
, 0, 0 }, /* %N */
203 { NULL
, 0, 0 }, /* %O */
204 { NULL
, 0, 0 }, /* %P */
205 { NULL
, 0, 0 }, /* %R */
206 { NULL
, 0, 0 }, /* %T */
207 { NULL
, 0, 0 }, /* %V */
208 { NULL
, 0, 0 }, /* Ac */
209 { NULL
, 0, 0 }, /* Ao */
210 { NULL
, 0, 0 }, /* Aq */
211 { NULL
, TYPE_At
, MDOCF_CHILD
}, /* At */
212 { NULL
, 0, 0 }, /* Bc */
213 { NULL
, 0, 0 }, /* Bf */
214 { NULL
, 0, 0 }, /* Bo */
215 { NULL
, 0, 0 }, /* Bq */
216 { NULL
, TYPE_Bsx
, MDOCF_CHILD
}, /* Bsx */
217 { NULL
, TYPE_Bx
, MDOCF_CHILD
}, /* Bx */
218 { NULL
, 0, 0 }, /* Db */
219 { NULL
, 0, 0 }, /* Dc */
220 { NULL
, 0, 0 }, /* Do */
221 { NULL
, 0, 0 }, /* Dq */
222 { NULL
, 0, 0 }, /* Ec */
223 { NULL
, 0, 0 }, /* Ef */
224 { NULL
, TYPE_Em
, MDOCF_CHILD
}, /* Em */
225 { NULL
, 0, 0 }, /* Eo */
226 { NULL
, TYPE_Fx
, MDOCF_CHILD
}, /* Fx */
227 { NULL
, TYPE_Ms
, MDOCF_CHILD
}, /* Ms */
228 { NULL
, 0, 0 }, /* No */
229 { NULL
, 0, 0 }, /* Ns */
230 { NULL
, TYPE_Nx
, MDOCF_CHILD
}, /* Nx */
231 { NULL
, TYPE_Ox
, MDOCF_CHILD
}, /* Ox */
232 { NULL
, 0, 0 }, /* Pc */
233 { NULL
, 0, 0 }, /* Pf */
234 { NULL
, 0, 0 }, /* Po */
235 { NULL
, 0, 0 }, /* Pq */
236 { NULL
, 0, 0 }, /* Qc */
237 { NULL
, 0, 0 }, /* Ql */
238 { NULL
, 0, 0 }, /* Qo */
239 { NULL
, 0, 0 }, /* Qq */
240 { NULL
, 0, 0 }, /* Re */
241 { NULL
, 0, 0 }, /* Rs */
242 { NULL
, 0, 0 }, /* Sc */
243 { NULL
, 0, 0 }, /* So */
244 { NULL
, 0, 0 }, /* Sq */
245 { NULL
, 0, 0 }, /* Sm */
246 { NULL
, 0, 0 }, /* Sx */
247 { NULL
, TYPE_Sy
, MDOCF_CHILD
}, /* Sy */
248 { NULL
, TYPE_Tn
, MDOCF_CHILD
}, /* Tn */
249 { NULL
, 0, 0 }, /* Ux */
250 { NULL
, 0, 0 }, /* Xc */
251 { NULL
, 0, 0 }, /* Xo */
252 { pmdoc_head
, TYPE_Fn
, 0 }, /* Fo */
253 { NULL
, 0, 0 }, /* Fc */
254 { NULL
, 0, 0 }, /* Oo */
255 { NULL
, 0, 0 }, /* Oc */
256 { NULL
, 0, 0 }, /* Bk */
257 { NULL
, 0, 0 }, /* Ek */
258 { NULL
, 0, 0 }, /* Bt */
259 { NULL
, 0, 0 }, /* Hf */
260 { NULL
, 0, 0 }, /* Fr */
261 { NULL
, 0, 0 }, /* Ud */
262 { NULL
, TYPE_Lb
, MDOCF_CHILD
}, /* Lb */
263 { NULL
, 0, 0 }, /* Lp */
264 { NULL
, TYPE_Lk
, MDOCF_CHILD
}, /* Lk */
265 { NULL
, TYPE_Mt
, MDOCF_CHILD
}, /* Mt */
266 { NULL
, 0, 0 }, /* Brq */
267 { NULL
, 0, 0 }, /* Bro */
268 { NULL
, 0, 0 }, /* Brc */
269 { NULL
, 0, 0 }, /* %C */
270 { NULL
, 0, 0 }, /* Es */
271 { NULL
, 0, 0 }, /* En */
272 { NULL
, TYPE_Dx
, MDOCF_CHILD
}, /* Dx */
273 { NULL
, 0, 0 }, /* %Q */
274 { NULL
, 0, 0 }, /* br */
275 { NULL
, 0, 0 }, /* sp */
276 { NULL
, 0, 0 }, /* %U */
277 { NULL
, 0, 0 }, /* Ta */
280 static const char *progname
;
281 static int use_all
; /* Use all directories and files. */
282 static int verb
; /* Output verbosity level. */
283 static int warnings
; /* Potential problems in manuals. */
286 main(int argc
, char *argv
[])
288 struct mparse
*mp
; /* parse sequence */
289 struct manpaths dirs
;
292 enum op op
; /* current operation */
297 DB
*hash
; /* temporary keyword hashtable */
298 BTREEINFO info
; /* btree configuration */
300 struct buf buf
, /* keyword buffer */
301 dbuf
; /* description buffer */
302 struct of
*of
; /* list of files for processing */
306 progname
= strrchr(argv
[0], '/');
307 if (progname
== NULL
)
312 memset(&dirs
, 0, sizeof(struct manpaths
));
313 memset(&mdb
, 0, sizeof(struct mdb
));
314 memset(&recs
, 0, sizeof(struct recs
));
322 while (-1 != (ch
= getopt(argc
, argv
, "aC:d:tu:vW")))
330 "-C: conflicting options\n");
339 "-d: conflicting options\n");
346 dup2(STDOUT_FILENO
, STDERR_FILENO
);
349 "-t: conflicting options\n");
359 "-u: conflicting options\n");
378 if (OP_CONFFILE
== op
&& argc
> 0) {
379 fprintf(stderr
, "-C: too many arguments\n");
383 memset(&info
, 0, sizeof(BTREEINFO
));
386 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
388 memset(&buf
, 0, sizeof(struct buf
));
389 memset(&dbuf
, 0, sizeof(struct buf
));
391 buf
.size
= dbuf
.size
= MANDOC_BUFSZ
;
393 buf
.cp
= mandoc_malloc(buf
.size
);
394 dbuf
.cp
= mandoc_malloc(dbuf
.size
);
396 flags
= O_CREAT
| O_RDWR
;
397 if (OP_DEFAULT
== op
|| OP_CONFFILE
== op
)
401 ofile_argbuild(argc
, argv
, &of
);
404 index_merge(of
, mp
, &dbuf
, &buf
, hash
, &mdb
, &recs
);
408 if (OP_UPDATE
== op
|| OP_DELETE
== op
) {
409 strlcat(mdb
.dbn
, dir
, MAXPATHLEN
);
410 strlcat(mdb
.dbn
, "/", MAXPATHLEN
);
411 sz1
= strlcat(mdb
.dbn
, MANDOC_DB
, MAXPATHLEN
);
413 strlcat(mdb
.idxn
, dir
, MAXPATHLEN
);
414 strlcat(mdb
.idxn
, "/", MAXPATHLEN
);
415 sz2
= strlcat(mdb
.idxn
, MANDOC_IDX
, MAXPATHLEN
);
417 if (sz1
>= MAXPATHLEN
|| sz2
>= MAXPATHLEN
) {
418 fprintf(stderr
, "%s: path too long\n", dir
);
419 exit((int)MANDOCLEVEL_BADARG
);
422 mdb
.db
= dbopen(mdb
.dbn
, flags
, 0644, DB_BTREE
, &info
);
423 mdb
.idx
= dbopen(mdb
.idxn
, flags
, 0644, DB_RECNO
, NULL
);
425 if (NULL
== mdb
.db
) {
427 exit((int)MANDOCLEVEL_SYSERR
);
428 } else if (NULL
== mdb
.idx
) {
430 exit((int)MANDOCLEVEL_SYSERR
);
433 ofile_argbuild(argc
, argv
, &of
);
438 index_prune(of
, &mdb
, &recs
);
441 * Go to the root of the respective manual tree.
442 * This must work or no manuals may be found (they're
443 * indexed relative to the root).
446 if (OP_UPDATE
== op
) {
447 if (-1 == chdir(dir
)) {
449 exit((int)MANDOCLEVEL_SYSERR
);
451 index_merge(of
, mp
, &dbuf
, &buf
, hash
,
459 * Configure the directories we're going to scan.
460 * If we have command-line arguments, use them.
461 * If not, we use man(1)'s method (see mandocdb.8).
465 dirs
.paths
= mandoc_calloc(argc
, sizeof(char *));
467 for (i
= 0; i
< argc
; i
++) {
468 if (NULL
== (cp
= realpath(argv
[i
], pbuf
))) {
472 dirs
.paths
[i
] = mandoc_strdup(cp
);
475 manpath_parse(&dirs
, dir
, NULL
, NULL
);
477 for (i
= 0; i
< dirs
.sz
; i
++) {
478 mdb
.idxn
[0] = mdb
.dbn
[0] = '\0';
480 strlcat(mdb
.dbn
, dirs
.paths
[i
], MAXPATHLEN
);
481 strlcat(mdb
.dbn
, "/", MAXPATHLEN
);
482 sz1
= strlcat(mdb
.dbn
, MANDOC_DB
, MAXPATHLEN
);
484 strlcat(mdb
.idxn
, dirs
.paths
[i
], MAXPATHLEN
);
485 strlcat(mdb
.idxn
, "/", MAXPATHLEN
);
486 sz2
= strlcat(mdb
.idxn
, MANDOC_IDX
, MAXPATHLEN
);
488 if (sz1
>= MAXPATHLEN
|| sz2
>= MAXPATHLEN
) {
489 fprintf(stderr
, "%s: path too long\n",
491 exit((int)MANDOCLEVEL_BADARG
);
495 (*mdb
.db
->close
)(mdb
.db
);
497 (*mdb
.idx
->close
)(mdb
.idx
);
499 mdb
.db
= dbopen(mdb
.dbn
, flags
, 0644, DB_BTREE
, &info
);
500 mdb
.idx
= dbopen(mdb
.idxn
, flags
, 0644, DB_RECNO
, NULL
);
502 if (NULL
== mdb
.db
) {
504 exit((int)MANDOCLEVEL_SYSERR
);
505 } else if (NULL
== mdb
.idx
) {
507 exit((int)MANDOCLEVEL_SYSERR
);
513 if (-1 == chdir(dirs
.paths
[i
])) {
514 perror(dirs
.paths
[i
]);
515 exit((int)MANDOCLEVEL_SYSERR
);
518 ofile_dirbuild(".", "", "", 0, &of
);
523 * Go to the root of the respective manual tree.
524 * This must work or no manuals may be found (they're
525 * indexed relative to the root).
528 if (-1 == chdir(dirs
.paths
[i
])) {
529 perror(dirs
.paths
[i
]);
530 exit((int)MANDOCLEVEL_SYSERR
);
533 index_merge(of
, mp
, &dbuf
, &buf
, hash
, &mdb
, &recs
);
538 (*mdb
.db
->close
)(mdb
.db
);
540 (*mdb
.idx
->close
)(mdb
.idx
);
542 (*hash
->close
)(hash
);
552 return(MANDOCLEVEL_OK
);
556 "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
557 " -d dir [file ...] | "
558 "-u dir [file ...]\n",
561 return((int)MANDOCLEVEL_BADARG
);
565 index_merge(const struct of
*of
, struct mparse
*mp
,
566 struct buf
*dbuf
, struct buf
*buf
, DB
*hash
,
567 struct mdb
*mdb
, struct recs
*recs
)
574 const char *fn
, *msec
, *march
, *mtitle
;
582 for (of
= of
->first
; of
; of
= of
->next
) {
586 * Try interpreting the file as mdoc(7) or man(7)
587 * source code, unless it is already known to be
588 * formatted. Fall back to formatted mode.
595 if ((MANDOC_SRC
& of
->src_form
||
596 ! (MANDOC_FORM
& of
->src_form
)) &&
597 MANDOCLEVEL_FATAL
> mparse_readfd(mp
, -1, fn
))
598 mparse_result(mp
, &mdoc
, &man
);
601 msec
= mdoc_meta(mdoc
)->msec
;
602 march
= mdoc_meta(mdoc
)->arch
;
605 mtitle
= mdoc_meta(mdoc
)->title
;
606 } else if (NULL
!= man
) {
607 msec
= man_meta(man
)->msec
;
609 mtitle
= man_meta(man
)->title
;
617 * By default, skip a file if the manual section
618 * and architecture given in the file disagree
619 * with the directory where the file is located.
625 if (strcasecmp(msec
, of
->sec
)) {
627 fprintf(stderr
, "%s: "
628 "section \"%s\" manual "
629 "in \"%s\" directory\n",
636 if (strcasecmp(march
, of
->arch
)) {
638 fprintf(stderr
, "%s: "
639 "architecture \"%s\" manual "
640 "in \"%s\" directory\n",
641 fn
, march
, of
->arch
);
646 * By default, skip a file if the title given
647 * in the file disagrees with the file name.
648 * If both agree, use the file name as the title,
649 * because the one in the file usually is all caps.
654 if (strcasecmp(mtitle
, of
->title
)) {
656 fprintf(stderr
, "%s: "
657 "title \"%s\" in file "
658 "but \"%s\" in filename\n",
659 fn
, mtitle
, of
->title
);
664 if (skip
&& !use_all
)
668 * The index record value consists of a nil-terminated
669 * filename, a nil-terminated manual section, and a
670 * nil-terminated description. Since the description
671 * may not be set, we set a sentinel to see if we're
672 * going to write a nil byte in its place.
676 type
= mdoc
? 'd' : (man
? 'a' : 'c');
677 buf_appendb(dbuf
, &type
, 1);
678 buf_appendb(dbuf
, fn
, strlen(fn
) + 1);
679 buf_appendb(dbuf
, msec
, strlen(msec
) + 1);
680 buf_appendb(dbuf
, mtitle
, strlen(mtitle
) + 1);
681 buf_appendb(dbuf
, march
, strlen(march
) + 1);
686 * Collect keyword/mask pairs.
687 * Each pair will become a new btree node.
692 pmdoc_node(hash
, buf
, dbuf
,
693 mdoc_node(mdoc
), mdoc_meta(mdoc
));
695 pman_node(hash
, buf
, dbuf
, man_node(man
));
697 pformatted(hash
, buf
, dbuf
, of
);
699 /* Test mode, do not access any database. */
701 if (NULL
== mdb
->db
|| NULL
== mdb
->idx
)
705 * Reclaim an empty index record, if available.
706 * Use its record number for all new btree nodes.
711 rec
= recs
->stack
[(int)recs
->cur
];
712 } else if (recs
->last
> 0) {
717 vbuf
[1] = htobe64(rec
);
720 * Copy from the in-memory hashtable of pending
721 * keyword/mask pairs into the database.
725 while (0 == (ch
= (*hash
->seq
)(hash
, &key
, &val
, seq
))) {
727 assert(sizeof(uint64_t) == val
.size
);
728 memcpy(&mask
, val
.data
, val
.size
);
729 vbuf
[0] = htobe64(mask
);
730 val
.size
= sizeof(vbuf
);
732 dbt_put(mdb
->db
, mdb
->dbn
, &key
, &val
);
736 exit((int)MANDOCLEVEL_SYSERR
);
740 * Apply to the index. If we haven't had a description
741 * set, put an empty one in now.
745 buf_appendb(dbuf
, "", 1);
748 key
.size
= sizeof(recno_t
);
751 val
.size
= dbuf
->len
;
754 printf("%s: adding to index\n", fn
);
756 dbt_put(mdb
->idx
, mdb
->idxn
, &key
, &val
);
761 * Scan through all entries in the index file `idx' and prune those
762 * entries in `ofile'.
763 * Pruning consists of removing from `db', then invalidating the entry
764 * in `idx' (zeroing its value size).
767 index_prune(const struct of
*ofile
, struct mdb
*mdb
, struct recs
*recs
)
778 while (0 == (ch
= (*mdb
->idx
->seq
)(mdb
->idx
, &key
, &val
, seq
))) {
780 assert(sizeof(recno_t
) == key
.size
);
781 memcpy(&recs
->last
, key
.data
, key
.size
);
783 /* Deleted records are zero-sized. Skip them. */
789 * Make sure we're sane.
790 * Read past our mdoc/man/cat type to the next string,
791 * then make sure it's bounded by a NUL.
792 * Failing any of these, we go into our error handler.
795 fn
= (char *)val
.data
+ 1;
796 if (NULL
== memchr(fn
, '\0', val
.size
- 1))
800 * Search for the file in those we care about.
801 * XXX: build this into a tree. Too slow.
804 for (of
= ofile
->first
; of
; of
= of
->next
)
805 if (0 == strcmp(fn
, of
->fname
))
812 * Search through the keyword database, throwing out all
813 * references to our file.
817 while (0 == (ch
= (*mdb
->db
->seq
)(mdb
->db
,
818 &key
, &val
, sseq
))) {
820 if (sizeof(vbuf
) != val
.size
)
823 memcpy(vbuf
, val
.data
, val
.size
);
824 if (recs
->last
!= betoh64(vbuf
[1]))
827 if ((ch
= (*mdb
->db
->del
)(mdb
->db
,
828 &key
, R_CURSOR
)) < 0)
834 exit((int)MANDOCLEVEL_SYSERR
);
835 } else if (1 != ch
) {
836 fprintf(stderr
, "%s: corrupt database\n",
838 exit((int)MANDOCLEVEL_SYSERR
);
842 printf("%s: deleting from index\n", fn
);
845 ch
= (*mdb
->idx
->put
)(mdb
->idx
, &key
, &val
, R_CURSOR
);
850 if (recs
->cur
>= recs
->size
) {
851 recs
->size
+= MANDOC_SLOP
;
852 recs
->stack
= mandoc_realloc(recs
->stack
,
853 recs
->size
* sizeof(recno_t
));
856 recs
->stack
[(int)recs
->cur
] = recs
->last
;
862 exit((int)MANDOCLEVEL_SYSERR
);
863 } else if (1 != ch
) {
864 fprintf(stderr
, "%s: corrupt index\n", mdb
->idxn
);
865 exit((int)MANDOCLEVEL_SYSERR
);
872 * Grow the buffer (if necessary) and copy in a binary string.
875 buf_appendb(struct buf
*buf
, const void *cp
, size_t sz
)
878 /* Overshoot by MANDOC_BUFSZ. */
880 while (buf
->len
+ sz
>= buf
->size
) {
881 buf
->size
= buf
->len
+ sz
+ MANDOC_BUFSZ
;
882 buf
->cp
= mandoc_realloc(buf
->cp
, buf
->size
);
885 memcpy(buf
->cp
+ (int)buf
->len
, cp
, sz
);
890 * Append a nil-terminated string to the buffer.
891 * This can be invoked multiple times.
892 * The buffer string will be nil-terminated.
893 * If invoked multiple times, a space is put between strings.
896 buf_append(struct buf
*buf
, const char *cp
)
900 if (0 == (sz
= strlen(cp
)))
904 buf
->cp
[(int)buf
->len
- 1] = ' ';
906 buf_appendb(buf
, cp
, sz
+ 1);
910 * Recursively add all text from a given node.
911 * This is optimised for general mdoc nodes in this context, which do
912 * not consist of subexpressions and having a recursive call for n->next
914 * The "f" variable should be 0 unless called from pmdoc_Nd for the
915 * description buffer, which does not start at the beginning of the
919 buf_appendmdoc(struct buf
*buf
, const struct mdoc_node
*n
, int f
)
922 for ( ; n
; n
= n
->next
) {
924 buf_appendmdoc(buf
, n
->child
, f
);
926 if (MDOC_TEXT
== n
->type
&& f
) {
928 buf_appendb(buf
, n
->string
,
929 strlen(n
->string
) + 1);
930 } else if (MDOC_TEXT
== n
->type
)
931 buf_append(buf
, n
->string
);
941 if (NULL
!= (hash
= *db
))
942 (*hash
->close
)(hash
);
944 *db
= dbopen(NULL
, O_CREAT
|O_RDWR
, 0644, DB_HASH
, NULL
);
947 exit((int)MANDOCLEVEL_SYSERR
);
953 pmdoc_head(MDOC_ARGS
)
956 return(MDOC_HEAD
== n
->type
);
961 pmdoc_body(MDOC_ARGS
)
964 return(MDOC_BODY
== n
->type
);
971 const char *start
, *end
;
974 if (SEC_SYNOPSIS
!= n
->sec
)
976 if (NULL
== (n
= n
->child
) || MDOC_TEXT
!= n
->type
)
980 * Only consider those `Fd' macro fields that begin with an
981 * "inclusion" token (versus, e.g., #define).
983 if (strcmp("#include", n
->string
))
986 if (NULL
== (n
= n
->next
) || MDOC_TEXT
!= n
->type
)
990 * Strip away the enclosing angle brackets and make sure we're
995 if ('<' == *start
|| '"' == *start
)
998 if (0 == (sz
= strlen(start
)))
1001 end
= &start
[(int)sz
- 1];
1002 if ('>' == *end
|| '"' == *end
)
1005 assert(end
>= start
);
1007 buf_appendb(buf
, start
, (size_t)(end
- start
+ 1));
1008 buf_appendb(buf
, "", 1);
1017 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
1020 buf_append(buf
, n
->child
->string
);
1028 struct mdoc_node
*nn
;
1033 if (NULL
== nn
|| MDOC_TEXT
!= nn
->type
)
1036 /* .Fn "struct type *name" "char *arg" */
1038 cp
= strrchr(nn
->string
, ' ');
1042 /* Strip away pointer symbol. */
1047 /* Store the function name. */
1049 buf_append(buf
, cp
);
1050 hash_put(hash
, buf
, TYPE_Fn
);
1052 /* Store the function type. */
1054 if (nn
->string
< cp
) {
1056 buf_appendb(buf
, nn
->string
, cp
- nn
->string
);
1057 buf_appendb(buf
, "", 1);
1058 hash_put(hash
, buf
, TYPE_Ft
);
1061 /* Store the arguments. */
1063 for (nn
= nn
->next
; nn
; nn
= nn
->next
) {
1064 if (MDOC_TEXT
!= nn
->type
)
1067 buf_append(buf
, nn
->string
);
1068 hash_put(hash
, buf
, TYPE_Fa
);
1079 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
1082 buf_append(buf
, n
->child
->string
);
1091 if (NULL
== (n
= n
->child
))
1094 buf_appendb(buf
, n
->string
, strlen(n
->string
));
1096 if (NULL
!= (n
= n
->next
)) {
1097 buf_appendb(buf
, ".", 1);
1098 buf_appendb(buf
, n
->string
, strlen(n
->string
) + 1);
1100 buf_appendb(buf
, ".", 2);
1110 if (MDOC_BODY
!= n
->type
)
1113 buf_appendmdoc(dbuf
, n
->child
, 1);
1122 if (SEC_NAME
== n
->sec
)
1124 else if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
1127 if (NULL
== n
->child
)
1128 buf_append(buf
, m
->name
);
1138 return(SEC_CUSTOM
== n
->sec
&& MDOC_HEAD
== n
->type
);
1142 hash_put(DB
*db
, const struct buf
*buf
, uint64_t mask
)
1152 key
.size
= buf
->len
;
1154 if ((rc
= (*db
->get
)(db
, &key
, &val
, 0)) < 0) {
1156 exit((int)MANDOCLEVEL_SYSERR
);
1157 } else if (0 == rc
) {
1158 assert(sizeof(uint64_t) == val
.size
);
1159 memcpy(&oldmask
, val
.data
, val
.size
);
1164 val
.size
= sizeof(uint64_t);
1166 if ((rc
= (*db
->put
)(db
, &key
, &val
, 0)) < 0) {
1168 exit((int)MANDOCLEVEL_SYSERR
);
1173 dbt_put(DB
*db
, const char *dbn
, DBT
*key
, DBT
*val
)
1179 if (0 == (*db
->put
)(db
, key
, val
, 0))
1183 exit((int)MANDOCLEVEL_SYSERR
);
1188 * Call out to per-macro handlers after clearing the persistent database
1189 * key. If the macro sets the database key, flush it to the database.
1192 pmdoc_node(MDOC_ARGS
)
1211 * Both NULL handlers and handlers returning true
1212 * request using the data. Only skip the element
1213 * when the handler returns false.
1216 if (NULL
!= mdocs
[n
->tok
].fp
&&
1217 0 == (*mdocs
[n
->tok
].fp
)(hash
, buf
, dbuf
, n
, m
))
1221 * For many macros, use the text from all children.
1222 * Set zero flags for macros not needing this.
1223 * In that case, the handler must fill the buffer.
1226 if (MDOCF_CHILD
& mdocs
[n
->tok
].flags
)
1227 buf_appendmdoc(buf
, n
->child
, 0);
1230 * Cover the most common case:
1231 * Automatically stage one string per element.
1232 * Set a zero mask for macros not needing this.
1233 * Additional staging can be done in the handler.
1236 if (mdocs
[n
->tok
].mask
)
1237 hash_put(hash
, buf
, mdocs
[n
->tok
].mask
);
1243 pmdoc_node(hash
, buf
, dbuf
, n
->child
, m
);
1244 pmdoc_node(hash
, buf
, dbuf
, n
->next
, m
);
1250 const struct man_node
*head
, *body
;
1251 const char *start
, *sv
;
1258 * We're only searching for one thing: the first text child in
1259 * the BODY of a NAME section. Since we don't keep track of
1260 * sections in -man, run some hoops to find out whether we're in
1261 * the correct section or not.
1264 if (MAN_BODY
== n
->type
&& MAN_SH
== n
->tok
) {
1266 assert(body
->parent
);
1267 if (NULL
!= (head
= body
->parent
->head
) &&
1268 1 == head
->nchild
&&
1269 NULL
!= (head
= (head
->child
)) &&
1270 MAN_TEXT
== head
->type
&&
1271 0 == strcmp(head
->string
, "NAME") &&
1272 NULL
!= (body
= body
->child
) &&
1273 MAN_TEXT
== body
->type
) {
1275 assert(body
->string
);
1276 start
= sv
= body
->string
;
1279 * Go through a special heuristic dance here.
1280 * This is why -man manuals are great!
1281 * (I'm being sarcastic: my eyes are bleeding.)
1282 * Conventionally, one or more manual names are
1283 * comma-specified prior to a whitespace, then a
1284 * dash, then a description. Try to puzzle out
1285 * the name parts here.
1289 sz
= strcspn(start
, " ,");
1290 if ('\0' == start
[(int)sz
])
1294 buf_appendb(buf
, start
, sz
);
1295 buf_appendb(buf
, "", 1);
1297 hash_put(hash
, buf
, TYPE_Nm
);
1299 if (' ' == start
[(int)sz
]) {
1300 start
+= (int)sz
+ 1;
1304 assert(',' == start
[(int)sz
]);
1305 start
+= (int)sz
+ 1;
1306 while (' ' == *start
)
1313 buf_append(buf
, start
);
1317 while (' ' == *start
)
1320 if (0 == strncmp(start
, "-", 1))
1322 else if (0 == strncmp(start
, "\\-", 2))
1324 else if (0 == strncmp(start
, "\\(en", 4))
1326 else if (0 == strncmp(start
, "\\(em", 4))
1329 while (' ' == *start
)
1332 sz
= strlen(start
) + 1;
1333 buf_appendb(dbuf
, start
, sz
);
1334 buf_appendb(buf
, start
, sz
);
1336 hash_put(hash
, buf
, TYPE_Nd
);
1340 for (n
= n
->child
; n
; n
= n
->next
)
1341 if (pman_node(hash
, buf
, dbuf
, n
))
1348 * Parse a formatted manual page.
1349 * By necessity, this involves rather crude guesswork.
1352 pformatted(DB
*hash
, struct buf
*buf
, struct buf
*dbuf
,
1353 const struct of
*of
)
1359 if (NULL
== (stream
= fopen(of
->fname
, "r"))) {
1366 * Always use the title derived from the filename up front,
1367 * do not even try to find it in the file. This also makes
1368 * sure we don't end up with an orphan index record, even if
1369 * the file content turns out to be completely unintelligible.
1373 buf_append(buf
, of
->title
);
1374 hash_put(hash
, buf
, TYPE_Nm
);
1376 /* Skip to first blank line. */
1378 while (NULL
!= (line
= fgetln(stream
, &len
)))
1383 * Assume the first line that is not indented
1384 * is the first section header. Skip to it.
1387 while (NULL
!= (line
= fgetln(stream
, &len
)))
1388 if ('\n' != *line
&& ' ' != *line
)
1392 * If no page content can be found, or the input line
1393 * is already the next section header, or there is no
1394 * trailing newline, reuse the page title as the page
1398 line
= fgetln(stream
, &len
);
1399 if (NULL
== line
|| ' ' != *line
|| '\n' != line
[(int)len
- 1]) {
1401 fprintf(stderr
, "%s: cannot find NAME section\n",
1403 buf_appendb(dbuf
, buf
->cp
, buf
->size
);
1404 hash_put(hash
, buf
, TYPE_Nd
);
1409 line
[(int)--len
] = '\0';
1412 * Skip to the first dash.
1413 * Use the remaining line as the description (no more than 70
1417 if (NULL
!= (p
= strstr(line
, "- "))) {
1418 for (p
+= 2; ' ' == *p
|| '\b' == *p
; p
++)
1419 /* Skip to next word. */ ;
1422 fprintf(stderr
, "%s: no dash in title line\n",
1427 if ((plen
= strlen(p
)) > 70) {
1432 /* Strip backspace-encoding from line. */
1434 while (NULL
!= (line
= memchr(p
, '\b', plen
))) {
1437 memmove(line
, line
+ 1, plen
--);
1440 memmove(line
- 1, line
+ 1, plen
- len
);
1444 buf_appendb(dbuf
, p
, plen
+ 1);
1446 buf_appendb(buf
, p
, plen
+ 1);
1447 hash_put(hash
, buf
, TYPE_Nd
);
1452 ofile_argbuild(int argc
, char *argv
[], struct of
**of
)
1454 char buf
[MAXPATHLEN
];
1455 char *sec
, *arch
, *title
, *p
;
1459 for (i
= 0; i
< argc
; i
++) {
1462 * Try to infer the manual section, architecture and
1463 * page title from the path, assuming it looks like
1464 * man*[/<arch>]/<title>.<section> or
1465 * cat<section>[/<arch>]/<title>.0
1468 if (strlcpy(buf
, argv
[i
], sizeof(buf
)) >= sizeof(buf
)) {
1469 fprintf(stderr
, "%s: path too long\n", argv
[i
]);
1472 sec
= arch
= title
= "";
1474 p
= strrchr(buf
, '\0');
1476 if ('\0' == *sec
&& '.' == *p
) {
1480 src_form
|= MANDOC_FORM
;
1481 else if ('1' <= *sec
&& '9' >= *sec
)
1482 src_form
|= MANDOC_SRC
;
1487 if ('\0' == *title
) {
1492 if (0 == strncmp("man", p
+ 1, 3))
1493 src_form
|= MANDOC_SRC
;
1494 else if (0 == strncmp("cat", p
+ 1, 3))
1495 src_form
|= MANDOC_FORM
;
1500 if ('\0' == *title
) {
1503 "%s: cannot deduce title "
1510 * Build the file structure.
1513 nof
= mandoc_calloc(1, sizeof(struct of
));
1514 nof
->fname
= mandoc_strdup(argv
[i
]);
1515 nof
->sec
= mandoc_strdup(sec
);
1516 nof
->arch
= mandoc_strdup(arch
);
1517 nof
->title
= mandoc_strdup(title
);
1518 nof
->src_form
= src_form
;
1521 * Add the structure to the list.
1525 printf("%s: scheduling\n", argv
[i
]);
1530 nof
->first
= (*of
)->first
;
1538 * Recursively build up a list of files to parse.
1539 * We use this instead of ftw() and so on because I don't want global
1540 * variables hanging around.
1541 * This ignores the mandoc.db and mandoc.index files, but assumes that
1542 * everything else is a manual.
1543 * Pass in a pointer to a NULL structure for the first invocation.
1546 ofile_dirbuild(const char *dir
, const char* psec
, const char *parch
,
1547 int p_src_form
, struct of
**of
)
1549 char buf
[MAXPATHLEN
];
1552 const char *fn
, *sec
, *arch
;
1553 char *p
, *q
, *suffix
;
1558 if (NULL
== (d
= opendir(dir
))) {
1564 while (NULL
!= (dp
= readdir(d
))) {
1570 src_form
= p_src_form
;
1572 if (DT_DIR
== dp
->d_type
) {
1577 * By default, only use directories called:
1578 * man<section>/[<arch>/] or
1579 * cat<section>/[<arch>/]
1583 if(0 == strncmp("man", fn
, 3)) {
1584 src_form
|= MANDOC_SRC
;
1586 } else if (0 == strncmp("cat", fn
, 3)) {
1587 src_form
|= MANDOC_FORM
;
1590 if (warnings
) fprintf(stderr
,
1591 "%s/%s: bad section\n",
1598 } else if ('\0' == *arch
) {
1599 if (NULL
!= strchr(fn
, '.')) {
1600 if (warnings
) fprintf(stderr
,
1601 "%s/%s: bad architecture\n",
1608 if (warnings
) fprintf(stderr
, "%s/%s: "
1609 "excessive subdirectory\n", dir
, fn
);
1615 strlcat(buf
, dir
, MAXPATHLEN
);
1616 strlcat(buf
, "/", MAXPATHLEN
);
1617 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1619 if (MAXPATHLEN
<= sz
) {
1620 if (warnings
) fprintf(stderr
, "%s/%s: "
1621 "path too long\n", dir
, fn
);
1626 printf("%s: scanning\n", buf
);
1628 ofile_dirbuild(buf
, sec
, arch
, src_form
, of
);
1632 if (DT_REG
!= dp
->d_type
) {
1635 "%s/%s: not a regular file\n",
1639 if (!strcmp(MANDOC_DB
, fn
) || !strcmp(MANDOC_IDX
, fn
))
1641 if ('\0' == *psec
) {
1644 "%s/%s: file outside section\n",
1651 * By default, skip files where the file name suffix
1652 * does not agree with the section directory
1653 * they are located in.
1656 suffix
= strrchr(fn
, '.');
1657 if (NULL
== suffix
) {
1660 "%s/%s: no filename suffix\n",
1664 } else if ((MANDOC_SRC
& src_form
&&
1665 strcmp(suffix
+ 1, psec
)) ||
1666 (MANDOC_FORM
& src_form
&&
1667 strcmp(suffix
+ 1, "0"))) {
1670 "%s/%s: wrong filename suffix\n",
1674 if ('0' == suffix
[1])
1675 src_form
|= MANDOC_FORM
;
1676 else if ('1' <= suffix
[1] && '9' >= suffix
[1])
1677 src_form
|= MANDOC_SRC
;
1681 * Skip formatted manuals if a source version is
1682 * available. Ignore the age: it is very unlikely
1683 * that people install newer formatted base manuals
1684 * when they used to have source manuals before,
1685 * and in ports, old manuals get removed on update.
1687 if (0 == use_all
&& MANDOC_FORM
& src_form
&&
1690 strlcat(buf
, dir
, MAXPATHLEN
);
1691 p
= strrchr(buf
, '/');
1692 if ('\0' != *parch
&& NULL
!= p
)
1693 for (p
--; p
> buf
; p
--)
1700 if (0 == strncmp("cat", p
, 3))
1701 memcpy(p
, "man", 3);
1702 strlcat(buf
, "/", MAXPATHLEN
);
1703 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1704 if (sz
>= MAXPATHLEN
) {
1705 if (warnings
) fprintf(stderr
,
1706 "%s/%s: path too long\n",
1710 q
= strrchr(buf
, '.');
1711 if (NULL
!= q
&& p
< q
++) {
1713 sz
= strlcat(buf
, psec
, MAXPATHLEN
);
1714 if (sz
>= MAXPATHLEN
) {
1715 if (warnings
) fprintf(stderr
,
1716 "%s/%s: path too long\n",
1720 if (0 == access(buf
, R_OK
))
1726 assert('.' == dir
[0]);
1727 if ('/' == dir
[1]) {
1728 strlcat(buf
, dir
+ 2, MAXPATHLEN
);
1729 strlcat(buf
, "/", MAXPATHLEN
);
1731 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1732 if (sz
>= MAXPATHLEN
) {
1733 if (warnings
) fprintf(stderr
,
1734 "%s/%s: path too long\n", dir
, fn
);
1738 nof
= mandoc_calloc(1, sizeof(struct of
));
1739 nof
->fname
= mandoc_strdup(buf
);
1740 nof
->sec
= mandoc_strdup(psec
);
1741 nof
->arch
= mandoc_strdup(parch
);
1742 nof
->src_form
= src_form
;
1745 * Remember the file name without the extension,
1746 * to be used as the page title in the database.
1751 nof
->title
= mandoc_strdup(fn
);
1754 * Add the structure to the list.
1758 printf("%s: scheduling\n", buf
);
1763 nof
->first
= (*of
)->first
;
1773 ofile_free(struct of
*of
)