]>
git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
1 /* $Id: mandocdb.c,v 1.22 2011/12/03 12:09:07 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 #include <sys/param.h>
23 #include <sys/types.h>
36 #if defined(__linux__)
39 #elif defined(__APPLE__)
40 # include <libkern/OSByteOrder.h>
52 #define MANDOC_BUFSZ BUFSIZ
53 #define MANDOC_SLOP 1024
55 #define MANDOC_SRC 0x1
56 #define MANDOC_FORM 0x2
58 /* Tiny list for files. No need to bring in QUEUE. */
61 char *fname
; /* heap-allocated */
66 struct of
*next
; /* NULL for last one */
67 struct of
*first
; /* first in list */
70 /* Buffer for storing growable data. */
74 size_t len
; /* current length */
75 size_t size
; /* total buffer size */
78 /* Operation we're going to perform. */
81 OP_NEW
= 0, /* new database */
82 OP_UPDATE
, /* delete/add entries in existing database */
83 OP_DELETE
/* delete entries from existing database */
86 #define MAN_ARGS DB *hash, \
89 const struct man_node *n
90 #define MDOC_ARGS DB *hash, \
93 const struct mdoc_node *n, \
94 const struct mdoc_meta *m
96 static void buf_appendmdoc(struct buf
*,
97 const struct mdoc_node
*, int);
98 static void buf_append(struct buf
*, const char *);
99 static void buf_appendb(struct buf
*,
100 const void *, size_t);
101 static void dbt_put(DB
*, const char *, DBT
*, DBT
*);
102 static void hash_put(DB
*, const struct buf
*, uint64_t);
103 static void hash_reset(DB
**);
104 static void index_merge(const struct of
*, struct mparse
*,
105 struct buf
*, struct buf
*, DB
*,
106 DB
*, const char *, DB
*, const char *,
107 recno_t
, const recno_t
*, size_t);
108 static void index_prune(const struct of
*, DB
*,
109 const char *, DB
*, const char *,
110 recno_t
*, recno_t
**, size_t *);
111 static void ofile_argbuild(int, char *[], struct of
**);
112 static int ofile_dirbuild(const char *, const char *,
113 const char *, int, struct of
**);
114 static void ofile_free(struct of
*);
115 static void pformatted(DB
*, struct buf
*, struct buf
*,
117 static int pman_node(MAN_ARGS
);
118 static void pmdoc_node(MDOC_ARGS
);
119 static void pmdoc_An(MDOC_ARGS
);
120 static void pmdoc_Cd(MDOC_ARGS
);
121 static void pmdoc_Er(MDOC_ARGS
);
122 static void pmdoc_Ev(MDOC_ARGS
);
123 static void pmdoc_Fd(MDOC_ARGS
);
124 static void pmdoc_In(MDOC_ARGS
);
125 static void pmdoc_Fn(MDOC_ARGS
);
126 static void pmdoc_Fo(MDOC_ARGS
);
127 static void pmdoc_Nd(MDOC_ARGS
);
128 static void pmdoc_Nm(MDOC_ARGS
);
129 static void pmdoc_Pa(MDOC_ARGS
);
130 static void pmdoc_St(MDOC_ARGS
);
131 static void pmdoc_Vt(MDOC_ARGS
);
132 static void pmdoc_Xr(MDOC_ARGS
);
133 static void usage(void);
135 typedef void (*pmdoc_nf
)(MDOC_ARGS
);
137 static const pmdoc_nf mdocs
[MDOC_MAX
] = {
262 static const char *progname
;
263 static int use_all
; /* Use all directories and files. */
264 static int verb
; /* Output verbosity level. */
267 main(int argc
, char *argv
[])
269 struct mparse
*mp
; /* parse sequence */
270 struct manpaths dirs
;
271 enum op op
; /* current operation */
273 char ibuf
[MAXPATHLEN
], /* index fname */
274 fbuf
[MAXPATHLEN
]; /* btree fname */
276 DB
*idx
, /* index database */
277 *db
, /* keyword database */
278 *hash
; /* temporary keyword hashtable */
279 BTREEINFO info
; /* btree configuration */
280 recno_t maxrec
; /* last record number in the index */
281 recno_t
*recs
; /* the numbers of all empty records */
283 recsz
, /* number of allocated slots in recs */
284 reccur
; /* current number of empty records */
285 struct buf buf
, /* keyword buffer */
286 dbuf
; /* description buffer */
287 struct of
*of
; /* list of files for processing */
291 progname
= strrchr(argv
[0], '/');
292 if (progname
== NULL
)
297 memset(&dirs
, 0, sizeof(struct manpaths
));
311 while (-1 != (ch
= getopt(argc
, argv
, "ad:u:v")))
329 return((int)MANDOCLEVEL_BADARG
);
335 memset(&info
, 0, sizeof(BTREEINFO
));
339 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
341 memset(&buf
, 0, sizeof(struct buf
));
342 memset(&dbuf
, 0, sizeof(struct buf
));
344 buf
.size
= dbuf
.size
= MANDOC_BUFSZ
;
346 buf
.cp
= mandoc_malloc(buf
.size
);
347 dbuf
.cp
= mandoc_malloc(dbuf
.size
);
349 flags
= OP_NEW
== op
? O_CREAT
|O_TRUNC
|O_RDWR
: O_CREAT
|O_RDWR
;
351 if (OP_UPDATE
== op
|| OP_DELETE
== op
) {
352 ibuf
[0] = fbuf
[0] = '\0';
354 strlcat(fbuf
, dir
, MAXPATHLEN
);
355 strlcat(fbuf
, "/", MAXPATHLEN
);
356 sz1
= strlcat(fbuf
, MANDOC_DB
, MAXPATHLEN
);
358 strlcat(ibuf
, dir
, MAXPATHLEN
);
359 strlcat(ibuf
, "/", MAXPATHLEN
);
360 sz2
= strlcat(ibuf
, MANDOC_IDX
, MAXPATHLEN
);
362 if (sz1
>= MAXPATHLEN
|| sz2
>= MAXPATHLEN
) {
363 fprintf(stderr
, "%s: Path too long\n", dir
);
364 exit((int)MANDOCLEVEL_BADARG
);
367 db
= dbopen(fbuf
, flags
, 0644, DB_BTREE
, &info
);
368 idx
= dbopen(ibuf
, flags
, 0644, DB_RECNO
, NULL
);
372 exit((int)MANDOCLEVEL_SYSERR
);
373 } else if (NULL
== idx
) {
375 exit((int)MANDOCLEVEL_SYSERR
);
379 printf("%s: Opened\n", fbuf
);
380 printf("%s: Opened\n", ibuf
);
383 ofile_argbuild(argc
, argv
, &of
);
389 index_prune(of
, db
, fbuf
, idx
, ibuf
,
390 &maxrec
, &recs
, &recsz
);
393 * Go to the root of the respective manual tree
394 * such that .so links work. In case of failure,
395 * just prod on, even though .so links won't work.
398 if (OP_UPDATE
== op
) {
400 index_merge(of
, mp
, &dbuf
, &buf
, hash
,
402 maxrec
, recs
, reccur
);
409 * Configure the directories we're going to scan.
410 * If we have command-line arguments, use them.
411 * If not, we use man(1)'s method (see mandocdb.8).
415 dirs
.paths
= mandoc_malloc(argc
* sizeof(char *));
417 for (i
= 0; i
< argc
; i
++)
418 dirs
.paths
[i
] = mandoc_strdup(argv
[i
]);
420 manpath_parse(&dirs
, NULL
, NULL
);
422 for (i
= 0; i
< dirs
.sz
; i
++) {
423 ibuf
[0] = fbuf
[0] = '\0';
425 strlcat(fbuf
, dirs
.paths
[i
], MAXPATHLEN
);
426 strlcat(fbuf
, "/", MAXPATHLEN
);
427 sz1
= strlcat(fbuf
, MANDOC_DB
, MAXPATHLEN
);
429 strlcat(ibuf
, dirs
.paths
[i
], MAXPATHLEN
);
430 strlcat(ibuf
, "/", MAXPATHLEN
);
431 sz2
= strlcat(ibuf
, MANDOC_IDX
, MAXPATHLEN
);
433 if (sz1
>= MAXPATHLEN
|| sz2
>= MAXPATHLEN
) {
434 fprintf(stderr
, "%s: Path too long\n",
436 exit((int)MANDOCLEVEL_BADARG
);
444 db
= dbopen(fbuf
, flags
, 0644, DB_BTREE
, &info
);
445 idx
= dbopen(ibuf
, flags
, 0644, DB_RECNO
, NULL
);
449 exit((int)MANDOCLEVEL_SYSERR
);
450 } else if (NULL
== idx
) {
452 exit((int)MANDOCLEVEL_SYSERR
);
456 printf("%s: Truncated\n", fbuf
);
457 printf("%s: Truncated\n", ibuf
);
463 if ( ! ofile_dirbuild(dirs
.paths
[i
], NULL
, NULL
,
465 exit((int)MANDOCLEVEL_SYSERR
);
473 * Go to the root of the respective manual tree
474 * such that .so links work. In case of failure,
475 * just prod on, even though .so links won't work.
478 chdir(dirs
.paths
[i
]);
479 index_merge(of
, mp
, &dbuf
, &buf
, hash
, db
, fbuf
,
480 idx
, ibuf
, maxrec
, recs
, reccur
);
489 (*hash
->close
)(hash
);
499 return(MANDOCLEVEL_OK
);
503 index_merge(const struct of
*of
, struct mparse
*mp
,
504 struct buf
*dbuf
, struct buf
*buf
, DB
*hash
,
505 DB
*db
, const char *dbf
, DB
*idx
, const char *idxf
,
506 recno_t maxrec
, const recno_t
*recs
, size_t reccur
)
513 const char *fn
, *msec
, *mtitle
, *arch
;
518 for (rec
= 0; of
; of
= of
->next
) {
522 * Reclaim an empty index record, if available.
527 rec
= recs
[(int)reccur
];
528 } else if (maxrec
> 0) {
540 * Try interpreting the file as mdoc(7) or man(7)
541 * source code, unless it is already known to be
542 * formatted. Fall back to formatted mode.
545 if ((MANDOC_SRC
& of
->src_form
||
546 ! (MANDOC_FORM
& of
->src_form
)) &&
547 MANDOCLEVEL_FATAL
> mparse_readfd(mp
, -1, fn
))
548 mparse_result(mp
, &mdoc
, &man
);
551 msec
= mdoc_meta(mdoc
)->msec
;
552 arch
= mdoc_meta(mdoc
)->arch
;
553 mtitle
= mdoc_meta(mdoc
)->title
;
554 } else if (NULL
!= man
) {
555 msec
= man_meta(man
)->msec
;
557 mtitle
= man_meta(man
)->title
;
565 * By default, skip a file if the manual section
566 * and architecture given in the file disagree
567 * with the directory where the file is located.
573 if (strcmp(msec
, of
->sec
))
577 if (NULL
!= of
->arch
)
579 } else if (NULL
== of
->arch
||
580 strcmp(arch
, of
->arch
))
588 * By default, skip a file if the title given
589 * in the file disagrees with the file name.
590 * If both agree, use the file name as the title,
591 * because the one in the file usually is all caps.
597 if (0 == strcasecmp(mtitle
, of
->title
))
599 else if (0 == use_all
)
603 * The index record value consists of a nil-terminated
604 * filename, a nil-terminated manual section, and a
605 * nil-terminated description. Since the description
606 * may not be set, we set a sentinel to see if we're
607 * going to write a nil byte in its place.
611 buf_append(dbuf
, mdoc
? "mdoc" : (man
? "man" : "cat"));
612 buf_appendb(dbuf
, fn
, strlen(fn
) + 1);
613 buf_appendb(dbuf
, msec
, strlen(msec
) + 1);
614 buf_appendb(dbuf
, mtitle
, strlen(mtitle
) + 1);
615 buf_appendb(dbuf
, arch
, strlen(arch
) + 1);
619 /* Fix the record number in the btree value. */
622 pmdoc_node(hash
, buf
, dbuf
,
623 mdoc_node(mdoc
), mdoc_meta(mdoc
));
625 pman_node(hash
, buf
, dbuf
, man_node(man
));
627 pformatted(hash
, buf
, dbuf
, of
);
630 * Copy from the in-memory hashtable of pending keywords
634 vbuf
.rec
= htobe32(rec
);
636 while (0 == (ch
= (*hash
->seq
)(hash
, &key
, &val
, seq
))) {
638 vbuf
.mask
= htobe64(*(uint64_t *)val
.data
);
639 val
.size
= sizeof(struct db_val
);
641 dbt_put(db
, dbf
, &key
, &val
);
645 exit((int)MANDOCLEVEL_SYSERR
);
649 * Apply to the index. If we haven't had a description
650 * set, put an empty one in now.
654 buf_appendb(dbuf
, "", 1);
657 key
.size
= sizeof(recno_t
);
660 val
.size
= dbuf
->len
;
663 printf("%s: Added index\n", fn
);
665 dbt_put(idx
, idxf
, &key
, &val
);
670 * Scan through all entries in the index file `idx' and prune those
671 * entries in `ofile'.
672 * Pruning consists of removing from `db', then invalidating the entry
673 * in `idx' (zeroing its value size).
676 index_prune(const struct of
*ofile
, DB
*db
, const char *dbf
,
677 DB
*idx
, const char *idxf
,
678 recno_t
*maxrec
, recno_t
**recs
, size_t *recsz
)
690 while (0 == (ch
= (*idx
->seq
)(idx
, &key
, &val
, seq
))) {
692 *maxrec
= *(recno_t
*)key
.data
;
695 /* Deleted records are zero-sized. Skip them. */
701 * Make sure we're sane.
702 * Read past our mdoc/man/cat type to the next string,
703 * then make sure it's bounded by a NUL.
704 * Failing any of these, we go into our error handler.
707 if (NULL
== (fn
= memchr(cp
, '\0', val
.size
)))
709 if (++fn
- cp
>= (int)val
.size
)
711 if (NULL
== memchr(fn
, '\0', val
.size
- (fn
- cp
)))
715 * Search for the file in those we care about.
716 * XXX: build this into a tree. Too slow.
719 for (of
= ofile
; of
; of
= of
->next
)
720 if (0 == strcmp(fn
, of
->fname
))
727 * Search through the keyword database, throwing out all
728 * references to our file.
732 while (0 == (ch
= (*db
->seq
)(db
, &key
, &val
, sseq
))) {
734 if (sizeof(struct db_val
) != val
.size
)
738 if (*maxrec
!= betoh32(vbuf
->rec
))
741 if ((ch
= (*db
->del
)(db
, &key
, R_CURSOR
)) < 0)
747 exit((int)MANDOCLEVEL_SYSERR
);
748 } else if (1 != ch
) {
749 fprintf(stderr
, "%s: Corrupt database\n", dbf
);
750 exit((int)MANDOCLEVEL_SYSERR
);
754 printf("%s: Deleted index\n", fn
);
757 ch
= (*idx
->put
)(idx
, &key
, &val
, R_CURSOR
);
762 if (reccur
>= *recsz
) {
763 *recsz
+= MANDOC_SLOP
;
764 *recs
= mandoc_realloc
765 (*recs
, *recsz
* sizeof(recno_t
));
768 (*recs
)[(int)reccur
] = *maxrec
;
774 exit((int)MANDOCLEVEL_SYSERR
);
775 } else if (1 != ch
) {
776 fprintf(stderr
, "%s: Corrupt index\n", idxf
);
777 exit((int)MANDOCLEVEL_SYSERR
);
784 * Grow the buffer (if necessary) and copy in a binary string.
787 buf_appendb(struct buf
*buf
, const void *cp
, size_t sz
)
790 /* Overshoot by MANDOC_BUFSZ. */
792 while (buf
->len
+ sz
>= buf
->size
) {
793 buf
->size
= buf
->len
+ sz
+ MANDOC_BUFSZ
;
794 buf
->cp
= mandoc_realloc(buf
->cp
, buf
->size
);
797 memcpy(buf
->cp
+ (int)buf
->len
, cp
, sz
);
802 * Append a nil-terminated string to the buffer.
803 * This can be invoked multiple times.
804 * The buffer string will be nil-terminated.
805 * If invoked multiple times, a space is put between strings.
808 buf_append(struct buf
*buf
, const char *cp
)
812 if (0 == (sz
= strlen(cp
)))
816 buf
->cp
[(int)buf
->len
- 1] = ' ';
818 buf_appendb(buf
, cp
, sz
+ 1);
822 * Recursively add all text from a given node.
823 * This is optimised for general mdoc nodes in this context, which do
824 * not consist of subexpressions and having a recursive call for n->next
826 * The "f" variable should be 0 unless called from pmdoc_Nd for the
827 * description buffer, which does not start at the beginning of the
831 buf_appendmdoc(struct buf
*buf
, const struct mdoc_node
*n
, int f
)
834 for ( ; n
; n
= n
->next
) {
836 buf_appendmdoc(buf
, n
->child
, f
);
838 if (MDOC_TEXT
== n
->type
&& f
) {
840 buf_appendb(buf
, n
->string
,
841 strlen(n
->string
) + 1);
842 } else if (MDOC_TEXT
== n
->type
)
843 buf_append(buf
, n
->string
);
853 if (SEC_AUTHORS
!= n
->sec
)
856 buf_appendmdoc(buf
, n
->child
, 0);
857 hash_put(hash
, buf
, TYPE_An
);
865 if (NULL
!= (hash
= *db
))
866 (*hash
->close
)(hash
);
868 *db
= dbopen(NULL
, O_CREAT
|O_RDWR
, 0644, DB_HASH
, NULL
);
871 exit((int)MANDOCLEVEL_SYSERR
);
879 const char *start
, *end
;
882 if (SEC_SYNOPSIS
!= n
->sec
)
884 if (NULL
== (n
= n
->child
) || MDOC_TEXT
!= n
->type
)
888 * Only consider those `Fd' macro fields that begin with an
889 * "inclusion" token (versus, e.g., #define).
891 if (strcmp("#include", n
->string
))
894 if (NULL
== (n
= n
->next
) || MDOC_TEXT
!= n
->type
)
898 * Strip away the enclosing angle brackets and make sure we're
903 if ('<' == *start
|| '"' == *start
)
906 if (0 == (sz
= strlen(start
)))
909 end
= &start
[(int)sz
- 1];
910 if ('>' == *end
|| '"' == *end
)
913 assert(end
>= start
);
915 buf_appendb(buf
, start
, (size_t)(end
- start
+ 1));
916 buf_appendb(buf
, "", 1);
918 hash_put(hash
, buf
, TYPE_In
);
926 if (SEC_SYNOPSIS
!= n
->sec
)
929 buf_appendmdoc(buf
, n
->child
, 0);
930 hash_put(hash
, buf
, TYPE_Cd
);
938 if (SEC_SYNOPSIS
!= n
->sec
)
940 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
943 buf_append(buf
, n
->child
->string
);
944 hash_put(hash
, buf
, TYPE_In
);
953 if (SEC_SYNOPSIS
!= n
->sec
)
955 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
958 /* .Fn "struct type *arg" "foo" */
960 cp
= strrchr(n
->child
->string
, ' ');
962 cp
= n
->child
->string
;
964 /* Strip away pointer symbol. */
970 hash_put(hash
, buf
, TYPE_Fn
);
978 if (SEC_STANDARDS
!= n
->sec
)
980 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
983 buf_append(buf
, n
->child
->string
);
984 hash_put(hash
, buf
, TYPE_St
);
992 if (NULL
== (n
= n
->child
))
995 buf_appendb(buf
, n
->string
, strlen(n
->string
));
997 if (NULL
!= (n
= n
->next
)) {
998 buf_appendb(buf
, ".", 1);
999 buf_appendb(buf
, n
->string
, strlen(n
->string
) + 1);
1001 buf_appendb(buf
, ".", 2);
1003 hash_put(hash
, buf
, TYPE_Xr
);
1013 if (SEC_SYNOPSIS
!= n
->sec
)
1015 if (MDOC_Vt
== n
->tok
&& MDOC_BODY
!= n
->type
)
1017 if (NULL
== n
->last
|| MDOC_TEXT
!= n
->last
->type
)
1021 * Strip away leading pointer symbol '*' and trailing ';'.
1024 start
= n
->last
->string
;
1026 while ('*' == *start
)
1029 if (0 == (sz
= strlen(start
)))
1032 if (';' == start
[(int)sz
- 1])
1038 buf_appendb(buf
, start
, sz
);
1039 buf_appendb(buf
, "", 1);
1040 hash_put(hash
, buf
, TYPE_Va
);
1048 if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
1050 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
1053 buf_append(buf
, n
->child
->string
);
1054 hash_put(hash
, buf
, TYPE_Fn
);
1063 if (MDOC_BODY
!= n
->type
)
1066 buf_appendmdoc(dbuf
, n
->child
, 1);
1067 buf_appendmdoc(buf
, n
->child
, 0);
1069 hash_put(hash
, buf
, TYPE_Nd
);
1077 if (SEC_ERRORS
!= n
->sec
)
1080 buf_appendmdoc(buf
, n
->child
, 0);
1081 hash_put(hash
, buf
, TYPE_Er
);
1089 if (SEC_ENVIRONMENT
!= n
->sec
)
1092 buf_appendmdoc(buf
, n
->child
, 0);
1093 hash_put(hash
, buf
, TYPE_Ev
);
1101 if (SEC_FILES
!= n
->sec
)
1104 buf_appendmdoc(buf
, n
->child
, 0);
1105 hash_put(hash
, buf
, TYPE_Pa
);
1113 if (SEC_NAME
== n
->sec
) {
1114 buf_appendmdoc(buf
, n
->child
, 0);
1115 hash_put(hash
, buf
, TYPE_Nm
);
1117 } else if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
1120 if (NULL
== n
->child
)
1121 buf_append(buf
, m
->name
);
1123 buf_appendmdoc(buf
, n
->child
, 0);
1124 hash_put(hash
, buf
, TYPE_Nm
);
1128 hash_put(DB
*db
, const struct buf
*buf
, uint64_t mask
)
1137 key
.size
= buf
->len
;
1139 if ((rc
= (*db
->get
)(db
, &key
, &val
, 0)) < 0) {
1141 exit((int)MANDOCLEVEL_SYSERR
);
1143 mask
|= *(uint64_t *)val
.data
;
1146 val
.size
= sizeof(uint64_t);
1148 if ((rc
= (*db
->put
)(db
, &key
, &val
, 0)) < 0) {
1150 exit((int)MANDOCLEVEL_SYSERR
);
1155 dbt_put(DB
*db
, const char *dbn
, DBT
*key
, DBT
*val
)
1161 if (0 == (*db
->put
)(db
, key
, val
, 0))
1165 exit((int)MANDOCLEVEL_SYSERR
);
1170 * Call out to per-macro handlers after clearing the persistent database
1171 * key. If the macro sets the database key, flush it to the database.
1174 pmdoc_node(MDOC_ARGS
)
1190 if (NULL
== mdocs
[n
->tok
])
1194 (*mdocs
[n
->tok
])(hash
, buf
, dbuf
, n
, m
);
1200 pmdoc_node(hash
, buf
, dbuf
, n
->child
, m
);
1201 pmdoc_node(hash
, buf
, dbuf
, n
->next
, m
);
1207 const struct man_node
*head
, *body
;
1208 const char *start
, *sv
;
1215 * We're only searching for one thing: the first text child in
1216 * the BODY of a NAME section. Since we don't keep track of
1217 * sections in -man, run some hoops to find out whether we're in
1218 * the correct section or not.
1221 if (MAN_BODY
== n
->type
&& MAN_SH
== n
->tok
) {
1223 assert(body
->parent
);
1224 if (NULL
!= (head
= body
->parent
->head
) &&
1225 1 == head
->nchild
&&
1226 NULL
!= (head
= (head
->child
)) &&
1227 MAN_TEXT
== head
->type
&&
1228 0 == strcmp(head
->string
, "NAME") &&
1229 NULL
!= (body
= body
->child
) &&
1230 MAN_TEXT
== body
->type
) {
1232 assert(body
->string
);
1233 start
= sv
= body
->string
;
1236 * Go through a special heuristic dance here.
1237 * This is why -man manuals are great!
1238 * (I'm being sarcastic: my eyes are bleeding.)
1239 * Conventionally, one or more manual names are
1240 * comma-specified prior to a whitespace, then a
1241 * dash, then a description. Try to puzzle out
1242 * the name parts here.
1246 sz
= strcspn(start
, " ,");
1247 if ('\0' == start
[(int)sz
])
1251 buf_appendb(buf
, start
, sz
);
1252 buf_appendb(buf
, "", 1);
1254 hash_put(hash
, buf
, TYPE_Nm
);
1256 if (' ' == start
[(int)sz
]) {
1257 start
+= (int)sz
+ 1;
1261 assert(',' == start
[(int)sz
]);
1262 start
+= (int)sz
+ 1;
1263 while (' ' == *start
)
1270 buf_append(buf
, start
);
1274 while (' ' == *start
)
1277 if (0 == strncmp(start
, "-", 1))
1279 else if (0 == strncmp(start
, "\\-", 2))
1281 else if (0 == strncmp(start
, "\\(en", 4))
1283 else if (0 == strncmp(start
, "\\(em", 4))
1286 while (' ' == *start
)
1289 sz
= strlen(start
) + 1;
1290 buf_appendb(dbuf
, start
, sz
);
1291 buf_appendb(buf
, start
, sz
);
1293 hash_put(hash
, buf
, TYPE_Nd
);
1297 for (n
= n
->child
; n
; n
= n
->next
)
1298 if (pman_node(hash
, buf
, dbuf
, n
))
1305 * Parse a formatted manual page.
1306 * By necessity, this involves rather crude guesswork.
1309 pformatted(DB
*hash
, struct buf
*buf
, struct buf
*dbuf
,
1310 const struct of
*of
)
1316 if (NULL
== (stream
= fopen(of
->fname
, "r"))) {
1322 * Always use the title derived from the filename up front,
1323 * do not even try to find it in the file. This also makes
1324 * sure we don't end up with an orphan index record, even if
1325 * the file content turns out to be completely unintelligible.
1329 buf_append(buf
, of
->title
);
1330 hash_put(hash
, buf
, TYPE_Nm
);
1332 while (NULL
!= (line
= fgetln(stream
, &len
)) && '\n' != *line
)
1333 /* Skip to first blank line. */ ;
1335 while (NULL
!= (line
= fgetln(stream
, &len
)) &&
1336 ('\n' == *line
|| ' ' == *line
))
1337 /* Skip to first section header. */ ;
1340 * If no page content can be found,
1341 * reuse the page title as the page description.
1344 if (NULL
== (line
= fgetln(stream
, &len
))) {
1345 buf_appendb(dbuf
, buf
->cp
, buf
->size
);
1346 hash_put(hash
, buf
, TYPE_Nd
);
1353 * If there is a dash, skip to the text following it.
1356 for (p
= line
, plen
= len
; plen
; p
++, plen
--)
1359 for ( ; plen
; p
++, plen
--)
1360 if ('-' != *p
&& ' ' != *p
&& 8 != *p
)
1368 * Copy the rest of the line, but no more than 70 bytes.
1374 buf_appendb(dbuf
, p
, plen
);
1376 buf_appendb(buf
, p
, plen
);
1377 hash_put(hash
, buf
, TYPE_Nd
);
1381 ofile_argbuild(int argc
, char *argv
[], struct of
**of
)
1383 char buf
[MAXPATHLEN
];
1384 char *sec
, *arch
, *title
, *p
;
1388 for (i
= 0; i
< argc
; i
++) {
1391 * Try to infer the manual section, architecture and
1392 * page title from the path, assuming it looks like
1393 * man*[/<arch>]/<title>.<section> or
1394 * cat<section>[/<arch>]/<title>.0
1397 if (strlcpy(buf
, argv
[i
], sizeof(buf
)) >= sizeof(buf
)) {
1398 fprintf(stderr
, "%s: Path too long\n", argv
[i
]);
1401 sec
= arch
= title
= NULL
;
1403 p
= strrchr(buf
, '\0');
1405 if (NULL
== sec
&& '.' == *p
) {
1409 src_form
|= MANDOC_FORM
;
1410 else if ('1' <= *sec
&& '9' >= *sec
)
1411 src_form
|= MANDOC_SRC
;
1416 if (NULL
== title
) {
1421 if (strncmp("man", p
+ 1, 3)) {
1422 src_form
|= MANDOC_SRC
;
1424 } else if (strncmp("cat", p
+ 1, 3)) {
1425 src_form
|= MANDOC_FORM
;
1434 * Build the file structure.
1437 nof
= mandoc_calloc(1, sizeof(struct of
));
1438 nof
->fname
= mandoc_strdup(argv
[i
]);
1440 nof
->sec
= mandoc_strdup(sec
);
1442 nof
->arch
= mandoc_strdup(arch
);
1443 nof
->title
= mandoc_strdup(title
);
1444 nof
->src_form
= src_form
;
1447 * Add the structure to the list.
1451 printf("%s: Scheduling\n", argv
[i
]);
1456 nof
->first
= (*of
)->first
;
1464 * Recursively build up a list of files to parse.
1465 * We use this instead of ftw() and so on because I don't want global
1466 * variables hanging around.
1467 * This ignores the mandoc.db and mandoc.index files, but assumes that
1468 * everything else is a manual.
1469 * Pass in a pointer to a NULL structure for the first invocation.
1472 ofile_dirbuild(const char *dir
, const char* psec
, const char *parch
,
1473 int p_src_form
, struct of
**of
)
1475 char buf
[MAXPATHLEN
];
1479 const char *fn
, *sec
, *arch
;
1480 char *p
, *q
, *suffix
;
1485 if (NULL
== (d
= opendir(dir
))) {
1490 while (NULL
!= (dp
= readdir(d
))) {
1496 src_form
= p_src_form
;
1498 if (DT_DIR
== dp
->d_type
) {
1503 * By default, only use directories called:
1504 * man<section>/[<arch>/] or
1505 * cat<section>/[<arch>/]
1509 if(0 == strncmp("man", fn
, 3)) {
1510 src_form
|= MANDOC_SRC
;
1512 } else if (0 == strncmp("cat", fn
, 3)) {
1513 src_form
|= MANDOC_FORM
;
1519 } else if (NULL
== arch
&& (use_all
||
1520 NULL
== strchr(fn
, '.')))
1522 else if (0 == use_all
)
1526 strlcat(buf
, dir
, MAXPATHLEN
);
1527 strlcat(buf
, "/", MAXPATHLEN
);
1528 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1530 if (MAXPATHLEN
<= sz
) {
1531 fprintf(stderr
, "%s: Path too long\n", dir
);
1536 printf("%s: Scanning\n", buf
);
1538 if ( ! ofile_dirbuild(buf
, sec
, arch
,
1542 if (DT_REG
!= dp
->d_type
||
1543 (NULL
== psec
&& !use_all
) ||
1544 !strcmp(MANDOC_DB
, fn
) ||
1545 !strcmp(MANDOC_IDX
, fn
))
1549 * By default, skip files where the file name suffix
1550 * does not agree with the section directory
1551 * they are located in.
1554 suffix
= strrchr(fn
, '.');
1558 if ((MANDOC_SRC
& src_form
&&
1559 strcmp(suffix
+ 1, psec
)) ||
1560 (MANDOC_FORM
& src_form
&&
1561 strcmp(suffix
+ 1, "0")))
1564 if (NULL
!= suffix
) {
1565 if ('0' == suffix
[1])
1566 src_form
|= MANDOC_FORM
;
1567 else if ('1' <= suffix
[1] && '9' >= suffix
[1])
1568 src_form
|= MANDOC_SRC
;
1573 * Skip formatted manuals if a source version is
1574 * available. Ignore the age: it is very unlikely
1575 * that people install newer formatted base manuals
1576 * when they used to have source manuals before,
1577 * and in ports, old manuals get removed on update.
1579 if (0 == use_all
&& MANDOC_FORM
& src_form
&&
1582 strlcat(buf
, dir
, MAXPATHLEN
);
1583 p
= strrchr(buf
, '/');
1588 if (0 == strncmp("cat", p
, 3))
1589 memcpy(p
, "man", 3);
1590 strlcat(buf
, "/", MAXPATHLEN
);
1591 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1592 if (sz
>= MAXPATHLEN
) {
1593 fprintf(stderr
, "%s: Path too long\n", buf
);
1596 q
= strrchr(buf
, '.');
1597 if (NULL
!= q
&& p
< q
++) {
1599 sz
= strlcat(buf
, psec
, MAXPATHLEN
);
1600 if (sz
>= MAXPATHLEN
) {
1602 "%s: Path too long\n", buf
);
1605 if (0 == stat(buf
, &sb
))
1611 strlcat(buf
, dir
, MAXPATHLEN
);
1612 strlcat(buf
, "/", MAXPATHLEN
);
1613 sz
= strlcat(buf
, fn
, MAXPATHLEN
);
1614 if (sz
>= MAXPATHLEN
) {
1615 fprintf(stderr
, "%s: Path too long\n", dir
);
1619 nof
= mandoc_calloc(1, sizeof(struct of
));
1620 nof
->fname
= mandoc_strdup(buf
);
1622 nof
->sec
= mandoc_strdup(psec
);
1624 nof
->arch
= mandoc_strdup(parch
);
1625 nof
->src_form
= src_form
;
1628 * Remember the file name without the extension,
1629 * to be used as the page title in the database.
1634 nof
->title
= mandoc_strdup(fn
);
1637 * Add the structure to the list.
1641 printf("%s: Scheduling\n", buf
);
1646 nof
->first
= (*of
)->first
;
1657 ofile_free(struct of
*of
)
1676 fprintf(stderr
, "usage: %s [-v] "
1677 "[-d dir [files...] |"
1678 " -u dir [files...] |"
1679 " dir...]\n", progname
);