]>
git.cameronkatri.com Git - mandoc.git/blob - makewhatis.c
1 /* $Id: makewhatis.c,v 1.4 2011/06/21 13:13:15 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/param.h>
40 #define MANDOC_DB "mandoc.db"
41 #define MANDOC_IDX "mandoc.index"
42 #define MANDOC_BUFSZ BUFSIZ
43 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
45 #define TYPE_NAME 0x01
46 #define TYPE_FUNCTION 0x02
47 #define TYPE_UTILITY 0x04
48 #define TYPE_INCLUDES 0x08
49 #define TYPE_VARIABLE 0x10
50 #define TYPE_STANDARD 0x20
51 #define TYPE_AUTHOR 0x40
52 #define TYPE_CONFIG 0x80
53 #define TYPE__MAX TYPE_CONFIG
61 #define MAN_ARGS DB *hash, \
63 DBT *rval, size_t *rsz, \
64 const struct man_node *n
65 #define MDOC_ARGS DB *hash, \
67 DBT *rval, size_t *rsz, \
68 const struct mdoc_node *n, \
69 const struct mdoc_meta *m
71 static void dbt_append(DBT
*, size_t *, const char *);
72 static void dbt_appendb(DBT
*, size_t *,
73 const void *, size_t);
74 static void dbt_init(DBT
*, size_t *);
75 static void dbt_put(DB
*, const char *, DBT
*, DBT
*);
76 static void hash_put(DB
*, const struct buf
*, int);
77 static void usage(void);
78 static int pman_node(MAN_ARGS
);
79 static void pmdoc_node(MDOC_ARGS
);
80 static void pmdoc_An(MDOC_ARGS
);
81 static void pmdoc_Cd(MDOC_ARGS
);
82 static void pmdoc_Fd(MDOC_ARGS
);
83 static void pmdoc_In(MDOC_ARGS
);
84 static void pmdoc_Fn(MDOC_ARGS
);
85 static void pmdoc_Fo(MDOC_ARGS
);
86 static void pmdoc_Nd(MDOC_ARGS
);
87 static void pmdoc_Nm(MDOC_ARGS
);
88 static void pmdoc_St(MDOC_ARGS
);
89 static void pmdoc_Vt(MDOC_ARGS
);
91 typedef void (*pmdoc_nf
)(MDOC_ARGS
);
93 static const char *progname
;
95 static const pmdoc_nf mdocs
[MDOC_MAX
] = {
221 main(int argc
, char *argv
[])
223 struct mparse
*mp
; /* parse sequence */
224 struct mdoc
*mdoc
; /* resulting mdoc */
225 struct man
*man
; /* resulting man */
226 char *fn
; /* current file being parsed */
227 const char *msec
, /* manual section */
228 *mtitle
, /* manual title */
229 *arch
, /* manual architecture */
230 *dir
; /* result dir (default: cwd) */
231 char ibuf
[MAXPATHLEN
], /* index fname */
232 ibbuf
[MAXPATHLEN
], /* index backup fname */
233 fbuf
[MAXPATHLEN
], /* btree fname */
234 fbbuf
[MAXPATHLEN
], /* btree backup fname */
235 vbuf
[8]; /* stringified record number */
237 DB
*idx
, /* index database */
238 *db
, /* keyword database */
239 *hash
; /* temporary keyword hashtable */
240 DBT rkey
, rval
, /* recno entries */
241 key
, val
; /* persistent keyword entries */
243 BTREEINFO info
; /* btree configuration */
244 recno_t rec
; /* current record number */
245 struct buf buf
; /* keyword buffer */
249 progname
= strrchr(argv
[0], '/');
250 if (progname
== NULL
)
257 while (-1 != (ch
= getopt(argc
, argv
, "d:")))
264 return((int)MANDOCLEVEL_BADARG
);
271 * Set up temporary file-names into which we're going to write
272 * all of our data (both for the index and database). These
273 * will be securely renamed to the real file-names after we've
274 * written all of our data.
277 ibuf
[0] = ibuf
[MAXPATHLEN
- 2] =
278 ibbuf
[0] = ibbuf
[MAXPATHLEN
- 2] =
279 fbuf
[0] = fbuf
[MAXPATHLEN
- 2] =
280 fbbuf
[0] = fbbuf
[MAXPATHLEN
- 2] = '\0';
282 strlcat(fbuf
, dir
, MAXPATHLEN
);
283 strlcat(fbuf
, MANDOC_DB
, MAXPATHLEN
);
285 strlcat(fbbuf
, fbuf
, MAXPATHLEN
);
286 strlcat(fbbuf
, "~", MAXPATHLEN
);
288 strlcat(ibuf
, dir
, MAXPATHLEN
);
289 strlcat(ibuf
, MANDOC_IDX
, MAXPATHLEN
);
291 strlcat(ibbuf
, ibuf
, MAXPATHLEN
);
292 strlcat(ibbuf
, "~", MAXPATHLEN
);
294 if ('\0' != fbuf
[MAXPATHLEN
- 2] ||
295 '\0' != fbbuf
[MAXPATHLEN
- 2] ||
296 '\0' != ibuf
[MAXPATHLEN
- 2] ||
297 '\0' != ibbuf
[MAXPATHLEN
- 2]) {
298 fprintf(stderr
, "%s: Path too long\n", progname
);
299 exit((int)MANDOCLEVEL_SYSERR
);
303 * For the keyword database, open a BTREE database that allows
305 * For the index database, use a standard RECNO database type.
306 * For the temporary keyword hashtable, use the HASH database
310 hash
= dbopen(NULL
, MANDOC_FLAGS
, 0644, DB_HASH
, NULL
);
313 exit((int)MANDOCLEVEL_SYSERR
);
316 memset(&info
, 0, sizeof(BTREEINFO
));
318 db
= dbopen(fbbuf
, MANDOC_FLAGS
, 0644, DB_BTREE
, &info
);
322 (*hash
->close
)(hash
);
323 exit((int)MANDOCLEVEL_SYSERR
);
326 idx
= dbopen(ibbuf
, MANDOC_FLAGS
, 0644, DB_RECNO
, NULL
);
331 (*hash
->close
)(hash
);
332 exit((int)MANDOCLEVEL_SYSERR
);
336 * Try parsing the manuals given on the command line. If we
337 * totally fail, then just keep on going. Take resulting trees
338 * and push them down into the database code.
339 * Use the auto-parser and don't report any errors.
342 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
344 memset(&key
, 0, sizeof(DBT
));
345 memset(&val
, 0, sizeof(DBT
));
346 memset(&rkey
, 0, sizeof(DBT
));
347 memset(&rval
, 0, sizeof(DBT
));
349 rkey
.size
= sizeof(recno_t
);
354 memset(&buf
, 0, sizeof(struct buf
));
356 buf
.size
= MANDOC_BUFSZ
;
357 buf
.cp
= mandoc_malloc(buf
.size
);
359 while (NULL
!= (fn
= *argv
++)) {
362 /* Parse and get (non-empty) AST. */
364 if (mparse_readfd(mp
, -1, fn
) >= MANDOCLEVEL_FATAL
) {
365 fprintf(stderr
, "%s: Parse failure\n", fn
);
368 mparse_result(mp
, &mdoc
, &man
);
369 if (NULL
== mdoc
&& NULL
== man
)
372 /* Manual section: can be empty string. */
374 msec
= NULL
!= mdoc
?
375 mdoc_meta(mdoc
)->msec
:
377 mtitle
= NULL
!= mdoc
?
378 mdoc_meta(mdoc
)->title
:
379 man_meta(man
)->title
;
380 arch
= NULL
!= mdoc
? mdoc_meta(mdoc
)->arch
: NULL
;
386 * The index record value consists of a nil-terminated
387 * filename, a nil-terminated manual section, and a
388 * nil-terminated description. Since the description
389 * may not be set, we set a sentinel to see if we're
390 * going to write a nil byte in its place.
393 dbt_init(&rval
, &rsz
);
394 dbt_appendb(&rval
, &rsz
, fn
, strlen(fn
) + 1);
395 dbt_appendb(&rval
, &rsz
, msec
, strlen(msec
) + 1);
396 dbt_appendb(&rval
, &rsz
, mtitle
, strlen(mtitle
) + 1);
397 dbt_appendb(&rval
, &rsz
, arch
? arch
: "",
398 arch
? strlen(arch
) + 1 : 1);
402 /* Fix the record number in the btree value. */
405 pmdoc_node(hash
, &buf
, &rval
,
406 &rsz
, mdoc_node(mdoc
),
409 pman_node(hash
, &buf
, &rval
,
410 &rsz
, man_node(man
));
413 * Copy from the in-memory hashtable of pending keywords
417 memset(vbuf
, 0, sizeof(uint32_t));
418 memcpy(vbuf
+ 4, &rec
, sizeof(uint32_t));
421 while (0 == (ch
= (*hash
->seq
)(hash
, &key
, &val
, seq
))) {
422 memcpy(vbuf
, val
.data
, sizeof(uint32_t));
423 val
.size
= sizeof(vbuf
);
425 dbt_put(db
, fbbuf
, &key
, &val
);
426 /*fprintf(stderr, "Recording: %s (0x%x)\n",
429 if ((*hash
->del
)(hash
, &key
, 0) < 0) {
431 exit((int)MANDOCLEVEL_SYSERR
);
438 exit((int)MANDOCLEVEL_SYSERR
);
442 * Apply to the index. If we haven't had a description
443 * set, put an empty one in now.
447 dbt_appendb(&rval
, &rsz
, "", 1);
450 dbt_put(idx
, ibbuf
, &rkey
, &rval
);
452 printf("Indexed: %s\n", fn
);
458 (*hash
->close
)(hash
);
465 /* Atomically replace the file with our temporary one. */
467 if (-1 == rename(fbbuf
, fbuf
))
469 if (-1 == rename(ibbuf
, ibuf
))
472 return((int)MANDOCLEVEL_OK
);
476 * Initialise the stored database key whose data buffer is shared
477 * between uses (as the key must sometimes be constructed from an array
481 dbt_init(DBT
*key
, size_t *ksz
)
485 assert(0 == key
->size
);
486 assert(NULL
== key
->data
);
487 key
->data
= mandoc_malloc(MANDOC_BUFSZ
);
495 * Append a binary value to a database entry. This can be invoked
496 * multiple times; the buffer is automatically resized.
499 dbt_appendb(DBT
*key
, size_t *ksz
, const void *cp
, size_t sz
)
504 /* Overshoot by MANDOC_BUFSZ. */
506 while (key
->size
+ sz
>= *ksz
) {
507 *ksz
= key
->size
+ sz
+ MANDOC_BUFSZ
;
508 key
->data
= mandoc_realloc(key
->data
, *ksz
);
511 memcpy(key
->data
+ (int)key
->size
, cp
, sz
);
516 buf_appendb(struct buf
*buf
, const void *cp
, size_t sz
)
519 /* Overshoot by MANDOC_BUFSZ. */
521 while (buf
->len
+ sz
>= buf
->size
) {
522 buf
->size
= buf
->len
+ sz
+ MANDOC_BUFSZ
;
523 buf
->cp
= mandoc_realloc(buf
->cp
, buf
->size
);
526 memcpy(buf
->cp
+ (int)buf
->len
, cp
, sz
);
531 * Append a nil-terminated string to the database entry. This can be
532 * invoked multiple times. The database entry will be nil-terminated as
533 * well; if invoked multiple times, a space is put between strings.
536 dbt_append(DBT
*key
, size_t *ksz
, const char *cp
)
540 if (0 == (sz
= strlen(cp
)))
546 ((char *)key
->data
)[(int)key
->size
- 1] = ' ';
548 dbt_appendb(key
, ksz
, cp
, sz
+ 1);
552 buf_append(struct buf
*buf
, const char *cp
)
556 if (0 == (sz
= strlen(cp
)))
560 buf
->cp
[(int)buf
->len
- 1] = ' ';
562 buf_appendb(buf
, cp
, sz
+ 1);
570 if (SEC_AUTHORS
!= n
->sec
)
573 for (n
= n
->child
; n
; n
= n
->next
)
574 if (MDOC_TEXT
== n
->type
)
575 buf_append(buf
, n
->string
);
577 hash_put(hash
, buf
, TYPE_AUTHOR
);
584 const char *start
, *end
;
587 if (SEC_SYNOPSIS
!= n
->sec
)
589 if (NULL
== (n
= n
->child
) || MDOC_TEXT
!= n
->type
)
593 * Only consider those `Fd' macro fields that begin with an
594 * "inclusion" token (versus, e.g., #define).
596 if (strcmp("#include", n
->string
))
599 if (NULL
== (n
= n
->next
) || MDOC_TEXT
!= n
->type
)
603 * Strip away the enclosing angle brackets and make sure we're
608 if ('<' == *start
|| '"' == *start
)
611 if (0 == (sz
= strlen(start
)))
614 end
= &start
[(int)sz
- 1];
615 if ('>' == *end
|| '"' == *end
)
618 assert(end
>= start
);
620 buf_appendb(buf
, start
, (size_t)(end
- start
+ 1));
621 buf_appendb(buf
, "", 1);
623 hash_put(hash
, buf
, TYPE_INCLUDES
);
631 if (SEC_SYNOPSIS
!= n
->sec
)
634 for (n
= n
->child
; n
; n
= n
->next
)
635 if (MDOC_TEXT
== n
->type
)
636 buf_append(buf
, n
->string
);
638 hash_put(hash
, buf
, TYPE_CONFIG
);
646 if (SEC_SYNOPSIS
!= n
->sec
)
648 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
651 buf_append(buf
, n
->child
->string
);
652 hash_put(hash
, buf
, TYPE_INCLUDES
);
661 if (SEC_SYNOPSIS
!= n
->sec
)
663 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
666 /* .Fn "struct type *arg" "foo" */
668 cp
= strrchr(n
->child
->string
, ' ');
670 cp
= n
->child
->string
;
672 /* Strip away pointer symbol. */
678 hash_put(hash
, buf
, TYPE_FUNCTION
);
686 if (SEC_STANDARDS
!= n
->sec
)
688 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
691 buf_append(buf
, n
->child
->string
);
692 hash_put(hash
, buf
, TYPE_STANDARD
);
702 if (SEC_SYNOPSIS
!= n
->sec
)
704 if (MDOC_Vt
== n
->tok
&& MDOC_BODY
!= n
->type
)
706 if (NULL
== n
->last
|| MDOC_TEXT
!= n
->last
->type
)
710 * Strip away leading pointer symbol '*' and trailing ';'.
713 start
= n
->last
->string
;
715 while ('*' == *start
)
718 if (0 == (sz
= strlen(start
)))
721 if (';' == start
[(int)sz
- 1])
727 buf_appendb(buf
, start
, sz
);
728 buf_appendb(buf
, "", 1);
729 hash_put(hash
, buf
, TYPE_VARIABLE
);
737 if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
739 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
742 buf_append(buf
, n
->child
->string
);
743 hash_put(hash
, buf
, TYPE_FUNCTION
);
753 for (first
= 1, n
= n
->child
; n
; n
= n
->next
) {
754 if (MDOC_TEXT
!= n
->type
)
757 dbt_appendb(rval
, rsz
, n
->string
, strlen(n
->string
) + 1);
759 dbt_append(rval
, rsz
, n
->string
);
769 if (SEC_NAME
== n
->sec
) {
770 for (n
= n
->child
; n
; n
= n
->next
)
771 if (MDOC_TEXT
== n
->type
)
772 buf_append(buf
, n
->string
);
773 hash_put(hash
, buf
, TYPE_NAME
);
775 } else if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
778 if (NULL
== n
->child
)
779 buf_append(buf
, m
->name
);
781 for (n
= n
->child
; n
; n
= n
->next
)
782 if (MDOC_TEXT
== n
->type
)
783 buf_append(buf
, n
->string
);
785 hash_put(hash
, buf
, TYPE_UTILITY
);
789 hash_put(DB
*db
, const struct buf
*buf
, int mask
)
796 if ((key
.size
= buf
->len
) < 2)
799 if ((rc
= (*db
->get
)(db
, &key
, &val
, 0)) < 0) {
801 exit((int)MANDOCLEVEL_SYSERR
);
803 mask
|= *(int *)val
.data
;
806 val
.size
= sizeof(int);
808 /*fprintf(stderr, "Hashing: [%s] (0x%x)\n",
809 (char *)key.data, mask);*/
811 if ((rc
= (*db
->put
)(db
, &key
, &val
, 0)) < 0) {
813 exit((int)MANDOCLEVEL_SYSERR
);
818 dbt_put(DB
*db
, const char *dbn
, DBT
*key
, DBT
*val
)
828 if (0 == (*db
->put
)(db
, key
, val
, 0))
832 exit((int)MANDOCLEVEL_SYSERR
);
837 * Call out to per-macro handlers after clearing the persistent database
838 * key. If the macro sets the database key, flush it to the database.
841 pmdoc_node(MDOC_ARGS
)
857 if (NULL
== mdocs
[n
->tok
])
861 (*mdocs
[n
->tok
])(hash
, buf
, rval
, rsz
, n
, m
);
867 pmdoc_node(hash
, buf
, rval
, rsz
, n
->child
, m
);
868 pmdoc_node(hash
, buf
, rval
, rsz
, n
->next
, m
);
874 const struct man_node
*head
, *body
;
875 const char *start
, *sv
;
882 * We're only searching for one thing: the first text child in
883 * the BODY of a NAME section. Since we don't keep track of
884 * sections in -man, run some hoops to find out whether we're in
885 * the correct section or not.
888 if (MAN_BODY
== n
->type
&& MAN_SH
== n
->tok
) {
890 assert(body
->parent
);
891 if (NULL
!= (head
= body
->parent
->head
) &&
893 NULL
!= (head
= (head
->child
)) &&
894 MAN_TEXT
== head
->type
&&
895 0 == strcmp(head
->string
, "NAME") &&
896 NULL
!= (body
= body
->child
) &&
897 MAN_TEXT
== body
->type
) {
899 assert(body
->string
);
900 start
= sv
= body
->string
;
903 * Go through a special heuristic dance here.
904 * This is why -man manuals are great!
905 * (I'm being sarcastic: my eyes are bleeding.)
906 * Conventionally, one or more manual names are
907 * comma-specified prior to a whitespace, then a
908 * dash, then a description. Try to puzzle out
909 * the name parts here.
913 sz
= strcspn(start
, " ,");
914 if ('\0' == start
[(int)sz
])
918 buf_appendb(buf
, start
, sz
);
919 buf_appendb(buf
, "", 1);
921 hash_put(hash
, buf
, TYPE_NAME
);
923 if (' ' == start
[(int)sz
]) {
924 start
+= (int)sz
+ 1;
928 assert(',' == start
[(int)sz
]);
929 start
+= (int)sz
+ 1;
930 while (' ' == *start
)
936 buf_append(buf
, start
);
940 while (' ' == *start
)
943 if (0 == strncmp(start
, "-", 1))
945 else if (0 == strncmp(start
, "\\-", 2))
947 else if (0 == strncmp(start
, "\\(en", 4))
949 else if (0 == strncmp(start
, "\\(em", 4))
952 while (' ' == *start
)
955 dbt_appendb(rval
, rsz
, start
, strlen(start
) + 1);
959 if (pman_node(hash
, buf
, rval
, rsz
, n
->child
))
961 if (pman_node(hash
, buf
, rval
, rsz
, n
->next
))
971 fprintf(stderr
, "usage: %s "