]>
git.cameronkatri.com Git - mandoc.git/blob - makewhatis.c
1 /* $Id: makewhatis.c,v 1.17 2011/07/10 13:03:31 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/param.h>
41 #define MANDOC_DB "mandoc.db"
42 #define MANDOC_IDX "mandoc.index"
43 #define MANDOC_BUFSZ BUFSIZ
44 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
46 /* Bit-fields. See makewhatis.1. */
48 #define TYPE_NAME 0x01
49 #define TYPE_FUNCTION 0x02
50 #define TYPE_UTILITY 0x04
51 #define TYPE_INCLUDES 0x08
52 #define TYPE_VARIABLE 0x10
53 #define TYPE_STANDARD 0x20
54 #define TYPE_AUTHOR 0x40
55 #define TYPE_CONFIG 0x80
56 #define TYPE_DESC 0x100
57 #define TYPE_XREF 0x200
58 #define TYPE_PATH 0x400
59 #define TYPE_ENV 0x800
60 #define TYPE_ERR 0x1000
62 /* Buffer for storing growable data. */
70 #define MAN_ARGS DB *hash, \
73 const struct man_node *n
74 #define MDOC_ARGS DB *hash, \
77 const struct mdoc_node *n, \
78 const struct mdoc_meta *m
80 static void buf_appendmdoc(struct buf
*,
81 const struct mdoc_node
*, int);
82 static void buf_append(struct buf
*, const char *);
83 static void buf_appendb(struct buf
*,
84 const void *, size_t);
85 static void dbt_put(DB
*, const char *, DBT
*, DBT
*);
86 static void hash_put(DB
*, const struct buf
*, int);
87 static int pman_node(MAN_ARGS
);
88 static void pmdoc_node(MDOC_ARGS
);
89 static void pmdoc_An(MDOC_ARGS
);
90 static void pmdoc_Cd(MDOC_ARGS
);
91 static void pmdoc_Er(MDOC_ARGS
);
92 static void pmdoc_Ev(MDOC_ARGS
);
93 static void pmdoc_Fd(MDOC_ARGS
);
94 static void pmdoc_In(MDOC_ARGS
);
95 static void pmdoc_Fn(MDOC_ARGS
);
96 static void pmdoc_Fo(MDOC_ARGS
);
97 static void pmdoc_Nd(MDOC_ARGS
);
98 static void pmdoc_Nm(MDOC_ARGS
);
99 static void pmdoc_Pa(MDOC_ARGS
);
100 static void pmdoc_St(MDOC_ARGS
);
101 static void pmdoc_Vt(MDOC_ARGS
);
102 static void pmdoc_Xr(MDOC_ARGS
);
103 static void usage(void);
105 typedef void (*pmdoc_nf
)(MDOC_ARGS
);
107 static const pmdoc_nf mdocs
[MDOC_MAX
] = {
232 static const char *progname
;
235 main(int argc
, char *argv
[])
237 struct mparse
*mp
; /* parse sequence */
238 struct mdoc
*mdoc
; /* resulting mdoc */
239 struct man
*man
; /* resulting man */
240 char *fn
; /* current file being parsed */
241 const char *msec
, /* manual section */
242 *mtitle
, /* manual title */
243 *arch
, /* manual architecture */
244 *dir
; /* result dir (default: cwd) */
245 char ibuf
[MAXPATHLEN
], /* index fname */
246 ibbuf
[MAXPATHLEN
], /* index backup fname */
247 fbuf
[MAXPATHLEN
], /* btree fname */
248 fbbuf
[MAXPATHLEN
], /* btree backup fname */
249 vbuf
[8]; /* stringified record number */
251 DB
*idx
, /* index database */
252 *db
, /* keyword database */
253 *hash
; /* temporary keyword hashtable */
256 BTREEINFO info
; /* btree configuration */
257 recno_t rec
; /* current record number */
258 struct buf buf
, /* keyword buffer */
259 dbuf
; /* description buffer */
263 progname
= strrchr(argv
[0], '/');
264 if (progname
== NULL
)
272 while (-1 != (ch
= getopt(argc
, argv
, "d:v")))
282 return((int)MANDOCLEVEL_BADARG
);
289 * Set up temporary file-names into which we're going to write
290 * all of our data (both for the index and database). These
291 * will be securely renamed to the real file-names after we've
292 * written all of our data.
295 ibuf
[0] = ibuf
[MAXPATHLEN
- 2] =
296 ibbuf
[0] = ibbuf
[MAXPATHLEN
- 2] =
297 fbuf
[0] = fbuf
[MAXPATHLEN
- 2] =
298 fbbuf
[0] = fbbuf
[MAXPATHLEN
- 2] = '\0';
300 strlcat(fbuf
, dir
, MAXPATHLEN
);
301 strlcat(fbuf
, MANDOC_DB
, MAXPATHLEN
);
303 strlcat(fbbuf
, fbuf
, MAXPATHLEN
);
304 strlcat(fbbuf
, "~", MAXPATHLEN
);
306 strlcat(ibuf
, dir
, MAXPATHLEN
);
307 strlcat(ibuf
, MANDOC_IDX
, MAXPATHLEN
);
309 strlcat(ibbuf
, ibuf
, MAXPATHLEN
);
310 strlcat(ibbuf
, "~", MAXPATHLEN
);
312 if ('\0' != fbuf
[MAXPATHLEN
- 2] ||
313 '\0' != fbbuf
[MAXPATHLEN
- 2] ||
314 '\0' != ibuf
[MAXPATHLEN
- 2] ||
315 '\0' != ibbuf
[MAXPATHLEN
- 2]) {
316 fprintf(stderr
, "%s: Path too long\n", dir
);
317 exit((int)MANDOCLEVEL_SYSERR
);
321 * For the keyword database, open a BTREE database that allows
323 * For the index database, use a standard RECNO database type.
326 memset(&info
, 0, sizeof(BTREEINFO
));
328 db
= dbopen(fbbuf
, MANDOC_FLAGS
, 0644, DB_BTREE
, &info
);
332 exit((int)MANDOCLEVEL_SYSERR
);
335 idx
= dbopen(ibbuf
, MANDOC_FLAGS
, 0644, DB_RECNO
, NULL
);
340 exit((int)MANDOCLEVEL_SYSERR
);
344 * Try parsing each manual given on the command line.
345 * If we fail, then emit an error and keep on going.
346 * Take resulting trees and push them down into the database code.
347 * Use the auto-parser and don't report any errors.
350 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
355 memset(&buf
, 0, sizeof(struct buf
));
356 memset(&dbuf
, 0, sizeof(struct buf
));
358 buf
.size
= dbuf
.size
= MANDOC_BUFSZ
;
360 buf
.cp
= mandoc_malloc(buf
.size
);
361 dbuf
.cp
= mandoc_malloc(dbuf
.size
);
363 while (NULL
!= (fn
= *argv
++)) {
366 /* Initialise the in-memory hash of keywords. */
369 (*hash
->close
)(hash
);
371 hash
= dbopen(NULL
, MANDOC_FLAGS
, 0644, DB_HASH
, NULL
);
375 exit((int)MANDOCLEVEL_SYSERR
);
378 /* Parse and get (non-empty) AST. */
380 if (mparse_readfd(mp
, -1, fn
) >= MANDOCLEVEL_FATAL
) {
381 fprintf(stderr
, "%s: Parse failure\n", fn
);
385 mparse_result(mp
, &mdoc
, &man
);
387 if (NULL
== mdoc
&& NULL
== man
)
390 msec
= NULL
!= mdoc
?
391 mdoc_meta(mdoc
)->msec
:
393 mtitle
= NULL
!= mdoc
?
394 mdoc_meta(mdoc
)->title
:
395 man_meta(man
)->title
;
396 arch
= NULL
!= mdoc
? mdoc_meta(mdoc
)->arch
: NULL
;
399 * The index record value consists of a nil-terminated
400 * filename, a nil-terminated manual section, and a
401 * nil-terminated description. Since the description
402 * may not be set, we set a sentinel to see if we're
403 * going to write a nil byte in its place.
407 buf_appendb(&dbuf
, fn
, strlen(fn
) + 1);
408 buf_appendb(&dbuf
, msec
, strlen(msec
) + 1);
409 buf_appendb(&dbuf
, mtitle
, strlen(mtitle
) + 1);
410 buf_appendb(&dbuf
, arch
? arch
: "",
411 arch
? strlen(arch
) + 1 : 1);
415 /* Fix the record number in the btree value. */
418 pmdoc_node(hash
, &buf
, &dbuf
,
419 mdoc_node(mdoc
), mdoc_meta(mdoc
));
421 pman_node(hash
, &buf
, &dbuf
, man_node(man
));
424 * Copy from the in-memory hashtable of pending keywords
428 memset(vbuf
, 0, sizeof(uint32_t));
429 memcpy(vbuf
+ 4, &rec
, sizeof(uint32_t));
432 while (0 == (ch
= (*hash
->seq
)(hash
, &key
, &val
, seq
))) {
435 memcpy(vbuf
, val
.data
, sizeof(uint32_t));
436 val
.size
= sizeof(vbuf
);
440 printf("%s: Keyword %s (%zu): 0x%x\n",
441 fn
, (char *)key
.data
, key
.size
,
444 dbt_put(db
, fbbuf
, &key
, &val
);
450 exit((int)MANDOCLEVEL_SYSERR
);
454 * Apply to the index. If we haven't had a description
455 * set, put an empty one in now.
459 buf_appendb(&dbuf
, "", 1);
462 key
.size
= sizeof(recno_t
);
468 printf("%s: Indexed\n", fn
);
470 dbt_put(idx
, ibbuf
, &key
, &val
);
478 (*hash
->close
)(hash
);
485 /* Atomically replace the file with our temporary one. */
487 if (-1 == rename(fbbuf
, fbuf
))
489 if (-1 == rename(ibbuf
, ibuf
))
492 return((int)MANDOCLEVEL_OK
);
496 * Grow the buffer (if necessary) and copy in a binary string.
499 buf_appendb(struct buf
*buf
, const void *cp
, size_t sz
)
502 /* Overshoot by MANDOC_BUFSZ. */
504 while (buf
->len
+ sz
>= buf
->size
) {
505 buf
->size
= buf
->len
+ sz
+ MANDOC_BUFSZ
;
506 buf
->cp
= mandoc_realloc(buf
->cp
, buf
->size
);
509 memcpy(buf
->cp
+ (int)buf
->len
, cp
, sz
);
514 * Append a nil-terminated string to the buffer.
515 * This can be invoked multiple times.
516 * The buffer string will be nil-terminated.
517 * If invoked multiple times, a space is put between strings.
520 buf_append(struct buf
*buf
, const char *cp
)
524 if (0 == (sz
= strlen(cp
)))
528 buf
->cp
[(int)buf
->len
- 1] = ' ';
530 buf_appendb(buf
, cp
, sz
+ 1);
534 * Recursively add all text from a given node.
535 * This is optimised for general mdoc nodes in this context, which do
536 * not consist of subexpressions and having a recursive call for n->next
538 * The "f" variable should be 0 unless called from pmdoc_Nd for the
539 * description buffer, which does not start at the beginning of the
543 buf_appendmdoc(struct buf
*buf
, const struct mdoc_node
*n
, int f
)
546 for ( ; n
; n
= n
->next
) {
548 buf_appendmdoc(buf
, n
->child
, f
);
550 if (MDOC_TEXT
== n
->type
&& f
) {
552 buf_appendb(buf
, n
->string
,
553 strlen(n
->string
) + 1);
554 } else if (MDOC_TEXT
== n
->type
)
555 buf_append(buf
, n
->string
);
565 if (SEC_AUTHORS
!= n
->sec
)
568 buf_appendmdoc(buf
, n
->child
, 0);
569 hash_put(hash
, buf
, TYPE_AUTHOR
);
576 const char *start
, *end
;
579 if (SEC_SYNOPSIS
!= n
->sec
)
581 if (NULL
== (n
= n
->child
) || MDOC_TEXT
!= n
->type
)
585 * Only consider those `Fd' macro fields that begin with an
586 * "inclusion" token (versus, e.g., #define).
588 if (strcmp("#include", n
->string
))
591 if (NULL
== (n
= n
->next
) || MDOC_TEXT
!= n
->type
)
595 * Strip away the enclosing angle brackets and make sure we're
600 if ('<' == *start
|| '"' == *start
)
603 if (0 == (sz
= strlen(start
)))
606 end
= &start
[(int)sz
- 1];
607 if ('>' == *end
|| '"' == *end
)
610 assert(end
>= start
);
612 buf_appendb(buf
, start
, (size_t)(end
- start
+ 1));
613 buf_appendb(buf
, "", 1);
615 hash_put(hash
, buf
, TYPE_INCLUDES
);
623 if (SEC_SYNOPSIS
!= n
->sec
)
626 buf_appendmdoc(buf
, n
->child
, 0);
627 hash_put(hash
, buf
, TYPE_CONFIG
);
635 if (SEC_SYNOPSIS
!= n
->sec
)
637 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
640 buf_append(buf
, n
->child
->string
);
641 hash_put(hash
, buf
, TYPE_INCLUDES
);
650 if (SEC_SYNOPSIS
!= n
->sec
)
652 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
655 /* .Fn "struct type *arg" "foo" */
657 cp
= strrchr(n
->child
->string
, ' ');
659 cp
= n
->child
->string
;
661 /* Strip away pointer symbol. */
667 hash_put(hash
, buf
, TYPE_FUNCTION
);
675 if (SEC_STANDARDS
!= n
->sec
)
677 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
680 buf_append(buf
, n
->child
->string
);
681 hash_put(hash
, buf
, TYPE_STANDARD
);
689 if (NULL
== (n
= n
->child
))
692 buf_appendb(buf
, n
->string
, strlen(n
->string
));
694 if (NULL
!= (n
= n
->next
)) {
695 buf_appendb(buf
, ".", 1);
696 buf_appendb(buf
, n
->string
, strlen(n
->string
) + 1);
698 buf_appendb(buf
, ".", 2);
700 hash_put(hash
, buf
, TYPE_XREF
);
710 if (SEC_SYNOPSIS
!= n
->sec
)
712 if (MDOC_Vt
== n
->tok
&& MDOC_BODY
!= n
->type
)
714 if (NULL
== n
->last
|| MDOC_TEXT
!= n
->last
->type
)
718 * Strip away leading pointer symbol '*' and trailing ';'.
721 start
= n
->last
->string
;
723 while ('*' == *start
)
726 if (0 == (sz
= strlen(start
)))
729 if (';' == start
[(int)sz
- 1])
735 buf_appendb(buf
, start
, sz
);
736 buf_appendb(buf
, "", 1);
737 hash_put(hash
, buf
, TYPE_VARIABLE
);
745 if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
747 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
750 buf_append(buf
, n
->child
->string
);
751 hash_put(hash
, buf
, TYPE_FUNCTION
);
760 if (MDOC_BODY
!= n
->type
)
763 buf_appendmdoc(dbuf
, n
->child
, 1);
764 buf_appendmdoc(buf
, n
->child
, 0);
766 hash_put(hash
, buf
, TYPE_DESC
);
774 if (SEC_ERRORS
!= n
->sec
)
777 buf_appendmdoc(buf
, n
->child
, 0);
778 hash_put(hash
, buf
, TYPE_ERR
);
786 if (SEC_ENVIRONMENT
!= n
->sec
)
789 buf_appendmdoc(buf
, n
->child
, 0);
790 hash_put(hash
, buf
, TYPE_ENV
);
798 if (SEC_FILES
!= n
->sec
)
801 buf_appendmdoc(buf
, n
->child
, 0);
802 hash_put(hash
, buf
, TYPE_PATH
);
810 if (SEC_NAME
== n
->sec
) {
811 buf_appendmdoc(buf
, n
->child
, 0);
812 hash_put(hash
, buf
, TYPE_NAME
);
814 } else if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
817 if (NULL
== n
->child
)
818 buf_append(buf
, m
->name
);
820 buf_appendmdoc(buf
, n
->child
, 0);
821 hash_put(hash
, buf
, TYPE_UTILITY
);
825 hash_put(DB
*db
, const struct buf
*buf
, int mask
)
836 if ((rc
= (*db
->get
)(db
, &key
, &val
, 0)) < 0) {
838 exit((int)MANDOCLEVEL_SYSERR
);
840 mask
|= *(int *)val
.data
;
843 val
.size
= sizeof(int);
845 if ((rc
= (*db
->put
)(db
, &key
, &val
, 0)) < 0) {
847 exit((int)MANDOCLEVEL_SYSERR
);
852 dbt_put(DB
*db
, const char *dbn
, DBT
*key
, DBT
*val
)
858 if (0 == (*db
->put
)(db
, key
, val
, 0))
862 exit((int)MANDOCLEVEL_SYSERR
);
867 * Call out to per-macro handlers after clearing the persistent database
868 * key. If the macro sets the database key, flush it to the database.
871 pmdoc_node(MDOC_ARGS
)
887 if (NULL
== mdocs
[n
->tok
])
891 (*mdocs
[n
->tok
])(hash
, buf
, dbuf
, n
, m
);
897 pmdoc_node(hash
, buf
, dbuf
, n
->child
, m
);
898 pmdoc_node(hash
, buf
, dbuf
, n
->next
, m
);
904 const struct man_node
*head
, *body
;
905 const char *start
, *sv
;
912 * We're only searching for one thing: the first text child in
913 * the BODY of a NAME section. Since we don't keep track of
914 * sections in -man, run some hoops to find out whether we're in
915 * the correct section or not.
918 if (MAN_BODY
== n
->type
&& MAN_SH
== n
->tok
) {
920 assert(body
->parent
);
921 if (NULL
!= (head
= body
->parent
->head
) &&
923 NULL
!= (head
= (head
->child
)) &&
924 MAN_TEXT
== head
->type
&&
925 0 == strcmp(head
->string
, "NAME") &&
926 NULL
!= (body
= body
->child
) &&
927 MAN_TEXT
== body
->type
) {
929 assert(body
->string
);
930 start
= sv
= body
->string
;
933 * Go through a special heuristic dance here.
934 * This is why -man manuals are great!
935 * (I'm being sarcastic: my eyes are bleeding.)
936 * Conventionally, one or more manual names are
937 * comma-specified prior to a whitespace, then a
938 * dash, then a description. Try to puzzle out
939 * the name parts here.
943 sz
= strcspn(start
, " ,");
944 if ('\0' == start
[(int)sz
])
948 buf_appendb(buf
, start
, sz
);
949 buf_appendb(buf
, "", 1);
951 hash_put(hash
, buf
, TYPE_NAME
);
953 if (' ' == start
[(int)sz
]) {
954 start
+= (int)sz
+ 1;
958 assert(',' == start
[(int)sz
]);
959 start
+= (int)sz
+ 1;
960 while (' ' == *start
)
967 buf_append(buf
, start
);
971 while (' ' == *start
)
974 if (0 == strncmp(start
, "-", 1))
976 else if (0 == strncmp(start
, "\\-", 2))
978 else if (0 == strncmp(start
, "\\(en", 4))
980 else if (0 == strncmp(start
, "\\(em", 4))
983 while (' ' == *start
)
986 sz
= strlen(start
) + 1;
987 buf_appendb(dbuf
, start
, sz
);
988 buf_appendb(buf
, start
, sz
);
990 hash_put(hash
, buf
, TYPE_DESC
);
994 if (pman_node(hash
, buf
, dbuf
, n
->child
))
996 if (pman_node(hash
, buf
, dbuf
, n
->next
))
1006 fprintf(stderr
, "usage: %s [-v] [-d path] [file...]\n",