]>
git.cameronkatri.com Git - mandoc.git/blob - makewhatis.c
1 /* $Id: makewhatis.c,v 1.1 2011/05/13 00:42:26 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/param.h>
40 #define MANDOC_DB "mandoc.db"
41 #define MANDOC_IDX "mandoc.index"
42 #define MANDOC_BUFSZ BUFSIZ
43 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
57 #define MAN_ARGS DB *db, \
59 DBT *key, size_t *ksz, \
61 DBT *rval, size_t *rsz, \
62 const struct man_node *n
63 #define MDOC_ARGS DB *db, \
65 DBT *key, size_t *ksz, \
67 DBT *rval, size_t *rsz, \
68 const struct mdoc_node *n
70 static void dbt_append(DBT
*, size_t *, const char *);
71 static void dbt_appendb(DBT
*, size_t *,
72 const void *, size_t);
73 static void dbt_init(DBT
*, size_t *);
74 static void dbt_put(DB
*, const char *, DBT
*, DBT
*);
75 static void usage(void);
76 static void pman(DB
*, const char *, DBT
*, size_t *,
77 DBT
*, DBT
*, size_t *, struct man
*);
78 static int pman_node(MAN_ARGS
);
79 static void pmdoc(DB
*, const char *, DBT
*, size_t *,
80 DBT
*, DBT
*, size_t *, struct mdoc
*);
81 static void pmdoc_node(MDOC_ARGS
);
82 static void pmdoc_An(MDOC_ARGS
);
83 static void pmdoc_Cd(MDOC_ARGS
);
84 static void pmdoc_Fd(MDOC_ARGS
);
85 static void pmdoc_In(MDOC_ARGS
);
86 static void pmdoc_Fn(MDOC_ARGS
);
87 static void pmdoc_Fo(MDOC_ARGS
);
88 static void pmdoc_Nd(MDOC_ARGS
);
89 static void pmdoc_Nm(MDOC_ARGS
);
90 static void pmdoc_St(MDOC_ARGS
);
91 static void pmdoc_Vt(MDOC_ARGS
);
93 typedef void (*pmdoc_nf
)(MDOC_ARGS
);
95 static const char *progname
;
97 static const pmdoc_nf mdocs
[MDOC_MAX
] = {
223 main(int argc
, char *argv
[])
225 struct mparse
*mp
; /* parse sequence */
226 struct mdoc
*mdoc
; /* resulting mdoc */
227 struct man
*man
; /* resulting man */
228 char *fn
; /* current file being parsed */
229 const char *msec
, /* manual section */
230 *mtitle
, /* manual title */
231 *arch
, /* manual architecture */
232 *dir
; /* result dir (default: cwd) */
233 char ibuf
[MAXPATHLEN
], /* index fname */
234 ibbuf
[MAXPATHLEN
], /* index backup fname */
235 fbuf
[MAXPATHLEN
], /* btree fname */
236 fbbuf
[MAXPATHLEN
]; /* btree backup fname */
238 DB
*idx
, /* index database */
239 *db
; /* keyword database */
240 DBT rkey
, rval
, /* recno entries */
241 key
, val
; /* persistent keyword entries */
243 ksz
, rsz
; /* entry buffer size */
244 char vbuf
[8]; /* stringified record number */
245 BTREEINFO info
; /* btree configuration */
246 recno_t rec
; /* current record number */
250 progname
= strrchr(argv
[0], '/');
251 if (progname
== NULL
)
258 while (-1 != (ch
= getopt(argc
, argv
, "d:")))
265 return((int)MANDOCLEVEL_BADARG
);
272 * Set up temporary file-names into which we're going to write
273 * all of our data (both for the index and database). These
274 * will be securely renamed to the real file-names after we've
275 * written all of our data.
278 ibuf
[0] = ibuf
[MAXPATHLEN
- 2] =
279 ibbuf
[0] = ibbuf
[MAXPATHLEN
- 2] =
280 fbuf
[0] = fbuf
[MAXPATHLEN
- 2] =
281 fbbuf
[0] = fbbuf
[MAXPATHLEN
- 2] = '\0';
283 strlcat(fbuf
, dir
, MAXPATHLEN
);
284 strlcat(fbuf
, MANDOC_DB
, MAXPATHLEN
);
286 strlcat(fbbuf
, fbuf
, MAXPATHLEN
);
287 strlcat(fbbuf
, "~", MAXPATHLEN
);
289 strlcat(ibuf
, dir
, MAXPATHLEN
);
290 strlcat(ibuf
, MANDOC_IDX
, MAXPATHLEN
);
292 strlcat(ibbuf
, ibuf
, MAXPATHLEN
);
293 strlcat(ibbuf
, "~", MAXPATHLEN
);
295 if ('\0' != fbuf
[MAXPATHLEN
- 2] ||
296 '\0' != fbbuf
[MAXPATHLEN
- 2] ||
297 '\0' != ibuf
[MAXPATHLEN
- 2] ||
298 '\0' != ibbuf
[MAXPATHLEN
- 2]) {
299 fprintf(stderr
, "%s: Path too long\n", progname
);
300 exit((int)MANDOCLEVEL_SYSERR
);
304 * For the keyword database, open a BTREE database that allows
305 * duplicates. For the index database, use a standard RECNO
309 memset(&info
, 0, sizeof(BTREEINFO
));
311 db
= dbopen(fbbuf
, MANDOC_FLAGS
, 0644, DB_BTREE
, &info
);
315 exit((int)MANDOCLEVEL_SYSERR
);
318 idx
= dbopen(ibbuf
, MANDOC_FLAGS
, 0644, DB_RECNO
, NULL
);
323 exit((int)MANDOCLEVEL_SYSERR
);
327 * Try parsing the manuals given on the command line. If we
328 * totally fail, then just keep on going. Take resulting trees
329 * and push them down into the database code.
330 * Use the auto-parser and don't report any errors.
333 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
335 memset(&key
, 0, sizeof(DBT
));
336 memset(&val
, 0, sizeof(DBT
));
337 memset(&rkey
, 0, sizeof(DBT
));
338 memset(&rval
, 0, sizeof(DBT
));
340 val
.size
= sizeof(vbuf
);
342 rkey
.size
= sizeof(recno_t
);
347 while (NULL
!= (fn
= *argv
++)) {
350 /* Parse and get (non-empty) AST. */
352 if (mparse_readfd(mp
, -1, fn
) >= MANDOCLEVEL_FATAL
) {
353 fprintf(stderr
, "%s: Parse failure\n", fn
);
356 mparse_result(mp
, &mdoc
, &man
);
357 if (NULL
== mdoc
&& NULL
== man
)
360 /* Manual section: can be empty string. */
362 msec
= NULL
!= mdoc
?
363 mdoc_meta(mdoc
)->msec
:
365 mtitle
= NULL
!= mdoc
?
366 mdoc_meta(mdoc
)->title
:
367 man_meta(man
)->title
;
368 arch
= NULL
!= mdoc
? mdoc_meta(mdoc
)->arch
: NULL
;
374 * The index record value consists of a nil-terminated
375 * filename, a nil-terminated manual section, and a
376 * nil-terminated description. Since the description
377 * may not be set, we set a sentinel to see if we're
378 * going to write a nil byte in its place.
381 dbt_init(&rval
, &rsz
);
382 dbt_appendb(&rval
, &rsz
, fn
, strlen(fn
) + 1);
383 dbt_appendb(&rval
, &rsz
, msec
, strlen(msec
) + 1);
384 dbt_appendb(&rval
, &rsz
, mtitle
, strlen(mtitle
) + 1);
385 dbt_appendb(&rval
, &rsz
, arch
? arch
: "",
386 arch
? strlen(arch
) + 1 : 1);
390 /* Fix the record number in the btree value. */
392 memset(val
.data
, 0, sizeof(uint32_t));
393 memcpy(val
.data
+ 4, &rec
, sizeof(uint32_t));
396 pmdoc(db
, fbbuf
, &key
, &ksz
,
397 &val
, &rval
, &rsz
, mdoc
);
399 pman(db
, fbbuf
, &key
, &ksz
,
400 &val
, &rval
, &rsz
, man
);
403 * Apply this to the index. If we haven't had a
404 * description set, put an empty one in now.
408 dbt_appendb(&rval
, &rsz
, "", 1);
411 dbt_put(idx
, ibbuf
, &rkey
, &rval
);
413 printf("Indexed: %s\n", fn
);
425 /* Atomically replace the file with our temporary one. */
427 if (-1 == rename(fbbuf
, fbuf
))
429 if (-1 == rename(ibbuf
, ibuf
))
432 return((int)MANDOCLEVEL_OK
);
436 * Initialise the stored database key whose data buffer is shared
437 * between uses (as the key must sometimes be constructed from an array
441 dbt_init(DBT
*key
, size_t *ksz
)
445 assert(0 == key
->size
);
446 assert(NULL
== key
->data
);
447 key
->data
= mandoc_malloc(MANDOC_BUFSZ
);
455 * Append a binary value to a database entry. This can be invoked
456 * multiple times; the buffer is automatically resized.
459 dbt_appendb(DBT
*key
, size_t *ksz
, const void *cp
, size_t sz
)
464 /* Overshoot by MANDOC_BUFSZ. */
466 while (key
->size
+ sz
>= *ksz
) {
467 *ksz
= key
->size
+ sz
+ MANDOC_BUFSZ
;
468 key
->data
= mandoc_realloc(key
->data
, *ksz
);
472 dstp
= key
->data
+ (int)key
->size
;
474 while (NULL
!= (endp
= memchr(cp
, '\\', sz
))) {
476 memcpy(dstp
, cp
, ssz
);
483 /* FIXME: expects nil-terminated string! */
484 esc
= mandoc_escape((const char **)&endp
, NULL
, NULL
);
488 /* Nil-terminate this point. */
492 case (ESCAPE_PREDEF
):
494 case (ESCAPE_SPECIAL
):
503 memcpy(dstp
, cp
, ssz
);
513 memcpy(key
->data
+ (int)key
->size
, cp
, sz
);
518 * Append a nil-terminated string to the database entry. This can be
519 * invoked multiple times. The database entry will be nil-terminated as
520 * well; if invoked multiple times, a space is put between strings.
523 dbt_append(DBT
*key
, size_t *ksz
, const char *cp
)
527 if (0 == (sz
= strlen(cp
)))
533 ((char *)key
->data
)[(int)key
->size
- 1] = ' ';
535 dbt_appendb(key
, ksz
, cp
, sz
+ 1);
544 if (SEC_AUTHORS
!= n
->sec
)
547 for (n
= n
->child
; n
; n
= n
->next
)
548 if (MDOC_TEXT
== n
->type
)
549 dbt_append(key
, ksz
, n
->string
);
551 fl
= (uint32_t)MANDOC_AUTHOR
;
552 memcpy(val
->data
, &fl
, 4);
560 const char *start
, *end
;
563 if (SEC_SYNOPSIS
!= n
->sec
)
565 if (NULL
== (n
= n
->child
) || MDOC_TEXT
!= n
->type
)
569 * Only consider those `Fd' macro fields that begin with an
570 * "inclusion" token (versus, e.g., #define).
572 if (strcmp("#include", n
->string
))
575 if (NULL
== (n
= n
->next
) || MDOC_TEXT
!= n
->type
)
579 * Strip away the enclosing angle brackets and make sure we're
584 if ('<' == *start
|| '"' == *start
)
587 if (0 == (sz
= strlen(start
)))
590 end
= &start
[(int)sz
- 1];
591 if ('>' == *end
|| '"' == *end
)
594 assert(end
>= start
);
595 dbt_appendb(key
, ksz
, start
, (size_t)(end
- start
+ 1));
596 dbt_appendb(key
, ksz
, "", 1);
598 fl
= (uint32_t)MANDOC_INCLUDES
;
599 memcpy(val
->data
, &fl
, 4);
608 if (SEC_SYNOPSIS
!= n
->sec
)
611 for (n
= n
->child
; n
; n
= n
->next
)
612 if (MDOC_TEXT
== n
->type
)
613 dbt_append(key
, ksz
, n
->string
);
615 fl
= (uint32_t)MANDOC_CONFIG
;
616 memcpy(val
->data
, &fl
, 4);
625 if (SEC_SYNOPSIS
!= n
->sec
)
627 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
630 dbt_append(key
, ksz
, n
->child
->string
);
631 fl
= (uint32_t)MANDOC_INCLUDES
;
632 memcpy(val
->data
, &fl
, 4);
642 if (SEC_SYNOPSIS
!= n
->sec
)
644 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
647 /* .Fn "struct type *arg" "foo" */
649 cp
= strrchr(n
->child
->string
, ' ');
651 cp
= n
->child
->string
;
653 /* Strip away pointer symbol. */
658 dbt_append(key
, ksz
, cp
);
659 fl
= (uint32_t)MANDOC_FUNCTION
;
660 memcpy(val
->data
, &fl
, 4);
669 if (SEC_STANDARDS
!= n
->sec
)
671 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
674 dbt_append(key
, ksz
, n
->child
->string
);
675 fl
= (uint32_t)MANDOC_STANDARD
;
676 memcpy(val
->data
, &fl
, 4);
687 if (SEC_SYNOPSIS
!= n
->sec
)
689 if (MDOC_Vt
== n
->tok
&& MDOC_BODY
!= n
->type
)
691 if (NULL
== n
->last
|| MDOC_TEXT
!= n
->last
->type
)
695 * Strip away leading pointer symbol '*' and trailing ';'.
698 start
= n
->last
->string
;
700 while ('*' == *start
)
703 if (0 == (sz
= strlen(start
)))
706 if (';' == start
[(int)sz
- 1])
712 dbt_appendb(key
, ksz
, start
, sz
);
713 dbt_appendb(key
, ksz
, "", 1);
715 fl
= (uint32_t)MANDOC_VARIABLE
;
716 memcpy(val
->data
, &fl
, 4);
725 if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
727 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
730 dbt_append(key
, ksz
, n
->child
->string
);
731 fl
= (uint32_t)MANDOC_FUNCTION
;
732 memcpy(val
->data
, &fl
, 4);
742 for (first
= 1, n
= n
->child
; n
; n
= n
->next
) {
743 if (MDOC_TEXT
!= n
->type
)
746 dbt_appendb(rval
, rsz
, n
->string
, strlen(n
->string
) + 1);
748 dbt_append(rval
, rsz
, n
->string
);
759 if (SEC_NAME
== n
->sec
) {
760 for (n
= n
->child
; n
; n
= n
->next
) {
761 if (MDOC_TEXT
!= n
->type
)
763 dbt_append(key
, ksz
, n
->string
);
765 fl
= (uint32_t)MANDOC_NAME
;
766 memcpy(val
->data
, &fl
, 4);
768 } else if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
771 for (n
= n
->child
; n
; n
= n
->next
) {
772 if (MDOC_TEXT
!= n
->type
)
774 dbt_append(key
, ksz
, n
->string
);
777 fl
= (uint32_t)MANDOC_UTILITY
;
778 memcpy(val
->data
, &fl
, 4);
782 dbt_put(DB
*db
, const char *dbn
, DBT
*key
, DBT
*val
)
792 if (0 == (*db
->put
)(db
, key
, val
, 0))
796 exit((int)MANDOCLEVEL_SYSERR
);
801 * Call out to per-macro handlers after clearing the persistent database
802 * key. If the macro sets the database key, flush it to the database.
805 pmdoc_node(MDOC_ARGS
)
821 if (NULL
== mdocs
[n
->tok
])
826 (*mdocs
[n
->tok
])(db
, dbn
, key
, ksz
, val
, rval
, rsz
, n
);
827 dbt_put(db
, dbn
, key
, val
);
833 pmdoc_node(db
, dbn
, key
, ksz
, val
, rval
, rsz
, n
->child
);
834 pmdoc_node(db
, dbn
, key
, ksz
, val
, rval
, rsz
, n
->next
);
840 const struct man_node
*head
, *body
;
841 const char *start
, *sv
;
849 * We're only searching for one thing: the first text child in
850 * the BODY of a NAME section. Since we don't keep track of
851 * sections in -man, run some hoops to find out whether we're in
852 * the correct section or not.
855 if (MAN_BODY
== n
->type
&& MAN_SH
== n
->tok
) {
857 assert(body
->parent
);
858 if (NULL
!= (head
= body
->parent
->head
) &&
860 NULL
!= (head
= (head
->child
)) &&
861 MAN_TEXT
== head
->type
&&
862 0 == strcmp(head
->string
, "NAME") &&
863 NULL
!= (body
= body
->child
) &&
864 MAN_TEXT
== body
->type
) {
866 fl
= (uint32_t)MANDOC_NAME
;
867 memcpy(val
->data
, &fl
, 4);
869 assert(body
->string
);
870 start
= sv
= body
->string
;
873 * Go through a special heuristic dance here.
874 * This is why -man manuals are great!
875 * (I'm being sarcastic: my eyes are bleeding.)
876 * Conventionally, one or more manual names are
877 * comma-specified prior to a whitespace, then a
878 * dash, then a description. Try to puzzle out
879 * the name parts here.
883 sz
= strcspn(start
, " ,");
884 if ('\0' == start
[(int)sz
])
888 dbt_appendb(key
, ksz
, start
, sz
);
889 dbt_appendb(key
, ksz
, "", 1);
891 dbt_put(db
, dbn
, key
, val
);
893 if (' ' == start
[(int)sz
]) {
894 start
+= (int)sz
+ 1;
898 assert(',' == start
[(int)sz
]);
899 start
+= (int)sz
+ 1;
900 while (' ' == *start
)
906 dbt_append(key
, ksz
, start
);
910 while (' ' == *start
)
913 if (0 == strncmp(start
, "-", 1))
915 else if (0 == strncmp(start
, "\\-", 2))
917 else if (0 == strncmp(start
, "\\(en", 4))
919 else if (0 == strncmp(start
, "\\(em", 4))
922 while (' ' == *start
)
925 dbt_appendb(rval
, rsz
, start
, strlen(start
) + 1);
929 if (pman_node(db
, dbn
, key
, ksz
, val
, rval
, rsz
, n
->child
))
931 if (pman_node(db
, dbn
, key
, ksz
, val
, rval
, rsz
, n
->next
))
938 pman(DB
*db
, const char *dbn
, DBT
*key
, size_t *ksz
,
939 DBT
*val
, DBT
*rval
, size_t *rsz
, struct man
*m
)
942 pman_node(db
, dbn
, key
, ksz
, val
, rval
, rsz
, man_node(m
));
947 pmdoc(DB
*db
, const char *dbn
, DBT
*key
, size_t *ksz
,
948 DBT
*val
, DBT
*rval
, size_t *rsz
, struct mdoc
*m
)
951 pmdoc_node(db
, dbn
, key
, ksz
, val
, rval
, rsz
, mdoc_node(m
));
958 fprintf(stderr
, "usage: %s "