]>
git.cameronkatri.com Git - mandoc.git/blob - mandocdb.c
1 /* $Id: mandocdb.c,v 1.4 2011/07/15 10:15:24 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/param.h>
42 #define MANDOC_DB "mandoc.db"
43 #define MANDOC_IDX "mandoc.index"
44 #define MANDOC_BUFSZ BUFSIZ
45 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
46 #define MANDOC_SLOP 1024
48 /* Bit-fields. See mandocdb.8. */
50 #define TYPE_NAME 0x01
51 #define TYPE_FUNCTION 0x02
52 #define TYPE_UTILITY 0x04
53 #define TYPE_INCLUDES 0x08
54 #define TYPE_VARIABLE 0x10
55 #define TYPE_STANDARD 0x20
56 #define TYPE_AUTHOR 0x40
57 #define TYPE_CONFIG 0x80
58 #define TYPE_DESC 0x100
59 #define TYPE_XREF 0x200
60 #define TYPE_PATH 0x400
61 #define TYPE_ENV 0x800
62 #define TYPE_ERR 0x1000
70 /* Buffer for storing growable data. */
78 /* Operation we're going to perform. */
81 OP_NEW
= 0, /* new database */
82 OP_UPDATE
, /* update entries in existing database */
83 OP_DELETE
/* delete entries from existing database */
86 #define MAN_ARGS DB *hash, \
89 const struct man_node *n
90 #define MDOC_ARGS DB *hash, \
93 const struct mdoc_node *n, \
94 const struct mdoc_meta *m
96 static void buf_appendmdoc(struct buf
*,
97 const struct mdoc_node
*, int);
98 static void buf_append(struct buf
*, const char *);
99 static void buf_appendb(struct buf
*,
100 const void *, size_t);
101 static void dbt_put(DB
*, const char *, DBT
*, DBT
*);
102 static void hash_put(DB
*, const struct buf
*, int);
103 static void hash_reset(DB
**);
104 static void index_merge(const struct of
*, struct mparse
*,
105 struct buf
*, struct buf
*,
106 DB
*, DB
*, const char *,
108 recno_t
, const recno_t
*, size_t);
109 static void index_prune(const struct of
*, DB
*,
110 const char *, DB
*, const char *,
111 recno_t
*, recno_t
**, size_t *);
112 static int ofile_build(const char *, struct of
**);
113 static void ofile_free(struct of
*);
114 static int pman_node(MAN_ARGS
);
115 static void pmdoc_node(MDOC_ARGS
);
116 static void pmdoc_An(MDOC_ARGS
);
117 static void pmdoc_Cd(MDOC_ARGS
);
118 static void pmdoc_Er(MDOC_ARGS
);
119 static void pmdoc_Ev(MDOC_ARGS
);
120 static void pmdoc_Fd(MDOC_ARGS
);
121 static void pmdoc_In(MDOC_ARGS
);
122 static void pmdoc_Fn(MDOC_ARGS
);
123 static void pmdoc_Fo(MDOC_ARGS
);
124 static void pmdoc_Nd(MDOC_ARGS
);
125 static void pmdoc_Nm(MDOC_ARGS
);
126 static void pmdoc_Pa(MDOC_ARGS
);
127 static void pmdoc_St(MDOC_ARGS
);
128 static void pmdoc_Vt(MDOC_ARGS
);
129 static void pmdoc_Xr(MDOC_ARGS
);
130 static void usage(void);
132 typedef void (*pmdoc_nf
)(MDOC_ARGS
);
134 static const pmdoc_nf mdocs
[MDOC_MAX
] = {
259 static const char *progname
;
262 main(int argc
, char *argv
[])
264 struct mparse
*mp
; /* parse sequence */
265 enum op op
; /* current operation */
266 char ibuf
[MAXPATHLEN
], /* index fname */
267 fbuf
[MAXPATHLEN
]; /* btree fname */
269 DB
*idx
, /* index database */
270 *db
, /* keyword database */
271 *hash
; /* temporary keyword hashtable */
272 BTREEINFO info
; /* btree configuration */
273 recno_t maxrec
; /* supremum of all records */
274 recno_t
*recs
; /* buffer of empty records */
275 size_t recsz
, /* buffer size of recs */
276 reccur
; /* valid number of recs */
277 struct buf buf
, /* keyword buffer */
278 dbuf
; /* description buffer */
283 progname
= strrchr(argv
[0], '/');
284 if (progname
== NULL
)
298 memset(&buf
, 0, sizeof(struct buf
));
299 memset(&dbuf
, 0, sizeof(struct buf
));
301 while (-1 != (ch
= getopt(argc
, argv
, "")))
305 return((int)MANDOCLEVEL_BADARG
);
311 memset(&info
, 0, sizeof(BTREEINFO
));
314 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
316 flags
= OP_NEW
== op
? O_CREAT
|O_TRUNC
|O_RDWR
: O_CREAT
|O_RDWR
;
318 buf
.size
= dbuf
.size
= MANDOC_BUFSZ
;
320 buf
.cp
= mandoc_malloc(buf
.size
);
321 dbuf
.cp
= mandoc_malloc(dbuf
.size
);
323 for (i
= 0; i
< argc
; i
++) {
324 ibuf
[0] = ibuf
[MAXPATHLEN
- 2] =
325 fbuf
[0] = fbuf
[MAXPATHLEN
- 2] = '\0';
327 strlcat(fbuf
, argv
[i
], MAXPATHLEN
);
328 strlcat(fbuf
, MANDOC_DB
, MAXPATHLEN
);
330 strlcat(ibuf
, argv
[i
], MAXPATHLEN
);
331 strlcat(ibuf
, MANDOC_IDX
, MAXPATHLEN
);
333 if ('\0' != fbuf
[MAXPATHLEN
- 2] ||
334 '\0' != ibuf
[MAXPATHLEN
- 2]) {
335 fprintf(stderr
, "%s: Path too long\n", argv
[i
]);
339 db
= dbopen(fbuf
, flags
, 0644, DB_BTREE
, &info
);
340 idx
= dbopen(ibuf
, flags
, 0644, DB_RECNO
, NULL
);
345 } else if (NULL
== db
) {
353 if ( ! ofile_build(argv
[i
], &of
))
357 if (OP_DELETE
== op
|| OP_UPDATE
== op
)
358 index_prune(of
, db
, fbuf
, idx
, ibuf
,
359 &maxrec
, &recs
, &recsz
);
364 index_merge(of
, mp
, &dbuf
, &buf
, hash
, db
,
365 fbuf
, idx
, ibuf
, maxrec
, recs
, reccur
);
373 (*hash
->close
)(hash
);
382 return(i
< argc
? MANDOCLEVEL_SYSERR
: MANDOCLEVEL_OK
);
386 index_merge(const struct of
*of
, struct mparse
*mp
,
387 struct buf
*dbuf
, struct buf
*buf
,
388 DB
*hash
, DB
*db
, const char *dbf
,
389 DB
*idx
, const char *idxf
,
390 recno_t maxrec
, const recno_t
*recs
, size_t reccur
)
397 const char *fn
, *msec
, *mtitle
, *arch
;
402 for (rec
= 0; of
; of
= of
->next
) {
406 rec
= recs
[(int)reccur
];
407 } else if (maxrec
> 0) {
416 if (mparse_readfd(mp
, -1, fn
) >= MANDOCLEVEL_FATAL
) {
417 fprintf(stderr
, "%s: Parse failure\n", fn
);
421 mparse_result(mp
, &mdoc
, &man
);
422 if (NULL
== mdoc
&& NULL
== man
)
425 msec
= NULL
!= mdoc
?
426 mdoc_meta(mdoc
)->msec
: man_meta(man
)->msec
;
427 mtitle
= NULL
!= mdoc
?
428 mdoc_meta(mdoc
)->title
: man_meta(man
)->title
;
429 arch
= NULL
!= mdoc
?
430 mdoc_meta(mdoc
)->arch
: NULL
;
436 * The index record value consists of a nil-terminated
437 * filename, a nil-terminated manual section, and a
438 * nil-terminated description. Since the description
439 * may not be set, we set a sentinel to see if we're
440 * going to write a nil byte in its place.
444 buf_appendb(dbuf
, fn
, strlen(fn
) + 1);
445 buf_appendb(dbuf
, msec
, strlen(msec
) + 1);
446 buf_appendb(dbuf
, mtitle
, strlen(mtitle
) + 1);
447 buf_appendb(dbuf
, arch
, strlen(arch
) + 1);
451 /* Fix the record number in the btree value. */
454 pmdoc_node(hash
, buf
, dbuf
,
455 mdoc_node(mdoc
), mdoc_meta(mdoc
));
457 pman_node(hash
, buf
, dbuf
, man_node(man
));
460 * Copy from the in-memory hashtable of pending keywords
464 memset(vbuf
, 0, sizeof(uint32_t));
465 memcpy(vbuf
+ 4, &rec
, sizeof(uint32_t));
468 while (0 == (ch
= (*hash
->seq
)(hash
, &key
, &val
, seq
))) {
471 memcpy(vbuf
, val
.data
, sizeof(uint32_t));
472 val
.size
= sizeof(vbuf
);
475 printf("%s: Added keyword: %s\n",
476 fn
, (char *)key
.data
);
477 dbt_put(db
, dbf
, &key
, &val
);
481 exit((int)MANDOCLEVEL_SYSERR
);
485 * Apply to the index. If we haven't had a description
486 * set, put an empty one in now.
490 buf_appendb(dbuf
, "", 1);
493 key
.size
= sizeof(recno_t
);
496 val
.size
= dbuf
->len
;
498 printf("%s: Added index\n", fn
);
499 dbt_put(idx
, idxf
, &key
, &val
);
504 * Scan through all entries in the index file `idx' and prune those
505 * entries in `ofile'.
506 * Pruning consists of removing from `db', then invalidating the entry
507 * in `idx' (zeroing its value size).
510 index_prune(const struct of
*ofile
, DB
*db
, const char *dbf
,
511 DB
*idx
, const char *idxf
,
512 recno_t
*maxrec
, recno_t
**recs
, size_t *recsz
)
523 while (0 == (ch
= (*idx
->seq
)(idx
, &key
, &val
, seq
))) {
525 *maxrec
= *(recno_t
*)key
.data
;
527 if (reccur
>= *recsz
) {
528 *recsz
+= MANDOC_SLOP
;
529 *recs
= mandoc_realloc(*recs
,
530 *recsz
* sizeof(recno_t
));
532 (*recs
)[(int)reccur
] = *maxrec
;
537 fn
= (char *)val
.data
;
538 for (of
= ofile
; of
; of
= of
->next
)
539 if (0 == strcmp(fn
, of
->fname
))
546 while (0 == (ch
= (*db
->seq
)(db
, &key
, &val
, sseq
))) {
548 assert(8 == val
.size
);
549 if (*maxrec
!= *(recno_t
*)(val
.data
+ 4))
551 printf("%s: Deleted keyword: %s\n",
552 fn
, (char *)key
.data
);
553 ch
= (*db
->del
)(db
, &key
, R_CURSOR
);
559 exit((int)MANDOCLEVEL_SYSERR
);
562 printf("%s: Deleted index\n", fn
);
565 ch
= (*idx
->put
)(idx
, &key
, &val
, R_CURSOR
);
568 exit((int)MANDOCLEVEL_SYSERR
);
571 if (reccur
>= *recsz
) {
572 *recsz
+= MANDOC_SLOP
;
573 *recs
= mandoc_realloc
574 (*recs
, *recsz
* sizeof(recno_t
));
577 (*recs
)[(int)reccur
] = *maxrec
;
584 * Grow the buffer (if necessary) and copy in a binary string.
587 buf_appendb(struct buf
*buf
, const void *cp
, size_t sz
)
590 /* Overshoot by MANDOC_BUFSZ. */
592 while (buf
->len
+ sz
>= buf
->size
) {
593 buf
->size
= buf
->len
+ sz
+ MANDOC_BUFSZ
;
594 buf
->cp
= mandoc_realloc(buf
->cp
, buf
->size
);
597 memcpy(buf
->cp
+ (int)buf
->len
, cp
, sz
);
602 * Append a nil-terminated string to the buffer.
603 * This can be invoked multiple times.
604 * The buffer string will be nil-terminated.
605 * If invoked multiple times, a space is put between strings.
608 buf_append(struct buf
*buf
, const char *cp
)
612 if (0 == (sz
= strlen(cp
)))
616 buf
->cp
[(int)buf
->len
- 1] = ' ';
618 buf_appendb(buf
, cp
, sz
+ 1);
622 * Recursively add all text from a given node.
623 * This is optimised for general mdoc nodes in this context, which do
624 * not consist of subexpressions and having a recursive call for n->next
626 * The "f" variable should be 0 unless called from pmdoc_Nd for the
627 * description buffer, which does not start at the beginning of the
631 buf_appendmdoc(struct buf
*buf
, const struct mdoc_node
*n
, int f
)
634 for ( ; n
; n
= n
->next
) {
636 buf_appendmdoc(buf
, n
->child
, f
);
638 if (MDOC_TEXT
== n
->type
&& f
) {
640 buf_appendb(buf
, n
->string
,
641 strlen(n
->string
) + 1);
642 } else if (MDOC_TEXT
== n
->type
)
643 buf_append(buf
, n
->string
);
653 if (SEC_AUTHORS
!= n
->sec
)
656 buf_appendmdoc(buf
, n
->child
, 0);
657 hash_put(hash
, buf
, TYPE_AUTHOR
);
665 if (NULL
!= (hash
= *db
))
666 (*hash
->close
)(hash
);
668 *db
= dbopen(NULL
, MANDOC_FLAGS
, 0644, DB_HASH
, NULL
);
671 exit((int)MANDOCLEVEL_SYSERR
);
679 const char *start
, *end
;
682 if (SEC_SYNOPSIS
!= n
->sec
)
684 if (NULL
== (n
= n
->child
) || MDOC_TEXT
!= n
->type
)
688 * Only consider those `Fd' macro fields that begin with an
689 * "inclusion" token (versus, e.g., #define).
691 if (strcmp("#include", n
->string
))
694 if (NULL
== (n
= n
->next
) || MDOC_TEXT
!= n
->type
)
698 * Strip away the enclosing angle brackets and make sure we're
703 if ('<' == *start
|| '"' == *start
)
706 if (0 == (sz
= strlen(start
)))
709 end
= &start
[(int)sz
- 1];
710 if ('>' == *end
|| '"' == *end
)
713 assert(end
>= start
);
715 buf_appendb(buf
, start
, (size_t)(end
- start
+ 1));
716 buf_appendb(buf
, "", 1);
718 hash_put(hash
, buf
, TYPE_INCLUDES
);
726 if (SEC_SYNOPSIS
!= n
->sec
)
729 buf_appendmdoc(buf
, n
->child
, 0);
730 hash_put(hash
, buf
, TYPE_CONFIG
);
738 if (SEC_SYNOPSIS
!= n
->sec
)
740 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
743 buf_append(buf
, n
->child
->string
);
744 hash_put(hash
, buf
, TYPE_INCLUDES
);
753 if (SEC_SYNOPSIS
!= n
->sec
)
755 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
758 /* .Fn "struct type *arg" "foo" */
760 cp
= strrchr(n
->child
->string
, ' ');
762 cp
= n
->child
->string
;
764 /* Strip away pointer symbol. */
770 hash_put(hash
, buf
, TYPE_FUNCTION
);
778 if (SEC_STANDARDS
!= n
->sec
)
780 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
783 buf_append(buf
, n
->child
->string
);
784 hash_put(hash
, buf
, TYPE_STANDARD
);
792 if (NULL
== (n
= n
->child
))
795 buf_appendb(buf
, n
->string
, strlen(n
->string
));
797 if (NULL
!= (n
= n
->next
)) {
798 buf_appendb(buf
, ".", 1);
799 buf_appendb(buf
, n
->string
, strlen(n
->string
) + 1);
801 buf_appendb(buf
, ".", 2);
803 hash_put(hash
, buf
, TYPE_XREF
);
813 if (SEC_SYNOPSIS
!= n
->sec
)
815 if (MDOC_Vt
== n
->tok
&& MDOC_BODY
!= n
->type
)
817 if (NULL
== n
->last
|| MDOC_TEXT
!= n
->last
->type
)
821 * Strip away leading pointer symbol '*' and trailing ';'.
824 start
= n
->last
->string
;
826 while ('*' == *start
)
829 if (0 == (sz
= strlen(start
)))
832 if (';' == start
[(int)sz
- 1])
838 buf_appendb(buf
, start
, sz
);
839 buf_appendb(buf
, "", 1);
840 hash_put(hash
, buf
, TYPE_VARIABLE
);
848 if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
850 if (NULL
== n
->child
|| MDOC_TEXT
!= n
->child
->type
)
853 buf_append(buf
, n
->child
->string
);
854 hash_put(hash
, buf
, TYPE_FUNCTION
);
863 if (MDOC_BODY
!= n
->type
)
866 buf_appendmdoc(dbuf
, n
->child
, 1);
867 buf_appendmdoc(buf
, n
->child
, 0);
869 hash_put(hash
, buf
, TYPE_DESC
);
877 if (SEC_ERRORS
!= n
->sec
)
880 buf_appendmdoc(buf
, n
->child
, 0);
881 hash_put(hash
, buf
, TYPE_ERR
);
889 if (SEC_ENVIRONMENT
!= n
->sec
)
892 buf_appendmdoc(buf
, n
->child
, 0);
893 hash_put(hash
, buf
, TYPE_ENV
);
901 if (SEC_FILES
!= n
->sec
)
904 buf_appendmdoc(buf
, n
->child
, 0);
905 hash_put(hash
, buf
, TYPE_PATH
);
913 if (SEC_NAME
== n
->sec
) {
914 buf_appendmdoc(buf
, n
->child
, 0);
915 hash_put(hash
, buf
, TYPE_NAME
);
917 } else if (SEC_SYNOPSIS
!= n
->sec
|| MDOC_HEAD
!= n
->type
)
920 if (NULL
== n
->child
)
921 buf_append(buf
, m
->name
);
923 buf_appendmdoc(buf
, n
->child
, 0);
924 hash_put(hash
, buf
, TYPE_UTILITY
);
928 hash_put(DB
*db
, const struct buf
*buf
, int mask
)
939 if ((rc
= (*db
->get
)(db
, &key
, &val
, 0)) < 0) {
941 exit((int)MANDOCLEVEL_SYSERR
);
943 mask
|= *(int *)val
.data
;
946 val
.size
= sizeof(int);
948 if ((rc
= (*db
->put
)(db
, &key
, &val
, 0)) < 0) {
950 exit((int)MANDOCLEVEL_SYSERR
);
955 dbt_put(DB
*db
, const char *dbn
, DBT
*key
, DBT
*val
)
961 if (0 == (*db
->put
)(db
, key
, val
, 0))
965 exit((int)MANDOCLEVEL_SYSERR
);
970 * Call out to per-macro handlers after clearing the persistent database
971 * key. If the macro sets the database key, flush it to the database.
974 pmdoc_node(MDOC_ARGS
)
990 if (NULL
== mdocs
[n
->tok
])
994 (*mdocs
[n
->tok
])(hash
, buf
, dbuf
, n
, m
);
1000 pmdoc_node(hash
, buf
, dbuf
, n
->child
, m
);
1001 pmdoc_node(hash
, buf
, dbuf
, n
->next
, m
);
1007 const struct man_node
*head
, *body
;
1008 const char *start
, *sv
;
1015 * We're only searching for one thing: the first text child in
1016 * the BODY of a NAME section. Since we don't keep track of
1017 * sections in -man, run some hoops to find out whether we're in
1018 * the correct section or not.
1021 if (MAN_BODY
== n
->type
&& MAN_SH
== n
->tok
) {
1023 assert(body
->parent
);
1024 if (NULL
!= (head
= body
->parent
->head
) &&
1025 1 == head
->nchild
&&
1026 NULL
!= (head
= (head
->child
)) &&
1027 MAN_TEXT
== head
->type
&&
1028 0 == strcmp(head
->string
, "NAME") &&
1029 NULL
!= (body
= body
->child
) &&
1030 MAN_TEXT
== body
->type
) {
1032 assert(body
->string
);
1033 start
= sv
= body
->string
;
1036 * Go through a special heuristic dance here.
1037 * This is why -man manuals are great!
1038 * (I'm being sarcastic: my eyes are bleeding.)
1039 * Conventionally, one or more manual names are
1040 * comma-specified prior to a whitespace, then a
1041 * dash, then a description. Try to puzzle out
1042 * the name parts here.
1046 sz
= strcspn(start
, " ,");
1047 if ('\0' == start
[(int)sz
])
1051 buf_appendb(buf
, start
, sz
);
1052 buf_appendb(buf
, "", 1);
1054 hash_put(hash
, buf
, TYPE_NAME
);
1056 if (' ' == start
[(int)sz
]) {
1057 start
+= (int)sz
+ 1;
1061 assert(',' == start
[(int)sz
]);
1062 start
+= (int)sz
+ 1;
1063 while (' ' == *start
)
1070 buf_append(buf
, start
);
1074 while (' ' == *start
)
1077 if (0 == strncmp(start
, "-", 1))
1079 else if (0 == strncmp(start
, "\\-", 2))
1081 else if (0 == strncmp(start
, "\\(en", 4))
1083 else if (0 == strncmp(start
, "\\(em", 4))
1086 while (' ' == *start
)
1089 sz
= strlen(start
) + 1;
1090 buf_appendb(dbuf
, start
, sz
);
1091 buf_appendb(buf
, start
, sz
);
1093 hash_put(hash
, buf
, TYPE_DESC
);
1097 if (pman_node(hash
, buf
, dbuf
, n
->child
))
1099 if (pman_node(hash
, buf
, dbuf
, n
->next
))
1106 * Recursively build up a list of files to parse.
1107 * We use this instead of ftw() and so on because I don't want global
1108 * variables hanging around.
1109 * This ignores the mandoc.db and mandoc.index files, but assumes that
1110 * everything else is a manual.
1111 * Pass in a pointer to a NULL structure for the first invocation.
1114 ofile_build(const char *dir
, struct of
**of
)
1121 if (NULL
== (d
= opendir(dir
))) {
1126 while (NULL
!= (dp
= readdir(d
))) {
1128 if (DT_DIR
== dp
->d_type
) {
1129 if (strcmp(".", fn
) && strcmp("..", fn
))
1130 if ( ! ofile_build(dp
->d_name
, of
))
1133 } else if (DT_REG
!= dp
->d_type
)
1136 if (0 == strcmp(MANDOC_DB
, fn
) ||
1137 0 == strcmp(MANDOC_IDX
, fn
))
1140 nof
= mandoc_calloc(1, sizeof(struct of
));
1141 nof
->fname
= mandoc_strdup(fn
);
1156 ofile_free(struct of
*of
)
1172 fprintf(stderr
, "usage: %s [dir...]\n", progname
);