]>
git.cameronkatri.com Git - mandoc.git/blob - apropos.c
02c5d0bad542e4f0e43f4d97d37314e4786a2c02
1 /* $Id: apropos.c,v 1.7 2011/10/09 10:46:38 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/types.h>
44 #define MAXRESULTS 256
46 /* Bit-fields. See mandocdb.8. */
48 #define TYPE_NAME 0x01
49 #define TYPE_FUNCTION 0x02
50 #define TYPE_UTILITY 0x04
51 #define TYPE_INCLUDES 0x08
52 #define TYPE_VARIABLE 0x10
53 #define TYPE_STANDARD 0x20
54 #define TYPE_AUTHOR 0x40
55 #define TYPE_CONFIG 0x80
56 #define TYPE_DESC 0x100
57 #define TYPE_XREF 0x200
58 #define TYPE_PATH 0x400
59 #define TYPE_ENV 0x800
60 #define TYPE_ERR 0x1000
75 enum sort sort
; /* output sorting */
76 const char *arch
; /* restrict to architecture */
77 const char *cat
; /* restrict to category */
78 int types
; /* only types in bitmask */
79 int insens
; /* case-insensitive match */
80 enum match match
; /* match type */
85 const char *name
; /* command-line type name */
89 char *file
; /* file in file-system */
90 char *cat
; /* category (3p, 3, etc.) */
91 char *title
; /* title (FOO, etc.) */
92 char *arch
; /* arch (or empty string) */
93 char *desc
; /* description (from Nd) */
94 recno_t rec
; /* record in index */
98 char *arch
; /* architecture */
99 char *desc
; /* free-form description */
100 char *keyword
; /* matched keyword */
101 int types
; /* bitmask of field selectors */
102 char *cat
; /* manual section */
103 char *title
; /* manual section */
104 char *uri
; /* formatted uri of file */
105 recno_t rec
; /* unique id of underlying manual */
107 * Maintain a binary tree for checking the uniqueness of `rec'
108 * when adding elements to the results array.
109 * Since the results array is dynamic, use offset in the array
110 * instead of a pointer to the structure.
117 DB
*db
; /* database */
119 const char *dbf
; /* database name */
120 const char *idxf
; /* index name */
123 static const char * const sorts
[SORT__MAX
] = {
124 "cat", /* SORT_CAT */
125 "title", /* SORT_TITLE */
128 static const struct type types
[] = {
129 { TYPE_NAME
, "name" },
130 { TYPE_FUNCTION
, "func" },
131 { TYPE_UTILITY
, "utility" },
132 { TYPE_INCLUDES
, "incl" },
133 { TYPE_VARIABLE
, "var" },
134 { TYPE_STANDARD
, "stand" },
135 { TYPE_AUTHOR
, "auth" },
136 { TYPE_CONFIG
, "conf" },
137 { TYPE_DESC
, "desc" },
138 { TYPE_XREF
, "xref" },
139 { TYPE_PATH
, "path" },
146 static void buf_alloc(char **, size_t *, size_t);
147 static void buf_dup(struct mchars
*, char **, const char *);
148 static void buf_redup(struct mchars
*, char **,
149 size_t *, const char *);
150 static int sort_cat(const void *, const void *);
151 static int sort_title(const void *, const void *);
152 static int state_getrecord(struct state
*,
153 recno_t
, struct rec
*);
154 static void state_output(const struct res
*, int);
155 static int state_search(struct state
*,
156 const struct opts
*, char *);
157 static void usage(void);
159 static char *progname
;
162 main(int argc
, char *argv
[])
166 const char *dbf
, *idxf
;
173 memset(&opts
, 0, sizeof(struct opts
));
174 memset(&state
, 0, sizeof(struct state
));
177 idxf
= "mandoc.index";
181 progname
= strrchr(argv
[0], '/');
182 if (progname
== NULL
)
187 opts
.match
= MATCH_SUBSTR
;
189 while (-1 != (ch
= getopt(argc
, argv
, "a:c:eIrs:t:")))
198 opts
.match
= MATCH_EXACT
;
204 opts
.match
= MATCH_REGEX
;
207 for (i
= 0; i
< SORT__MAX
; i
++) {
208 if (strcmp(optarg
, sorts
[i
]))
210 opts
.sort
= (enum sort
)i
;
217 fprintf(stderr
, "%s: Bad sort\n", optarg
);
218 return(EXIT_FAILURE
);
220 while (NULL
!= (v
= strsep(&optarg
, ","))) {
223 for (i
= 0; types
[i
].mask
; i
++) {
224 if (strcmp(types
[i
].name
, v
))
228 if (0 == types
[i
].mask
)
230 opts
.types
|= types
[i
].mask
;
235 fprintf(stderr
, "%s: Bad type\n", v
);
236 return(EXIT_FAILURE
);
239 return(EXIT_FAILURE
);
245 if (0 == argc
|| '\0' == **argv
) {
252 opts
.types
= TYPE_NAME
| TYPE_DESC
;
255 * Configure databases.
256 * The keyword database is a btree that allows for duplicate
258 * The index database is a recno.
261 memset(&info
, 0, sizeof(BTREEINFO
));
264 state
.db
= dbopen(dbf
, O_RDONLY
, 0, DB_BTREE
, &info
);
265 if (NULL
== state
.db
) {
270 state
.idx
= dbopen(idxf
, O_RDONLY
, 0, DB_RECNO
, NULL
);
271 if (NULL
== state
.idx
) {
276 /* Main search function. */
278 rc
= state_search(&state
, &opts
, q
) ?
279 EXIT_SUCCESS
: EXIT_FAILURE
;
282 (*state
.db
->close
)(state
.db
);
284 (*state
.idx
->close
)(state
.idx
);
290 state_search(struct state
*p
, const struct opts
*opts
, char *q
)
292 int leaf
, root
, len
, ch
, dflag
, rc
;
314 * Configure how we scan through results to see if we match:
315 * whether by regexp or exact matches.
318 switch (opts
->match
) {
320 ch
= REG_EXTENDED
| REG_NOSUB
|
321 (opts
->insens
? REG_ICASE
: 0);
323 if (0 != regcomp(®
, q
, ch
)) {
324 fprintf(stderr
, "%s: Bad pattern\n", q
);
333 key
.size
= strlen(q
) + 1;
344 * Iterate over the entire keyword database.
345 * For each record, we must first translate the key into UTF-8.
346 * Following that, make sure it's acceptable.
347 * Lastly, add it to the available records.
350 while (0 == (ch
= (*p
->db
->seq
)(p
->db
, &key
, &val
, dflag
))) {
354 * Keys must be sized as such: the keyword must be
355 * non-empty (nil terminator plus one character) and the
356 * value must be 8 (recno_t---uint32_t---index reference
357 * and a uint32_t flag field).
360 if (key
.size
< 2 || 8 != val
.size
) {
361 fprintf(stderr
, "%s: Bad database\n", p
->dbf
);
365 buf_redup(mc
, &buf
, &bufsz
, (char *)key
.data
);
367 fl
= *(uint32_t *)val
.data
;
369 if ( ! (fl
& opts
->types
))
372 switch (opts
->match
) {
374 if (regexec(regp
, buf
, 0, NULL
, 0))
378 if (opts
->insens
&& strcasecmp(buf
, q
))
380 if ( ! opts
->insens
&& strcmp(buf
, q
))
384 if (opts
->insens
&& NULL
== strcasestr(buf
, q
))
386 if ( ! opts
->insens
&& NULL
== strstr(buf
, q
))
392 * Now look up the file itself in our index. The file's
393 * indexed by its recno for fast lookups.
396 memcpy(&rec
, val
.data
+ 4, sizeof(recno_t
));
398 if ( ! state_getrecord(p
, rec
, &record
))
401 /* If we're in a different section, skip... */
403 if (opts
->cat
&& strcasecmp(opts
->cat
, record
.cat
))
405 if (opts
->arch
&& strcasecmp(opts
->arch
, record
.arch
))
409 * Do a binary search to dedupe the results tree of the
410 * same record: we don't print the same file.
413 for (leaf
= root
; leaf
>= 0; )
414 if (rec
> res
[leaf
].rec
&& res
[leaf
].rhs
>= 0)
415 leaf
= res
[leaf
].rhs
;
416 else if (rec
< res
[leaf
].rec
&& res
[leaf
].lhs
>= 0)
417 leaf
= res
[leaf
].lhs
;
421 if (leaf
>= 0 && res
[leaf
].rec
== rec
)
425 (res
, (len
+ 1) * sizeof(struct res
));
428 * Now we have our filename, keywords, types, and all
429 * other necessary information.
430 * Process it and add it to our list of results.
434 snprintf(filebuf
, 10, "%u", record
.rec
);
435 assert('\0' == filebuf
[9]);
437 res
[len
].rec
= record
.rec
;
439 res
[len
].lhs
= res
[len
].rhs
= -1;
441 buf_dup(mc
, &res
[len
].keyword
, buf
);
442 buf_dup(mc
, &res
[len
].uri
, filebuf
);
443 buf_dup(mc
, &res
[len
].cat
, record
.cat
);
444 buf_dup(mc
, &res
[len
].arch
, record
.arch
);
445 buf_dup(mc
, &res
[len
].title
, record
.title
);
446 buf_dup(mc
, &res
[len
].desc
, record
.desc
);
449 if (record
.rec
> res
[leaf
].rec
)
464 /* Sort our results. */
466 if (SORT_CAT
== opts
->sort
)
467 qsort(res
, len
, sizeof(struct res
), sort_cat
);
469 qsort(res
, len
, sizeof(struct res
), sort_title
);
471 state_output(res
, len
);
474 for (len
-- ; len
>= 0; len
--) {
475 free(res
[len
].keyword
);
476 free(res
[len
].title
);
494 * Track allocated buffer size for buf_redup().
497 buf_alloc(char **buf
, size_t *bufsz
, size_t sz
)
504 *buf
= mandoc_realloc(*buf
, *bufsz
);
508 * Like buf_redup() but throwing away the buffer size.
511 buf_dup(struct mchars
*mc
, char **buf
, const char *val
)
517 buf_redup(mc
, buf
, &bufsz
, val
);
521 * Normalise strings from the index and database.
522 * These strings are escaped as defined by mandoc_char(7) along with
523 * other goop in mandoc.h (e.g., soft hyphens).
526 buf_redup(struct mchars
*mc
, char **buf
,
527 size_t *bufsz
, const char *val
)
530 const char *seq
, *cpp
;
533 const char rsv
[] = { '\\', ASCII_NBRSP
, ASCII_HYPH
, '\0' };
535 /* Pre-allocate by the length of the input */
537 buf_alloc(buf
, bufsz
, strlen(val
) + 1);
541 while ('\0' != *val
) {
543 * Halt on the first escape sequence.
544 * This also halts on the end of string, in which case
545 * we just copy, fallthrough, and exit the loop.
547 if ((sz
= strcspn(val
, rsv
)) > 0) {
548 memcpy(&(*buf
)[pos
], val
, sz
);
553 if (ASCII_HYPH
== *val
) {
557 } else if (ASCII_NBRSP
== *val
) {
561 } else if ('\\' != *val
)
564 /* Read past the slash. */
569 * Parse the escape sequence and see if it's a
570 * predefined character or special character.
573 esc
= mandoc_escape(&val
, &seq
, &len
);
574 if (ESCAPE_ERROR
== esc
)
577 cpp
= ESCAPE_SPECIAL
== esc
?
578 mchars_spec2str(mc
, seq
, len
, &sz
) : NULL
;
583 /* Copy the rendered glyph into the stream. */
585 buf_alloc(buf
, bufsz
, sz
);
587 memcpy(&(*buf
)[pos
], cpp
, sz
);
595 state_output(const struct res
*res
, int sz
)
599 for (i
= 0; i
< sz
; i
++)
600 printf("%s(%s%s%s) - %s\n", res
[i
].title
,
602 *res
[i
].arch
? "/" : "",
603 *res
[i
].arch
? res
[i
].arch
: "",
611 fprintf(stderr
, "usage: %s "
621 state_getrecord(struct state
*p
, recno_t rec
, struct rec
*rp
)
628 key
.size
= sizeof(recno_t
);
630 rc
= (*p
->idx
->get
)(p
->idx
, &key
, &val
, 0);
637 rp
->file
= (char *)val
.data
;
638 if ((sz
= strlen(rp
->file
) + 1) >= val
.size
)
641 rp
->cat
= (char *)val
.data
+ (int)sz
;
642 if ((sz
+= strlen(rp
->cat
) + 1) >= val
.size
)
645 rp
->title
= (char *)val
.data
+ (int)sz
;
646 if ((sz
+= strlen(rp
->title
) + 1) >= val
.size
)
649 rp
->arch
= (char *)val
.data
+ (int)sz
;
650 if ((sz
+= strlen(rp
->arch
) + 1) >= val
.size
)
653 rp
->desc
= (char *)val
.data
+ (int)sz
;
657 fprintf(stderr
, "%s: Corrupt index\n", p
->idxf
);
662 sort_title(const void *p1
, const void *p2
)
665 return(strcmp(((const struct res
*)p1
)->title
,
666 ((const struct res
*)p2
)->title
));
670 sort_cat(const void *p1
, const void *p2
)
674 rc
= strcmp(((const struct res
*)p1
)->cat
,
675 ((const struct res
*)p2
)->cat
);
677 return(0 == rc
? sort_title(p1
, p2
) : rc
);