]> git.cameronkatri.com Git - mandoc.git/blob - apropos.c
02c5d0bad542e4f0e43f4d97d37314e4786a2c02
[mandoc.git] / apropos.c
1 /* $Id: apropos.c,v 1.7 2011/10/09 10:46:38 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <limits.h>
28 #include <regex.h>
29 #include <stdarg.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #ifdef __linux__
37 # include <db_185.h>
38 #else
39 # include <db.h>
40 #endif
41
42 #include "mandoc.h"
43
44 #define MAXRESULTS 256
45
46 /* Bit-fields. See mandocdb.8. */
47
48 #define TYPE_NAME 0x01
49 #define TYPE_FUNCTION 0x02
50 #define TYPE_UTILITY 0x04
51 #define TYPE_INCLUDES 0x08
52 #define TYPE_VARIABLE 0x10
53 #define TYPE_STANDARD 0x20
54 #define TYPE_AUTHOR 0x40
55 #define TYPE_CONFIG 0x80
56 #define TYPE_DESC 0x100
57 #define TYPE_XREF 0x200
58 #define TYPE_PATH 0x400
59 #define TYPE_ENV 0x800
60 #define TYPE_ERR 0x1000
61
62 enum match {
63 MATCH_SUBSTR = 0,
64 MATCH_REGEX,
65 MATCH_EXACT
66 };
67
68 enum sort {
69 SORT_TITLE = 0,
70 SORT_CAT,
71 SORT__MAX
72 };
73
74 struct opts {
75 enum sort sort; /* output sorting */
76 const char *arch; /* restrict to architecture */
77 const char *cat; /* restrict to category */
78 int types; /* only types in bitmask */
79 int insens; /* case-insensitive match */
80 enum match match; /* match type */
81 };
82
83 struct type {
84 int mask;
85 const char *name; /* command-line type name */
86 };
87
88 struct rec {
89 char *file; /* file in file-system */
90 char *cat; /* category (3p, 3, etc.) */
91 char *title; /* title (FOO, etc.) */
92 char *arch; /* arch (or empty string) */
93 char *desc; /* description (from Nd) */
94 recno_t rec; /* record in index */
95 };
96
97 struct res {
98 char *arch; /* architecture */
99 char *desc; /* free-form description */
100 char *keyword; /* matched keyword */
101 int types; /* bitmask of field selectors */
102 char *cat; /* manual section */
103 char *title; /* manual section */
104 char *uri; /* formatted uri of file */
105 recno_t rec; /* unique id of underlying manual */
106 /*
107 * Maintain a binary tree for checking the uniqueness of `rec'
108 * when adding elements to the results array.
109 * Since the results array is dynamic, use offset in the array
110 * instead of a pointer to the structure.
111 */
112 int lhs;
113 int rhs;
114 };
115
116 struct state {
117 DB *db; /* database */
118 DB *idx; /* index */
119 const char *dbf; /* database name */
120 const char *idxf; /* index name */
121 };
122
123 static const char * const sorts[SORT__MAX] = {
124 "cat", /* SORT_CAT */
125 "title", /* SORT_TITLE */
126 };
127
128 static const struct type types[] = {
129 { TYPE_NAME, "name" },
130 { TYPE_FUNCTION, "func" },
131 { TYPE_UTILITY, "utility" },
132 { TYPE_INCLUDES, "incl" },
133 { TYPE_VARIABLE, "var" },
134 { TYPE_STANDARD, "stand" },
135 { TYPE_AUTHOR, "auth" },
136 { TYPE_CONFIG, "conf" },
137 { TYPE_DESC, "desc" },
138 { TYPE_XREF, "xref" },
139 { TYPE_PATH, "path" },
140 { TYPE_ENV, "env" },
141 { TYPE_ERR, "err" },
142 { INT_MAX, "all" },
143 { 0, NULL }
144 };
145
146 static void buf_alloc(char **, size_t *, size_t);
147 static void buf_dup(struct mchars *, char **, const char *);
148 static void buf_redup(struct mchars *, char **,
149 size_t *, const char *);
150 static int sort_cat(const void *, const void *);
151 static int sort_title(const void *, const void *);
152 static int state_getrecord(struct state *,
153 recno_t, struct rec *);
154 static void state_output(const struct res *, int);
155 static int state_search(struct state *,
156 const struct opts *, char *);
157 static void usage(void);
158
159 static char *progname;
160
161 int
162 main(int argc, char *argv[])
163 {
164 BTREEINFO info;
165 int ch, i, rc;
166 const char *dbf, *idxf;
167 struct state state;
168 char *q, *v;
169 struct opts opts;
170 extern int optind;
171 extern char *optarg;
172
173 memset(&opts, 0, sizeof(struct opts));
174 memset(&state, 0, sizeof(struct state));
175
176 dbf = "mandoc.db";
177 idxf = "mandoc.index";
178 q = NULL;
179 rc = EXIT_FAILURE;
180
181 progname = strrchr(argv[0], '/');
182 if (progname == NULL)
183 progname = argv[0];
184 else
185 ++progname;
186
187 opts.match = MATCH_SUBSTR;
188
189 while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:")))
190 switch (ch) {
191 case ('a'):
192 opts.arch = optarg;
193 break;
194 case ('c'):
195 opts.cat = optarg;
196 break;
197 case ('e'):
198 opts.match = MATCH_EXACT;
199 break;
200 case ('I'):
201 opts.insens = 1;
202 break;
203 case ('r'):
204 opts.match = MATCH_REGEX;
205 break;
206 case ('s'):
207 for (i = 0; i < SORT__MAX; i++) {
208 if (strcmp(optarg, sorts[i]))
209 continue;
210 opts.sort = (enum sort)i;
211 break;
212 }
213
214 if (i < SORT__MAX)
215 break;
216
217 fprintf(stderr, "%s: Bad sort\n", optarg);
218 return(EXIT_FAILURE);
219 case ('t'):
220 while (NULL != (v = strsep(&optarg, ","))) {
221 if ('\0' == *v)
222 continue;
223 for (i = 0; types[i].mask; i++) {
224 if (strcmp(types[i].name, v))
225 continue;
226 break;
227 }
228 if (0 == types[i].mask)
229 break;
230 opts.types |= types[i].mask;
231 }
232 if (NULL == v)
233 break;
234
235 fprintf(stderr, "%s: Bad type\n", v);
236 return(EXIT_FAILURE);
237 default:
238 usage();
239 return(EXIT_FAILURE);
240 }
241
242 argc -= optind;
243 argv += optind;
244
245 if (0 == argc || '\0' == **argv) {
246 usage();
247 goto out;
248 } else
249 q = *argv;
250
251 if (0 == opts.types)
252 opts.types = TYPE_NAME | TYPE_DESC;
253
254 /*
255 * Configure databases.
256 * The keyword database is a btree that allows for duplicate
257 * entries.
258 * The index database is a recno.
259 */
260
261 memset(&info, 0, sizeof(BTREEINFO));
262 info.flags = R_DUP;
263
264 state.db = dbopen(dbf, O_RDONLY, 0, DB_BTREE, &info);
265 if (NULL == state.db) {
266 perror(dbf);
267 goto out;
268 }
269
270 state.idx = dbopen(idxf, O_RDONLY, 0, DB_RECNO, NULL);
271 if (NULL == state.idx) {
272 perror(idxf);
273 goto out;
274 }
275
276 /* Main search function. */
277
278 rc = state_search(&state, &opts, q) ?
279 EXIT_SUCCESS : EXIT_FAILURE;
280 out:
281 if (state.db)
282 (*state.db->close)(state.db);
283 if (state.idx)
284 (*state.idx->close)(state.idx);
285
286 return(rc);
287 }
288
289 static int
290 state_search(struct state *p, const struct opts *opts, char *q)
291 {
292 int leaf, root, len, ch, dflag, rc;
293 struct mchars *mc;
294 char *buf;
295 size_t bufsz;
296 recno_t rec;
297 uint32_t fl;
298 DBT key, val;
299 struct res *res;
300 regex_t reg;
301 regex_t *regp;
302 char filebuf[10];
303 struct rec record;
304
305 rc = 0;
306 root = leaf = -1;
307 res = NULL;
308 len = 0;
309 buf = NULL;
310 bufsz = 0;
311 regp = NULL;
312
313 /*
314 * Configure how we scan through results to see if we match:
315 * whether by regexp or exact matches.
316 */
317
318 switch (opts->match) {
319 case (MATCH_REGEX):
320 ch = REG_EXTENDED | REG_NOSUB |
321 (opts->insens ? REG_ICASE : 0);
322
323 if (0 != regcomp(&reg, q, ch)) {
324 fprintf(stderr, "%s: Bad pattern\n", q);
325 return(0);
326 }
327
328 regp = &reg;
329 dflag = R_FIRST;
330 break;
331 case (MATCH_EXACT):
332 key.data = q;
333 key.size = strlen(q) + 1;
334 dflag = R_CURSOR;
335 break;
336 default:
337 dflag = R_FIRST;
338 break;
339 }
340
341 mc = mchars_alloc();
342
343 /*
344 * Iterate over the entire keyword database.
345 * For each record, we must first translate the key into UTF-8.
346 * Following that, make sure it's acceptable.
347 * Lastly, add it to the available records.
348 */
349
350 while (0 == (ch = (*p->db->seq)(p->db, &key, &val, dflag))) {
351 dflag = R_NEXT;
352
353 /*
354 * Keys must be sized as such: the keyword must be
355 * non-empty (nil terminator plus one character) and the
356 * value must be 8 (recno_t---uint32_t---index reference
357 * and a uint32_t flag field).
358 */
359
360 if (key.size < 2 || 8 != val.size) {
361 fprintf(stderr, "%s: Bad database\n", p->dbf);
362 goto out;
363 }
364
365 buf_redup(mc, &buf, &bufsz, (char *)key.data);
366
367 fl = *(uint32_t *)val.data;
368
369 if ( ! (fl & opts->types))
370 continue;
371
372 switch (opts->match) {
373 case (MATCH_REGEX):
374 if (regexec(regp, buf, 0, NULL, 0))
375 continue;
376 break;
377 case (MATCH_EXACT):
378 if (opts->insens && strcasecmp(buf, q))
379 goto send;
380 if ( ! opts->insens && strcmp(buf, q))
381 goto send;
382 break;
383 default:
384 if (opts->insens && NULL == strcasestr(buf, q))
385 continue;
386 if ( ! opts->insens && NULL == strstr(buf, q))
387 continue;
388 break;
389 }
390
391 /*
392 * Now look up the file itself in our index. The file's
393 * indexed by its recno for fast lookups.
394 */
395
396 memcpy(&rec, val.data + 4, sizeof(recno_t));
397
398 if ( ! state_getrecord(p, rec, &record))
399 goto out;
400
401 /* If we're in a different section, skip... */
402
403 if (opts->cat && strcasecmp(opts->cat, record.cat))
404 continue;
405 if (opts->arch && strcasecmp(opts->arch, record.arch))
406 continue;
407
408 /*
409 * Do a binary search to dedupe the results tree of the
410 * same record: we don't print the same file.
411 */
412
413 for (leaf = root; leaf >= 0; )
414 if (rec > res[leaf].rec && res[leaf].rhs >= 0)
415 leaf = res[leaf].rhs;
416 else if (rec < res[leaf].rec && res[leaf].lhs >= 0)
417 leaf = res[leaf].lhs;
418 else
419 break;
420
421 if (leaf >= 0 && res[leaf].rec == rec)
422 continue;
423
424 res = mandoc_realloc
425 (res, (len + 1) * sizeof(struct res));
426
427 /*
428 * Now we have our filename, keywords, types, and all
429 * other necessary information.
430 * Process it and add it to our list of results.
431 */
432
433 filebuf[9] = '\0';
434 snprintf(filebuf, 10, "%u", record.rec);
435 assert('\0' == filebuf[9]);
436
437 res[len].rec = record.rec;
438 res[len].types = fl;
439 res[len].lhs = res[len].rhs = -1;
440
441 buf_dup(mc, &res[len].keyword, buf);
442 buf_dup(mc, &res[len].uri, filebuf);
443 buf_dup(mc, &res[len].cat, record.cat);
444 buf_dup(mc, &res[len].arch, record.arch);
445 buf_dup(mc, &res[len].title, record.title);
446 buf_dup(mc, &res[len].desc, record.desc);
447
448 if (leaf >= 0) {
449 if (record.rec > res[leaf].rec)
450 res[leaf].rhs = len;
451 else
452 res[leaf].lhs = len;
453 } else
454 root = len;
455
456 len++;
457 }
458
459 if (ch < 0) {
460 perror(p->dbf);
461 goto out;
462 }
463 send:
464 /* Sort our results. */
465
466 if (SORT_CAT == opts->sort)
467 qsort(res, len, sizeof(struct res), sort_cat);
468 else
469 qsort(res, len, sizeof(struct res), sort_title);
470
471 state_output(res, len);
472 rc = 1;
473 out:
474 for (len-- ; len >= 0; len--) {
475 free(res[len].keyword);
476 free(res[len].title);
477 free(res[len].cat);
478 free(res[len].arch);
479 free(res[len].desc);
480 free(res[len].uri);
481 }
482
483 free(res);
484 free(buf);
485 mchars_free(mc);
486
487 if (regp)
488 regfree(regp);
489
490 return(rc);
491 }
492
493 /*
494 * Track allocated buffer size for buf_redup().
495 */
496 static inline void
497 buf_alloc(char **buf, size_t *bufsz, size_t sz)
498 {
499
500 if (sz < *bufsz)
501 return;
502
503 *bufsz = sz + 1024;
504 *buf = mandoc_realloc(*buf, *bufsz);
505 }
506
507 /*
508 * Like buf_redup() but throwing away the buffer size.
509 */
510 static void
511 buf_dup(struct mchars *mc, char **buf, const char *val)
512 {
513 size_t bufsz;
514
515 bufsz = 0;
516 *buf = NULL;
517 buf_redup(mc, buf, &bufsz, val);
518 }
519
520 /*
521 * Normalise strings from the index and database.
522 * These strings are escaped as defined by mandoc_char(7) along with
523 * other goop in mandoc.h (e.g., soft hyphens).
524 */
525 static void
526 buf_redup(struct mchars *mc, char **buf,
527 size_t *bufsz, const char *val)
528 {
529 size_t sz;
530 const char *seq, *cpp;
531 int len, pos;
532 enum mandoc_esc esc;
533 const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' };
534
535 /* Pre-allocate by the length of the input */
536
537 buf_alloc(buf, bufsz, strlen(val) + 1);
538
539 pos = 0;
540
541 while ('\0' != *val) {
542 /*
543 * Halt on the first escape sequence.
544 * This also halts on the end of string, in which case
545 * we just copy, fallthrough, and exit the loop.
546 */
547 if ((sz = strcspn(val, rsv)) > 0) {
548 memcpy(&(*buf)[pos], val, sz);
549 pos += (int)sz;
550 val += (int)sz;
551 }
552
553 if (ASCII_HYPH == *val) {
554 (*buf)[pos++] = '-';
555 val++;
556 continue;
557 } else if (ASCII_NBRSP == *val) {
558 (*buf)[pos++] = ' ';
559 val++;
560 continue;
561 } else if ('\\' != *val)
562 break;
563
564 /* Read past the slash. */
565
566 val++;
567
568 /*
569 * Parse the escape sequence and see if it's a
570 * predefined character or special character.
571 */
572
573 esc = mandoc_escape(&val, &seq, &len);
574 if (ESCAPE_ERROR == esc)
575 break;
576
577 cpp = ESCAPE_SPECIAL == esc ?
578 mchars_spec2str(mc, seq, len, &sz) : NULL;
579
580 if (NULL == cpp)
581 continue;
582
583 /* Copy the rendered glyph into the stream. */
584
585 buf_alloc(buf, bufsz, sz);
586
587 memcpy(&(*buf)[pos], cpp, sz);
588 pos += (int)sz;
589 }
590
591 (*buf)[pos] = '\0';
592 }
593
594 static void
595 state_output(const struct res *res, int sz)
596 {
597 int i;
598
599 for (i = 0; i < sz; i++)
600 printf("%s(%s%s%s) - %s\n", res[i].title,
601 res[i].cat,
602 *res[i].arch ? "/" : "",
603 *res[i].arch ? res[i].arch : "",
604 res[i].desc);
605 }
606
607 static void
608 usage(void)
609 {
610
611 fprintf(stderr, "usage: %s "
612 "[-eIr] "
613 "[-a arch] "
614 "[-c cat] "
615 "[-s sort] "
616 "[-t type[,...]] "
617 "key\n", progname);
618 }
619
620 static int
621 state_getrecord(struct state *p, recno_t rec, struct rec *rp)
622 {
623 DBT key, val;
624 size_t sz;
625 int rc;
626
627 key.data = &rec;
628 key.size = sizeof(recno_t);
629
630 rc = (*p->idx->get)(p->idx, &key, &val, 0);
631 if (rc < 0) {
632 perror(p->idxf);
633 return(0);
634 } else if (rc > 0)
635 goto err;
636
637 rp->file = (char *)val.data;
638 if ((sz = strlen(rp->file) + 1) >= val.size)
639 goto err;
640
641 rp->cat = (char *)val.data + (int)sz;
642 if ((sz += strlen(rp->cat) + 1) >= val.size)
643 goto err;
644
645 rp->title = (char *)val.data + (int)sz;
646 if ((sz += strlen(rp->title) + 1) >= val.size)
647 goto err;
648
649 rp->arch = (char *)val.data + (int)sz;
650 if ((sz += strlen(rp->arch) + 1) >= val.size)
651 goto err;
652
653 rp->desc = (char *)val.data + (int)sz;
654 rp->rec = rec;
655 return(1);
656 err:
657 fprintf(stderr, "%s: Corrupt index\n", p->idxf);
658 return(0);
659 }
660
661 static int
662 sort_title(const void *p1, const void *p2)
663 {
664
665 return(strcmp(((const struct res *)p1)->title,
666 ((const struct res *)p2)->title));
667 }
668
669 static int
670 sort_cat(const void *p1, const void *p2)
671 {
672 int rc;
673
674 rc = strcmp(((const struct res *)p1)->cat,
675 ((const struct res *)p2)->cat);
676
677 return(0 == rc ? sort_title(p1, p2) : rc);
678 }