]> git.cameronkatri.com Git - mandoc.git/blob - apropos.c
Remove some unnecessary variables and note that mchars_alloc never returns
[mandoc.git] / apropos.c
1 /* $Id: apropos.c,v 1.6 2011/10/09 10:37:52 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <limits.h>
28 #include <regex.h>
29 #include <stdarg.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #ifdef __linux__
37 # include <db_185.h>
38 #else
39 # include <db.h>
40 #endif
41
42 #include "mandoc.h"
43
44 #define MAXRESULTS 256
45
46 /* Bit-fields. See mandocdb.8. */
47
48 #define TYPE_NAME 0x01
49 #define TYPE_FUNCTION 0x02
50 #define TYPE_UTILITY 0x04
51 #define TYPE_INCLUDES 0x08
52 #define TYPE_VARIABLE 0x10
53 #define TYPE_STANDARD 0x20
54 #define TYPE_AUTHOR 0x40
55 #define TYPE_CONFIG 0x80
56 #define TYPE_DESC 0x100
57 #define TYPE_XREF 0x200
58 #define TYPE_PATH 0x400
59 #define TYPE_ENV 0x800
60 #define TYPE_ERR 0x1000
61
62 enum match {
63 MATCH_SUBSTR = 0,
64 MATCH_REGEX,
65 MATCH_EXACT
66 };
67
68 enum sort {
69 SORT_TITLE = 0,
70 SORT_CAT,
71 SORT__MAX
72 };
73
74 struct opts {
75 enum sort sort; /* output sorting */
76 const char *arch; /* restrict to architecture */
77 const char *cat; /* restrict to category */
78 int types; /* only types in bitmask */
79 int insens; /* case-insensitive match */
80 enum match match; /* match type */
81 };
82
83 struct type {
84 int mask;
85 const char *name; /* command-line type name */
86 };
87
88 struct rec {
89 char *file; /* file in file-system */
90 char *cat; /* category (3p, 3, etc.) */
91 char *title; /* title (FOO, etc.) */
92 char *arch; /* arch (or empty string) */
93 char *desc; /* description (from Nd) */
94 recno_t rec; /* record in index */
95 };
96
97 struct res {
98 char *arch; /* architecture */
99 char *desc; /* free-form description */
100 char *keyword; /* matched keyword */
101 int types; /* bitmask of field selectors */
102 char *cat; /* manual section */
103 char *title; /* manual section */
104 char *uri; /* formatted uri of file */
105 recno_t rec; /* unique id of underlying manual */
106 /*
107 * Maintain a binary tree for checking the uniqueness of `rec'
108 * when adding elements to the results array.
109 * Since the results array is dynamic, use offset in the array
110 * instead of a pointer to the structure.
111 */
112 int lhs;
113 int rhs;
114 };
115
116 struct state {
117 DB *db; /* database */
118 DB *idx; /* index */
119 const char *dbf; /* database name */
120 const char *idxf; /* index name */
121 };
122
123 static const char * const sorts[SORT__MAX] = {
124 "cat", /* SORT_CAT */
125 "title", /* SORT_TITLE */
126 };
127
128 static const struct type types[] = {
129 { TYPE_NAME, "name" },
130 { TYPE_FUNCTION, "func" },
131 { TYPE_UTILITY, "utility" },
132 { TYPE_INCLUDES, "incl" },
133 { TYPE_VARIABLE, "var" },
134 { TYPE_STANDARD, "stand" },
135 { TYPE_AUTHOR, "auth" },
136 { TYPE_CONFIG, "conf" },
137 { TYPE_DESC, "desc" },
138 { TYPE_XREF, "xref" },
139 { TYPE_PATH, "path" },
140 { TYPE_ENV, "env" },
141 { TYPE_ERR, "err" },
142 { INT_MAX, "all" },
143 { 0, NULL }
144 };
145
146 static void buf_alloc(char **, size_t *, size_t);
147 static void buf_dup(struct mchars *, char **, const char *);
148 static void buf_redup(struct mchars *, char **,
149 size_t *, const char *);
150 static int sort_cat(const void *, const void *);
151 static int sort_title(const void *, const void *);
152 static int state_getrecord(struct state *,
153 recno_t, struct rec *);
154 static void state_output(const struct res *, int);
155 static void state_search(struct state *,
156 const struct opts *, char *);
157 static void usage(void);
158
159 static char *progname;
160
161 int
162 main(int argc, char *argv[])
163 {
164 BTREEINFO info;
165 int ch, i, rc;
166 const char *dbf, *idxf;
167 struct state state;
168 char *q, *v;
169 struct opts opts;
170 extern int optind;
171 extern char *optarg;
172
173 memset(&opts, 0, sizeof(struct opts));
174 memset(&state, 0, sizeof(struct state));
175
176 dbf = "mandoc.db";
177 idxf = "mandoc.index";
178 q = NULL;
179 rc = EXIT_FAILURE;
180
181 progname = strrchr(argv[0], '/');
182 if (progname == NULL)
183 progname = argv[0];
184 else
185 ++progname;
186
187 opts.match = MATCH_SUBSTR;
188
189 while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:")))
190 switch (ch) {
191 case ('a'):
192 opts.arch = optarg;
193 break;
194 case ('c'):
195 opts.cat = optarg;
196 break;
197 case ('e'):
198 opts.match = MATCH_EXACT;
199 break;
200 case ('I'):
201 opts.insens = 1;
202 break;
203 case ('r'):
204 opts.match = MATCH_REGEX;
205 break;
206 case ('s'):
207 for (i = 0; i < SORT__MAX; i++) {
208 if (strcmp(optarg, sorts[i]))
209 continue;
210 opts.sort = (enum sort)i;
211 break;
212 }
213
214 if (i < SORT__MAX)
215 break;
216
217 fprintf(stderr, "%s: Bad sort\n", optarg);
218 return(EXIT_FAILURE);
219 case ('t'):
220 while (NULL != (v = strsep(&optarg, ","))) {
221 if ('\0' == *v)
222 continue;
223 for (i = 0; types[i].mask; i++) {
224 if (strcmp(types[i].name, v))
225 continue;
226 break;
227 }
228 if (0 == types[i].mask)
229 break;
230 opts.types |= types[i].mask;
231 }
232 if (NULL == v)
233 break;
234
235 fprintf(stderr, "%s: Bad type\n", v);
236 return(EXIT_FAILURE);
237 default:
238 usage();
239 return(EXIT_FAILURE);
240 }
241
242 argc -= optind;
243 argv += optind;
244
245 if (0 == argc || '\0' == **argv) {
246 usage();
247 goto out;
248 } else
249 q = *argv;
250
251 if (0 == opts.types)
252 opts.types = TYPE_NAME | TYPE_DESC;
253
254 /*
255 * Configure databases.
256 * The keyword database is a btree that allows for duplicate
257 * entries.
258 * The index database is a recno.
259 */
260
261 memset(&info, 0, sizeof(BTREEINFO));
262 info.flags = R_DUP;
263
264 state.db = dbopen(dbf, O_RDONLY, 0, DB_BTREE, &info);
265 if (NULL == state.db) {
266 perror(dbf);
267 goto out;
268 }
269
270 state.idx = dbopen(idxf, O_RDONLY, 0, DB_RECNO, NULL);
271 if (NULL == state.idx) {
272 perror(idxf);
273 goto out;
274 }
275
276 /* Main search function. */
277
278 state_search(&state, &opts, q);
279
280 rc = EXIT_SUCCESS;
281 out:
282 if (state.db)
283 (*state.db->close)(state.db);
284 if (state.idx)
285 (*state.idx->close)(state.idx);
286
287 return(rc);
288 }
289
290 static void
291 state_search(struct state *p, const struct opts *opts, char *q)
292 {
293 int leaf, root, len, ch, dflag;
294 struct mchars *mc;
295 char *buf;
296 size_t bufsz;
297 recno_t rec;
298 uint32_t fl;
299 DBT key, val;
300 struct res *res;
301 regex_t reg;
302 regex_t *regp;
303 char filebuf[10];
304 struct rec record;
305
306 root = leaf = -1;
307 res = NULL;
308 len = 0;
309 buf = NULL;
310 bufsz = 0;
311 regp = NULL;
312
313 /*
314 * Configure how we scan through results to see if we match:
315 * whether by regexp or exact matches.
316 */
317
318 switch (opts->match) {
319 case (MATCH_REGEX):
320 ch = REG_EXTENDED | REG_NOSUB |
321 (opts->insens ? REG_ICASE : 0);
322
323 if (0 != regcomp(&reg, q, ch)) {
324 fprintf(stderr, "%s: Bad pattern\n", q);
325 return;
326 }
327
328 regp = &reg;
329 dflag = R_FIRST;
330 break;
331 case (MATCH_EXACT):
332 key.data = q;
333 key.size = strlen(q) + 1;
334 dflag = R_CURSOR;
335 break;
336 default:
337 dflag = R_FIRST;
338 break;
339 }
340
341 mc = mchars_alloc();
342
343 /*
344 * Iterate over the entire keyword database.
345 * For each record, we must first translate the key into UTF-8.
346 * Following that, make sure it's acceptable.
347 * Lastly, add it to the available records.
348 */
349
350 while (0 == (ch = (*p->db->seq)(p->db, &key, &val, dflag))) {
351 dflag = R_NEXT;
352
353 /*
354 * Keys must be sized as such: the keyword must be
355 * non-empty (nil terminator plus one character) and the
356 * value must be 8 (recno_t---uint32_t---index reference
357 * and a uint32_t flag field).
358 */
359
360 if (key.size < 2 || 8 != val.size) {
361 fprintf(stderr, "%s: Corrupt database\n", p->dbf);
362 exit(EXIT_FAILURE);
363 }
364
365 buf_redup(mc, &buf, &bufsz, (char *)key.data);
366
367 fl = *(uint32_t *)val.data;
368
369 if ( ! (fl & opts->types))
370 continue;
371
372 switch (opts->match) {
373 case (MATCH_REGEX):
374 if (regexec(regp, buf, 0, NULL, 0))
375 continue;
376 break;
377 case (MATCH_EXACT):
378 if (opts->insens && strcasecmp(buf, q))
379 goto send;
380 if ( ! opts->insens && strcmp(buf, q))
381 goto send;
382 break;
383 default:
384 if (opts->insens && NULL == strcasestr(buf, q))
385 continue;
386 if ( ! opts->insens && NULL == strstr(buf, q))
387 continue;
388 break;
389 }
390
391 /*
392 * Now look up the file itself in our index. The file's
393 * indexed by its recno for fast lookups.
394 */
395
396 memcpy(&rec, val.data + 4, sizeof(recno_t));
397
398 if ( ! state_getrecord(p, rec, &record))
399 exit(EXIT_FAILURE);
400
401 /* If we're in a different section, skip... */
402
403 if (opts->cat && strcasecmp(opts->cat, record.cat))
404 continue;
405 if (opts->arch && strcasecmp(opts->arch, record.arch))
406 continue;
407
408 /*
409 * Do a binary search to dedupe the results tree of the
410 * same record: we don't print the same file.
411 */
412
413 for (leaf = root; leaf >= 0; )
414 if (rec > res[leaf].rec && res[leaf].rhs >= 0)
415 leaf = res[leaf].rhs;
416 else if (rec < res[leaf].rec && res[leaf].lhs >= 0)
417 leaf = res[leaf].lhs;
418 else
419 break;
420
421 if (leaf >= 0 && res[leaf].rec == rec)
422 continue;
423
424 res = mandoc_realloc
425 (res, (len + 1) * sizeof(struct res));
426
427 /*
428 * Now we have our filename, keywords, types, and all
429 * other necessary information.
430 * Process it and add it to our list of results.
431 */
432
433 filebuf[9] = '\0';
434 snprintf(filebuf, 10, "%u", record.rec);
435 assert('\0' == filebuf[9]);
436
437 res[len].rec = record.rec;
438 res[len].types = fl;
439 res[len].lhs = res[len].rhs = -1;
440
441 buf_dup(mc, &res[len].keyword, buf);
442 buf_dup(mc, &res[len].uri, filebuf);
443 buf_dup(mc, &res[len].cat, record.cat);
444 buf_dup(mc, &res[len].arch, record.arch);
445 buf_dup(mc, &res[len].title, record.title);
446 buf_dup(mc, &res[len].desc, record.desc);
447
448 if (leaf >= 0) {
449 if (record.rec > res[leaf].rec)
450 res[leaf].rhs = len;
451 else
452 res[leaf].lhs = len;
453 } else
454 root = len;
455
456 len++;
457 }
458
459 send:
460 if (ch < 0) {
461 perror(p->dbf);
462 exit(EXIT_FAILURE);
463 }
464
465 /* Sort our results. */
466
467 if (SORT_CAT == opts->sort)
468 qsort(res, len, sizeof(struct res), sort_cat);
469 else
470 qsort(res, len, sizeof(struct res), sort_title);
471
472 state_output(res, len);
473
474 for (len-- ; len >= 0; len--) {
475 free(res[len].keyword);
476 free(res[len].title);
477 free(res[len].cat);
478 free(res[len].arch);
479 free(res[len].desc);
480 free(res[len].uri);
481 }
482
483 free(res);
484 free(buf);
485 mchars_free(mc);
486
487 if (regp)
488 regfree(regp);
489 }
490
491 /*
492 * Track allocated buffer size for buf_redup().
493 */
494 static inline void
495 buf_alloc(char **buf, size_t *bufsz, size_t sz)
496 {
497
498 if (sz < *bufsz)
499 return;
500
501 *bufsz = sz + 1024;
502 *buf = mandoc_realloc(*buf, *bufsz);
503 }
504
505 /*
506 * Like buf_redup() but throwing away the buffer size.
507 */
508 static void
509 buf_dup(struct mchars *mc, char **buf, const char *val)
510 {
511 size_t bufsz;
512
513 bufsz = 0;
514 *buf = NULL;
515 buf_redup(mc, buf, &bufsz, val);
516 }
517
518 /*
519 * Normalise strings from the index and database.
520 * These strings are escaped as defined by mandoc_char(7) along with
521 * other goop in mandoc.h (e.g., soft hyphens).
522 */
523 static void
524 buf_redup(struct mchars *mc, char **buf,
525 size_t *bufsz, const char *val)
526 {
527 size_t sz;
528 const char *seq, *cpp;
529 int len, pos;
530 enum mandoc_esc esc;
531 const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' };
532
533 /* Pre-allocate by the length of the input */
534
535 buf_alloc(buf, bufsz, strlen(val) + 1);
536
537 pos = 0;
538
539 while ('\0' != *val) {
540 /*
541 * Halt on the first escape sequence.
542 * This also halts on the end of string, in which case
543 * we just copy, fallthrough, and exit the loop.
544 */
545 if ((sz = strcspn(val, rsv)) > 0) {
546 memcpy(&(*buf)[pos], val, sz);
547 pos += (int)sz;
548 val += (int)sz;
549 }
550
551 if (ASCII_HYPH == *val) {
552 (*buf)[pos++] = '-';
553 val++;
554 continue;
555 } else if (ASCII_NBRSP == *val) {
556 (*buf)[pos++] = ' ';
557 val++;
558 continue;
559 } else if ('\\' != *val)
560 break;
561
562 /* Read past the slash. */
563
564 val++;
565
566 /*
567 * Parse the escape sequence and see if it's a
568 * predefined character or special character.
569 */
570
571 esc = mandoc_escape(&val, &seq, &len);
572 if (ESCAPE_ERROR == esc)
573 break;
574
575 cpp = ESCAPE_SPECIAL == esc ?
576 mchars_spec2str(mc, seq, len, &sz) : NULL;
577
578 if (NULL == cpp)
579 continue;
580
581 /* Copy the rendered glyph into the stream. */
582
583 buf_alloc(buf, bufsz, sz);
584
585 memcpy(&(*buf)[pos], cpp, sz);
586 pos += (int)sz;
587 }
588
589 (*buf)[pos] = '\0';
590 }
591
592 static void
593 state_output(const struct res *res, int sz)
594 {
595 int i;
596
597 for (i = 0; i < sz; i++)
598 printf("%s(%s%s%s) - %s\n", res[i].title,
599 res[i].cat,
600 *res[i].arch ? "/" : "",
601 *res[i].arch ? res[i].arch : "",
602 res[i].desc);
603 }
604
605 static void
606 usage(void)
607 {
608
609 fprintf(stderr, "usage: %s "
610 "[-eIr] "
611 "[-a arch] "
612 "[-c cat] "
613 "[-s sort] "
614 "[-t type[,...]] "
615 "key\n", progname);
616 }
617
618 static int
619 state_getrecord(struct state *p, recno_t rec, struct rec *rp)
620 {
621 DBT key, val;
622 size_t sz;
623 int rc;
624
625 key.data = &rec;
626 key.size = sizeof(recno_t);
627
628 rc = (*p->idx->get)(p->idx, &key, &val, 0);
629 if (rc < 0) {
630 perror(p->idxf);
631 return(0);
632 } else if (rc > 0)
633 goto err;
634
635 rp->file = (char *)val.data;
636 if ((sz = strlen(rp->file) + 1) >= val.size)
637 goto err;
638
639 rp->cat = (char *)val.data + (int)sz;
640 if ((sz += strlen(rp->cat) + 1) >= val.size)
641 goto err;
642
643 rp->title = (char *)val.data + (int)sz;
644 if ((sz += strlen(rp->title) + 1) >= val.size)
645 goto err;
646
647 rp->arch = (char *)val.data + (int)sz;
648 if ((sz += strlen(rp->arch) + 1) >= val.size)
649 goto err;
650
651 rp->desc = (char *)val.data + (int)sz;
652 rp->rec = rec;
653 return(1);
654 err:
655 fprintf(stderr, "%s: Corrupt index\n", p->idxf);
656 return(0);
657 }
658
659 static int
660 sort_title(const void *p1, const void *p2)
661 {
662
663 return(strcmp(((const struct res *)p1)->title,
664 ((const struct res *)p2)->title));
665 }
666
667 static int
668 sort_cat(const void *p1, const void *p2)
669 {
670 int rc;
671
672 rc = strcmp(((const struct res *)p1)->cat,
673 ((const struct res *)p2)->cat);
674
675 return(0 == rc ? sort_title(p1, p2) : rc);
676 }