]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
fix a typo that prevented names from .Dt from getting priority
[mandoc.git] / mansearch.c
1 /* $OpenBSD: mansearch.c,v 1.50 2016/07/09 15:23:36 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014, 2015, 2016 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/mman.h>
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <glob.h>
28 #include <limits.h>
29 #include <regex.h>
30 #include <stdio.h>
31 #include <stdint.h>
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "mandoc.h"
38 #include "mandoc_aux.h"
39 #include "mandoc_ohash.h"
40 #include "manconf.h"
41 #include "mansearch.h"
42 #include "dbm.h"
43
44 struct expr {
45 /* Used for terms: */
46 struct dbm_match match; /* Match type and expression. */
47 uint64_t bits; /* Type mask. */
48 /* Used for OR and AND groups: */
49 struct expr *next; /* Next child in the parent group. */
50 struct expr *child; /* First child in this group. */
51 enum { EXPR_TERM, EXPR_OR, EXPR_AND } type;
52 };
53
54 const char *const mansearch_keynames[KEY_MAX] = {
55 "arch", "sec", "Xr", "Ar", "Fa", "Fl", "Dv", "Fn",
56 "Ic", "Pa", "Cm", "Li", "Em", "Cd", "Va", "Ft",
57 "Tn", "Er", "Ev", "Sy", "Sh", "In", "Ss", "Ox",
58 "An", "Mt", "St", "Bx", "At", "Nx", "Fx", "Lk",
59 "Ms", "Bsx", "Dx", "Rs", "Vt", "Lb", "Nm", "Nd"
60 };
61
62
63 static struct ohash *manmerge(struct expr *, struct ohash *);
64 static struct ohash *manmerge_term(struct expr *, struct ohash *);
65 static struct ohash *manmerge_or(struct expr *, struct ohash *);
66 static struct ohash *manmerge_and(struct expr *, struct ohash *);
67 static char *buildnames(const struct dbm_page *);
68 static char *buildoutput(size_t, int32_t);
69 static size_t lstlen(const char *);
70 static void lstcat(char *, size_t *, const char *);
71 static int lstmatch(const char *, const char *);
72 static struct expr *exprcomp(const struct mansearch *,
73 int, char *[], int *);
74 static struct expr *expr_and(const struct mansearch *,
75 int, char *[], int *);
76 static struct expr *exprterm(const struct mansearch *,
77 int, char *[], int *);
78 static void exprfree(struct expr *);
79 static int manpage_compare(const void *, const void *);
80
81
82 int
83 mansearch(const struct mansearch *search,
84 const struct manpaths *paths,
85 int argc, char *argv[],
86 struct manpage **res, size_t *sz)
87 {
88 char buf[PATH_MAX];
89 struct dbm_res *rp;
90 struct expr *e;
91 struct dbm_page *page;
92 struct manpage *mpage;
93 struct ohash *htab;
94 size_t cur, i, maxres, outkey;
95 unsigned int slot;
96 int argi, chdir_status, getcwd_status, im;
97
98 argi = 0;
99 if ((e = exprcomp(search, argc, argv, &argi)) == NULL) {
100 *sz = 0;
101 return 0;
102 }
103
104 cur = maxres = 0;
105 *res = NULL;
106
107 outkey = KEY_Nd;
108 if (search->outkey != NULL)
109 for (im = 0; im < KEY_MAX; im++)
110 if (0 == strcasecmp(search->outkey,
111 mansearch_keynames[im])) {
112 outkey = im;
113 break;
114 }
115
116 /*
117 * Remember the original working directory, if possible.
118 * This will be needed if the second or a later directory
119 * is given as a relative path.
120 * Do not error out if the current directory is not
121 * searchable: Maybe it won't be needed after all.
122 */
123
124 if (getcwd(buf, PATH_MAX) == NULL) {
125 getcwd_status = 0;
126 (void)strlcpy(buf, strerror(errno), sizeof(buf));
127 } else
128 getcwd_status = 1;
129
130 /*
131 * Loop over the directories (containing databases) for us to
132 * search.
133 * Don't let missing/bad databases/directories phase us.
134 * In each, try to open the resident database and, if it opens,
135 * scan it for our match expression.
136 */
137
138 chdir_status = 0;
139 for (i = 0; i < paths->sz; i++) {
140 if (chdir_status && paths->paths[i][0] != '/') {
141 if ( ! getcwd_status) {
142 warnx("%s: getcwd: %s", paths->paths[i], buf);
143 continue;
144 } else if (chdir(buf) == -1) {
145 warn("%s", buf);
146 continue;
147 }
148 }
149 if (chdir(paths->paths[i]) == -1) {
150 warn("%s", paths->paths[i]);
151 continue;
152 }
153 chdir_status = 1;
154
155 if (dbm_open(MANDOC_DB) == -1) {
156 warn("%s/%s", paths->paths[i], MANDOC_DB);
157 continue;
158 }
159
160 if ((htab = manmerge(e, NULL)) == NULL) {
161 dbm_close();
162 continue;
163 }
164
165 for (rp = ohash_first(htab, &slot); rp != NULL;
166 rp = ohash_next(htab, &slot)) {
167 page = dbm_page_get(rp->page);
168
169 if (lstmatch(search->sec, page->sect) == 0 ||
170 lstmatch(search->arch, page->arch) == 0)
171 continue;
172
173 if (cur + 1 > maxres) {
174 maxres += 1024;
175 *res = mandoc_reallocarray(*res,
176 maxres, sizeof(**res));
177 }
178 mpage = *res + cur;
179 mandoc_asprintf(&mpage->file, "%s/%s",
180 paths->paths[i], page->file + 1);
181 mpage->names = buildnames(page);
182 mpage->output = (int)outkey == KEY_Nd ?
183 mandoc_strdup(page->desc) :
184 buildoutput(outkey, page->addr);
185 mpage->ipath = i;
186 mpage->bits = rp->bits;
187 mpage->sec = *page->sect - '0';
188 if (mpage->sec < 0 || mpage->sec > 9)
189 mpage->sec = 10;
190 mpage->form = *page->file;
191 free(rp);
192 cur++;
193 }
194 ohash_delete(htab);
195 free(htab);
196 dbm_close();
197
198 /*
199 * In man(1) mode, prefer matches in earlier trees
200 * over matches in later trees.
201 */
202
203 if (cur && search->firstmatch)
204 break;
205 }
206 qsort(*res, cur, sizeof(struct manpage), manpage_compare);
207 if (chdir_status && getcwd_status && chdir(buf) == -1)
208 warn("%s", buf);
209 exprfree(e);
210 *sz = cur;
211 return 1;
212 }
213
214 /*
215 * Merge the results for the expression tree rooted at e
216 * into the the result list htab.
217 */
218 static struct ohash *
219 manmerge(struct expr *e, struct ohash *htab)
220 {
221 switch (e->type) {
222 case EXPR_TERM:
223 return manmerge_term(e, htab);
224 case EXPR_OR:
225 return manmerge_or(e->child, htab);
226 case EXPR_AND:
227 return manmerge_and(e->child, htab);
228 default:
229 abort();
230 }
231 }
232
233 static struct ohash *
234 manmerge_term(struct expr *e, struct ohash *htab)
235 {
236 struct dbm_res res, *rp;
237 uint64_t ib;
238 unsigned int slot;
239 int im;
240
241 if (htab == NULL) {
242 htab = mandoc_malloc(sizeof(*htab));
243 mandoc_ohash_init(htab, 4, offsetof(struct dbm_res, page));
244 }
245
246 for (im = 0, ib = 1; im < KEY_MAX; im++, ib <<= 1) {
247 if ((e->bits & ib) == 0)
248 continue;
249
250 switch (ib) {
251 case TYPE_arch:
252 dbm_page_byarch(&e->match);
253 break;
254 case TYPE_sec:
255 dbm_page_bysect(&e->match);
256 break;
257 case TYPE_Nm:
258 dbm_page_byname(&e->match);
259 break;
260 case TYPE_Nd:
261 dbm_page_bydesc(&e->match);
262 break;
263 default:
264 dbm_page_bymacro(im - 2, &e->match);
265 break;
266 }
267
268 /*
269 * When hashing for deduplication, use the unique
270 * page ID itself instead of a hash function;
271 * that is quite efficient.
272 */
273
274 for (;;) {
275 res = dbm_page_next();
276 if (res.page == -1)
277 break;
278 slot = ohash_lookup_memory(htab,
279 (char *)&res, sizeof(res.page), res.page);
280 if ((rp = ohash_find(htab, slot)) != NULL) {
281 rp->bits |= res.bits;
282 continue;
283 }
284 rp = mandoc_malloc(sizeof(*rp));
285 *rp = res;
286 ohash_insert(htab, slot, rp);
287 }
288 }
289 return htab;
290 }
291
292 static struct ohash *
293 manmerge_or(struct expr *e, struct ohash *htab)
294 {
295 while (e != NULL) {
296 htab = manmerge(e, htab);
297 e = e->next;
298 }
299 return htab;
300 }
301
302 static struct ohash *
303 manmerge_and(struct expr *e, struct ohash *htab)
304 {
305 struct ohash *hand, *h1, *h2;
306 struct dbm_res *res;
307 unsigned int slot1, slot2;
308
309 /* Evaluate the first term of the AND clause. */
310
311 hand = manmerge(e, NULL);
312
313 while ((e = e->next) != NULL) {
314
315 /* Evaluate the next term and prepare for ANDing. */
316
317 h2 = manmerge(e, NULL);
318 if (ohash_entries(h2) < ohash_entries(hand)) {
319 h1 = h2;
320 h2 = hand;
321 } else
322 h1 = hand;
323 hand = mandoc_malloc(sizeof(*hand));
324 mandoc_ohash_init(hand, 4, offsetof(struct dbm_res, page));
325
326 /* Keep all pages that are in both result sets. */
327
328 for (res = ohash_first(h1, &slot1); res != NULL;
329 res = ohash_next(h1, &slot1)) {
330 if (ohash_find(h2, ohash_lookup_memory(h2,
331 (char *)res, sizeof(res->page),
332 res->page)) == NULL)
333 free(res);
334 else
335 ohash_insert(hand, ohash_lookup_memory(hand,
336 (char *)res, sizeof(res->page),
337 res->page), res);
338 }
339
340 /* Discard the merged results. */
341
342 for (res = ohash_first(h2, &slot2); res != NULL;
343 res = ohash_next(h2, &slot2))
344 free(res);
345 ohash_delete(h2);
346 free(h2);
347 ohash_delete(h1);
348 free(h1);
349 }
350
351 /* Merge the result of the AND into htab. */
352
353 if (htab == NULL)
354 return hand;
355
356 for (res = ohash_first(hand, &slot1); res != NULL;
357 res = ohash_next(hand, &slot1)) {
358 slot2 = ohash_lookup_memory(htab,
359 (char *)res, sizeof(res->page), res->page);
360 if (ohash_find(htab, slot2) == NULL)
361 ohash_insert(htab, slot2, res);
362 else
363 free(res);
364 }
365
366 /* Discard the merged result. */
367
368 ohash_delete(hand);
369 free(hand);
370 return htab;
371 }
372
373 void
374 mansearch_free(struct manpage *res, size_t sz)
375 {
376 size_t i;
377
378 for (i = 0; i < sz; i++) {
379 free(res[i].file);
380 free(res[i].names);
381 free(res[i].output);
382 }
383 free(res);
384 }
385
386 static int
387 manpage_compare(const void *vp1, const void *vp2)
388 {
389 const struct manpage *mp1, *mp2;
390 int diff;
391
392 mp1 = vp1;
393 mp2 = vp2;
394 return (diff = mp2->bits - mp1->bits) ? diff :
395 (diff = mp1->sec - mp2->sec) ? diff :
396 strcasecmp(mp1->names, mp2->names);
397 }
398
399 static char *
400 buildnames(const struct dbm_page *page)
401 {
402 char *buf;
403 size_t i, sz;
404
405 sz = lstlen(page->name) + 1 + lstlen(page->sect) +
406 (page->arch == NULL ? 0 : 1 + lstlen(page->arch)) + 2;
407 buf = mandoc_malloc(sz);
408 i = 0;
409 lstcat(buf, &i, page->name);
410 buf[i++] = '(';
411 lstcat(buf, &i, page->sect);
412 if (page->arch != NULL) {
413 buf[i++] = '/';
414 lstcat(buf, &i, page->arch);
415 }
416 buf[i++] = ')';
417 buf[i++] = '\0';
418 assert(i == sz);
419 return buf;
420 }
421
422 /*
423 * Count the buffer space needed to print the NUL-terminated
424 * list of NUL-terminated strings, when printing two separator
425 * characters between strings.
426 */
427 static size_t
428 lstlen(const char *cp)
429 {
430 size_t sz;
431
432 for (sz = 0;; sz++) {
433 if (cp[0] == '\0') {
434 if (cp[1] == '\0')
435 break;
436 sz++;
437 } else if (cp[0] < ' ')
438 sz--;
439 cp++;
440 }
441 return sz;
442 }
443
444 /*
445 * Print the NUL-terminated list of NUL-terminated strings
446 * into the buffer, seperating strings with a comma and a blank.
447 */
448 static void
449 lstcat(char *buf, size_t *i, const char *cp)
450 {
451 for (;;) {
452 if (cp[0] == '\0') {
453 if (cp[1] == '\0')
454 break;
455 buf[(*i)++] = ',';
456 buf[(*i)++] = ' ';
457 } else if (cp[0] >= ' ')
458 buf[(*i)++] = cp[0];
459 cp++;
460 }
461 }
462
463 /*
464 * Return 1 if the string *want occurs in any of the strings
465 * in the NUL-terminated string list *have, or 0 otherwise.
466 * If either argument is NULL or empty, assume no filtering
467 * is desired and return 1.
468 */
469 static int
470 lstmatch(const char *want, const char *have)
471 {
472 if (want == NULL || have == NULL || *have == '\0')
473 return 1;
474 while (*have != '\0') {
475 if (strcasestr(have, want) != NULL)
476 return 1;
477 have = strchr(have, '\0') + 1;
478 }
479 return 0;
480 }
481
482 /*
483 * Build a list of values taken by the macro im
484 * in the manual page with big-endian address addr.
485 */
486 static char *
487 buildoutput(size_t im, int32_t addr)
488 {
489 const char *oldoutput, *sep;
490 char *output, *newoutput, *value;
491
492 output = NULL;
493 dbm_macro_bypage(im - 2, addr);
494 while ((value = dbm_macro_next()) != NULL) {
495 if (output == NULL) {
496 oldoutput = "";
497 sep = "";
498 } else {
499 oldoutput = output;
500 sep = " # ";
501 }
502 mandoc_asprintf(&newoutput, "%s%s%s", oldoutput, sep, value);
503 free(output);
504 output = newoutput;
505 }
506 return output;
507 }
508
509 /*
510 * Compile a set of string tokens into an expression.
511 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
512 * "(", "foo=bar", etc.).
513 */
514 static struct expr *
515 exprcomp(const struct mansearch *search, int argc, char *argv[], int *argi)
516 {
517 struct expr *parent, *child;
518 int needterm, nested;
519
520 if ((nested = *argi) == argc)
521 return NULL;
522 needterm = 1;
523 parent = child = NULL;
524 while (*argi < argc) {
525 if (strcmp(")", argv[*argi]) == 0) {
526 if (needterm)
527 warnx("missing term "
528 "before closing parenthesis");
529 needterm = 0;
530 if (nested)
531 break;
532 warnx("ignoring unmatched right parenthesis");
533 ++*argi;
534 continue;
535 }
536 if (strcmp("-o", argv[*argi]) == 0) {
537 if (needterm) {
538 if (*argi > 0)
539 warnx("ignoring -o after %s",
540 argv[*argi - 1]);
541 else
542 warnx("ignoring initial -o");
543 }
544 needterm = 1;
545 ++*argi;
546 continue;
547 }
548 needterm = 0;
549 if (child == NULL) {
550 child = expr_and(search, argc, argv, argi);
551 continue;
552 }
553 if (parent == NULL) {
554 parent = mandoc_calloc(1, sizeof(*parent));
555 parent->type = EXPR_OR;
556 parent->next = NULL;
557 parent->child = child;
558 }
559 child->next = expr_and(search, argc, argv, argi);
560 child = child->next;
561 }
562 if (needterm && *argi)
563 warnx("ignoring trailing %s", argv[*argi - 1]);
564 return parent == NULL ? child : parent;
565 }
566
567 static struct expr *
568 expr_and(const struct mansearch *search, int argc, char *argv[], int *argi)
569 {
570 struct expr *parent, *child;
571 int needterm;
572
573 needterm = 1;
574 parent = child = NULL;
575 while (*argi < argc) {
576 if (strcmp(")", argv[*argi]) == 0) {
577 if (needterm)
578 warnx("missing term "
579 "before closing parenthesis");
580 needterm = 0;
581 break;
582 }
583 if (strcmp("-o", argv[*argi]) == 0)
584 break;
585 if (strcmp("-a", argv[*argi]) == 0) {
586 if (needterm) {
587 if (*argi > 0)
588 warnx("ignoring -a after %s",
589 argv[*argi - 1]);
590 else
591 warnx("ignoring initial -a");
592 }
593 needterm = 1;
594 ++*argi;
595 continue;
596 }
597 if (needterm == 0)
598 break;
599 if (child == NULL) {
600 child = exprterm(search, argc, argv, argi);
601 if (child != NULL)
602 needterm = 0;
603 continue;
604 }
605 needterm = 0;
606 if (parent == NULL) {
607 parent = mandoc_calloc(1, sizeof(*parent));
608 parent->type = EXPR_AND;
609 parent->next = NULL;
610 parent->child = child;
611 }
612 child->next = exprterm(search, argc, argv, argi);
613 if (child->next != NULL) {
614 child = child->next;
615 needterm = 0;
616 }
617 }
618 if (needterm && *argi)
619 warnx("ignoring trailing %s", argv[*argi - 1]);
620 return parent == NULL ? child : parent;
621 }
622
623 static struct expr *
624 exprterm(const struct mansearch *search, int argc, char *argv[], int *argi)
625 {
626 char errbuf[BUFSIZ];
627 struct expr *e;
628 char *key, *val;
629 uint64_t iterbit;
630 int cs, i, irc;
631
632 if (strcmp("(", argv[*argi]) == 0) {
633 ++*argi;
634 e = exprcomp(search, argc, argv, argi);
635 if (*argi < argc) {
636 assert(strcmp(")", argv[*argi]) == 0);
637 ++*argi;
638 } else
639 warnx("unclosed parenthesis");
640 return e;
641 }
642
643 e = mandoc_calloc(1, sizeof(*e));
644 e->type = EXPR_TERM;
645 e->bits = 0;
646 e->next = NULL;
647 e->child = NULL;
648
649 if (search->argmode == ARG_NAME) {
650 e->bits = TYPE_Nm;
651 e->match.type = DBM_EXACT;
652 e->match.str = argv[(*argi)++];
653 return e;
654 }
655
656 /*
657 * Separate macro keys from search string.
658 * If needed, request regular expression handling.
659 */
660
661 if (search->argmode == ARG_WORD) {
662 e->bits = TYPE_Nm;
663 e->match.type = DBM_REGEX;
664 #if HAVE_REWB_BSD
665 mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", argv[*argi]);
666 #elif HAVE_REWB_SYSV
667 mandoc_asprintf(&val, "\\<%s\\>", argv[*argi]);
668 #else
669 mandoc_asprintf(&val,
670 "(^|[^a-zA-Z01-9_])%s([^a-zA-Z01-9_]|$)", argv[*argi]);
671 #endif
672 cs = 0;
673 } else if ((val = strpbrk(argv[*argi], "=~")) == NULL) {
674 e->bits = TYPE_Nm | TYPE_Nd;
675 e->match.type = DBM_SUB;
676 e->match.str = argv[*argi];
677 } else {
678 if (val == argv[*argi])
679 e->bits = TYPE_Nm | TYPE_Nd;
680 if (*val == '=') {
681 e->match.type = DBM_SUB;
682 e->match.str = val + 1;
683 } else
684 e->match.type = DBM_REGEX;
685 *val++ = '\0';
686 if (strstr(argv[*argi], "arch") != NULL)
687 cs = 0;
688 }
689
690 /* Compile regular expressions. */
691
692 if (e->match.type == DBM_REGEX) {
693 e->match.re = mandoc_malloc(sizeof(*e->match.re));
694 irc = regcomp(e->match.re, val,
695 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
696 if (irc) {
697 regerror(irc, e->match.re, errbuf, sizeof(errbuf));
698 warnx("regcomp /%s/: %s", val, errbuf);
699 }
700 if (search->argmode == ARG_WORD)
701 free(val);
702 if (irc) {
703 free(e->match.re);
704 free(e);
705 ++*argi;
706 return NULL;
707 }
708 }
709
710 if (e->bits) {
711 ++*argi;
712 return e;
713 }
714
715 /*
716 * Parse out all possible fields.
717 * If the field doesn't resolve, bail.
718 */
719
720 while (NULL != (key = strsep(&argv[*argi], ","))) {
721 if ('\0' == *key)
722 continue;
723 for (i = 0, iterbit = 1; i < KEY_MAX; i++, iterbit <<= 1) {
724 if (0 == strcasecmp(key, mansearch_keynames[i])) {
725 e->bits |= iterbit;
726 break;
727 }
728 }
729 if (i == KEY_MAX) {
730 if (strcasecmp(key, "any"))
731 warnx("treating unknown key "
732 "\"%s\" as \"any\"", key);
733 e->bits |= ~0ULL;
734 }
735 }
736
737 ++*argi;
738 return e;
739 }
740
741 static void
742 exprfree(struct expr *e)
743 {
744 if (e->next != NULL)
745 exprfree(e->next);
746 if (e->child != NULL)
747 exprfree(e->child);
748 free(e);
749 }