]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
Move .sp to the roff modules. Enough infrastructure is in place
[mandoc.git] / mansearch.c
1 /* $OpenBSD: mansearch.c,v 1.50 2016/07/09 15:23:36 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013-2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/mman.h>
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <glob.h>
30 #include <limits.h>
31 #include <regex.h>
32 #include <stdio.h>
33 #include <stdint.h>
34 #include <stddef.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "mandoc_ohash.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "dbm.h"
45
46 struct expr {
47 /* Used for terms: */
48 struct dbm_match match; /* Match type and expression. */
49 uint64_t bits; /* Type mask. */
50 /* Used for OR and AND groups: */
51 struct expr *next; /* Next child in the parent group. */
52 struct expr *child; /* First child in this group. */
53 enum { EXPR_TERM, EXPR_OR, EXPR_AND } type;
54 };
55
56 const char *const mansearch_keynames[KEY_MAX] = {
57 "arch", "sec", "Xr", "Ar", "Fa", "Fl", "Dv", "Fn",
58 "Ic", "Pa", "Cm", "Li", "Em", "Cd", "Va", "Ft",
59 "Tn", "Er", "Ev", "Sy", "Sh", "In", "Ss", "Ox",
60 "An", "Mt", "St", "Bx", "At", "Nx", "Fx", "Lk",
61 "Ms", "Bsx", "Dx", "Rs", "Vt", "Lb", "Nm", "Nd"
62 };
63
64
65 static struct ohash *manmerge(struct expr *, struct ohash *);
66 static struct ohash *manmerge_term(struct expr *, struct ohash *);
67 static struct ohash *manmerge_or(struct expr *, struct ohash *);
68 static struct ohash *manmerge_and(struct expr *, struct ohash *);
69 static char *buildnames(const struct dbm_page *);
70 static char *buildoutput(size_t, struct dbm_page *);
71 static size_t lstlen(const char *, size_t);
72 static void lstcat(char *, size_t *, const char *, const char *);
73 static int lstmatch(const char *, const char *);
74 static struct expr *exprcomp(const struct mansearch *,
75 int, char *[], int *);
76 static struct expr *expr_and(const struct mansearch *,
77 int, char *[], int *);
78 static struct expr *exprterm(const struct mansearch *,
79 int, char *[], int *);
80 static void exprfree(struct expr *);
81 static int manpage_compare(const void *, const void *);
82
83
84 int
85 mansearch(const struct mansearch *search,
86 const struct manpaths *paths,
87 int argc, char *argv[],
88 struct manpage **res, size_t *sz)
89 {
90 char buf[PATH_MAX];
91 struct dbm_res *rp;
92 struct expr *e;
93 struct dbm_page *page;
94 struct manpage *mpage;
95 struct ohash *htab;
96 size_t cur, i, maxres, outkey;
97 unsigned int slot;
98 int argi, chdir_status, getcwd_status, im;
99
100 argi = 0;
101 if ((e = exprcomp(search, argc, argv, &argi)) == NULL) {
102 *sz = 0;
103 return 0;
104 }
105
106 cur = maxres = 0;
107 *res = NULL;
108
109 outkey = KEY_Nd;
110 if (search->outkey != NULL)
111 for (im = 0; im < KEY_MAX; im++)
112 if (0 == strcasecmp(search->outkey,
113 mansearch_keynames[im])) {
114 outkey = im;
115 break;
116 }
117
118 /*
119 * Remember the original working directory, if possible.
120 * This will be needed if the second or a later directory
121 * is given as a relative path.
122 * Do not error out if the current directory is not
123 * searchable: Maybe it won't be needed after all.
124 */
125
126 if (getcwd(buf, PATH_MAX) == NULL) {
127 getcwd_status = 0;
128 (void)strlcpy(buf, strerror(errno), sizeof(buf));
129 } else
130 getcwd_status = 1;
131
132 /*
133 * Loop over the directories (containing databases) for us to
134 * search.
135 * Don't let missing/bad databases/directories phase us.
136 * In each, try to open the resident database and, if it opens,
137 * scan it for our match expression.
138 */
139
140 chdir_status = 0;
141 for (i = 0; i < paths->sz; i++) {
142 if (chdir_status && paths->paths[i][0] != '/') {
143 if ( ! getcwd_status) {
144 warnx("%s: getcwd: %s", paths->paths[i], buf);
145 continue;
146 } else if (chdir(buf) == -1) {
147 warn("%s", buf);
148 continue;
149 }
150 }
151 if (chdir(paths->paths[i]) == -1) {
152 warn("%s", paths->paths[i]);
153 continue;
154 }
155 chdir_status = 1;
156
157 if (dbm_open(MANDOC_DB) == -1) {
158 warn("%s/%s", paths->paths[i], MANDOC_DB);
159 continue;
160 }
161
162 if ((htab = manmerge(e, NULL)) == NULL) {
163 dbm_close();
164 continue;
165 }
166
167 for (rp = ohash_first(htab, &slot); rp != NULL;
168 rp = ohash_next(htab, &slot)) {
169 page = dbm_page_get(rp->page);
170
171 if (lstmatch(search->sec, page->sect) == 0 ||
172 lstmatch(search->arch, page->arch) == 0)
173 continue;
174
175 if (cur + 1 > maxres) {
176 maxres += 1024;
177 *res = mandoc_reallocarray(*res,
178 maxres, sizeof(**res));
179 }
180 mpage = *res + cur;
181 mandoc_asprintf(&mpage->file, "%s/%s",
182 paths->paths[i], page->file + 1);
183 mpage->names = buildnames(page);
184 mpage->output = buildoutput(outkey, page);
185 mpage->ipath = i;
186 mpage->bits = rp->bits;
187 mpage->sec = *page->sect - '0';
188 if (mpage->sec < 0 || mpage->sec > 9)
189 mpage->sec = 10;
190 mpage->form = *page->file;
191 free(rp);
192 cur++;
193 }
194 ohash_delete(htab);
195 free(htab);
196 dbm_close();
197
198 /*
199 * In man(1) mode, prefer matches in earlier trees
200 * over matches in later trees.
201 */
202
203 if (cur && search->firstmatch)
204 break;
205 }
206 qsort(*res, cur, sizeof(struct manpage), manpage_compare);
207 if (chdir_status && getcwd_status && chdir(buf) == -1)
208 warn("%s", buf);
209 exprfree(e);
210 *sz = cur;
211 return 1;
212 }
213
214 /*
215 * Merge the results for the expression tree rooted at e
216 * into the the result list htab.
217 */
218 static struct ohash *
219 manmerge(struct expr *e, struct ohash *htab)
220 {
221 switch (e->type) {
222 case EXPR_TERM:
223 return manmerge_term(e, htab);
224 case EXPR_OR:
225 return manmerge_or(e->child, htab);
226 case EXPR_AND:
227 return manmerge_and(e->child, htab);
228 default:
229 abort();
230 }
231 }
232
233 static struct ohash *
234 manmerge_term(struct expr *e, struct ohash *htab)
235 {
236 struct dbm_res res, *rp;
237 uint64_t ib;
238 unsigned int slot;
239 int im;
240
241 if (htab == NULL) {
242 htab = mandoc_malloc(sizeof(*htab));
243 mandoc_ohash_init(htab, 4, offsetof(struct dbm_res, page));
244 }
245
246 for (im = 0, ib = 1; im < KEY_MAX; im++, ib <<= 1) {
247 if ((e->bits & ib) == 0)
248 continue;
249
250 switch (ib) {
251 case TYPE_arch:
252 dbm_page_byarch(&e->match);
253 break;
254 case TYPE_sec:
255 dbm_page_bysect(&e->match);
256 break;
257 case TYPE_Nm:
258 dbm_page_byname(&e->match);
259 break;
260 case TYPE_Nd:
261 dbm_page_bydesc(&e->match);
262 break;
263 default:
264 dbm_page_bymacro(im - 2, &e->match);
265 break;
266 }
267
268 /*
269 * When hashing for deduplication, use the unique
270 * page ID itself instead of a hash function;
271 * that is quite efficient.
272 */
273
274 for (;;) {
275 res = dbm_page_next();
276 if (res.page == -1)
277 break;
278 slot = ohash_lookup_memory(htab,
279 (char *)&res, sizeof(res.page), res.page);
280 if ((rp = ohash_find(htab, slot)) != NULL) {
281 rp->bits |= res.bits;
282 continue;
283 }
284 rp = mandoc_malloc(sizeof(*rp));
285 *rp = res;
286 ohash_insert(htab, slot, rp);
287 }
288 }
289 return htab;
290 }
291
292 static struct ohash *
293 manmerge_or(struct expr *e, struct ohash *htab)
294 {
295 while (e != NULL) {
296 htab = manmerge(e, htab);
297 e = e->next;
298 }
299 return htab;
300 }
301
302 static struct ohash *
303 manmerge_and(struct expr *e, struct ohash *htab)
304 {
305 struct ohash *hand, *h1, *h2;
306 struct dbm_res *res;
307 unsigned int slot1, slot2;
308
309 /* Evaluate the first term of the AND clause. */
310
311 hand = manmerge(e, NULL);
312
313 while ((e = e->next) != NULL) {
314
315 /* Evaluate the next term and prepare for ANDing. */
316
317 h2 = manmerge(e, NULL);
318 if (ohash_entries(h2) < ohash_entries(hand)) {
319 h1 = h2;
320 h2 = hand;
321 } else
322 h1 = hand;
323 hand = mandoc_malloc(sizeof(*hand));
324 mandoc_ohash_init(hand, 4, offsetof(struct dbm_res, page));
325
326 /* Keep all pages that are in both result sets. */
327
328 for (res = ohash_first(h1, &slot1); res != NULL;
329 res = ohash_next(h1, &slot1)) {
330 if (ohash_find(h2, ohash_lookup_memory(h2,
331 (char *)res, sizeof(res->page),
332 res->page)) == NULL)
333 free(res);
334 else
335 ohash_insert(hand, ohash_lookup_memory(hand,
336 (char *)res, sizeof(res->page),
337 res->page), res);
338 }
339
340 /* Discard the merged results. */
341
342 for (res = ohash_first(h2, &slot2); res != NULL;
343 res = ohash_next(h2, &slot2))
344 free(res);
345 ohash_delete(h2);
346 free(h2);
347 ohash_delete(h1);
348 free(h1);
349 }
350
351 /* Merge the result of the AND into htab. */
352
353 if (htab == NULL)
354 return hand;
355
356 for (res = ohash_first(hand, &slot1); res != NULL;
357 res = ohash_next(hand, &slot1)) {
358 slot2 = ohash_lookup_memory(htab,
359 (char *)res, sizeof(res->page), res->page);
360 if (ohash_find(htab, slot2) == NULL)
361 ohash_insert(htab, slot2, res);
362 else
363 free(res);
364 }
365
366 /* Discard the merged result. */
367
368 ohash_delete(hand);
369 free(hand);
370 return htab;
371 }
372
373 void
374 mansearch_free(struct manpage *res, size_t sz)
375 {
376 size_t i;
377
378 for (i = 0; i < sz; i++) {
379 free(res[i].file);
380 free(res[i].names);
381 free(res[i].output);
382 }
383 free(res);
384 }
385
386 static int
387 manpage_compare(const void *vp1, const void *vp2)
388 {
389 const struct manpage *mp1, *mp2;
390 int diff;
391
392 mp1 = vp1;
393 mp2 = vp2;
394 return (diff = mp2->bits - mp1->bits) ? diff :
395 (diff = mp1->sec - mp2->sec) ? diff :
396 strcasecmp(mp1->names, mp2->names);
397 }
398
399 static char *
400 buildnames(const struct dbm_page *page)
401 {
402 char *buf;
403 size_t i, sz;
404
405 sz = lstlen(page->name, 2) + 1 + lstlen(page->sect, 2) +
406 (page->arch == NULL ? 0 : 1 + lstlen(page->arch, 2)) + 2;
407 buf = mandoc_malloc(sz);
408 i = 0;
409 lstcat(buf, &i, page->name, ", ");
410 buf[i++] = '(';
411 lstcat(buf, &i, page->sect, ", ");
412 if (page->arch != NULL) {
413 buf[i++] = '/';
414 lstcat(buf, &i, page->arch, ", ");
415 }
416 buf[i++] = ')';
417 buf[i++] = '\0';
418 assert(i == sz);
419 return buf;
420 }
421
422 /*
423 * Count the buffer space needed to print the NUL-terminated
424 * list of NUL-terminated strings, when printing sep separator
425 * characters between strings.
426 */
427 static size_t
428 lstlen(const char *cp, size_t sep)
429 {
430 size_t sz;
431
432 for (sz = 0;; sz++) {
433 if (cp[0] == '\0') {
434 if (cp[1] == '\0')
435 break;
436 sz += sep - 1;
437 } else if (cp[0] < ' ')
438 sz--;
439 cp++;
440 }
441 return sz;
442 }
443
444 /*
445 * Print the NUL-terminated list of NUL-terminated strings
446 * into the buffer, seperating strings with sep.
447 */
448 static void
449 lstcat(char *buf, size_t *i, const char *cp, const char *sep)
450 {
451 const char *s;
452
453 for (;;) {
454 if (cp[0] == '\0') {
455 if (cp[1] == '\0')
456 break;
457 s = sep;
458 while (*s != '\0')
459 buf[(*i)++] = *s++;
460 } else if (cp[0] >= ' ')
461 buf[(*i)++] = cp[0];
462 cp++;
463 }
464 }
465
466 /*
467 * Return 1 if the string *want occurs in any of the strings
468 * in the NUL-terminated string list *have, or 0 otherwise.
469 * If either argument is NULL or empty, assume no filtering
470 * is desired and return 1.
471 */
472 static int
473 lstmatch(const char *want, const char *have)
474 {
475 if (want == NULL || have == NULL || *have == '\0')
476 return 1;
477 while (*have != '\0') {
478 if (strcasestr(have, want) != NULL)
479 return 1;
480 have = strchr(have, '\0') + 1;
481 }
482 return 0;
483 }
484
485 /*
486 * Build a list of values taken by the macro im in the manual page.
487 */
488 static char *
489 buildoutput(size_t im, struct dbm_page *page)
490 {
491 const char *oldoutput, *sep, *input;
492 char *output, *newoutput, *value;
493 size_t sz, i;
494
495 switch (im) {
496 case KEY_Nd:
497 return mandoc_strdup(page->desc);
498 case KEY_Nm:
499 input = page->name;
500 break;
501 case KEY_sec:
502 input = page->sect;
503 break;
504 case KEY_arch:
505 input = page->arch;
506 if (input == NULL)
507 input = "all\0";
508 break;
509 default:
510 input = NULL;
511 break;
512 }
513
514 if (input != NULL) {
515 sz = lstlen(input, 3) + 1;
516 output = mandoc_malloc(sz);
517 i = 0;
518 lstcat(output, &i, input, " # ");
519 output[i++] = '\0';
520 assert(i == sz);
521 return output;
522 }
523
524 output = NULL;
525 dbm_macro_bypage(im - 2, page->addr);
526 while ((value = dbm_macro_next()) != NULL) {
527 if (output == NULL) {
528 oldoutput = "";
529 sep = "";
530 } else {
531 oldoutput = output;
532 sep = " # ";
533 }
534 mandoc_asprintf(&newoutput, "%s%s%s", oldoutput, sep, value);
535 free(output);
536 output = newoutput;
537 }
538 return output;
539 }
540
541 /*
542 * Compile a set of string tokens into an expression.
543 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
544 * "(", "foo=bar", etc.).
545 */
546 static struct expr *
547 exprcomp(const struct mansearch *search, int argc, char *argv[], int *argi)
548 {
549 struct expr *parent, *child;
550 int needterm, nested;
551
552 if ((nested = *argi) == argc)
553 return NULL;
554 needterm = 1;
555 parent = child = NULL;
556 while (*argi < argc) {
557 if (strcmp(")", argv[*argi]) == 0) {
558 if (needterm)
559 warnx("missing term "
560 "before closing parenthesis");
561 needterm = 0;
562 if (nested)
563 break;
564 warnx("ignoring unmatched right parenthesis");
565 ++*argi;
566 continue;
567 }
568 if (strcmp("-o", argv[*argi]) == 0) {
569 if (needterm) {
570 if (*argi > 0)
571 warnx("ignoring -o after %s",
572 argv[*argi - 1]);
573 else
574 warnx("ignoring initial -o");
575 }
576 needterm = 1;
577 ++*argi;
578 continue;
579 }
580 needterm = 0;
581 if (child == NULL) {
582 child = expr_and(search, argc, argv, argi);
583 continue;
584 }
585 if (parent == NULL) {
586 parent = mandoc_calloc(1, sizeof(*parent));
587 parent->type = EXPR_OR;
588 parent->next = NULL;
589 parent->child = child;
590 }
591 child->next = expr_and(search, argc, argv, argi);
592 child = child->next;
593 }
594 if (needterm && *argi)
595 warnx("ignoring trailing %s", argv[*argi - 1]);
596 return parent == NULL ? child : parent;
597 }
598
599 static struct expr *
600 expr_and(const struct mansearch *search, int argc, char *argv[], int *argi)
601 {
602 struct expr *parent, *child;
603 int needterm;
604
605 needterm = 1;
606 parent = child = NULL;
607 while (*argi < argc) {
608 if (strcmp(")", argv[*argi]) == 0) {
609 if (needterm)
610 warnx("missing term "
611 "before closing parenthesis");
612 needterm = 0;
613 break;
614 }
615 if (strcmp("-o", argv[*argi]) == 0)
616 break;
617 if (strcmp("-a", argv[*argi]) == 0) {
618 if (needterm) {
619 if (*argi > 0)
620 warnx("ignoring -a after %s",
621 argv[*argi - 1]);
622 else
623 warnx("ignoring initial -a");
624 }
625 needterm = 1;
626 ++*argi;
627 continue;
628 }
629 if (needterm == 0)
630 break;
631 if (child == NULL) {
632 child = exprterm(search, argc, argv, argi);
633 if (child != NULL)
634 needterm = 0;
635 continue;
636 }
637 needterm = 0;
638 if (parent == NULL) {
639 parent = mandoc_calloc(1, sizeof(*parent));
640 parent->type = EXPR_AND;
641 parent->next = NULL;
642 parent->child = child;
643 }
644 child->next = exprterm(search, argc, argv, argi);
645 if (child->next != NULL) {
646 child = child->next;
647 needterm = 0;
648 }
649 }
650 if (needterm && *argi)
651 warnx("ignoring trailing %s", argv[*argi - 1]);
652 return parent == NULL ? child : parent;
653 }
654
655 static struct expr *
656 exprterm(const struct mansearch *search, int argc, char *argv[], int *argi)
657 {
658 char errbuf[BUFSIZ];
659 struct expr *e;
660 char *key, *val;
661 uint64_t iterbit;
662 int cs, i, irc;
663
664 if (strcmp("(", argv[*argi]) == 0) {
665 ++*argi;
666 e = exprcomp(search, argc, argv, argi);
667 if (*argi < argc) {
668 assert(strcmp(")", argv[*argi]) == 0);
669 ++*argi;
670 } else
671 warnx("unclosed parenthesis");
672 return e;
673 }
674
675 if (strcmp("-i", argv[*argi]) == 0 && *argi + 1 < argc) {
676 cs = 0;
677 ++*argi;
678 } else
679 cs = 1;
680
681 e = mandoc_calloc(1, sizeof(*e));
682 e->type = EXPR_TERM;
683 e->bits = 0;
684 e->next = NULL;
685 e->child = NULL;
686
687 if (search->argmode == ARG_NAME) {
688 e->bits = TYPE_Nm;
689 e->match.type = DBM_EXACT;
690 e->match.str = argv[(*argi)++];
691 return e;
692 }
693
694 /*
695 * Separate macro keys from search string.
696 * If needed, request regular expression handling.
697 */
698
699 if (search->argmode == ARG_WORD) {
700 e->bits = TYPE_Nm;
701 e->match.type = DBM_REGEX;
702 #if HAVE_REWB_BSD
703 mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", argv[*argi]);
704 #elif HAVE_REWB_SYSV
705 mandoc_asprintf(&val, "\\<%s\\>", argv[*argi]);
706 #else
707 mandoc_asprintf(&val,
708 "(^|[^a-zA-Z01-9_])%s([^a-zA-Z01-9_]|$)", argv[*argi]);
709 #endif
710 cs = 0;
711 } else if ((val = strpbrk(argv[*argi], "=~")) == NULL) {
712 e->bits = TYPE_Nm | TYPE_Nd;
713 e->match.type = DBM_SUB;
714 e->match.str = argv[*argi];
715 } else {
716 if (val == argv[*argi])
717 e->bits = TYPE_Nm | TYPE_Nd;
718 if (*val == '=') {
719 e->match.type = DBM_SUB;
720 e->match.str = val + 1;
721 } else
722 e->match.type = DBM_REGEX;
723 *val++ = '\0';
724 if (strstr(argv[*argi], "arch") != NULL)
725 cs = 0;
726 }
727
728 /* Compile regular expressions. */
729
730 if (e->match.type == DBM_REGEX) {
731 e->match.re = mandoc_malloc(sizeof(*e->match.re));
732 irc = regcomp(e->match.re, val,
733 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
734 if (irc) {
735 regerror(irc, e->match.re, errbuf, sizeof(errbuf));
736 warnx("regcomp /%s/: %s", val, errbuf);
737 }
738 if (search->argmode == ARG_WORD)
739 free(val);
740 if (irc) {
741 free(e->match.re);
742 free(e);
743 ++*argi;
744 return NULL;
745 }
746 }
747
748 if (e->bits) {
749 ++*argi;
750 return e;
751 }
752
753 /*
754 * Parse out all possible fields.
755 * If the field doesn't resolve, bail.
756 */
757
758 while (NULL != (key = strsep(&argv[*argi], ","))) {
759 if ('\0' == *key)
760 continue;
761 for (i = 0, iterbit = 1; i < KEY_MAX; i++, iterbit <<= 1) {
762 if (0 == strcasecmp(key, mansearch_keynames[i])) {
763 e->bits |= iterbit;
764 break;
765 }
766 }
767 if (i == KEY_MAX) {
768 if (strcasecmp(key, "any"))
769 warnx("treating unknown key "
770 "\"%s\" as \"any\"", key);
771 e->bits |= ~0ULL;
772 }
773 }
774
775 ++*argi;
776 return e;
777 }
778
779 static void
780 exprfree(struct expr *e)
781 {
782 if (e->next != NULL)
783 exprfree(e->next);
784 if (e->child != NULL)
785 exprfree(e->child);
786 free(e);
787 }