]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
in apropos(1) output, sort names and avoid multiple section numbers
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.22 2014/03/17 16:31:44 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #ifdef HAVE_OHASH
35 #include <ohash.h>
36 #else
37 #include "compat_ohash.h"
38 #endif
39 #include <sqlite3.h>
40
41 #include "mandoc.h"
42 #include "manpath.h"
43 #include "mansearch.h"
44
45 extern int mansearch_keymax;
46 extern const char *const mansearch_keynames[];
47
48 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
49 do { if (SQLITE_OK != sqlite3_bind_text \
50 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
51 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
52 } while (0)
53 #define SQL_BIND_INT64(_db, _s, _i, _v) \
54 do { if (SQLITE_OK != sqlite3_bind_int64 \
55 ((_s), (_i)++, (_v))) \
56 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
57 } while (0)
58 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
59 do { if (SQLITE_OK != sqlite3_bind_blob \
60 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
61 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
62 } while (0)
63
64 struct expr {
65 uint64_t bits; /* type-mask */
66 const char *substr; /* to search for, if applicable */
67 regex_t regexp; /* compiled regexp, if applicable */
68 int open; /* opening parentheses before */
69 int and; /* logical AND before */
70 int close; /* closing parentheses after */
71 struct expr *next; /* next in sequence */
72 };
73
74 struct match {
75 uint64_t id; /* identifier in database */
76 int form; /* 0 == catpage */
77 };
78
79 static void buildnames(struct manpage *, sqlite3 *,
80 sqlite3_stmt *, uint64_t,
81 const char *, int form);
82 static char *buildoutput(sqlite3 *, sqlite3_stmt *,
83 uint64_t, uint64_t);
84 static void *hash_alloc(size_t, void *);
85 static void hash_free(void *, size_t, void *);
86 static void *hash_halloc(size_t, void *);
87 static struct expr *exprcomp(const struct mansearch *,
88 int, char *[]);
89 static void exprfree(struct expr *);
90 static struct expr *exprspec(struct expr *, uint64_t,
91 const char *, const char *);
92 static struct expr *exprterm(const struct mansearch *, char *, int);
93 static void sql_append(char **sql, size_t *sz,
94 const char *newstr, int count);
95 static void sql_match(sqlite3_context *context,
96 int argc, sqlite3_value **argv);
97 static void sql_regexp(sqlite3_context *context,
98 int argc, sqlite3_value **argv);
99 static char *sql_statement(const struct expr *);
100
101 int
102 mansearch(const struct mansearch *search,
103 const struct manpaths *paths,
104 int argc, char *argv[],
105 const char *outkey,
106 struct manpage **res, size_t *sz)
107 {
108 int fd, rc, c, indexbit;
109 int64_t id;
110 uint64_t outbit, iterbit;
111 char buf[PATH_MAX];
112 char *sql;
113 struct manpage *mpage;
114 struct expr *e, *ep;
115 sqlite3 *db;
116 sqlite3_stmt *s, *s2;
117 struct match *mp;
118 struct ohash_info info;
119 struct ohash htab;
120 unsigned int idx;
121 size_t i, j, cur, maxres;
122
123 memset(&info, 0, sizeof(struct ohash_info));
124
125 info.halloc = hash_halloc;
126 info.alloc = hash_alloc;
127 info.hfree = hash_free;
128 info.key_offset = offsetof(struct match, id);
129
130 *sz = cur = maxres = 0;
131 sql = NULL;
132 *res = NULL;
133 fd = -1;
134 e = NULL;
135 rc = 0;
136
137 if (0 == argc)
138 goto out;
139 if (NULL == (e = exprcomp(search, argc, argv)))
140 goto out;
141
142 outbit = 0;
143 if (NULL != outkey) {
144 for (indexbit = 0, iterbit = 1;
145 indexbit < mansearch_keymax;
146 indexbit++, iterbit <<= 1) {
147 if (0 == strcasecmp(outkey,
148 mansearch_keynames[indexbit])) {
149 outbit = iterbit;
150 break;
151 }
152 }
153 }
154
155 /*
156 * Save a descriptor to the current working directory.
157 * Since pathnames in the "paths" variable might be relative,
158 * and we'll be chdir()ing into them, we need to keep a handle
159 * on our current directory from which to start the chdir().
160 */
161
162 if (NULL == getcwd(buf, PATH_MAX)) {
163 perror(NULL);
164 goto out;
165 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
166 perror(buf);
167 goto out;
168 }
169
170 sql = sql_statement(e);
171
172 /*
173 * Loop over the directories (containing databases) for us to
174 * search.
175 * Don't let missing/bad databases/directories phase us.
176 * In each, try to open the resident database and, if it opens,
177 * scan it for our match expression.
178 */
179
180 for (i = 0; i < paths->sz; i++) {
181 if (-1 == fchdir(fd)) {
182 perror(buf);
183 free(*res);
184 break;
185 } else if (-1 == chdir(paths->paths[i])) {
186 perror(paths->paths[i]);
187 continue;
188 }
189
190 c = sqlite3_open_v2
191 (MANDOC_DB, &db,
192 SQLITE_OPEN_READONLY, NULL);
193
194 if (SQLITE_OK != c) {
195 perror(MANDOC_DB);
196 sqlite3_close(db);
197 continue;
198 }
199
200 /*
201 * Define the SQL functions for substring
202 * and regular expression matching.
203 */
204
205 c = sqlite3_create_function(db, "match", 2,
206 SQLITE_ANY, NULL, sql_match, NULL, NULL);
207 assert(SQLITE_OK == c);
208 c = sqlite3_create_function(db, "regexp", 2,
209 SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
210 assert(SQLITE_OK == c);
211
212 j = 1;
213 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
214 if (SQLITE_OK != c)
215 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
216
217 for (ep = e; NULL != ep; ep = ep->next) {
218 if (NULL == ep->substr) {
219 SQL_BIND_BLOB(db, s, j, ep->regexp);
220 } else
221 SQL_BIND_TEXT(db, s, j, ep->substr);
222 SQL_BIND_INT64(db, s, j, ep->bits);
223 }
224
225 memset(&htab, 0, sizeof(struct ohash));
226 ohash_init(&htab, 4, &info);
227
228 /*
229 * Hash each entry on its [unique] document identifier.
230 * This is a uint64_t.
231 * Instead of using a hash function, simply convert the
232 * uint64_t to a uint32_t, the hash value's type.
233 * This gives good performance and preserves the
234 * distribution of buckets in the table.
235 */
236 while (SQLITE_ROW == (c = sqlite3_step(s))) {
237 id = sqlite3_column_int64(s, 1);
238 idx = ohash_lookup_memory
239 (&htab, (char *)&id,
240 sizeof(uint64_t), (uint32_t)id);
241
242 if (NULL != ohash_find(&htab, idx))
243 continue;
244
245 mp = mandoc_calloc(1, sizeof(struct match));
246 mp->id = id;
247 mp->form = sqlite3_column_int(s, 0);
248 ohash_insert(&htab, idx, mp);
249 }
250
251 if (SQLITE_DONE != c)
252 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
253
254 sqlite3_finalize(s);
255
256 c = sqlite3_prepare_v2(db,
257 "SELECT * FROM mlinks WHERE pageid=?"
258 " ORDER BY sec, arch, name",
259 -1, &s, NULL);
260 if (SQLITE_OK != c)
261 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
262
263 c = sqlite3_prepare_v2(db,
264 "SELECT * FROM keys WHERE pageid=? AND bits & ?",
265 -1, &s2, NULL);
266 if (SQLITE_OK != c)
267 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
268
269 for (mp = ohash_first(&htab, &idx);
270 NULL != mp;
271 mp = ohash_next(&htab, &idx)) {
272 if (cur + 1 > maxres) {
273 maxres += 1024;
274 *res = mandoc_realloc
275 (*res, maxres * sizeof(struct manpage));
276 }
277 mpage = *res + cur;
278 mpage->form = mp->form;
279 buildnames(mpage, db, s, mp->id,
280 paths->paths[i], mp->form);
281 mpage->output = outbit ?
282 buildoutput(db, s2, mp->id, outbit) : NULL;
283
284 free(mp);
285 cur++;
286 }
287
288 sqlite3_finalize(s);
289 sqlite3_finalize(s2);
290 sqlite3_close(db);
291 ohash_delete(&htab);
292 }
293 rc = 1;
294 out:
295 exprfree(e);
296 if (-1 != fd)
297 close(fd);
298 free(sql);
299 *sz = cur;
300 return(rc);
301 }
302
303 static void
304 buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s,
305 uint64_t id, const char *path, int form)
306 {
307 char *newnames, *prevsec, *prevarch;
308 const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
309 size_t i;
310 int c;
311
312 mpage->names = NULL;
313 prevsec = prevarch = NULL;
314 i = 1;
315 SQL_BIND_INT64(db, s, i, id);
316 while (SQLITE_ROW == (c = sqlite3_step(s))) {
317
318 /* Decide whether we already have some names. */
319
320 if (NULL == mpage->names) {
321 oldnames = "";
322 sep1 = "";
323 } else {
324 oldnames = mpage->names;
325 sep1 = ", ";
326 }
327
328 /* Fetch the next name. */
329
330 sec = sqlite3_column_text(s, 0);
331 arch = sqlite3_column_text(s, 1);
332 name = sqlite3_column_text(s, 2);
333
334 /* If the section changed, append the old one. */
335
336 if (NULL != prevsec &&
337 (strcmp(sec, prevsec) ||
338 strcmp(arch, prevarch))) {
339 sep2 = '\0' == *prevarch ? "" : "/";
340 if (-1 == asprintf(&newnames, "%s(%s%s%s)",
341 oldnames, prevsec, sep2, prevarch)) {
342 perror(0);
343 exit((int)MANDOCLEVEL_SYSERR);
344 }
345 free(mpage->names);
346 oldnames = mpage->names = newnames;
347 free(prevsec);
348 free(prevarch);
349 prevsec = prevarch = NULL;
350 }
351
352 /* Save the new section, to append it later. */
353
354 if (NULL == prevsec) {
355 prevsec = mandoc_strdup(sec);
356 prevarch = mandoc_strdup(arch);
357 }
358
359 /* Append the new name. */
360
361 if (-1 == asprintf(&newnames, "%s%s%s",
362 oldnames, sep1, name)) {
363 perror(0);
364 exit((int)MANDOCLEVEL_SYSERR);
365 }
366 free(mpage->names);
367 mpage->names = newnames;
368
369 /* Also save the first file name encountered. */
370
371 if (NULL != mpage->file)
372 continue;
373
374 if (form) {
375 sep1 = "man";
376 fsec = sec;
377 } else {
378 sep1 = "cat";
379 fsec = "0";
380 }
381 sep2 = '\0' == *arch ? "" : "/";
382 if (-1 == asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
383 path, sep1, sec, sep2, arch, name, fsec)) {
384 perror(0);
385 exit((int)MANDOCLEVEL_SYSERR);
386 }
387 }
388 if (SQLITE_DONE != c)
389 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
390 sqlite3_reset(s);
391
392 /* Append one final section to the names. */
393
394 if (NULL != prevsec) {
395 sep2 = '\0' == *prevarch ? "" : "/";
396 if (-1 == asprintf(&newnames, "%s(%s%s%s)",
397 mpage->names, prevsec, sep2, prevarch)) {
398 perror(0);
399 exit((int)MANDOCLEVEL_SYSERR);
400 }
401 free(mpage->names);
402 mpage->names = newnames;
403 free(prevsec);
404 free(prevarch);
405 }
406 }
407
408 static char *
409 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
410 {
411 char *output, *newoutput;
412 const char *oldoutput, *sep1, *data;
413 size_t i;
414 int c;
415
416 output = NULL;
417 i = 1;
418 SQL_BIND_INT64(db, s, i, id);
419 SQL_BIND_INT64(db, s, i, outbit);
420 while (SQLITE_ROW == (c = sqlite3_step(s))) {
421 if (NULL == output) {
422 oldoutput = "";
423 sep1 = "";
424 } else {
425 oldoutput = output;
426 sep1 = " # ";
427 }
428 data = sqlite3_column_text(s, 1);
429 if (-1 == asprintf(&newoutput, "%s%s%s",
430 oldoutput, sep1, data)) {
431 perror(0);
432 exit((int)MANDOCLEVEL_SYSERR);
433 }
434 free(output);
435 output = newoutput;
436 }
437 if (SQLITE_DONE != c)
438 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
439 sqlite3_reset(s);
440 return(output);
441 }
442
443 /*
444 * Implement substring match as an application-defined SQL function.
445 * Using the SQL LIKE or GLOB operators instead would be a bad idea
446 * because that would require escaping metacharacters in the string
447 * being searched for.
448 */
449 static void
450 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
451 {
452
453 assert(2 == argc);
454 sqlite3_result_int(context, NULL != strcasestr(
455 (const char *)sqlite3_value_text(argv[1]),
456 (const char *)sqlite3_value_text(argv[0])));
457 }
458
459 /*
460 * Implement regular expression match
461 * as an application-defined SQL function.
462 */
463 static void
464 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
465 {
466
467 assert(2 == argc);
468 sqlite3_result_int(context, !regexec(
469 (regex_t *)sqlite3_value_blob(argv[0]),
470 (const char *)sqlite3_value_text(argv[1]),
471 0, NULL, 0));
472 }
473
474 static void
475 sql_append(char **sql, size_t *sz, const char *newstr, int count)
476 {
477 size_t newsz;
478
479 newsz = 1 < count ? (size_t)count : strlen(newstr);
480 *sql = mandoc_realloc(*sql, *sz + newsz + 1);
481 if (1 < count)
482 memset(*sql + *sz, *newstr, (size_t)count);
483 else
484 memcpy(*sql + *sz, newstr, newsz);
485 *sz += newsz;
486 (*sql)[*sz] = '\0';
487 }
488
489 /*
490 * Prepare the search SQL statement.
491 */
492 static char *
493 sql_statement(const struct expr *e)
494 {
495 char *sql;
496 size_t sz;
497 int needop;
498
499 sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
500 sz = strlen(sql);
501
502 for (needop = 0; NULL != e; e = e->next) {
503 if (e->and)
504 sql_append(&sql, &sz, " AND ", 1);
505 else if (needop)
506 sql_append(&sql, &sz, " OR ", 1);
507 if (e->open)
508 sql_append(&sql, &sz, "(", e->open);
509 sql_append(&sql, &sz, NULL == e->substr ?
510 "id IN (SELECT pageid FROM keys "
511 "WHERE key REGEXP ? AND bits & ?)" :
512 "id IN (SELECT pageid FROM keys "
513 "WHERE key MATCH ? AND bits & ?)", 1);
514 if (e->close)
515 sql_append(&sql, &sz, ")", e->close);
516 needop = 1;
517 }
518
519 return(sql);
520 }
521
522 /*
523 * Compile a set of string tokens into an expression.
524 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
525 * "(", "foo=bar", etc.).
526 */
527 static struct expr *
528 exprcomp(const struct mansearch *search, int argc, char *argv[])
529 {
530 int i, toopen, logic, igncase, toclose;
531 struct expr *first, *next, *cur;
532
533 first = cur = NULL;
534 logic = igncase = toclose = 0;
535 toopen = 1;
536
537 for (i = 0; i < argc; i++) {
538 if (0 == strcmp("(", argv[i])) {
539 if (igncase)
540 goto fail;
541 toopen++;
542 toclose++;
543 continue;
544 } else if (0 == strcmp(")", argv[i])) {
545 if (toopen || logic || igncase || NULL == cur)
546 goto fail;
547 cur->close++;
548 if (0 > --toclose)
549 goto fail;
550 continue;
551 } else if (0 == strcmp("-a", argv[i])) {
552 if (toopen || logic || igncase || NULL == cur)
553 goto fail;
554 logic = 1;
555 continue;
556 } else if (0 == strcmp("-o", argv[i])) {
557 if (toopen || logic || igncase || NULL == cur)
558 goto fail;
559 logic = 2;
560 continue;
561 } else if (0 == strcmp("-i", argv[i])) {
562 if (igncase)
563 goto fail;
564 igncase = 1;
565 continue;
566 }
567 next = exprterm(search, argv[i], !igncase);
568 if (NULL == next)
569 goto fail;
570 next->open = toopen;
571 next->and = (1 == logic);
572 if (NULL != first) {
573 cur->next = next;
574 cur = next;
575 } else
576 cur = first = next;
577 toopen = logic = igncase = 0;
578 }
579 if (toopen || logic || igncase || toclose)
580 goto fail;
581
582 cur->close++;
583 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$");
584 exprspec(cur, TYPE_sec, search->sec, "^%s$");
585
586 return(first);
587
588 fail:
589 if (NULL != first)
590 exprfree(first);
591 return(NULL);
592 }
593
594 static struct expr *
595 exprspec(struct expr *cur, uint64_t key, const char *value,
596 const char *format)
597 {
598 char errbuf[BUFSIZ];
599 char *cp;
600 int irc;
601
602 if (NULL == value)
603 return(cur);
604
605 if (-1 == asprintf(&cp, format, value)) {
606 perror(0);
607 exit((int)MANDOCLEVEL_SYSERR);
608 }
609 cur->next = mandoc_calloc(1, sizeof(struct expr));
610 cur = cur->next;
611 cur->and = 1;
612 cur->bits = key;
613 if (0 != (irc = regcomp(&cur->regexp, cp,
614 REG_EXTENDED | REG_NOSUB | REG_ICASE))) {
615 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf));
616 fprintf(stderr, "regcomp: %s\n", errbuf);
617 cur->substr = value;
618 }
619 free(cp);
620 return(cur);
621 }
622
623 static struct expr *
624 exprterm(const struct mansearch *search, char *buf, int cs)
625 {
626 char errbuf[BUFSIZ];
627 struct expr *e;
628 char *key, *v;
629 uint64_t iterbit;
630 int i, irc;
631
632 if ('\0' == *buf)
633 return(NULL);
634
635 e = mandoc_calloc(1, sizeof(struct expr));
636
637 /*"whatis" mode uses an opaque string and default fields. */
638
639 if (MANSEARCH_WHATIS & search->flags) {
640 e->substr = buf;
641 e->bits = search->deftype;
642 return(e);
643 }
644
645 /*
646 * If no =~ is specified, search with equality over names and
647 * descriptions.
648 * If =~ begins the phrase, use name and description fields.
649 */
650
651 if (NULL == (v = strpbrk(buf, "=~"))) {
652 e->substr = buf;
653 e->bits = search->deftype;
654 return(e);
655 } else if (v == buf)
656 e->bits = search->deftype;
657
658 if ('~' == *v++) {
659 if (NULL != strstr(buf, "arch"))
660 cs = 0;
661 if (0 != (irc = regcomp(&e->regexp, v,
662 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) {
663 regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
664 fprintf(stderr, "regcomp: %s\n", errbuf);
665 free(e);
666 return(NULL);
667 }
668 } else
669 e->substr = v;
670 v[-1] = '\0';
671
672 /*
673 * Parse out all possible fields.
674 * If the field doesn't resolve, bail.
675 */
676
677 while (NULL != (key = strsep(&buf, ","))) {
678 if ('\0' == *key)
679 continue;
680 for (i = 0, iterbit = 1;
681 i < mansearch_keymax;
682 i++, iterbit <<= 1) {
683 if (0 == strcasecmp(key,
684 mansearch_keynames[i])) {
685 e->bits |= iterbit;
686 break;
687 }
688 }
689 if (i == mansearch_keymax) {
690 if (strcasecmp(key, "any")) {
691 free(e);
692 return(NULL);
693 }
694 e->bits |= ~0ULL;
695 }
696 }
697
698 return(e);
699 }
700
701 static void
702 exprfree(struct expr *p)
703 {
704 struct expr *pp;
705
706 while (NULL != p) {
707 pp = p->next;
708 free(p);
709 p = pp;
710 }
711 }
712
713 static void *
714 hash_halloc(size_t sz, void *arg)
715 {
716
717 return(mandoc_calloc(sz, 1));
718 }
719
720 static void *
721 hash_alloc(size_t sz, void *arg)
722 {
723
724 return(mandoc_malloc(sz));
725 }
726
727 static void
728 hash_free(void *p, size_t sz, void *arg)
729 {
730
731 free(p);
732 }