]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
The files mandoc.c and mandoc.h contained both specialised low-level
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.23 2014/03/23 11:25:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #ifdef HAVE_OHASH
35 #include <ohash.h>
36 #else
37 #include "compat_ohash.h"
38 #endif
39 #include <sqlite3.h>
40
41 #include "mandoc.h"
42 #include "mandoc_aux.h"
43 #include "manpath.h"
44 #include "mansearch.h"
45
46 extern int mansearch_keymax;
47 extern const char *const mansearch_keynames[];
48
49 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
50 do { if (SQLITE_OK != sqlite3_bind_text \
51 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
52 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
53 } while (0)
54 #define SQL_BIND_INT64(_db, _s, _i, _v) \
55 do { if (SQLITE_OK != sqlite3_bind_int64 \
56 ((_s), (_i)++, (_v))) \
57 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
58 } while (0)
59 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
60 do { if (SQLITE_OK != sqlite3_bind_blob \
61 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
62 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
63 } while (0)
64
65 struct expr {
66 uint64_t bits; /* type-mask */
67 const char *substr; /* to search for, if applicable */
68 regex_t regexp; /* compiled regexp, if applicable */
69 int open; /* opening parentheses before */
70 int and; /* logical AND before */
71 int close; /* closing parentheses after */
72 struct expr *next; /* next in sequence */
73 };
74
75 struct match {
76 uint64_t id; /* identifier in database */
77 int form; /* 0 == catpage */
78 };
79
80 static void buildnames(struct manpage *, sqlite3 *,
81 sqlite3_stmt *, uint64_t,
82 const char *, int form);
83 static char *buildoutput(sqlite3 *, sqlite3_stmt *,
84 uint64_t, uint64_t);
85 static void *hash_alloc(size_t, void *);
86 static void hash_free(void *, size_t, void *);
87 static void *hash_halloc(size_t, void *);
88 static struct expr *exprcomp(const struct mansearch *,
89 int, char *[]);
90 static void exprfree(struct expr *);
91 static struct expr *exprspec(struct expr *, uint64_t,
92 const char *, const char *);
93 static struct expr *exprterm(const struct mansearch *, char *, int);
94 static void sql_append(char **sql, size_t *sz,
95 const char *newstr, int count);
96 static void sql_match(sqlite3_context *context,
97 int argc, sqlite3_value **argv);
98 static void sql_regexp(sqlite3_context *context,
99 int argc, sqlite3_value **argv);
100 static char *sql_statement(const struct expr *);
101
102 int
103 mansearch(const struct mansearch *search,
104 const struct manpaths *paths,
105 int argc, char *argv[],
106 const char *outkey,
107 struct manpage **res, size_t *sz)
108 {
109 int fd, rc, c, indexbit;
110 int64_t id;
111 uint64_t outbit, iterbit;
112 char buf[PATH_MAX];
113 char *sql;
114 struct manpage *mpage;
115 struct expr *e, *ep;
116 sqlite3 *db;
117 sqlite3_stmt *s, *s2;
118 struct match *mp;
119 struct ohash_info info;
120 struct ohash htab;
121 unsigned int idx;
122 size_t i, j, cur, maxres;
123
124 memset(&info, 0, sizeof(struct ohash_info));
125
126 info.halloc = hash_halloc;
127 info.alloc = hash_alloc;
128 info.hfree = hash_free;
129 info.key_offset = offsetof(struct match, id);
130
131 *sz = cur = maxres = 0;
132 sql = NULL;
133 *res = NULL;
134 fd = -1;
135 e = NULL;
136 rc = 0;
137
138 if (0 == argc)
139 goto out;
140 if (NULL == (e = exprcomp(search, argc, argv)))
141 goto out;
142
143 outbit = 0;
144 if (NULL != outkey) {
145 for (indexbit = 0, iterbit = 1;
146 indexbit < mansearch_keymax;
147 indexbit++, iterbit <<= 1) {
148 if (0 == strcasecmp(outkey,
149 mansearch_keynames[indexbit])) {
150 outbit = iterbit;
151 break;
152 }
153 }
154 }
155
156 /*
157 * Save a descriptor to the current working directory.
158 * Since pathnames in the "paths" variable might be relative,
159 * and we'll be chdir()ing into them, we need to keep a handle
160 * on our current directory from which to start the chdir().
161 */
162
163 if (NULL == getcwd(buf, PATH_MAX)) {
164 perror(NULL);
165 goto out;
166 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
167 perror(buf);
168 goto out;
169 }
170
171 sql = sql_statement(e);
172
173 /*
174 * Loop over the directories (containing databases) for us to
175 * search.
176 * Don't let missing/bad databases/directories phase us.
177 * In each, try to open the resident database and, if it opens,
178 * scan it for our match expression.
179 */
180
181 for (i = 0; i < paths->sz; i++) {
182 if (-1 == fchdir(fd)) {
183 perror(buf);
184 free(*res);
185 break;
186 } else if (-1 == chdir(paths->paths[i])) {
187 perror(paths->paths[i]);
188 continue;
189 }
190
191 c = sqlite3_open_v2
192 (MANDOC_DB, &db,
193 SQLITE_OPEN_READONLY, NULL);
194
195 if (SQLITE_OK != c) {
196 perror(MANDOC_DB);
197 sqlite3_close(db);
198 continue;
199 }
200
201 /*
202 * Define the SQL functions for substring
203 * and regular expression matching.
204 */
205
206 c = sqlite3_create_function(db, "match", 2,
207 SQLITE_ANY, NULL, sql_match, NULL, NULL);
208 assert(SQLITE_OK == c);
209 c = sqlite3_create_function(db, "regexp", 2,
210 SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
211 assert(SQLITE_OK == c);
212
213 j = 1;
214 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
215 if (SQLITE_OK != c)
216 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
217
218 for (ep = e; NULL != ep; ep = ep->next) {
219 if (NULL == ep->substr) {
220 SQL_BIND_BLOB(db, s, j, ep->regexp);
221 } else
222 SQL_BIND_TEXT(db, s, j, ep->substr);
223 SQL_BIND_INT64(db, s, j, ep->bits);
224 }
225
226 memset(&htab, 0, sizeof(struct ohash));
227 ohash_init(&htab, 4, &info);
228
229 /*
230 * Hash each entry on its [unique] document identifier.
231 * This is a uint64_t.
232 * Instead of using a hash function, simply convert the
233 * uint64_t to a uint32_t, the hash value's type.
234 * This gives good performance and preserves the
235 * distribution of buckets in the table.
236 */
237 while (SQLITE_ROW == (c = sqlite3_step(s))) {
238 id = sqlite3_column_int64(s, 1);
239 idx = ohash_lookup_memory
240 (&htab, (char *)&id,
241 sizeof(uint64_t), (uint32_t)id);
242
243 if (NULL != ohash_find(&htab, idx))
244 continue;
245
246 mp = mandoc_calloc(1, sizeof(struct match));
247 mp->id = id;
248 mp->form = sqlite3_column_int(s, 0);
249 ohash_insert(&htab, idx, mp);
250 }
251
252 if (SQLITE_DONE != c)
253 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
254
255 sqlite3_finalize(s);
256
257 c = sqlite3_prepare_v2(db,
258 "SELECT * FROM mlinks WHERE pageid=?"
259 " ORDER BY sec, arch, name",
260 -1, &s, NULL);
261 if (SQLITE_OK != c)
262 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
263
264 c = sqlite3_prepare_v2(db,
265 "SELECT * FROM keys WHERE pageid=? AND bits & ?",
266 -1, &s2, NULL);
267 if (SQLITE_OK != c)
268 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
269
270 for (mp = ohash_first(&htab, &idx);
271 NULL != mp;
272 mp = ohash_next(&htab, &idx)) {
273 if (cur + 1 > maxres) {
274 maxres += 1024;
275 *res = mandoc_realloc
276 (*res, maxres * sizeof(struct manpage));
277 }
278 mpage = *res + cur;
279 mpage->form = mp->form;
280 buildnames(mpage, db, s, mp->id,
281 paths->paths[i], mp->form);
282 mpage->output = outbit ?
283 buildoutput(db, s2, mp->id, outbit) : NULL;
284
285 free(mp);
286 cur++;
287 }
288
289 sqlite3_finalize(s);
290 sqlite3_finalize(s2);
291 sqlite3_close(db);
292 ohash_delete(&htab);
293 }
294 rc = 1;
295 out:
296 exprfree(e);
297 if (-1 != fd)
298 close(fd);
299 free(sql);
300 *sz = cur;
301 return(rc);
302 }
303
304 static void
305 buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s,
306 uint64_t id, const char *path, int form)
307 {
308 char *newnames, *prevsec, *prevarch;
309 const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
310 size_t i;
311 int c;
312
313 mpage->names = NULL;
314 prevsec = prevarch = NULL;
315 i = 1;
316 SQL_BIND_INT64(db, s, i, id);
317 while (SQLITE_ROW == (c = sqlite3_step(s))) {
318
319 /* Decide whether we already have some names. */
320
321 if (NULL == mpage->names) {
322 oldnames = "";
323 sep1 = "";
324 } else {
325 oldnames = mpage->names;
326 sep1 = ", ";
327 }
328
329 /* Fetch the next name. */
330
331 sec = sqlite3_column_text(s, 0);
332 arch = sqlite3_column_text(s, 1);
333 name = sqlite3_column_text(s, 2);
334
335 /* If the section changed, append the old one. */
336
337 if (NULL != prevsec &&
338 (strcmp(sec, prevsec) ||
339 strcmp(arch, prevarch))) {
340 sep2 = '\0' == *prevarch ? "" : "/";
341 if (-1 == asprintf(&newnames, "%s(%s%s%s)",
342 oldnames, prevsec, sep2, prevarch)) {
343 perror(0);
344 exit((int)MANDOCLEVEL_SYSERR);
345 }
346 free(mpage->names);
347 oldnames = mpage->names = newnames;
348 free(prevsec);
349 free(prevarch);
350 prevsec = prevarch = NULL;
351 }
352
353 /* Save the new section, to append it later. */
354
355 if (NULL == prevsec) {
356 prevsec = mandoc_strdup(sec);
357 prevarch = mandoc_strdup(arch);
358 }
359
360 /* Append the new name. */
361
362 if (-1 == asprintf(&newnames, "%s%s%s",
363 oldnames, sep1, name)) {
364 perror(0);
365 exit((int)MANDOCLEVEL_SYSERR);
366 }
367 free(mpage->names);
368 mpage->names = newnames;
369
370 /* Also save the first file name encountered. */
371
372 if (NULL != mpage->file)
373 continue;
374
375 if (form) {
376 sep1 = "man";
377 fsec = sec;
378 } else {
379 sep1 = "cat";
380 fsec = "0";
381 }
382 sep2 = '\0' == *arch ? "" : "/";
383 if (-1 == asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
384 path, sep1, sec, sep2, arch, name, fsec)) {
385 perror(0);
386 exit((int)MANDOCLEVEL_SYSERR);
387 }
388 }
389 if (SQLITE_DONE != c)
390 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
391 sqlite3_reset(s);
392
393 /* Append one final section to the names. */
394
395 if (NULL != prevsec) {
396 sep2 = '\0' == *prevarch ? "" : "/";
397 if (-1 == asprintf(&newnames, "%s(%s%s%s)",
398 mpage->names, prevsec, sep2, prevarch)) {
399 perror(0);
400 exit((int)MANDOCLEVEL_SYSERR);
401 }
402 free(mpage->names);
403 mpage->names = newnames;
404 free(prevsec);
405 free(prevarch);
406 }
407 }
408
409 static char *
410 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
411 {
412 char *output, *newoutput;
413 const char *oldoutput, *sep1, *data;
414 size_t i;
415 int c;
416
417 output = NULL;
418 i = 1;
419 SQL_BIND_INT64(db, s, i, id);
420 SQL_BIND_INT64(db, s, i, outbit);
421 while (SQLITE_ROW == (c = sqlite3_step(s))) {
422 if (NULL == output) {
423 oldoutput = "";
424 sep1 = "";
425 } else {
426 oldoutput = output;
427 sep1 = " # ";
428 }
429 data = sqlite3_column_text(s, 1);
430 if (-1 == asprintf(&newoutput, "%s%s%s",
431 oldoutput, sep1, data)) {
432 perror(0);
433 exit((int)MANDOCLEVEL_SYSERR);
434 }
435 free(output);
436 output = newoutput;
437 }
438 if (SQLITE_DONE != c)
439 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
440 sqlite3_reset(s);
441 return(output);
442 }
443
444 /*
445 * Implement substring match as an application-defined SQL function.
446 * Using the SQL LIKE or GLOB operators instead would be a bad idea
447 * because that would require escaping metacharacters in the string
448 * being searched for.
449 */
450 static void
451 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
452 {
453
454 assert(2 == argc);
455 sqlite3_result_int(context, NULL != strcasestr(
456 (const char *)sqlite3_value_text(argv[1]),
457 (const char *)sqlite3_value_text(argv[0])));
458 }
459
460 /*
461 * Implement regular expression match
462 * as an application-defined SQL function.
463 */
464 static void
465 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
466 {
467
468 assert(2 == argc);
469 sqlite3_result_int(context, !regexec(
470 (regex_t *)sqlite3_value_blob(argv[0]),
471 (const char *)sqlite3_value_text(argv[1]),
472 0, NULL, 0));
473 }
474
475 static void
476 sql_append(char **sql, size_t *sz, const char *newstr, int count)
477 {
478 size_t newsz;
479
480 newsz = 1 < count ? (size_t)count : strlen(newstr);
481 *sql = mandoc_realloc(*sql, *sz + newsz + 1);
482 if (1 < count)
483 memset(*sql + *sz, *newstr, (size_t)count);
484 else
485 memcpy(*sql + *sz, newstr, newsz);
486 *sz += newsz;
487 (*sql)[*sz] = '\0';
488 }
489
490 /*
491 * Prepare the search SQL statement.
492 */
493 static char *
494 sql_statement(const struct expr *e)
495 {
496 char *sql;
497 size_t sz;
498 int needop;
499
500 sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
501 sz = strlen(sql);
502
503 for (needop = 0; NULL != e; e = e->next) {
504 if (e->and)
505 sql_append(&sql, &sz, " AND ", 1);
506 else if (needop)
507 sql_append(&sql, &sz, " OR ", 1);
508 if (e->open)
509 sql_append(&sql, &sz, "(", e->open);
510 sql_append(&sql, &sz, NULL == e->substr ?
511 "id IN (SELECT pageid FROM keys "
512 "WHERE key REGEXP ? AND bits & ?)" :
513 "id IN (SELECT pageid FROM keys "
514 "WHERE key MATCH ? AND bits & ?)", 1);
515 if (e->close)
516 sql_append(&sql, &sz, ")", e->close);
517 needop = 1;
518 }
519
520 return(sql);
521 }
522
523 /*
524 * Compile a set of string tokens into an expression.
525 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
526 * "(", "foo=bar", etc.).
527 */
528 static struct expr *
529 exprcomp(const struct mansearch *search, int argc, char *argv[])
530 {
531 int i, toopen, logic, igncase, toclose;
532 struct expr *first, *next, *cur;
533
534 first = cur = NULL;
535 logic = igncase = toclose = 0;
536 toopen = 1;
537
538 for (i = 0; i < argc; i++) {
539 if (0 == strcmp("(", argv[i])) {
540 if (igncase)
541 goto fail;
542 toopen++;
543 toclose++;
544 continue;
545 } else if (0 == strcmp(")", argv[i])) {
546 if (toopen || logic || igncase || NULL == cur)
547 goto fail;
548 cur->close++;
549 if (0 > --toclose)
550 goto fail;
551 continue;
552 } else if (0 == strcmp("-a", argv[i])) {
553 if (toopen || logic || igncase || NULL == cur)
554 goto fail;
555 logic = 1;
556 continue;
557 } else if (0 == strcmp("-o", argv[i])) {
558 if (toopen || logic || igncase || NULL == cur)
559 goto fail;
560 logic = 2;
561 continue;
562 } else if (0 == strcmp("-i", argv[i])) {
563 if (igncase)
564 goto fail;
565 igncase = 1;
566 continue;
567 }
568 next = exprterm(search, argv[i], !igncase);
569 if (NULL == next)
570 goto fail;
571 next->open = toopen;
572 next->and = (1 == logic);
573 if (NULL != first) {
574 cur->next = next;
575 cur = next;
576 } else
577 cur = first = next;
578 toopen = logic = igncase = 0;
579 }
580 if (toopen || logic || igncase || toclose)
581 goto fail;
582
583 cur->close++;
584 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$");
585 exprspec(cur, TYPE_sec, search->sec, "^%s$");
586
587 return(first);
588
589 fail:
590 if (NULL != first)
591 exprfree(first);
592 return(NULL);
593 }
594
595 static struct expr *
596 exprspec(struct expr *cur, uint64_t key, const char *value,
597 const char *format)
598 {
599 char errbuf[BUFSIZ];
600 char *cp;
601 int irc;
602
603 if (NULL == value)
604 return(cur);
605
606 if (-1 == asprintf(&cp, format, value)) {
607 perror(0);
608 exit((int)MANDOCLEVEL_SYSERR);
609 }
610 cur->next = mandoc_calloc(1, sizeof(struct expr));
611 cur = cur->next;
612 cur->and = 1;
613 cur->bits = key;
614 if (0 != (irc = regcomp(&cur->regexp, cp,
615 REG_EXTENDED | REG_NOSUB | REG_ICASE))) {
616 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf));
617 fprintf(stderr, "regcomp: %s\n", errbuf);
618 cur->substr = value;
619 }
620 free(cp);
621 return(cur);
622 }
623
624 static struct expr *
625 exprterm(const struct mansearch *search, char *buf, int cs)
626 {
627 char errbuf[BUFSIZ];
628 struct expr *e;
629 char *key, *v;
630 uint64_t iterbit;
631 int i, irc;
632
633 if ('\0' == *buf)
634 return(NULL);
635
636 e = mandoc_calloc(1, sizeof(struct expr));
637
638 /*"whatis" mode uses an opaque string and default fields. */
639
640 if (MANSEARCH_WHATIS & search->flags) {
641 e->substr = buf;
642 e->bits = search->deftype;
643 return(e);
644 }
645
646 /*
647 * If no =~ is specified, search with equality over names and
648 * descriptions.
649 * If =~ begins the phrase, use name and description fields.
650 */
651
652 if (NULL == (v = strpbrk(buf, "=~"))) {
653 e->substr = buf;
654 e->bits = search->deftype;
655 return(e);
656 } else if (v == buf)
657 e->bits = search->deftype;
658
659 if ('~' == *v++) {
660 if (NULL != strstr(buf, "arch"))
661 cs = 0;
662 if (0 != (irc = regcomp(&e->regexp, v,
663 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) {
664 regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
665 fprintf(stderr, "regcomp: %s\n", errbuf);
666 free(e);
667 return(NULL);
668 }
669 } else
670 e->substr = v;
671 v[-1] = '\0';
672
673 /*
674 * Parse out all possible fields.
675 * If the field doesn't resolve, bail.
676 */
677
678 while (NULL != (key = strsep(&buf, ","))) {
679 if ('\0' == *key)
680 continue;
681 for (i = 0, iterbit = 1;
682 i < mansearch_keymax;
683 i++, iterbit <<= 1) {
684 if (0 == strcasecmp(key,
685 mansearch_keynames[i])) {
686 e->bits |= iterbit;
687 break;
688 }
689 }
690 if (i == mansearch_keymax) {
691 if (strcasecmp(key, "any")) {
692 free(e);
693 return(NULL);
694 }
695 e->bits |= ~0ULL;
696 }
697 }
698
699 return(e);
700 }
701
702 static void
703 exprfree(struct expr *p)
704 {
705 struct expr *pp;
706
707 while (NULL != p) {
708 pp = p->next;
709 free(p);
710 p = pp;
711 }
712 }
713
714 static void *
715 hash_halloc(size_t sz, void *arg)
716 {
717
718 return(mandoc_calloc(sz, 1));
719 }
720
721 static void *
722 hash_alloc(size_t sz, void *arg)
723 {
724
725 return(mandoc_malloc(sz));
726 }
727
728 static void
729 hash_free(void *p, size_t sz, void *arg)
730 {
731
732 free(p);
733 }