]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
Sync to OpenBSD:
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.21 2014/01/19 23:09:30 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #ifdef HAVE_OHASH
35 #include <ohash.h>
36 #else
37 #include "compat_ohash.h"
38 #endif
39 #include <sqlite3.h>
40
41 #include "mandoc.h"
42 #include "manpath.h"
43 #include "mansearch.h"
44
45 extern int mansearch_keymax;
46 extern const char *const mansearch_keynames[];
47
48 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
49 do { if (SQLITE_OK != sqlite3_bind_text \
50 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
51 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
52 } while (0)
53 #define SQL_BIND_INT64(_db, _s, _i, _v) \
54 do { if (SQLITE_OK != sqlite3_bind_int64 \
55 ((_s), (_i)++, (_v))) \
56 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
57 } while (0)
58 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
59 do { if (SQLITE_OK != sqlite3_bind_blob \
60 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
61 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
62 } while (0)
63
64 struct expr {
65 uint64_t bits; /* type-mask */
66 const char *substr; /* to search for, if applicable */
67 regex_t regexp; /* compiled regexp, if applicable */
68 int open; /* opening parentheses before */
69 int and; /* logical AND before */
70 int close; /* closing parentheses after */
71 struct expr *next; /* next in sequence */
72 };
73
74 struct match {
75 uint64_t id; /* identifier in database */
76 int form; /* 0 == catpage */
77 };
78
79 static void buildnames(struct manpage *, sqlite3 *,
80 sqlite3_stmt *, uint64_t,
81 const char *, int form);
82 static char *buildoutput(sqlite3 *, sqlite3_stmt *,
83 uint64_t, uint64_t);
84 static void *hash_alloc(size_t, void *);
85 static void hash_free(void *, size_t, void *);
86 static void *hash_halloc(size_t, void *);
87 static struct expr *exprcomp(const struct mansearch *,
88 int, char *[]);
89 static void exprfree(struct expr *);
90 static struct expr *exprspec(struct expr *, uint64_t,
91 const char *, const char *);
92 static struct expr *exprterm(const struct mansearch *, char *, int);
93 static void sql_append(char **sql, size_t *sz,
94 const char *newstr, int count);
95 static void sql_match(sqlite3_context *context,
96 int argc, sqlite3_value **argv);
97 static void sql_regexp(sqlite3_context *context,
98 int argc, sqlite3_value **argv);
99 static char *sql_statement(const struct expr *);
100
101 int
102 mansearch(const struct mansearch *search,
103 const struct manpaths *paths,
104 int argc, char *argv[],
105 const char *outkey,
106 struct manpage **res, size_t *sz)
107 {
108 int fd, rc, c, indexbit;
109 int64_t id;
110 uint64_t outbit, iterbit;
111 char buf[PATH_MAX];
112 char *sql;
113 struct manpage *mpage;
114 struct expr *e, *ep;
115 sqlite3 *db;
116 sqlite3_stmt *s, *s2;
117 struct match *mp;
118 struct ohash_info info;
119 struct ohash htab;
120 unsigned int idx;
121 size_t i, j, cur, maxres;
122
123 memset(&info, 0, sizeof(struct ohash_info));
124
125 info.halloc = hash_halloc;
126 info.alloc = hash_alloc;
127 info.hfree = hash_free;
128 info.key_offset = offsetof(struct match, id);
129
130 *sz = cur = maxres = 0;
131 sql = NULL;
132 *res = NULL;
133 fd = -1;
134 e = NULL;
135 rc = 0;
136
137 if (0 == argc)
138 goto out;
139 if (NULL == (e = exprcomp(search, argc, argv)))
140 goto out;
141
142 outbit = 0;
143 if (NULL != outkey) {
144 for (indexbit = 0, iterbit = 1;
145 indexbit < mansearch_keymax;
146 indexbit++, iterbit <<= 1) {
147 if (0 == strcasecmp(outkey,
148 mansearch_keynames[indexbit])) {
149 outbit = iterbit;
150 break;
151 }
152 }
153 }
154
155 /*
156 * Save a descriptor to the current working directory.
157 * Since pathnames in the "paths" variable might be relative,
158 * and we'll be chdir()ing into them, we need to keep a handle
159 * on our current directory from which to start the chdir().
160 */
161
162 if (NULL == getcwd(buf, PATH_MAX)) {
163 perror(NULL);
164 goto out;
165 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
166 perror(buf);
167 goto out;
168 }
169
170 sql = sql_statement(e);
171
172 /*
173 * Loop over the directories (containing databases) for us to
174 * search.
175 * Don't let missing/bad databases/directories phase us.
176 * In each, try to open the resident database and, if it opens,
177 * scan it for our match expression.
178 */
179
180 for (i = 0; i < paths->sz; i++) {
181 if (-1 == fchdir(fd)) {
182 perror(buf);
183 free(*res);
184 break;
185 } else if (-1 == chdir(paths->paths[i])) {
186 perror(paths->paths[i]);
187 continue;
188 }
189
190 c = sqlite3_open_v2
191 (MANDOC_DB, &db,
192 SQLITE_OPEN_READONLY, NULL);
193
194 if (SQLITE_OK != c) {
195 perror(MANDOC_DB);
196 sqlite3_close(db);
197 continue;
198 }
199
200 /*
201 * Define the SQL functions for substring
202 * and regular expression matching.
203 */
204
205 c = sqlite3_create_function(db, "match", 2,
206 SQLITE_ANY, NULL, sql_match, NULL, NULL);
207 assert(SQLITE_OK == c);
208 c = sqlite3_create_function(db, "regexp", 2,
209 SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
210 assert(SQLITE_OK == c);
211
212 j = 1;
213 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
214 if (SQLITE_OK != c)
215 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
216
217 for (ep = e; NULL != ep; ep = ep->next) {
218 if (NULL == ep->substr) {
219 SQL_BIND_BLOB(db, s, j, ep->regexp);
220 } else
221 SQL_BIND_TEXT(db, s, j, ep->substr);
222 SQL_BIND_INT64(db, s, j, ep->bits);
223 }
224
225 memset(&htab, 0, sizeof(struct ohash));
226 ohash_init(&htab, 4, &info);
227
228 /*
229 * Hash each entry on its [unique] document identifier.
230 * This is a uint64_t.
231 * Instead of using a hash function, simply convert the
232 * uint64_t to a uint32_t, the hash value's type.
233 * This gives good performance and preserves the
234 * distribution of buckets in the table.
235 */
236 while (SQLITE_ROW == (c = sqlite3_step(s))) {
237 id = sqlite3_column_int64(s, 1);
238 idx = ohash_lookup_memory
239 (&htab, (char *)&id,
240 sizeof(uint64_t), (uint32_t)id);
241
242 if (NULL != ohash_find(&htab, idx))
243 continue;
244
245 mp = mandoc_calloc(1, sizeof(struct match));
246 mp->id = id;
247 mp->form = sqlite3_column_int(s, 0);
248 ohash_insert(&htab, idx, mp);
249 }
250
251 if (SQLITE_DONE != c)
252 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
253
254 sqlite3_finalize(s);
255
256 c = sqlite3_prepare_v2(db,
257 "SELECT * FROM mlinks WHERE pageid=?",
258 -1, &s, NULL);
259 if (SQLITE_OK != c)
260 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
261
262 c = sqlite3_prepare_v2(db,
263 "SELECT * FROM keys WHERE pageid=? AND bits & ?",
264 -1, &s2, NULL);
265 if (SQLITE_OK != c)
266 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
267
268 for (mp = ohash_first(&htab, &idx);
269 NULL != mp;
270 mp = ohash_next(&htab, &idx)) {
271 if (cur + 1 > maxres) {
272 maxres += 1024;
273 *res = mandoc_realloc
274 (*res, maxres * sizeof(struct manpage));
275 }
276 mpage = *res + cur;
277 mpage->form = mp->form;
278 buildnames(mpage, db, s, mp->id,
279 paths->paths[i], mp->form);
280 mpage->output = outbit ?
281 buildoutput(db, s2, mp->id, outbit) : NULL;
282
283 free(mp);
284 cur++;
285 }
286
287 sqlite3_finalize(s);
288 sqlite3_finalize(s2);
289 sqlite3_close(db);
290 ohash_delete(&htab);
291 }
292 rc = 1;
293 out:
294 exprfree(e);
295 if (-1 != fd)
296 close(fd);
297 free(sql);
298 *sz = cur;
299 return(rc);
300 }
301
302 static void
303 buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s,
304 uint64_t id, const char *path, int form)
305 {
306 char *newnames;
307 const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
308 size_t i;
309 int c;
310
311 mpage->names = NULL;
312 i = 1;
313 SQL_BIND_INT64(db, s, i, id);
314 while (SQLITE_ROW == (c = sqlite3_step(s))) {
315
316 /* Assemble the list of names. */
317
318 if (NULL == mpage->names) {
319 oldnames = "";
320 sep1 = "";
321 } else {
322 oldnames = mpage->names;
323 sep1 = ", ";
324 }
325 sec = sqlite3_column_text(s, 0);
326 arch = sqlite3_column_text(s, 1);
327 name = sqlite3_column_text(s, 2);
328 sep2 = '\0' == *arch ? "" : "/";
329 if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)",
330 oldnames, sep1, name, sec, sep2, arch)) {
331 perror(0);
332 exit((int)MANDOCLEVEL_SYSERR);
333 }
334 free(mpage->names);
335 mpage->names = newnames;
336
337 /* Also save the first file name encountered. */
338
339 if (NULL != mpage->file)
340 continue;
341
342 if (form) {
343 sep1 = "man";
344 fsec = sec;
345 } else {
346 sep1 = "cat";
347 fsec = "0";
348 }
349 if (-1 == asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
350 path, sep1, sec, sep2, arch, name, fsec)) {
351 perror(0);
352 exit((int)MANDOCLEVEL_SYSERR);
353 }
354 }
355 if (SQLITE_DONE != c)
356 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
357 sqlite3_reset(s);
358 }
359
360 static char *
361 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
362 {
363 char *output, *newoutput;
364 const char *oldoutput, *sep1, *data;
365 size_t i;
366 int c;
367
368 output = NULL;
369 i = 1;
370 SQL_BIND_INT64(db, s, i, id);
371 SQL_BIND_INT64(db, s, i, outbit);
372 while (SQLITE_ROW == (c = sqlite3_step(s))) {
373 if (NULL == output) {
374 oldoutput = "";
375 sep1 = "";
376 } else {
377 oldoutput = output;
378 sep1 = " # ";
379 }
380 data = sqlite3_column_text(s, 1);
381 if (-1 == asprintf(&newoutput, "%s%s%s",
382 oldoutput, sep1, data)) {
383 perror(0);
384 exit((int)MANDOCLEVEL_SYSERR);
385 }
386 free(output);
387 output = newoutput;
388 }
389 if (SQLITE_DONE != c)
390 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
391 sqlite3_reset(s);
392 return(output);
393 }
394
395 /*
396 * Implement substring match as an application-defined SQL function.
397 * Using the SQL LIKE or GLOB operators instead would be a bad idea
398 * because that would require escaping metacharacters in the string
399 * being searched for.
400 */
401 static void
402 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
403 {
404
405 assert(2 == argc);
406 sqlite3_result_int(context, NULL != strcasestr(
407 (const char *)sqlite3_value_text(argv[1]),
408 (const char *)sqlite3_value_text(argv[0])));
409 }
410
411 /*
412 * Implement regular expression match
413 * as an application-defined SQL function.
414 */
415 static void
416 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
417 {
418
419 assert(2 == argc);
420 sqlite3_result_int(context, !regexec(
421 (regex_t *)sqlite3_value_blob(argv[0]),
422 (const char *)sqlite3_value_text(argv[1]),
423 0, NULL, 0));
424 }
425
426 static void
427 sql_append(char **sql, size_t *sz, const char *newstr, int count)
428 {
429 size_t newsz;
430
431 newsz = 1 < count ? (size_t)count : strlen(newstr);
432 *sql = mandoc_realloc(*sql, *sz + newsz + 1);
433 if (1 < count)
434 memset(*sql + *sz, *newstr, (size_t)count);
435 else
436 memcpy(*sql + *sz, newstr, newsz);
437 *sz += newsz;
438 (*sql)[*sz] = '\0';
439 }
440
441 /*
442 * Prepare the search SQL statement.
443 */
444 static char *
445 sql_statement(const struct expr *e)
446 {
447 char *sql;
448 size_t sz;
449 int needop;
450
451 sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
452 sz = strlen(sql);
453
454 for (needop = 0; NULL != e; e = e->next) {
455 if (e->and)
456 sql_append(&sql, &sz, " AND ", 1);
457 else if (needop)
458 sql_append(&sql, &sz, " OR ", 1);
459 if (e->open)
460 sql_append(&sql, &sz, "(", e->open);
461 sql_append(&sql, &sz, NULL == e->substr ?
462 "id IN (SELECT pageid FROM keys "
463 "WHERE key REGEXP ? AND bits & ?)" :
464 "id IN (SELECT pageid FROM keys "
465 "WHERE key MATCH ? AND bits & ?)", 1);
466 if (e->close)
467 sql_append(&sql, &sz, ")", e->close);
468 needop = 1;
469 }
470
471 return(sql);
472 }
473
474 /*
475 * Compile a set of string tokens into an expression.
476 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
477 * "(", "foo=bar", etc.).
478 */
479 static struct expr *
480 exprcomp(const struct mansearch *search, int argc, char *argv[])
481 {
482 int i, toopen, logic, igncase, toclose;
483 struct expr *first, *next, *cur;
484
485 first = cur = NULL;
486 logic = igncase = toclose = 0;
487 toopen = 1;
488
489 for (i = 0; i < argc; i++) {
490 if (0 == strcmp("(", argv[i])) {
491 if (igncase)
492 goto fail;
493 toopen++;
494 toclose++;
495 continue;
496 } else if (0 == strcmp(")", argv[i])) {
497 if (toopen || logic || igncase || NULL == cur)
498 goto fail;
499 cur->close++;
500 if (0 > --toclose)
501 goto fail;
502 continue;
503 } else if (0 == strcmp("-a", argv[i])) {
504 if (toopen || logic || igncase || NULL == cur)
505 goto fail;
506 logic = 1;
507 continue;
508 } else if (0 == strcmp("-o", argv[i])) {
509 if (toopen || logic || igncase || NULL == cur)
510 goto fail;
511 logic = 2;
512 continue;
513 } else if (0 == strcmp("-i", argv[i])) {
514 if (igncase)
515 goto fail;
516 igncase = 1;
517 continue;
518 }
519 next = exprterm(search, argv[i], !igncase);
520 if (NULL == next)
521 goto fail;
522 next->open = toopen;
523 next->and = (1 == logic);
524 if (NULL != first) {
525 cur->next = next;
526 cur = next;
527 } else
528 cur = first = next;
529 toopen = logic = igncase = 0;
530 }
531 if (toopen || logic || igncase || toclose)
532 goto fail;
533
534 cur->close++;
535 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$");
536 exprspec(cur, TYPE_sec, search->sec, "^%s$");
537
538 return(first);
539
540 fail:
541 if (NULL != first)
542 exprfree(first);
543 return(NULL);
544 }
545
546 static struct expr *
547 exprspec(struct expr *cur, uint64_t key, const char *value,
548 const char *format)
549 {
550 char errbuf[BUFSIZ];
551 char *cp;
552 int irc;
553
554 if (NULL == value)
555 return(cur);
556
557 if (-1 == asprintf(&cp, format, value)) {
558 perror(0);
559 exit((int)MANDOCLEVEL_SYSERR);
560 }
561 cur->next = mandoc_calloc(1, sizeof(struct expr));
562 cur = cur->next;
563 cur->and = 1;
564 cur->bits = key;
565 if (0 != (irc = regcomp(&cur->regexp, cp,
566 REG_EXTENDED | REG_NOSUB | REG_ICASE))) {
567 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf));
568 fprintf(stderr, "regcomp: %s\n", errbuf);
569 cur->substr = value;
570 }
571 free(cp);
572 return(cur);
573 }
574
575 static struct expr *
576 exprterm(const struct mansearch *search, char *buf, int cs)
577 {
578 char errbuf[BUFSIZ];
579 struct expr *e;
580 char *key, *v;
581 uint64_t iterbit;
582 int i, irc;
583
584 if ('\0' == *buf)
585 return(NULL);
586
587 e = mandoc_calloc(1, sizeof(struct expr));
588
589 /*"whatis" mode uses an opaque string and default fields. */
590
591 if (MANSEARCH_WHATIS & search->flags) {
592 e->substr = buf;
593 e->bits = search->deftype;
594 return(e);
595 }
596
597 /*
598 * If no =~ is specified, search with equality over names and
599 * descriptions.
600 * If =~ begins the phrase, use name and description fields.
601 */
602
603 if (NULL == (v = strpbrk(buf, "=~"))) {
604 e->substr = buf;
605 e->bits = search->deftype;
606 return(e);
607 } else if (v == buf)
608 e->bits = search->deftype;
609
610 if ('~' == *v++) {
611 if (NULL != strstr(buf, "arch"))
612 cs = 0;
613 if (0 != (irc = regcomp(&e->regexp, v,
614 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) {
615 regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
616 fprintf(stderr, "regcomp: %s\n", errbuf);
617 free(e);
618 return(NULL);
619 }
620 } else
621 e->substr = v;
622 v[-1] = '\0';
623
624 /*
625 * Parse out all possible fields.
626 * If the field doesn't resolve, bail.
627 */
628
629 while (NULL != (key = strsep(&buf, ","))) {
630 if ('\0' == *key)
631 continue;
632 for (i = 0, iterbit = 1;
633 i < mansearch_keymax;
634 i++, iterbit <<= 1) {
635 if (0 == strcasecmp(key,
636 mansearch_keynames[i])) {
637 e->bits |= iterbit;
638 break;
639 }
640 }
641 if (i == mansearch_keymax) {
642 if (strcasecmp(key, "any")) {
643 free(e);
644 return(NULL);
645 }
646 e->bits |= ~0ULL;
647 }
648 }
649
650 return(e);
651 }
652
653 static void
654 exprfree(struct expr *p)
655 {
656 struct expr *pp;
657
658 while (NULL != p) {
659 pp = p->next;
660 free(p);
661 p = pp;
662 }
663 }
664
665 static void *
666 hash_halloc(size_t sz, void *arg)
667 {
668
669 return(mandoc_calloc(sz, 1));
670 }
671
672 static void *
673 hash_alloc(size_t sz, void *arg)
674 {
675
676 return(mandoc_malloc(sz));
677 }
678
679 static void
680 hash_free(void *p, size_t sz, void *arg)
681 {
682
683 free(p);
684 }