]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
needs mandoc_aux, too
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.24 2014/03/23 12:11:18 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #ifdef HAVE_OHASH
35 #include <ohash.h>
36 #else
37 #include "compat_ohash.h"
38 #endif
39 #include <sqlite3.h>
40
41 #include "mandoc.h"
42 #include "mandoc_aux.h"
43 #include "manpath.h"
44 #include "mansearch.h"
45
46 extern int mansearch_keymax;
47 extern const char *const mansearch_keynames[];
48
49 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
50 do { if (SQLITE_OK != sqlite3_bind_text \
51 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
52 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
53 } while (0)
54 #define SQL_BIND_INT64(_db, _s, _i, _v) \
55 do { if (SQLITE_OK != sqlite3_bind_int64 \
56 ((_s), (_i)++, (_v))) \
57 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
58 } while (0)
59 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
60 do { if (SQLITE_OK != sqlite3_bind_blob \
61 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
62 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
63 } while (0)
64
65 struct expr {
66 uint64_t bits; /* type-mask */
67 const char *substr; /* to search for, if applicable */
68 regex_t regexp; /* compiled regexp, if applicable */
69 int open; /* opening parentheses before */
70 int and; /* logical AND before */
71 int close; /* closing parentheses after */
72 struct expr *next; /* next in sequence */
73 };
74
75 struct match {
76 uint64_t id; /* identifier in database */
77 int form; /* 0 == catpage */
78 };
79
80 static void buildnames(struct manpage *, sqlite3 *,
81 sqlite3_stmt *, uint64_t,
82 const char *, int form);
83 static char *buildoutput(sqlite3 *, sqlite3_stmt *,
84 uint64_t, uint64_t);
85 static void *hash_alloc(size_t, void *);
86 static void hash_free(void *, size_t, void *);
87 static void *hash_halloc(size_t, void *);
88 static struct expr *exprcomp(const struct mansearch *,
89 int, char *[]);
90 static void exprfree(struct expr *);
91 static struct expr *exprspec(struct expr *, uint64_t,
92 const char *, const char *);
93 static struct expr *exprterm(const struct mansearch *, char *, int);
94 static void sql_append(char **sql, size_t *sz,
95 const char *newstr, int count);
96 static void sql_match(sqlite3_context *context,
97 int argc, sqlite3_value **argv);
98 static void sql_regexp(sqlite3_context *context,
99 int argc, sqlite3_value **argv);
100 static char *sql_statement(const struct expr *);
101
102 int
103 mansearch(const struct mansearch *search,
104 const struct manpaths *paths,
105 int argc, char *argv[],
106 const char *outkey,
107 struct manpage **res, size_t *sz)
108 {
109 int fd, rc, c, indexbit;
110 int64_t id;
111 uint64_t outbit, iterbit;
112 char buf[PATH_MAX];
113 char *sql;
114 struct manpage *mpage;
115 struct expr *e, *ep;
116 sqlite3 *db;
117 sqlite3_stmt *s, *s2;
118 struct match *mp;
119 struct ohash_info info;
120 struct ohash htab;
121 unsigned int idx;
122 size_t i, j, cur, maxres;
123
124 memset(&info, 0, sizeof(struct ohash_info));
125
126 info.halloc = hash_halloc;
127 info.alloc = hash_alloc;
128 info.hfree = hash_free;
129 info.key_offset = offsetof(struct match, id);
130
131 *sz = cur = maxres = 0;
132 sql = NULL;
133 *res = NULL;
134 fd = -1;
135 e = NULL;
136 rc = 0;
137
138 if (0 == argc)
139 goto out;
140 if (NULL == (e = exprcomp(search, argc, argv)))
141 goto out;
142
143 outbit = 0;
144 if (NULL != outkey) {
145 for (indexbit = 0, iterbit = 1;
146 indexbit < mansearch_keymax;
147 indexbit++, iterbit <<= 1) {
148 if (0 == strcasecmp(outkey,
149 mansearch_keynames[indexbit])) {
150 outbit = iterbit;
151 break;
152 }
153 }
154 }
155
156 /*
157 * Save a descriptor to the current working directory.
158 * Since pathnames in the "paths" variable might be relative,
159 * and we'll be chdir()ing into them, we need to keep a handle
160 * on our current directory from which to start the chdir().
161 */
162
163 if (NULL == getcwd(buf, PATH_MAX)) {
164 perror(NULL);
165 goto out;
166 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
167 perror(buf);
168 goto out;
169 }
170
171 sql = sql_statement(e);
172
173 /*
174 * Loop over the directories (containing databases) for us to
175 * search.
176 * Don't let missing/bad databases/directories phase us.
177 * In each, try to open the resident database and, if it opens,
178 * scan it for our match expression.
179 */
180
181 for (i = 0; i < paths->sz; i++) {
182 if (-1 == fchdir(fd)) {
183 perror(buf);
184 free(*res);
185 break;
186 } else if (-1 == chdir(paths->paths[i])) {
187 perror(paths->paths[i]);
188 continue;
189 }
190
191 c = sqlite3_open_v2
192 (MANDOC_DB, &db,
193 SQLITE_OPEN_READONLY, NULL);
194
195 if (SQLITE_OK != c) {
196 perror(MANDOC_DB);
197 sqlite3_close(db);
198 continue;
199 }
200
201 /*
202 * Define the SQL functions for substring
203 * and regular expression matching.
204 */
205
206 c = sqlite3_create_function(db, "match", 2,
207 SQLITE_ANY, NULL, sql_match, NULL, NULL);
208 assert(SQLITE_OK == c);
209 c = sqlite3_create_function(db, "regexp", 2,
210 SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
211 assert(SQLITE_OK == c);
212
213 j = 1;
214 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
215 if (SQLITE_OK != c)
216 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
217
218 for (ep = e; NULL != ep; ep = ep->next) {
219 if (NULL == ep->substr) {
220 SQL_BIND_BLOB(db, s, j, ep->regexp);
221 } else
222 SQL_BIND_TEXT(db, s, j, ep->substr);
223 SQL_BIND_INT64(db, s, j, ep->bits);
224 }
225
226 memset(&htab, 0, sizeof(struct ohash));
227 ohash_init(&htab, 4, &info);
228
229 /*
230 * Hash each entry on its [unique] document identifier.
231 * This is a uint64_t.
232 * Instead of using a hash function, simply convert the
233 * uint64_t to a uint32_t, the hash value's type.
234 * This gives good performance and preserves the
235 * distribution of buckets in the table.
236 */
237 while (SQLITE_ROW == (c = sqlite3_step(s))) {
238 id = sqlite3_column_int64(s, 1);
239 idx = ohash_lookup_memory
240 (&htab, (char *)&id,
241 sizeof(uint64_t), (uint32_t)id);
242
243 if (NULL != ohash_find(&htab, idx))
244 continue;
245
246 mp = mandoc_calloc(1, sizeof(struct match));
247 mp->id = id;
248 mp->form = sqlite3_column_int(s, 0);
249 ohash_insert(&htab, idx, mp);
250 }
251
252 if (SQLITE_DONE != c)
253 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
254
255 sqlite3_finalize(s);
256
257 c = sqlite3_prepare_v2(db,
258 "SELECT * FROM mlinks WHERE pageid=?"
259 " ORDER BY sec, arch, name",
260 -1, &s, NULL);
261 if (SQLITE_OK != c)
262 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
263
264 c = sqlite3_prepare_v2(db,
265 "SELECT * FROM keys WHERE pageid=? AND bits & ?",
266 -1, &s2, NULL);
267 if (SQLITE_OK != c)
268 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
269
270 for (mp = ohash_first(&htab, &idx);
271 NULL != mp;
272 mp = ohash_next(&htab, &idx)) {
273 if (cur + 1 > maxres) {
274 maxres += 1024;
275 *res = mandoc_realloc
276 (*res, maxres * sizeof(struct manpage));
277 }
278 mpage = *res + cur;
279 mpage->form = mp->form;
280 buildnames(mpage, db, s, mp->id,
281 paths->paths[i], mp->form);
282 mpage->output = outbit ?
283 buildoutput(db, s2, mp->id, outbit) : NULL;
284
285 free(mp);
286 cur++;
287 }
288
289 sqlite3_finalize(s);
290 sqlite3_finalize(s2);
291 sqlite3_close(db);
292 ohash_delete(&htab);
293 }
294 rc = 1;
295 out:
296 exprfree(e);
297 if (-1 != fd)
298 close(fd);
299 free(sql);
300 *sz = cur;
301 return(rc);
302 }
303
304 static void
305 buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s,
306 uint64_t id, const char *path, int form)
307 {
308 char *newnames, *prevsec, *prevarch;
309 const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
310 size_t i;
311 int c;
312
313 mpage->names = NULL;
314 prevsec = prevarch = NULL;
315 i = 1;
316 SQL_BIND_INT64(db, s, i, id);
317 while (SQLITE_ROW == (c = sqlite3_step(s))) {
318
319 /* Decide whether we already have some names. */
320
321 if (NULL == mpage->names) {
322 oldnames = "";
323 sep1 = "";
324 } else {
325 oldnames = mpage->names;
326 sep1 = ", ";
327 }
328
329 /* Fetch the next name. */
330
331 sec = sqlite3_column_text(s, 0);
332 arch = sqlite3_column_text(s, 1);
333 name = sqlite3_column_text(s, 2);
334
335 /* If the section changed, append the old one. */
336
337 if (NULL != prevsec &&
338 (strcmp(sec, prevsec) ||
339 strcmp(arch, prevarch))) {
340 sep2 = '\0' == *prevarch ? "" : "/";
341 mandoc_asprintf(&newnames, "%s(%s%s%s)",
342 oldnames, prevsec, sep2, prevarch);
343 free(mpage->names);
344 oldnames = mpage->names = newnames;
345 free(prevsec);
346 free(prevarch);
347 prevsec = prevarch = NULL;
348 }
349
350 /* Save the new section, to append it later. */
351
352 if (NULL == prevsec) {
353 prevsec = mandoc_strdup(sec);
354 prevarch = mandoc_strdup(arch);
355 }
356
357 /* Append the new name. */
358
359 mandoc_asprintf(&newnames, "%s%s%s",
360 oldnames, sep1, name);
361 free(mpage->names);
362 mpage->names = newnames;
363
364 /* Also save the first file name encountered. */
365
366 if (NULL != mpage->file)
367 continue;
368
369 if (form) {
370 sep1 = "man";
371 fsec = sec;
372 } else {
373 sep1 = "cat";
374 fsec = "0";
375 }
376 sep2 = '\0' == *arch ? "" : "/";
377 mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
378 path, sep1, sec, sep2, arch, name, fsec);
379 }
380 if (SQLITE_DONE != c)
381 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
382 sqlite3_reset(s);
383
384 /* Append one final section to the names. */
385
386 if (NULL != prevsec) {
387 sep2 = '\0' == *prevarch ? "" : "/";
388 mandoc_asprintf(&newnames, "%s(%s%s%s)",
389 mpage->names, prevsec, sep2, prevarch);
390 free(mpage->names);
391 mpage->names = newnames;
392 free(prevsec);
393 free(prevarch);
394 }
395 }
396
397 static char *
398 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
399 {
400 char *output, *newoutput;
401 const char *oldoutput, *sep1, *data;
402 size_t i;
403 int c;
404
405 output = NULL;
406 i = 1;
407 SQL_BIND_INT64(db, s, i, id);
408 SQL_BIND_INT64(db, s, i, outbit);
409 while (SQLITE_ROW == (c = sqlite3_step(s))) {
410 if (NULL == output) {
411 oldoutput = "";
412 sep1 = "";
413 } else {
414 oldoutput = output;
415 sep1 = " # ";
416 }
417 data = sqlite3_column_text(s, 1);
418 mandoc_asprintf(&newoutput, "%s%s%s",
419 oldoutput, sep1, data);
420 free(output);
421 output = newoutput;
422 }
423 if (SQLITE_DONE != c)
424 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
425 sqlite3_reset(s);
426 return(output);
427 }
428
429 /*
430 * Implement substring match as an application-defined SQL function.
431 * Using the SQL LIKE or GLOB operators instead would be a bad idea
432 * because that would require escaping metacharacters in the string
433 * being searched for.
434 */
435 static void
436 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
437 {
438
439 assert(2 == argc);
440 sqlite3_result_int(context, NULL != strcasestr(
441 (const char *)sqlite3_value_text(argv[1]),
442 (const char *)sqlite3_value_text(argv[0])));
443 }
444
445 /*
446 * Implement regular expression match
447 * as an application-defined SQL function.
448 */
449 static void
450 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
451 {
452
453 assert(2 == argc);
454 sqlite3_result_int(context, !regexec(
455 (regex_t *)sqlite3_value_blob(argv[0]),
456 (const char *)sqlite3_value_text(argv[1]),
457 0, NULL, 0));
458 }
459
460 static void
461 sql_append(char **sql, size_t *sz, const char *newstr, int count)
462 {
463 size_t newsz;
464
465 newsz = 1 < count ? (size_t)count : strlen(newstr);
466 *sql = mandoc_realloc(*sql, *sz + newsz + 1);
467 if (1 < count)
468 memset(*sql + *sz, *newstr, (size_t)count);
469 else
470 memcpy(*sql + *sz, newstr, newsz);
471 *sz += newsz;
472 (*sql)[*sz] = '\0';
473 }
474
475 /*
476 * Prepare the search SQL statement.
477 */
478 static char *
479 sql_statement(const struct expr *e)
480 {
481 char *sql;
482 size_t sz;
483 int needop;
484
485 sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
486 sz = strlen(sql);
487
488 for (needop = 0; NULL != e; e = e->next) {
489 if (e->and)
490 sql_append(&sql, &sz, " AND ", 1);
491 else if (needop)
492 sql_append(&sql, &sz, " OR ", 1);
493 if (e->open)
494 sql_append(&sql, &sz, "(", e->open);
495 sql_append(&sql, &sz, NULL == e->substr ?
496 "id IN (SELECT pageid FROM keys "
497 "WHERE key REGEXP ? AND bits & ?)" :
498 "id IN (SELECT pageid FROM keys "
499 "WHERE key MATCH ? AND bits & ?)", 1);
500 if (e->close)
501 sql_append(&sql, &sz, ")", e->close);
502 needop = 1;
503 }
504
505 return(sql);
506 }
507
508 /*
509 * Compile a set of string tokens into an expression.
510 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
511 * "(", "foo=bar", etc.).
512 */
513 static struct expr *
514 exprcomp(const struct mansearch *search, int argc, char *argv[])
515 {
516 int i, toopen, logic, igncase, toclose;
517 struct expr *first, *next, *cur;
518
519 first = cur = NULL;
520 logic = igncase = toclose = 0;
521 toopen = 1;
522
523 for (i = 0; i < argc; i++) {
524 if (0 == strcmp("(", argv[i])) {
525 if (igncase)
526 goto fail;
527 toopen++;
528 toclose++;
529 continue;
530 } else if (0 == strcmp(")", argv[i])) {
531 if (toopen || logic || igncase || NULL == cur)
532 goto fail;
533 cur->close++;
534 if (0 > --toclose)
535 goto fail;
536 continue;
537 } else if (0 == strcmp("-a", argv[i])) {
538 if (toopen || logic || igncase || NULL == cur)
539 goto fail;
540 logic = 1;
541 continue;
542 } else if (0 == strcmp("-o", argv[i])) {
543 if (toopen || logic || igncase || NULL == cur)
544 goto fail;
545 logic = 2;
546 continue;
547 } else if (0 == strcmp("-i", argv[i])) {
548 if (igncase)
549 goto fail;
550 igncase = 1;
551 continue;
552 }
553 next = exprterm(search, argv[i], !igncase);
554 if (NULL == next)
555 goto fail;
556 next->open = toopen;
557 next->and = (1 == logic);
558 if (NULL != first) {
559 cur->next = next;
560 cur = next;
561 } else
562 cur = first = next;
563 toopen = logic = igncase = 0;
564 }
565 if (toopen || logic || igncase || toclose)
566 goto fail;
567
568 cur->close++;
569 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$");
570 exprspec(cur, TYPE_sec, search->sec, "^%s$");
571
572 return(first);
573
574 fail:
575 if (NULL != first)
576 exprfree(first);
577 return(NULL);
578 }
579
580 static struct expr *
581 exprspec(struct expr *cur, uint64_t key, const char *value,
582 const char *format)
583 {
584 char errbuf[BUFSIZ];
585 char *cp;
586 int irc;
587
588 if (NULL == value)
589 return(cur);
590
591 mandoc_asprintf(&cp, format, value);
592 cur->next = mandoc_calloc(1, sizeof(struct expr));
593 cur = cur->next;
594 cur->and = 1;
595 cur->bits = key;
596 if (0 != (irc = regcomp(&cur->regexp, cp,
597 REG_EXTENDED | REG_NOSUB | REG_ICASE))) {
598 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf));
599 fprintf(stderr, "regcomp: %s\n", errbuf);
600 cur->substr = value;
601 }
602 free(cp);
603 return(cur);
604 }
605
606 static struct expr *
607 exprterm(const struct mansearch *search, char *buf, int cs)
608 {
609 char errbuf[BUFSIZ];
610 struct expr *e;
611 char *key, *v;
612 uint64_t iterbit;
613 int i, irc;
614
615 if ('\0' == *buf)
616 return(NULL);
617
618 e = mandoc_calloc(1, sizeof(struct expr));
619
620 /*"whatis" mode uses an opaque string and default fields. */
621
622 if (MANSEARCH_WHATIS & search->flags) {
623 e->substr = buf;
624 e->bits = search->deftype;
625 return(e);
626 }
627
628 /*
629 * If no =~ is specified, search with equality over names and
630 * descriptions.
631 * If =~ begins the phrase, use name and description fields.
632 */
633
634 if (NULL == (v = strpbrk(buf, "=~"))) {
635 e->substr = buf;
636 e->bits = search->deftype;
637 return(e);
638 } else if (v == buf)
639 e->bits = search->deftype;
640
641 if ('~' == *v++) {
642 if (NULL != strstr(buf, "arch"))
643 cs = 0;
644 if (0 != (irc = regcomp(&e->regexp, v,
645 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) {
646 regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
647 fprintf(stderr, "regcomp: %s\n", errbuf);
648 free(e);
649 return(NULL);
650 }
651 } else
652 e->substr = v;
653 v[-1] = '\0';
654
655 /*
656 * Parse out all possible fields.
657 * If the field doesn't resolve, bail.
658 */
659
660 while (NULL != (key = strsep(&buf, ","))) {
661 if ('\0' == *key)
662 continue;
663 for (i = 0, iterbit = 1;
664 i < mansearch_keymax;
665 i++, iterbit <<= 1) {
666 if (0 == strcasecmp(key,
667 mansearch_keynames[i])) {
668 e->bits |= iterbit;
669 break;
670 }
671 }
672 if (i == mansearch_keymax) {
673 if (strcasecmp(key, "any")) {
674 free(e);
675 return(NULL);
676 }
677 e->bits |= ~0ULL;
678 }
679 }
680
681 return(e);
682 }
683
684 static void
685 exprfree(struct expr *p)
686 {
687 struct expr *pp;
688
689 while (NULL != p) {
690 pp = p->next;
691 free(p);
692 p = pp;
693 }
694 }
695
696 static void *
697 hash_halloc(size_t sz, void *arg)
698 {
699
700 return(mandoc_calloc(sz, 1));
701 }
702
703 static void *
704 hash_alloc(size_t sz, void *arg)
705 {
706
707 return(mandoc_malloc(sz));
708 }
709
710 static void
711 hash_free(void *p, size_t sz, void *arg)
712 {
713
714 free(p);
715 }