]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
document -Q and -T; from OpenBSD
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.25 2014/03/28 19:17:12 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #ifdef HAVE_OHASH
35 #include <ohash.h>
36 #else
37 #include "compat_ohash.h"
38 #endif
39 #include <sqlite3.h>
40
41 #include "mandoc.h"
42 #include "mandoc_aux.h"
43 #include "manpath.h"
44 #include "mansearch.h"
45
46 extern int mansearch_keymax;
47 extern const char *const mansearch_keynames[];
48
49 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
50 do { if (SQLITE_OK != sqlite3_bind_text \
51 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
52 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
53 } while (0)
54 #define SQL_BIND_INT64(_db, _s, _i, _v) \
55 do { if (SQLITE_OK != sqlite3_bind_int64 \
56 ((_s), (_i)++, (_v))) \
57 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
58 } while (0)
59 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
60 do { if (SQLITE_OK != sqlite3_bind_blob \
61 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
62 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
63 } while (0)
64
65 struct expr {
66 uint64_t bits; /* type-mask */
67 const char *substr; /* to search for, if applicable */
68 regex_t regexp; /* compiled regexp, if applicable */
69 int open; /* opening parentheses before */
70 int and; /* logical AND before */
71 int close; /* closing parentheses after */
72 struct expr *next; /* next in sequence */
73 };
74
75 struct match {
76 uint64_t id; /* identifier in database */
77 int form; /* 0 == catpage */
78 };
79
80 static void buildnames(struct manpage *, sqlite3 *,
81 sqlite3_stmt *, uint64_t,
82 const char *, int form);
83 static char *buildoutput(sqlite3 *, sqlite3_stmt *,
84 uint64_t, uint64_t);
85 static void *hash_alloc(size_t, void *);
86 static void hash_free(void *, size_t, void *);
87 static void *hash_halloc(size_t, void *);
88 static struct expr *exprcomp(const struct mansearch *,
89 int, char *[]);
90 static void exprfree(struct expr *);
91 static struct expr *exprspec(struct expr *, uint64_t,
92 const char *, const char *);
93 static struct expr *exprterm(const struct mansearch *, char *, int);
94 static void sql_append(char **sql, size_t *sz,
95 const char *newstr, int count);
96 static void sql_match(sqlite3_context *context,
97 int argc, sqlite3_value **argv);
98 static void sql_regexp(sqlite3_context *context,
99 int argc, sqlite3_value **argv);
100 static char *sql_statement(const struct expr *);
101
102 int
103 mansearch(const struct mansearch *search,
104 const struct manpaths *paths,
105 int argc, char *argv[],
106 const char *outkey,
107 struct manpage **res, size_t *sz)
108 {
109 int fd, rc, c, indexbit;
110 int64_t id;
111 uint64_t outbit, iterbit;
112 char buf[PATH_MAX];
113 char *sql;
114 struct manpage *mpage;
115 struct expr *e, *ep;
116 sqlite3 *db;
117 sqlite3_stmt *s, *s2;
118 struct match *mp;
119 struct ohash_info info;
120 struct ohash htab;
121 unsigned int idx;
122 size_t i, j, cur, maxres;
123
124 memset(&info, 0, sizeof(struct ohash_info));
125
126 info.halloc = hash_halloc;
127 info.alloc = hash_alloc;
128 info.hfree = hash_free;
129 info.key_offset = offsetof(struct match, id);
130
131 *sz = cur = maxres = 0;
132 sql = NULL;
133 *res = NULL;
134 fd = -1;
135 e = NULL;
136 rc = 0;
137
138 if (0 == argc)
139 goto out;
140 if (NULL == (e = exprcomp(search, argc, argv)))
141 goto out;
142
143 outbit = 0;
144 if (NULL != outkey) {
145 for (indexbit = 0, iterbit = 1;
146 indexbit < mansearch_keymax;
147 indexbit++, iterbit <<= 1) {
148 if (0 == strcasecmp(outkey,
149 mansearch_keynames[indexbit])) {
150 outbit = iterbit;
151 break;
152 }
153 }
154 }
155
156 /*
157 * Save a descriptor to the current working directory.
158 * Since pathnames in the "paths" variable might be relative,
159 * and we'll be chdir()ing into them, we need to keep a handle
160 * on our current directory from which to start the chdir().
161 */
162
163 if (NULL == getcwd(buf, PATH_MAX)) {
164 perror(NULL);
165 goto out;
166 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
167 perror(buf);
168 goto out;
169 }
170
171 sql = sql_statement(e);
172
173 /*
174 * Loop over the directories (containing databases) for us to
175 * search.
176 * Don't let missing/bad databases/directories phase us.
177 * In each, try to open the resident database and, if it opens,
178 * scan it for our match expression.
179 */
180
181 for (i = 0; i < paths->sz; i++) {
182 if (-1 == fchdir(fd)) {
183 perror(buf);
184 free(*res);
185 break;
186 } else if (-1 == chdir(paths->paths[i])) {
187 perror(paths->paths[i]);
188 continue;
189 }
190
191 c = sqlite3_open_v2
192 (MANDOC_DB, &db,
193 SQLITE_OPEN_READONLY, NULL);
194
195 if (SQLITE_OK != c) {
196 perror(MANDOC_DB);
197 sqlite3_close(db);
198 continue;
199 }
200
201 /*
202 * Define the SQL functions for substring
203 * and regular expression matching.
204 */
205
206 c = sqlite3_create_function(db, "match", 2,
207 SQLITE_ANY, NULL, sql_match, NULL, NULL);
208 assert(SQLITE_OK == c);
209 c = sqlite3_create_function(db, "regexp", 2,
210 SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
211 assert(SQLITE_OK == c);
212
213 j = 1;
214 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
215 if (SQLITE_OK != c)
216 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
217
218 for (ep = e; NULL != ep; ep = ep->next) {
219 if (NULL == ep->substr) {
220 SQL_BIND_BLOB(db, s, j, ep->regexp);
221 } else
222 SQL_BIND_TEXT(db, s, j, ep->substr);
223 SQL_BIND_INT64(db, s, j, ep->bits);
224 }
225
226 memset(&htab, 0, sizeof(struct ohash));
227 ohash_init(&htab, 4, &info);
228
229 /*
230 * Hash each entry on its [unique] document identifier.
231 * This is a uint64_t.
232 * Instead of using a hash function, simply convert the
233 * uint64_t to a uint32_t, the hash value's type.
234 * This gives good performance and preserves the
235 * distribution of buckets in the table.
236 */
237 while (SQLITE_ROW == (c = sqlite3_step(s))) {
238 id = sqlite3_column_int64(s, 1);
239 idx = ohash_lookup_memory
240 (&htab, (char *)&id,
241 sizeof(uint64_t), (uint32_t)id);
242
243 if (NULL != ohash_find(&htab, idx))
244 continue;
245
246 mp = mandoc_calloc(1, sizeof(struct match));
247 mp->id = id;
248 mp->form = sqlite3_column_int(s, 0);
249 ohash_insert(&htab, idx, mp);
250 }
251
252 if (SQLITE_DONE != c)
253 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
254
255 sqlite3_finalize(s);
256
257 c = sqlite3_prepare_v2(db,
258 "SELECT * FROM mlinks WHERE pageid=?"
259 " ORDER BY sec, arch, name",
260 -1, &s, NULL);
261 if (SQLITE_OK != c)
262 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
263
264 c = sqlite3_prepare_v2(db,
265 "SELECT * FROM keys WHERE pageid=? AND bits & ?",
266 -1, &s2, NULL);
267 if (SQLITE_OK != c)
268 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
269
270 for (mp = ohash_first(&htab, &idx);
271 NULL != mp;
272 mp = ohash_next(&htab, &idx)) {
273 if (cur + 1 > maxres) {
274 maxres += 1024;
275 *res = mandoc_realloc
276 (*res, maxres * sizeof(struct manpage));
277 }
278 mpage = *res + cur;
279 mpage->form = mp->form;
280 buildnames(mpage, db, s, mp->id,
281 paths->paths[i], mp->form);
282 mpage->output = outbit ?
283 buildoutput(db, s2, mp->id, outbit) : NULL;
284
285 free(mp);
286 cur++;
287 }
288
289 sqlite3_finalize(s);
290 sqlite3_finalize(s2);
291 sqlite3_close(db);
292 ohash_delete(&htab);
293 }
294 rc = 1;
295 out:
296 exprfree(e);
297 if (-1 != fd)
298 close(fd);
299 free(sql);
300 *sz = cur;
301 return(rc);
302 }
303
304 static void
305 buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s,
306 uint64_t id, const char *path, int form)
307 {
308 char *newnames, *prevsec, *prevarch;
309 const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
310 size_t i;
311 int c;
312
313 mpage->file = NULL;
314 mpage->names = NULL;
315 prevsec = prevarch = NULL;
316 i = 1;
317 SQL_BIND_INT64(db, s, i, id);
318 while (SQLITE_ROW == (c = sqlite3_step(s))) {
319
320 /* Decide whether we already have some names. */
321
322 if (NULL == mpage->names) {
323 oldnames = "";
324 sep1 = "";
325 } else {
326 oldnames = mpage->names;
327 sep1 = ", ";
328 }
329
330 /* Fetch the next name. */
331
332 sec = sqlite3_column_text(s, 0);
333 arch = sqlite3_column_text(s, 1);
334 name = sqlite3_column_text(s, 2);
335
336 /* If the section changed, append the old one. */
337
338 if (NULL != prevsec &&
339 (strcmp(sec, prevsec) ||
340 strcmp(arch, prevarch))) {
341 sep2 = '\0' == *prevarch ? "" : "/";
342 mandoc_asprintf(&newnames, "%s(%s%s%s)",
343 oldnames, prevsec, sep2, prevarch);
344 free(mpage->names);
345 oldnames = mpage->names = newnames;
346 free(prevsec);
347 free(prevarch);
348 prevsec = prevarch = NULL;
349 }
350
351 /* Save the new section, to append it later. */
352
353 if (NULL == prevsec) {
354 prevsec = mandoc_strdup(sec);
355 prevarch = mandoc_strdup(arch);
356 }
357
358 /* Append the new name. */
359
360 mandoc_asprintf(&newnames, "%s%s%s",
361 oldnames, sep1, name);
362 free(mpage->names);
363 mpage->names = newnames;
364
365 /* Also save the first file name encountered. */
366
367 if (NULL != mpage->file)
368 continue;
369
370 if (form) {
371 sep1 = "man";
372 fsec = sec;
373 } else {
374 sep1 = "cat";
375 fsec = "0";
376 }
377 sep2 = '\0' == *arch ? "" : "/";
378 mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
379 path, sep1, sec, sep2, arch, name, fsec);
380 }
381 if (SQLITE_DONE != c)
382 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
383 sqlite3_reset(s);
384
385 /* Append one final section to the names. */
386
387 if (NULL != prevsec) {
388 sep2 = '\0' == *prevarch ? "" : "/";
389 mandoc_asprintf(&newnames, "%s(%s%s%s)",
390 mpage->names, prevsec, sep2, prevarch);
391 free(mpage->names);
392 mpage->names = newnames;
393 free(prevsec);
394 free(prevarch);
395 }
396 }
397
398 static char *
399 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
400 {
401 char *output, *newoutput;
402 const char *oldoutput, *sep1, *data;
403 size_t i;
404 int c;
405
406 output = NULL;
407 i = 1;
408 SQL_BIND_INT64(db, s, i, id);
409 SQL_BIND_INT64(db, s, i, outbit);
410 while (SQLITE_ROW == (c = sqlite3_step(s))) {
411 if (NULL == output) {
412 oldoutput = "";
413 sep1 = "";
414 } else {
415 oldoutput = output;
416 sep1 = " # ";
417 }
418 data = sqlite3_column_text(s, 1);
419 mandoc_asprintf(&newoutput, "%s%s%s",
420 oldoutput, sep1, data);
421 free(output);
422 output = newoutput;
423 }
424 if (SQLITE_DONE != c)
425 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
426 sqlite3_reset(s);
427 return(output);
428 }
429
430 /*
431 * Implement substring match as an application-defined SQL function.
432 * Using the SQL LIKE or GLOB operators instead would be a bad idea
433 * because that would require escaping metacharacters in the string
434 * being searched for.
435 */
436 static void
437 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
438 {
439
440 assert(2 == argc);
441 sqlite3_result_int(context, NULL != strcasestr(
442 (const char *)sqlite3_value_text(argv[1]),
443 (const char *)sqlite3_value_text(argv[0])));
444 }
445
446 /*
447 * Implement regular expression match
448 * as an application-defined SQL function.
449 */
450 static void
451 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
452 {
453
454 assert(2 == argc);
455 sqlite3_result_int(context, !regexec(
456 (regex_t *)sqlite3_value_blob(argv[0]),
457 (const char *)sqlite3_value_text(argv[1]),
458 0, NULL, 0));
459 }
460
461 static void
462 sql_append(char **sql, size_t *sz, const char *newstr, int count)
463 {
464 size_t newsz;
465
466 newsz = 1 < count ? (size_t)count : strlen(newstr);
467 *sql = mandoc_realloc(*sql, *sz + newsz + 1);
468 if (1 < count)
469 memset(*sql + *sz, *newstr, (size_t)count);
470 else
471 memcpy(*sql + *sz, newstr, newsz);
472 *sz += newsz;
473 (*sql)[*sz] = '\0';
474 }
475
476 /*
477 * Prepare the search SQL statement.
478 */
479 static char *
480 sql_statement(const struct expr *e)
481 {
482 char *sql;
483 size_t sz;
484 int needop;
485
486 sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
487 sz = strlen(sql);
488
489 for (needop = 0; NULL != e; e = e->next) {
490 if (e->and)
491 sql_append(&sql, &sz, " AND ", 1);
492 else if (needop)
493 sql_append(&sql, &sz, " OR ", 1);
494 if (e->open)
495 sql_append(&sql, &sz, "(", e->open);
496 sql_append(&sql, &sz, NULL == e->substr ?
497 "id IN (SELECT pageid FROM keys "
498 "WHERE key REGEXP ? AND bits & ?)" :
499 "id IN (SELECT pageid FROM keys "
500 "WHERE key MATCH ? AND bits & ?)", 1);
501 if (e->close)
502 sql_append(&sql, &sz, ")", e->close);
503 needop = 1;
504 }
505
506 return(sql);
507 }
508
509 /*
510 * Compile a set of string tokens into an expression.
511 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
512 * "(", "foo=bar", etc.).
513 */
514 static struct expr *
515 exprcomp(const struct mansearch *search, int argc, char *argv[])
516 {
517 int i, toopen, logic, igncase, toclose;
518 struct expr *first, *next, *cur;
519
520 first = cur = NULL;
521 logic = igncase = toclose = 0;
522 toopen = 1;
523
524 for (i = 0; i < argc; i++) {
525 if (0 == strcmp("(", argv[i])) {
526 if (igncase)
527 goto fail;
528 toopen++;
529 toclose++;
530 continue;
531 } else if (0 == strcmp(")", argv[i])) {
532 if (toopen || logic || igncase || NULL == cur)
533 goto fail;
534 cur->close++;
535 if (0 > --toclose)
536 goto fail;
537 continue;
538 } else if (0 == strcmp("-a", argv[i])) {
539 if (toopen || logic || igncase || NULL == cur)
540 goto fail;
541 logic = 1;
542 continue;
543 } else if (0 == strcmp("-o", argv[i])) {
544 if (toopen || logic || igncase || NULL == cur)
545 goto fail;
546 logic = 2;
547 continue;
548 } else if (0 == strcmp("-i", argv[i])) {
549 if (igncase)
550 goto fail;
551 igncase = 1;
552 continue;
553 }
554 next = exprterm(search, argv[i], !igncase);
555 if (NULL == next)
556 goto fail;
557 next->open = toopen;
558 next->and = (1 == logic);
559 if (NULL != first) {
560 cur->next = next;
561 cur = next;
562 } else
563 cur = first = next;
564 toopen = logic = igncase = 0;
565 }
566 if (toopen || logic || igncase || toclose)
567 goto fail;
568
569 cur->close++;
570 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$");
571 exprspec(cur, TYPE_sec, search->sec, "^%s$");
572
573 return(first);
574
575 fail:
576 if (NULL != first)
577 exprfree(first);
578 return(NULL);
579 }
580
581 static struct expr *
582 exprspec(struct expr *cur, uint64_t key, const char *value,
583 const char *format)
584 {
585 char errbuf[BUFSIZ];
586 char *cp;
587 int irc;
588
589 if (NULL == value)
590 return(cur);
591
592 mandoc_asprintf(&cp, format, value);
593 cur->next = mandoc_calloc(1, sizeof(struct expr));
594 cur = cur->next;
595 cur->and = 1;
596 cur->bits = key;
597 if (0 != (irc = regcomp(&cur->regexp, cp,
598 REG_EXTENDED | REG_NOSUB | REG_ICASE))) {
599 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf));
600 fprintf(stderr, "regcomp: %s\n", errbuf);
601 cur->substr = value;
602 }
603 free(cp);
604 return(cur);
605 }
606
607 static struct expr *
608 exprterm(const struct mansearch *search, char *buf, int cs)
609 {
610 char errbuf[BUFSIZ];
611 struct expr *e;
612 char *key, *v;
613 uint64_t iterbit;
614 int i, irc;
615
616 if ('\0' == *buf)
617 return(NULL);
618
619 e = mandoc_calloc(1, sizeof(struct expr));
620
621 /*"whatis" mode uses an opaque string and default fields. */
622
623 if (MANSEARCH_WHATIS & search->flags) {
624 e->substr = buf;
625 e->bits = search->deftype;
626 return(e);
627 }
628
629 /*
630 * If no =~ is specified, search with equality over names and
631 * descriptions.
632 * If =~ begins the phrase, use name and description fields.
633 */
634
635 if (NULL == (v = strpbrk(buf, "=~"))) {
636 e->substr = buf;
637 e->bits = search->deftype;
638 return(e);
639 } else if (v == buf)
640 e->bits = search->deftype;
641
642 if ('~' == *v++) {
643 if (NULL != strstr(buf, "arch"))
644 cs = 0;
645 if (0 != (irc = regcomp(&e->regexp, v,
646 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) {
647 regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
648 fprintf(stderr, "regcomp: %s\n", errbuf);
649 free(e);
650 return(NULL);
651 }
652 } else
653 e->substr = v;
654 v[-1] = '\0';
655
656 /*
657 * Parse out all possible fields.
658 * If the field doesn't resolve, bail.
659 */
660
661 while (NULL != (key = strsep(&buf, ","))) {
662 if ('\0' == *key)
663 continue;
664 for (i = 0, iterbit = 1;
665 i < mansearch_keymax;
666 i++, iterbit <<= 1) {
667 if (0 == strcasecmp(key,
668 mansearch_keynames[i])) {
669 e->bits |= iterbit;
670 break;
671 }
672 }
673 if (i == mansearch_keymax) {
674 if (strcasecmp(key, "any")) {
675 free(e);
676 return(NULL);
677 }
678 e->bits |= ~0ULL;
679 }
680 }
681
682 return(e);
683 }
684
685 static void
686 exprfree(struct expr *p)
687 {
688 struct expr *pp;
689
690 while (NULL != p) {
691 pp = p->next;
692 free(p);
693 p = pp;
694 }
695 }
696
697 static void *
698 hash_halloc(size_t sz, void *arg)
699 {
700
701 return(mandoc_calloc(sz, 1));
702 }
703
704 static void *
705 hash_alloc(size_t sz, void *arg)
706 {
707
708 return(mandoc_malloc(sz));
709 }
710
711 static void
712 hash_free(void *p, size_t sz, void *arg)
713 {
714
715 free(p);
716 }