]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
Use C99 uint32_t, not BSD-style u_int32_t.
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.12 2013/12/31 03:41:14 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #ifdef HAVE_OHASH
35 #include <ohash.h>
36 #else
37 #include "compat_ohash.h"
38 #endif
39 #include <sqlite3.h>
40
41 #include "mandoc.h"
42 #include "manpath.h"
43 #include "mansearch.h"
44
45 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
46 do { if (SQLITE_OK != sqlite3_bind_text \
47 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
48 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
49 } while (0)
50 #define SQL_BIND_INT64(_db, _s, _i, _v) \
51 do { if (SQLITE_OK != sqlite3_bind_int64 \
52 ((_s), (_i)++, (_v))) \
53 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
54 } while (0)
55 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
56 do { if (SQLITE_OK != sqlite3_bind_blob \
57 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
58 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
59 } while (0)
60
61 struct expr {
62 uint64_t bits; /* type-mask */
63 const char *substr; /* to search for, if applicable */
64 regex_t regexp; /* compiled regexp, if applicable */
65 struct expr *next; /* next in sequence */
66 };
67
68 struct match {
69 uint64_t id; /* identifier in database */
70 char *file; /* relative filepath of manpage */
71 char *desc; /* description of manpage */
72 int form; /* 0 == catpage */
73 };
74
75 struct type {
76 uint64_t bits;
77 const char *name;
78 };
79
80 static const struct type types[] = {
81 { TYPE_An, "An" },
82 { TYPE_Ar, "Ar" },
83 { TYPE_At, "At" },
84 { TYPE_Bsx, "Bsx" },
85 { TYPE_Bx, "Bx" },
86 { TYPE_Cd, "Cd" },
87 { TYPE_Cm, "Cm" },
88 { TYPE_Dv, "Dv" },
89 { TYPE_Dx, "Dx" },
90 { TYPE_Em, "Em" },
91 { TYPE_Er, "Er" },
92 { TYPE_Ev, "Ev" },
93 { TYPE_Fa, "Fa" },
94 { TYPE_Fl, "Fl" },
95 { TYPE_Fn, "Fn" },
96 { TYPE_Fn, "Fo" },
97 { TYPE_Ft, "Ft" },
98 { TYPE_Fx, "Fx" },
99 { TYPE_Ic, "Ic" },
100 { TYPE_In, "In" },
101 { TYPE_Lb, "Lb" },
102 { TYPE_Li, "Li" },
103 { TYPE_Lk, "Lk" },
104 { TYPE_Ms, "Ms" },
105 { TYPE_Mt, "Mt" },
106 { TYPE_Nd, "Nd" },
107 { TYPE_Nm, "Nm" },
108 { TYPE_Nx, "Nx" },
109 { TYPE_Ox, "Ox" },
110 { TYPE_Pa, "Pa" },
111 { TYPE_Rs, "Rs" },
112 { TYPE_Sh, "Sh" },
113 { TYPE_Ss, "Ss" },
114 { TYPE_St, "St" },
115 { TYPE_Sy, "Sy" },
116 { TYPE_Tn, "Tn" },
117 { TYPE_Va, "Va" },
118 { TYPE_Va, "Vt" },
119 { TYPE_Xr, "Xr" },
120 { ~0ULL, "any" },
121 { 0ULL, NULL }
122 };
123
124 static char *buildnames(sqlite3 *, sqlite3_stmt *, uint64_t);
125 static char *buildoutput(sqlite3 *, sqlite3_stmt *,
126 uint64_t, uint64_t);
127 static void *hash_alloc(size_t, void *);
128 static void hash_free(void *, size_t, void *);
129 static void *hash_halloc(size_t, void *);
130 static struct expr *exprcomp(const struct mansearch *,
131 int, char *[]);
132 static void exprfree(struct expr *);
133 static struct expr *exprterm(const struct mansearch *, char *, int);
134 static void sql_match(sqlite3_context *context,
135 int argc, sqlite3_value **argv);
136 static void sql_regexp(sqlite3_context *context,
137 int argc, sqlite3_value **argv);
138 static char *sql_statement(const struct expr *,
139 const char *, const char *);
140
141 int
142 mansearch(const struct mansearch *search,
143 const struct manpaths *paths,
144 int argc, char *argv[],
145 const char *outkey,
146 struct manpage **res, size_t *sz)
147 {
148 int fd, rc, c, ibit;
149 int64_t id;
150 uint64_t outbit;
151 char buf[PATH_MAX];
152 char *sql;
153 struct manpage *mpage;
154 struct expr *e, *ep;
155 sqlite3 *db;
156 sqlite3_stmt *s, *s2;
157 struct match *mp;
158 struct ohash_info info;
159 struct ohash htab;
160 unsigned int idx;
161 size_t i, j, cur, maxres;
162
163 memset(&info, 0, sizeof(struct ohash_info));
164
165 info.halloc = hash_halloc;
166 info.alloc = hash_alloc;
167 info.hfree = hash_free;
168 info.key_offset = offsetof(struct match, id);
169
170 *sz = cur = maxres = 0;
171 sql = NULL;
172 *res = NULL;
173 fd = -1;
174 e = NULL;
175 rc = 0;
176
177 if (0 == argc)
178 goto out;
179 if (NULL == (e = exprcomp(search, argc, argv)))
180 goto out;
181
182 outbit = 0;
183 if (NULL != outkey) {
184 for (ibit = 0; types[ibit].bits; ibit++) {
185 if (0 == strcasecmp(types[ibit].name, outkey)) {
186 outbit = types[ibit].bits;
187 break;
188 }
189 }
190 }
191
192 /*
193 * Save a descriptor to the current working directory.
194 * Since pathnames in the "paths" variable might be relative,
195 * and we'll be chdir()ing into them, we need to keep a handle
196 * on our current directory from which to start the chdir().
197 */
198
199 if (NULL == getcwd(buf, PATH_MAX)) {
200 perror(NULL);
201 goto out;
202 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
203 perror(buf);
204 goto out;
205 }
206
207 sql = sql_statement(e, search->arch, search->sec);
208
209 /*
210 * Loop over the directories (containing databases) for us to
211 * search.
212 * Don't let missing/bad databases/directories phase us.
213 * In each, try to open the resident database and, if it opens,
214 * scan it for our match expression.
215 */
216
217 for (i = 0; i < paths->sz; i++) {
218 if (-1 == fchdir(fd)) {
219 perror(buf);
220 free(*res);
221 break;
222 } else if (-1 == chdir(paths->paths[i])) {
223 perror(paths->paths[i]);
224 continue;
225 }
226
227 c = sqlite3_open_v2
228 (MANDOC_DB, &db,
229 SQLITE_OPEN_READONLY, NULL);
230
231 if (SQLITE_OK != c) {
232 perror(MANDOC_DB);
233 sqlite3_close(db);
234 continue;
235 }
236
237 /*
238 * Define the SQL functions for substring
239 * and regular expression matching.
240 */
241
242 c = sqlite3_create_function(db, "match", 2,
243 SQLITE_ANY, NULL, sql_match, NULL, NULL);
244 assert(SQLITE_OK == c);
245 c = sqlite3_create_function(db, "regexp", 2,
246 SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
247 assert(SQLITE_OK == c);
248
249 j = 1;
250 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
251 if (SQLITE_OK != c)
252 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
253
254 if (NULL != search->arch)
255 SQL_BIND_TEXT(db, s, j, search->arch);
256 if (NULL != search->sec)
257 SQL_BIND_TEXT(db, s, j, search->sec);
258
259 for (ep = e; NULL != ep; ep = ep->next) {
260 if (NULL == ep->substr) {
261 SQL_BIND_BLOB(db, s, j, ep->regexp);
262 } else
263 SQL_BIND_TEXT(db, s, j, ep->substr);
264 SQL_BIND_INT64(db, s, j, ep->bits);
265 }
266
267 memset(&htab, 0, sizeof(struct ohash));
268 ohash_init(&htab, 4, &info);
269
270 /*
271 * Hash each entry on its [unique] document identifier.
272 * This is a uint64_t.
273 * Instead of using a hash function, simply convert the
274 * uint64_t to a uint32_t, the hash value's type.
275 * This gives good performance and preserves the
276 * distribution of buckets in the table.
277 */
278 while (SQLITE_ROW == (c = sqlite3_step(s))) {
279 id = sqlite3_column_int64(s, 0);
280 idx = ohash_lookup_memory
281 (&htab, (char *)&id,
282 sizeof(uint64_t), (uint32_t)id);
283
284 if (NULL != ohash_find(&htab, idx))
285 continue;
286
287 mp = mandoc_calloc(1, sizeof(struct match));
288 mp->id = id;
289 mp->file = mandoc_strdup
290 ((char *)sqlite3_column_text(s, 3));
291 mp->desc = mandoc_strdup
292 ((char *)sqlite3_column_text(s, 4));
293 mp->form = sqlite3_column_int(s, 5);
294 ohash_insert(&htab, idx, mp);
295 }
296
297 if (SQLITE_DONE != c)
298 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
299
300 sqlite3_finalize(s);
301
302 c = sqlite3_prepare_v2(db,
303 "SELECT * FROM mlinks WHERE pageid=?",
304 -1, &s, NULL);
305 if (SQLITE_OK != c)
306 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
307
308 c = sqlite3_prepare_v2(db,
309 "SELECT * FROM keys WHERE pageid=? AND bits & ?",
310 -1, &s2, NULL);
311 if (SQLITE_OK != c)
312 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
313
314 for (mp = ohash_first(&htab, &idx);
315 NULL != mp;
316 mp = ohash_next(&htab, &idx)) {
317 if (cur + 1 > maxres) {
318 maxres += 1024;
319 *res = mandoc_realloc
320 (*res, maxres * sizeof(struct manpage));
321 }
322 mpage = *res + cur;
323 if (-1 == asprintf(&mpage->file, "%s/%s",
324 paths->paths[i], mp->file)) {
325 perror(0);
326 exit((int)MANDOCLEVEL_SYSERR);
327 }
328 mpage->desc = mp->desc;
329 mpage->form = mp->form;
330 mpage->names = buildnames(db, s, mp->id);
331 mpage->output = outbit ?
332 buildoutput(db, s2, mp->id, outbit) : NULL;
333
334 free(mp->file);
335 free(mp);
336 cur++;
337 }
338
339 sqlite3_finalize(s);
340 sqlite3_finalize(s2);
341 sqlite3_close(db);
342 ohash_delete(&htab);
343 }
344 rc = 1;
345 out:
346 exprfree(e);
347 if (-1 != fd)
348 close(fd);
349 free(sql);
350 *sz = cur;
351 return(rc);
352 }
353
354 static char *
355 buildnames(sqlite3 *db, sqlite3_stmt *s, uint64_t id)
356 {
357 char *names, *newnames;
358 const char *oldnames, *sep1, *name, *sec, *sep2, *arch;
359 size_t i;
360 int c;
361
362 names = NULL;
363 i = 1;
364 SQL_BIND_INT64(db, s, i, id);
365 while (SQLITE_ROW == (c = sqlite3_step(s))) {
366 if (NULL == names) {
367 oldnames = "";
368 sep1 = "";
369 } else {
370 oldnames = names;
371 sep1 = ", ";
372 }
373 sec = sqlite3_column_text(s, 1);
374 arch = sqlite3_column_text(s, 2);
375 name = sqlite3_column_text(s, 3);
376 sep2 = '\0' == *arch ? "" : "/";
377 if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)",
378 oldnames, sep1, name, sec, sep2, arch)) {
379 perror(0);
380 exit((int)MANDOCLEVEL_SYSERR);
381 }
382 free(names);
383 names = newnames;
384 }
385 if (SQLITE_DONE != c)
386 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
387 sqlite3_reset(s);
388 return(names);
389 }
390
391 static char *
392 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
393 {
394 char *output, *newoutput;
395 const char *oldoutput, *sep1, *data;
396 size_t i;
397 int c;
398
399 output = NULL;
400 i = 1;
401 SQL_BIND_INT64(db, s, i, id);
402 SQL_BIND_INT64(db, s, i, outbit);
403 while (SQLITE_ROW == (c = sqlite3_step(s))) {
404 if (NULL == output) {
405 oldoutput = "";
406 sep1 = "";
407 } else {
408 oldoutput = output;
409 sep1 = " # ";
410 }
411 data = sqlite3_column_text(s, 1);
412 if (-1 == asprintf(&newoutput, "%s%s%s",
413 oldoutput, sep1, data)) {
414 perror(0);
415 exit((int)MANDOCLEVEL_SYSERR);
416 }
417 free(output);
418 output = newoutput;
419 }
420 if (SQLITE_DONE != c)
421 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
422 sqlite3_reset(s);
423 return(output);
424 }
425
426 /*
427 * Implement substring match as an application-defined SQL function.
428 * Using the SQL LIKE or GLOB operators instead would be a bad idea
429 * because that would require escaping metacharacters in the string
430 * being searched for.
431 */
432 static void
433 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
434 {
435
436 assert(2 == argc);
437 sqlite3_result_int(context, NULL != strcasestr(
438 (const char *)sqlite3_value_text(argv[1]),
439 (const char *)sqlite3_value_text(argv[0])));
440 }
441
442 /*
443 * Implement regular expression match
444 * as an application-defined SQL function.
445 */
446 static void
447 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
448 {
449
450 assert(2 == argc);
451 sqlite3_result_int(context, !regexec(
452 (regex_t *)sqlite3_value_blob(argv[0]),
453 (const char *)sqlite3_value_text(argv[1]),
454 0, NULL, 0));
455 }
456
457 /*
458 * Prepare the search SQL statement.
459 * We search for any of the words specified in our match expression.
460 * We filter the per-doc AND expressions when collecting results.
461 */
462 static char *
463 sql_statement(const struct expr *e, const char *arch, const char *sec)
464 {
465 char *sql;
466 const char *substr = "(key MATCH ? AND bits & ?)";
467 const char *regexp = "(key REGEXP ? AND bits & ?)";
468 const char *andarch = "arch = ? AND ";
469 const char *andsec = "sec = ? AND ";
470 size_t substrsz;
471 size_t regexpsz;
472 size_t sz;
473
474 sql = mandoc_strdup
475 ("SELECT pageid,bits,key,file,desc,form,sec,arch "
476 "FROM keys "
477 "INNER JOIN mpages ON mpages.id=keys.pageid "
478 "WHERE ");
479 sz = strlen(sql);
480 substrsz = strlen(substr);
481 regexpsz = strlen(regexp);
482
483 if (NULL != arch) {
484 sz += strlen(andarch) + 1;
485 sql = mandoc_realloc(sql, sz);
486 strlcat(sql, andarch, sz);
487 }
488
489 if (NULL != sec) {
490 sz += strlen(andsec) + 1;
491 sql = mandoc_realloc(sql, sz);
492 strlcat(sql, andsec, sz);
493 }
494
495 sz += 2;
496 sql = mandoc_realloc(sql, sz);
497 strlcat(sql, "(", sz);
498
499 for ( ; NULL != e; e = e->next) {
500 sz += (NULL == e->substr ? regexpsz : substrsz) +
501 (NULL == e->next ? 3 : 5);
502 sql = mandoc_realloc(sql, sz);
503 strlcat(sql, NULL == e->substr ? regexp : substr, sz);
504 strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
505 }
506
507 return(sql);
508 }
509
510 /*
511 * Compile a set of string tokens into an expression.
512 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
513 * "(", "foo=bar", etc.).
514 */
515 static struct expr *
516 exprcomp(const struct mansearch *search, int argc, char *argv[])
517 {
518 int i, cs;
519 struct expr *first, *next, *cur;
520
521 first = cur = NULL;
522
523 for (i = 0; i < argc; i++) {
524 if (0 == strcmp("-i", argv[i])) {
525 if (++i >= argc)
526 return(NULL);
527 cs = 0;
528 } else
529 cs = 1;
530 next = exprterm(search, argv[i], cs);
531 if (NULL == next) {
532 exprfree(first);
533 return(NULL);
534 }
535 if (NULL != first) {
536 cur->next = next;
537 cur = next;
538 } else
539 cur = first = next;
540 }
541
542 return(first);
543 }
544
545 static struct expr *
546 exprterm(const struct mansearch *search, char *buf, int cs)
547 {
548 struct expr *e;
549 char *key, *v;
550 size_t i;
551
552 if ('\0' == *buf)
553 return(NULL);
554
555 e = mandoc_calloc(1, sizeof(struct expr));
556
557 /*"whatis" mode uses an opaque string and default fields. */
558
559 if (MANSEARCH_WHATIS & search->flags) {
560 e->substr = buf;
561 e->bits = search->deftype;
562 return(e);
563 }
564
565 /*
566 * If no =~ is specified, search with equality over names and
567 * descriptions.
568 * If =~ begins the phrase, use name and description fields.
569 */
570
571 if (NULL == (v = strpbrk(buf, "=~"))) {
572 e->substr = buf;
573 e->bits = search->deftype;
574 return(e);
575 } else if (v == buf)
576 e->bits = search->deftype;
577
578 if ('~' == *v++) {
579 if (regcomp(&e->regexp, v,
580 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE))) {
581 free(e);
582 return(NULL);
583 }
584 } else
585 e->substr = v;
586 v[-1] = '\0';
587
588 /*
589 * Parse out all possible fields.
590 * If the field doesn't resolve, bail.
591 */
592
593 while (NULL != (key = strsep(&buf, ","))) {
594 if ('\0' == *key)
595 continue;
596 i = 0;
597 while (types[i].bits &&
598 strcasecmp(types[i].name, key))
599 i++;
600 if (0 == types[i].bits) {
601 free(e);
602 return(NULL);
603 }
604 e->bits |= types[i].bits;
605 }
606
607 return(e);
608 }
609
610 static void
611 exprfree(struct expr *p)
612 {
613 struct expr *pp;
614
615 while (NULL != p) {
616 pp = p->next;
617 free(p);
618 p = pp;
619 }
620 }
621
622 static void *
623 hash_halloc(size_t sz, void *arg)
624 {
625
626 return(mandoc_calloc(sz, 1));
627 }
628
629 static void *
630 hash_alloc(size_t sz, void *arg)
631 {
632
633 return(mandoc_malloc(sz));
634 }
635
636 static void
637 hash_free(void *p, size_t sz, void *arg)
638 {
639
640 free(p);
641 }