]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
To better support MLINKS, we will have to split the "docs" database
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.8 2013/10/20 00:03:05 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #ifdef HAVE_OHASH
35 #include <ohash.h>
36 #else
37 #include "compat_ohash.h"
38 #endif
39 #include <sqlite3.h>
40
41 #include "mandoc.h"
42 #include "manpath.h"
43 #include "mansearch.h"
44
45 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
46 do { if (SQLITE_OK != sqlite3_bind_text \
47 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
48 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
49 } while (0)
50 #define SQL_BIND_INT64(_db, _s, _i, _v) \
51 do { if (SQLITE_OK != sqlite3_bind_int64 \
52 ((_s), (_i)++, (_v))) \
53 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
54 } while (0)
55 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
56 do { if (SQLITE_OK != sqlite3_bind_blob \
57 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
58 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
59 } while (0)
60
61 struct expr {
62 uint64_t bits; /* type-mask */
63 const char *substr; /* to search for, if applicable */
64 regex_t regexp; /* compiled regexp, if applicable */
65 struct expr *next; /* next in sequence */
66 };
67
68 struct match {
69 uint64_t id; /* identifier in database */
70 char *file; /* relative filepath of manpage */
71 char *desc; /* description of manpage */
72 int form; /* 0 == catpage */
73 };
74
75 struct type {
76 uint64_t bits;
77 const char *name;
78 };
79
80 static const struct type types[] = {
81 { TYPE_An, "An" },
82 { TYPE_Ar, "Ar" },
83 { TYPE_At, "At" },
84 { TYPE_Bsx, "Bsx" },
85 { TYPE_Bx, "Bx" },
86 { TYPE_Cd, "Cd" },
87 { TYPE_Cm, "Cm" },
88 { TYPE_Dv, "Dv" },
89 { TYPE_Dx, "Dx" },
90 { TYPE_Em, "Em" },
91 { TYPE_Er, "Er" },
92 { TYPE_Ev, "Ev" },
93 { TYPE_Fa, "Fa" },
94 { TYPE_Fl, "Fl" },
95 { TYPE_Fn, "Fn" },
96 { TYPE_Fn, "Fo" },
97 { TYPE_Ft, "Ft" },
98 { TYPE_Fx, "Fx" },
99 { TYPE_Ic, "Ic" },
100 { TYPE_In, "In" },
101 { TYPE_Lb, "Lb" },
102 { TYPE_Li, "Li" },
103 { TYPE_Lk, "Lk" },
104 { TYPE_Ms, "Ms" },
105 { TYPE_Mt, "Mt" },
106 { TYPE_Nd, "Nd" },
107 { TYPE_Nm, "Nm" },
108 { TYPE_Nx, "Nx" },
109 { TYPE_Ox, "Ox" },
110 { TYPE_Pa, "Pa" },
111 { TYPE_Rs, "Rs" },
112 { TYPE_Sh, "Sh" },
113 { TYPE_Ss, "Ss" },
114 { TYPE_St, "St" },
115 { TYPE_Sy, "Sy" },
116 { TYPE_Tn, "Tn" },
117 { TYPE_Va, "Va" },
118 { TYPE_Va, "Vt" },
119 { TYPE_Xr, "Xr" },
120 { ~0ULL, "any" },
121 { 0ULL, NULL }
122 };
123
124 static void *hash_alloc(size_t, void *);
125 static void hash_free(void *, size_t, void *);
126 static void *hash_halloc(size_t, void *);
127 static struct expr *exprcomp(const struct mansearch *,
128 int, char *[]);
129 static void exprfree(struct expr *);
130 static struct expr *exprterm(const struct mansearch *, char *, int);
131 static void sql_match(sqlite3_context *context,
132 int argc, sqlite3_value **argv);
133 static void sql_regexp(sqlite3_context *context,
134 int argc, sqlite3_value **argv);
135 static char *sql_statement(const struct expr *,
136 const char *, const char *);
137
138 int
139 mansearch(const struct mansearch *search,
140 const struct manpaths *paths,
141 int argc, char *argv[],
142 struct manpage **res, size_t *sz)
143 {
144 int fd, rc, c;
145 int64_t id;
146 char buf[PATH_MAX];
147 char *sql;
148 struct expr *e, *ep;
149 sqlite3 *db;
150 sqlite3_stmt *s;
151 struct match *mp;
152 struct ohash_info info;
153 struct ohash htab;
154 unsigned int idx;
155 size_t i, j, cur, maxres;
156
157 memset(&info, 0, sizeof(struct ohash_info));
158
159 info.halloc = hash_halloc;
160 info.alloc = hash_alloc;
161 info.hfree = hash_free;
162 info.key_offset = offsetof(struct match, id);
163
164 *sz = cur = maxres = 0;
165 sql = NULL;
166 *res = NULL;
167 fd = -1;
168 e = NULL;
169 rc = 0;
170
171 if (0 == argc)
172 goto out;
173 if (NULL == (e = exprcomp(search, argc, argv)))
174 goto out;
175
176 /*
177 * Save a descriptor to the current working directory.
178 * Since pathnames in the "paths" variable might be relative,
179 * and we'll be chdir()ing into them, we need to keep a handle
180 * on our current directory from which to start the chdir().
181 */
182
183 if (NULL == getcwd(buf, PATH_MAX)) {
184 perror(NULL);
185 goto out;
186 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
187 perror(buf);
188 goto out;
189 }
190
191 sql = sql_statement(e, search->arch, search->sec);
192
193 /*
194 * Loop over the directories (containing databases) for us to
195 * search.
196 * Don't let missing/bad databases/directories phase us.
197 * In each, try to open the resident database and, if it opens,
198 * scan it for our match expression.
199 */
200
201 for (i = 0; i < paths->sz; i++) {
202 if (-1 == fchdir(fd)) {
203 perror(buf);
204 free(*res);
205 break;
206 } else if (-1 == chdir(paths->paths[i])) {
207 perror(paths->paths[i]);
208 continue;
209 }
210
211 c = sqlite3_open_v2
212 (MANDOC_DB, &db,
213 SQLITE_OPEN_READONLY, NULL);
214
215 if (SQLITE_OK != c) {
216 perror(MANDOC_DB);
217 sqlite3_close(db);
218 continue;
219 }
220
221 /*
222 * Define the SQL functions for substring
223 * and regular expression matching.
224 */
225
226 c = sqlite3_create_function(db, "match", 2,
227 SQLITE_ANY, NULL, sql_match, NULL, NULL);
228 assert(SQLITE_OK == c);
229 c = sqlite3_create_function(db, "regexp", 2,
230 SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
231 assert(SQLITE_OK == c);
232
233 j = 1;
234 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
235 if (SQLITE_OK != c)
236 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
237
238 if (NULL != search->arch)
239 SQL_BIND_TEXT(db, s, j, search->arch);
240 if (NULL != search->sec)
241 SQL_BIND_TEXT(db, s, j, search->sec);
242
243 for (ep = e; NULL != ep; ep = ep->next) {
244 if (NULL == ep->substr) {
245 SQL_BIND_BLOB(db, s, j, ep->regexp);
246 } else
247 SQL_BIND_TEXT(db, s, j, ep->substr);
248 SQL_BIND_INT64(db, s, j, ep->bits);
249 }
250
251 memset(&htab, 0, sizeof(struct ohash));
252 ohash_init(&htab, 4, &info);
253
254 /*
255 * Hash each entry on its [unique] document identifier.
256 * This is a uint64_t.
257 * Instead of using a hash function, simply convert the
258 * uint64_t to a uint32_t, the hash value's type.
259 * This gives good performance and preserves the
260 * distribution of buckets in the table.
261 */
262 while (SQLITE_ROW == (c = sqlite3_step(s))) {
263 id = sqlite3_column_int64(s, 0);
264 idx = ohash_lookup_memory
265 (&htab, (char *)&id,
266 sizeof(uint64_t), (uint32_t)id);
267
268 if (NULL != ohash_find(&htab, idx))
269 continue;
270
271 mp = mandoc_calloc(1, sizeof(struct match));
272 mp->id = id;
273 mp->file = mandoc_strdup
274 ((char *)sqlite3_column_text(s, 3));
275 mp->desc = mandoc_strdup
276 ((char *)sqlite3_column_text(s, 4));
277 mp->form = sqlite3_column_int(s, 5);
278 ohash_insert(&htab, idx, mp);
279 }
280
281 if (SQLITE_DONE != c)
282 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
283
284 sqlite3_finalize(s);
285 sqlite3_close(db);
286
287 for (mp = ohash_first(&htab, &idx);
288 NULL != mp;
289 mp = ohash_next(&htab, &idx)) {
290 if (cur + 1 > maxres) {
291 maxres += 1024;
292 *res = mandoc_realloc
293 (*res, maxres * sizeof(struct manpage));
294 }
295 strlcpy((*res)[cur].file,
296 paths->paths[i], PATH_MAX);
297 strlcat((*res)[cur].file, "/", PATH_MAX);
298 strlcat((*res)[cur].file, mp->file, PATH_MAX);
299 (*res)[cur].desc = mp->desc;
300 (*res)[cur].form = mp->form;
301 free(mp->file);
302 free(mp);
303 cur++;
304 }
305 ohash_delete(&htab);
306 }
307 rc = 1;
308 out:
309 exprfree(e);
310 if (-1 != fd)
311 close(fd);
312 free(sql);
313 *sz = cur;
314 return(rc);
315 }
316
317 /*
318 * Implement substring match as an application-defined SQL function.
319 * Using the SQL LIKE or GLOB operators instead would be a bad idea
320 * because that would require escaping metacharacters in the string
321 * being searched for.
322 */
323 static void
324 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
325 {
326
327 assert(2 == argc);
328 sqlite3_result_int(context, NULL != strcasestr(
329 (const char *)sqlite3_value_text(argv[1]),
330 (const char *)sqlite3_value_text(argv[0])));
331 }
332
333 /*
334 * Implement regular expression match
335 * as an application-defined SQL function.
336 */
337 static void
338 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
339 {
340
341 assert(2 == argc);
342 sqlite3_result_int(context, !regexec(
343 (regex_t *)sqlite3_value_blob(argv[0]),
344 (const char *)sqlite3_value_text(argv[1]),
345 0, NULL, 0));
346 }
347
348 /*
349 * Prepare the search SQL statement.
350 * We search for any of the words specified in our match expression.
351 * We filter the per-doc AND expressions when collecting results.
352 */
353 static char *
354 sql_statement(const struct expr *e, const char *arch, const char *sec)
355 {
356 char *sql;
357 const char *substr = "(key MATCH ? AND bits & ?)";
358 const char *regexp = "(key REGEXP ? AND bits & ?)";
359 const char *andarch = "arch = ? AND ";
360 const char *andsec = "sec = ? AND ";
361 size_t substrsz;
362 size_t regexpsz;
363 size_t sz;
364
365 sql = mandoc_strdup
366 ("SELECT docid,bits,key,file,desc,form,sec,arch "
367 "FROM keys "
368 "INNER JOIN docs ON docs.id=keys.docid "
369 "WHERE ");
370 sz = strlen(sql);
371 substrsz = strlen(substr);
372 regexpsz = strlen(regexp);
373
374 if (NULL != arch) {
375 sz += strlen(andarch) + 1;
376 sql = mandoc_realloc(sql, sz);
377 strlcat(sql, andarch, sz);
378 }
379
380 if (NULL != sec) {
381 sz += strlen(andsec) + 1;
382 sql = mandoc_realloc(sql, sz);
383 strlcat(sql, andsec, sz);
384 }
385
386 sz += 2;
387 sql = mandoc_realloc(sql, sz);
388 strlcat(sql, "(", sz);
389
390 for ( ; NULL != e; e = e->next) {
391 sz += (NULL == e->substr ? regexpsz : substrsz) +
392 (NULL == e->next ? 3 : 5);
393 sql = mandoc_realloc(sql, sz);
394 strlcat(sql, NULL == e->substr ? regexp : substr, sz);
395 strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
396 }
397
398 return(sql);
399 }
400
401 /*
402 * Compile a set of string tokens into an expression.
403 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
404 * "(", "foo=bar", etc.).
405 */
406 static struct expr *
407 exprcomp(const struct mansearch *search, int argc, char *argv[])
408 {
409 int i, cs;
410 struct expr *first, *next, *cur;
411
412 first = cur = NULL;
413
414 for (i = 0; i < argc; i++) {
415 if (0 == strcmp("-i", argv[i])) {
416 if (++i >= argc)
417 return(NULL);
418 cs = 0;
419 } else
420 cs = 1;
421 next = exprterm(search, argv[i], cs);
422 if (NULL == next) {
423 exprfree(first);
424 return(NULL);
425 }
426 if (NULL != first) {
427 cur->next = next;
428 cur = next;
429 } else
430 cur = first = next;
431 }
432
433 return(first);
434 }
435
436 static struct expr *
437 exprterm(const struct mansearch *search, char *buf, int cs)
438 {
439 struct expr *e;
440 char *key, *v;
441 size_t i;
442
443 if ('\0' == *buf)
444 return(NULL);
445
446 e = mandoc_calloc(1, sizeof(struct expr));
447
448 /*"whatis" mode uses an opaque string and default fields. */
449
450 if (MANSEARCH_WHATIS & search->flags) {
451 e->substr = buf;
452 e->bits = search->deftype;
453 return(e);
454 }
455
456 /*
457 * If no =~ is specified, search with equality over names and
458 * descriptions.
459 * If =~ begins the phrase, use name and description fields.
460 */
461
462 if (NULL == (v = strpbrk(buf, "=~"))) {
463 e->substr = buf;
464 e->bits = search->deftype;
465 return(e);
466 } else if (v == buf)
467 e->bits = search->deftype;
468
469 if ('~' == *v++) {
470 if (regcomp(&e->regexp, v,
471 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE))) {
472 free(e);
473 return(NULL);
474 }
475 } else
476 e->substr = v;
477 v[-1] = '\0';
478
479 /*
480 * Parse out all possible fields.
481 * If the field doesn't resolve, bail.
482 */
483
484 while (NULL != (key = strsep(&buf, ","))) {
485 if ('\0' == *key)
486 continue;
487 i = 0;
488 while (types[i].bits &&
489 strcasecmp(types[i].name, key))
490 i++;
491 if (0 == types[i].bits) {
492 free(e);
493 return(NULL);
494 }
495 e->bits |= types[i].bits;
496 }
497
498 return(e);
499 }
500
501 static void
502 exprfree(struct expr *p)
503 {
504 struct expr *pp;
505
506 while (NULL != p) {
507 pp = p->next;
508 free(p);
509 p = pp;
510 }
511 }
512
513 static void *
514 hash_halloc(size_t sz, void *arg)
515 {
516
517 return(mandoc_calloc(sz, 1));
518 }
519
520 static void *
521 hash_alloc(size_t sz, void *arg)
522 {
523
524 return(mandoc_malloc(sz));
525 }
526
527 static void
528 hash_free(void *p, size_t sz, void *arg)
529 {
530
531 free(p);
532 }