]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
One of the WARNING messages has to use the word "section" twice in two
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.6 2013/06/05 02:00:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <assert.h>
22 #include <fcntl.h>
23 #include <getopt.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stddef.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #ifdef HAVE_OHASH
33 #include <ohash.h>
34 #else
35 #include "compat_ohash.h"
36 #endif
37 #include <sqlite3.h>
38
39 #include "mandoc.h"
40 #include "manpath.h"
41 #include "mansearch.h"
42
43 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
44 if (SQLITE_OK != sqlite3_bind_text \
45 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
46 fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
47 #define SQL_BIND_INT64(_db, _s, _i, _v) \
48 if (SQLITE_OK != sqlite3_bind_int64 \
49 ((_s), (_i)++, (_v))) \
50 fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
51
52 struct expr {
53 int glob; /* is glob? */
54 uint64_t bits; /* type-mask */
55 const char *v; /* search value */
56 struct expr *next; /* next in sequence */
57 };
58
59 struct match {
60 uint64_t id; /* identifier in database */
61 char *file; /* relative filepath of manpage */
62 char *desc; /* description of manpage */
63 int form; /* 0 == catpage */
64 };
65
66 struct type {
67 uint64_t bits;
68 const char *name;
69 };
70
71 static const struct type types[] = {
72 { TYPE_An, "An" },
73 { TYPE_Ar, "Ar" },
74 { TYPE_At, "At" },
75 { TYPE_Bsx, "Bsx" },
76 { TYPE_Bx, "Bx" },
77 { TYPE_Cd, "Cd" },
78 { TYPE_Cm, "Cm" },
79 { TYPE_Dv, "Dv" },
80 { TYPE_Dx, "Dx" },
81 { TYPE_Em, "Em" },
82 { TYPE_Er, "Er" },
83 { TYPE_Ev, "Ev" },
84 { TYPE_Fa, "Fa" },
85 { TYPE_Fl, "Fl" },
86 { TYPE_Fn, "Fn" },
87 { TYPE_Fn, "Fo" },
88 { TYPE_Ft, "Ft" },
89 { TYPE_Fx, "Fx" },
90 { TYPE_Ic, "Ic" },
91 { TYPE_In, "In" },
92 { TYPE_Lb, "Lb" },
93 { TYPE_Li, "Li" },
94 { TYPE_Lk, "Lk" },
95 { TYPE_Ms, "Ms" },
96 { TYPE_Mt, "Mt" },
97 { TYPE_Nd, "Nd" },
98 { TYPE_Nm, "Nm" },
99 { TYPE_Nx, "Nx" },
100 { TYPE_Ox, "Ox" },
101 { TYPE_Pa, "Pa" },
102 { TYPE_Rs, "Rs" },
103 { TYPE_Sh, "Sh" },
104 { TYPE_Ss, "Ss" },
105 { TYPE_St, "St" },
106 { TYPE_Sy, "Sy" },
107 { TYPE_Tn, "Tn" },
108 { TYPE_Va, "Va" },
109 { TYPE_Va, "Vt" },
110 { TYPE_Xr, "Xr" },
111 { ~0ULL, "any" },
112 { 0ULL, NULL }
113 };
114
115 static void *hash_alloc(size_t, void *);
116 static void hash_free(void *, size_t, void *);
117 static void *hash_halloc(size_t, void *);
118 static struct expr *exprcomp(const struct mansearch *,
119 int, char *[]);
120 static void exprfree(struct expr *);
121 static struct expr *exprterm(const struct mansearch *, char *);
122 static char *sql_statement(const struct expr *,
123 const char *, const char *);
124
125 int
126 mansearch(const struct mansearch *search,
127 const struct manpaths *paths,
128 int argc, char *argv[],
129 struct manpage **res, size_t *sz)
130 {
131 int fd, rc, c;
132 int64_t id;
133 char buf[PATH_MAX];
134 char *sql;
135 struct expr *e, *ep;
136 sqlite3 *db;
137 sqlite3_stmt *s;
138 struct match *mp;
139 struct ohash_info info;
140 struct ohash htab;
141 unsigned int idx;
142 size_t i, j, cur, maxres;
143
144 memset(&info, 0, sizeof(struct ohash_info));
145
146 info.halloc = hash_halloc;
147 info.alloc = hash_alloc;
148 info.hfree = hash_free;
149 info.key_offset = offsetof(struct match, id);
150
151 *sz = cur = maxres = 0;
152 sql = NULL;
153 *res = NULL;
154 fd = -1;
155 e = NULL;
156 rc = 0;
157
158 if (0 == argc)
159 goto out;
160 if (NULL == (e = exprcomp(search, argc, argv)))
161 goto out;
162
163 /*
164 * Save a descriptor to the current working directory.
165 * Since pathnames in the "paths" variable might be relative,
166 * and we'll be chdir()ing into them, we need to keep a handle
167 * on our current directory from which to start the chdir().
168 */
169
170 if (NULL == getcwd(buf, PATH_MAX)) {
171 perror(NULL);
172 goto out;
173 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
174 perror(buf);
175 goto out;
176 }
177
178 sql = sql_statement(e, search->arch, search->sec);
179
180 /*
181 * Loop over the directories (containing databases) for us to
182 * search.
183 * Don't let missing/bad databases/directories phase us.
184 * In each, try to open the resident database and, if it opens,
185 * scan it for our match expression.
186 */
187
188 for (i = 0; i < paths->sz; i++) {
189 if (-1 == fchdir(fd)) {
190 perror(buf);
191 free(*res);
192 break;
193 } else if (-1 == chdir(paths->paths[i])) {
194 perror(paths->paths[i]);
195 continue;
196 }
197
198 c = sqlite3_open_v2
199 (MANDOC_DB, &db,
200 SQLITE_OPEN_READONLY, NULL);
201
202 if (SQLITE_OK != c) {
203 perror(MANDOC_DB);
204 sqlite3_close(db);
205 continue;
206 }
207
208 j = 1;
209 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
210 if (SQLITE_OK != c)
211 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
212
213 if (NULL != search->arch)
214 SQL_BIND_TEXT(db, s, j, search->arch);
215 if (NULL != search->sec)
216 SQL_BIND_TEXT(db, s, j, search->sec);
217
218 for (ep = e; NULL != ep; ep = ep->next) {
219 SQL_BIND_TEXT(db, s, j, ep->v);
220 SQL_BIND_INT64(db, s, j, ep->bits);
221 }
222
223 memset(&htab, 0, sizeof(struct ohash));
224 ohash_init(&htab, 4, &info);
225
226 /*
227 * Hash each entry on its [unique] document identifier.
228 * This is a uint64_t.
229 * Instead of using a hash function, simply convert the
230 * uint64_t to a uint32_t, the hash value's type.
231 * This gives good performance and preserves the
232 * distribution of buckets in the table.
233 */
234 while (SQLITE_ROW == (c = sqlite3_step(s))) {
235 id = sqlite3_column_int64(s, 0);
236 idx = ohash_lookup_memory
237 (&htab, (char *)&id,
238 sizeof(uint64_t), (uint32_t)id);
239
240 if (NULL != ohash_find(&htab, idx))
241 continue;
242
243 mp = mandoc_calloc(1, sizeof(struct match));
244 mp->id = id;
245 mp->file = mandoc_strdup
246 ((char *)sqlite3_column_text(s, 3));
247 mp->desc = mandoc_strdup
248 ((char *)sqlite3_column_text(s, 4));
249 mp->form = sqlite3_column_int(s, 5);
250 ohash_insert(&htab, idx, mp);
251 }
252
253 if (SQLITE_DONE != c)
254 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
255
256 sqlite3_finalize(s);
257 sqlite3_close(db);
258
259 for (mp = ohash_first(&htab, &idx);
260 NULL != mp;
261 mp = ohash_next(&htab, &idx)) {
262 if (cur + 1 > maxres) {
263 maxres += 1024;
264 *res = mandoc_realloc
265 (*res, maxres * sizeof(struct manpage));
266 }
267 strlcpy((*res)[cur].file,
268 paths->paths[i], PATH_MAX);
269 strlcat((*res)[cur].file, "/", PATH_MAX);
270 strlcat((*res)[cur].file, mp->file, PATH_MAX);
271 (*res)[cur].desc = mp->desc;
272 (*res)[cur].form = mp->form;
273 free(mp->file);
274 free(mp);
275 cur++;
276 }
277 ohash_delete(&htab);
278 }
279 rc = 1;
280 out:
281 exprfree(e);
282 if (-1 != fd)
283 close(fd);
284 free(sql);
285 *sz = cur;
286 return(rc);
287 }
288
289 /*
290 * Prepare the search SQL statement.
291 * We search for any of the words specified in our match expression.
292 * We filter the per-doc AND expressions when collecting results.
293 */
294 static char *
295 sql_statement(const struct expr *e, const char *arch, const char *sec)
296 {
297 char *sql;
298 const char *glob = "(key GLOB ? AND bits & ?)";
299 const char *eq = "(key = ? AND bits & ?)";
300 const char *andarch = "arch = ? AND ";
301 const char *andsec = "sec = ? AND ";
302 size_t globsz;
303 size_t eqsz;
304 size_t sz;
305
306 sql = mandoc_strdup
307 ("SELECT docid,bits,key,file,desc,form,sec,arch "
308 "FROM keys "
309 "INNER JOIN docs ON docs.id=keys.docid "
310 "WHERE ");
311 sz = strlen(sql);
312 globsz = strlen(glob);
313 eqsz = strlen(eq);
314
315 if (NULL != arch) {
316 sz += strlen(andarch) + 1;
317 sql = mandoc_realloc(sql, sz);
318 strlcat(sql, andarch, sz);
319 }
320
321 if (NULL != sec) {
322 sz += strlen(andsec) + 1;
323 sql = mandoc_realloc(sql, sz);
324 strlcat(sql, andsec, sz);
325 }
326
327 sz += 2;
328 sql = mandoc_realloc(sql, sz);
329 strlcat(sql, "(", sz);
330
331 for ( ; NULL != e; e = e->next) {
332 sz += (e->glob ? globsz : eqsz) +
333 (NULL == e->next ? 3 : 5);
334 sql = mandoc_realloc(sql, sz);
335 strlcat(sql, e->glob ? glob : eq, sz);
336 strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
337 }
338
339 return(sql);
340 }
341
342 /*
343 * Compile a set of string tokens into an expression.
344 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
345 * "(", "foo=bar", etc.).
346 */
347 static struct expr *
348 exprcomp(const struct mansearch *search, int argc, char *argv[])
349 {
350 int i;
351 struct expr *first, *next, *cur;
352
353 first = cur = NULL;
354
355 for (i = 0; i < argc; i++) {
356 next = exprterm(search, argv[i]);
357 if (NULL == next) {
358 exprfree(first);
359 return(NULL);
360 }
361 if (NULL != first) {
362 cur->next = next;
363 cur = next;
364 } else
365 cur = first = next;
366 }
367
368 return(first);
369 }
370
371 static struct expr *
372 exprterm(const struct mansearch *search, char *buf)
373 {
374 struct expr *e;
375 char *key, *v;
376 size_t i;
377
378 if ('\0' == *buf)
379 return(NULL);
380
381 e = mandoc_calloc(1, sizeof(struct expr));
382
383 /*"whatis" mode uses an opaque string and default fields. */
384
385 if (MANSEARCH_WHATIS & search->flags) {
386 e->v = buf;
387 e->bits = search->deftype;
388 return(e);
389 }
390
391 /*
392 * If no =~ is specified, search with equality over names and
393 * descriptions.
394 * If =~ begins the phrase, use name and description fields.
395 */
396
397 if (NULL == (v = strpbrk(buf, "=~"))) {
398 e->v = buf;
399 e->bits = search->deftype;
400 return(e);
401 } else if (v == buf)
402 e->bits = search->deftype;
403
404 e->glob = '~' == *v;
405 *v++ = '\0';
406 e->v = v;
407
408 /*
409 * Parse out all possible fields.
410 * If the field doesn't resolve, bail.
411 */
412
413 while (NULL != (key = strsep(&buf, ","))) {
414 if ('\0' == *key)
415 continue;
416 i = 0;
417 while (types[i].bits &&
418 strcasecmp(types[i].name, key))
419 i++;
420 if (0 == types[i].bits) {
421 free(e);
422 return(NULL);
423 }
424 e->bits |= types[i].bits;
425 }
426
427 return(e);
428 }
429
430 static void
431 exprfree(struct expr *p)
432 {
433 struct expr *pp;
434
435 while (NULL != p) {
436 pp = p->next;
437 free(p);
438 p = pp;
439 }
440 }
441
442 static void *
443 hash_halloc(size_t sz, void *arg)
444 {
445
446 return(mandoc_calloc(sz, 1));
447 }
448
449 static void *
450 hash_alloc(size_t sz, void *arg)
451 {
452
453 return(mandoc_malloc(sz));
454 }
455
456 static void
457 hash_free(void *p, size_t sz, void *arg)
458 {
459
460 free(p);
461 }