]> git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
ad18f94285da9281984e890560f4d8300e612e2c
[mandoc.git] / mansearch.c
1 /* $Id: mansearch.c,v 1.2 2012/06/08 14:14:30 kristaps Exp $ */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22
23 #include <assert.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stddef.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include <ohash.h>
34 #include <sqlite3.h>
35
36 #include "mandoc.h"
37 #include "manpath.h"
38 #include "mandocdb.h"
39 #include "mansearch.h"
40
41 #define BIND_TEXT(_db, _s, _i, _v) \
42 if (SQLITE_OK != sqlite3_bind_text \
43 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
44 fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
45 #define BIND_INT64(_db, _s, _i, _v) \
46 if (SQLITE_OK != sqlite3_bind_int64 \
47 ((_s), (_i)++, (_v))) \
48 fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
49
50 struct expr {
51 int glob; /* is glob? */
52 uint64_t bits; /* type-mask */
53 const char *v; /* search value */
54 struct expr *next; /* next in sequence */
55 };
56
57 struct match {
58 uint64_t id; /* identifier in database */
59 char *file; /* relative filepath of manpage */
60 char *desc; /* description of manpage */
61 int form; /* 0 == catpage */
62 };
63
64 struct type {
65 uint64_t bits;
66 const char *name;
67 };
68
69 static const struct type types[] = {
70 { TYPE_An, "An" },
71 { TYPE_Ar, "Ar" },
72 { TYPE_At, "At" },
73 { TYPE_Bsx, "Bsx" },
74 { TYPE_Bx, "Bx" },
75 { TYPE_Cd, "Cd" },
76 { TYPE_Cm, "Cm" },
77 { TYPE_Dv, "Dv" },
78 { TYPE_Dx, "Dx" },
79 { TYPE_Em, "Em" },
80 { TYPE_Er, "Er" },
81 { TYPE_Ev, "Ev" },
82 { TYPE_Fa, "Fa" },
83 { TYPE_Fl, "Fl" },
84 { TYPE_Fn, "Fn" },
85 { TYPE_Fn, "Fo" },
86 { TYPE_Ft, "Ft" },
87 { TYPE_Fx, "Fx" },
88 { TYPE_Ic, "Ic" },
89 { TYPE_In, "In" },
90 { TYPE_Lb, "Lb" },
91 { TYPE_Li, "Li" },
92 { TYPE_Lk, "Lk" },
93 { TYPE_Ms, "Ms" },
94 { TYPE_Mt, "Mt" },
95 { TYPE_Nd, "Nd" },
96 { TYPE_Nm, "Nm" },
97 { TYPE_Nx, "Nx" },
98 { TYPE_Ox, "Ox" },
99 { TYPE_Pa, "Pa" },
100 { TYPE_Rs, "Rs" },
101 { TYPE_Sh, "Sh" },
102 { TYPE_Ss, "Ss" },
103 { TYPE_St, "St" },
104 { TYPE_Sy, "Sy" },
105 { TYPE_Tn, "Tn" },
106 { TYPE_Va, "Va" },
107 { TYPE_Va, "Vt" },
108 { TYPE_Xr, "Xr" },
109 { ~0ULL, "any" },
110 { 0ULL, NULL }
111 };
112
113 static void *hash_alloc(size_t, void *);
114 static void hash_free(void *, size_t, void *);
115 static void *hash_halloc(size_t, void *);
116 static struct expr *exprcomp(int, char *[]);
117 static void exprfree(struct expr *);
118 static struct expr *exprterm(char *);
119 static char *sql_statement(const struct expr *,
120 const char *, const char *);
121
122 int
123 mansearch(const struct manpaths *paths,
124 const char *arch, const char *sec,
125 int argc, char *argv[],
126 struct manpage **res, size_t *sz)
127 {
128 int fd, rc, c;
129 int64_t id;
130 char buf[MAXPATHLEN];
131 char *sql;
132 struct expr *e, *ep;
133 sqlite3 *db;
134 sqlite3_stmt *s;
135 struct match *mp;
136 struct ohash_info info;
137 struct ohash htab;
138 unsigned int idx;
139 size_t i, j, cur, maxres;
140
141 memset(&info, 0, sizeof(struct ohash_info));
142
143 info.halloc = hash_halloc;
144 info.alloc = hash_alloc;
145 info.hfree = hash_free;
146 info.key_offset = offsetof(struct match, id);
147
148 *sz = cur = maxres = 0;
149 sql = NULL;
150 *res = NULL;
151 fd = -1;
152 e = NULL;
153 rc = 0;
154
155 if (0 == argc)
156 goto out;
157 if (NULL == (e = exprcomp(argc, argv)))
158 goto out;
159
160 /*
161 * Save a descriptor to the current working directory.
162 * Since pathnames in the "paths" variable might be relative,
163 * and we'll be chdir()ing into them, we need to keep a handle
164 * on our current directory from which to start the chdir().
165 */
166
167 if (NULL == getcwd(buf, MAXPATHLEN)) {
168 perror(NULL);
169 goto out;
170 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
171 perror(buf);
172 goto out;
173 }
174
175 sql = sql_statement(e, arch, sec);
176
177 /*
178 * Loop over the directories (containing databases) for us to
179 * search.
180 * Don't let missing/bad databases/directories phase us.
181 * In each, try to open the resident database and, if it opens,
182 * scan it for our match expression.
183 */
184
185 for (i = 0; i < paths->sz; i++) {
186 if (-1 == fchdir(fd)) {
187 perror(buf);
188 free(*res);
189 break;
190 } else if (-1 == chdir(paths->paths[i])) {
191 perror(paths->paths[i]);
192 continue;
193 }
194
195 c = sqlite3_open_v2
196 (MANDOC_DB, &db,
197 SQLITE_OPEN_READONLY, NULL);
198
199 if (SQLITE_OK != c) {
200 perror(MANDOC_DB);
201 sqlite3_close(db);
202 continue;
203 }
204
205 j = 1;
206 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
207 if (SQLITE_OK != c)
208 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
209
210 if (NULL != arch)
211 BIND_TEXT(db, s, j, arch);
212 if (NULL != sec)
213 BIND_TEXT(db, s, j, arch);
214
215 for (ep = e; NULL != ep; ep = ep->next) {
216 BIND_TEXT(db, s, j, ep->v);
217 BIND_INT64(db, s, j, ep->bits);
218 }
219
220 memset(&htab, 0, sizeof(struct ohash));
221 ohash_init(&htab, 4, &info);
222
223 /*
224 * Hash each entry on its [unique] document identifier.
225 * This is a uint64_t.
226 * Instead of using a hash function, simply convert the
227 * uint64_t to a uint32_t, the hash value's type.
228 * This gives good performance and preserves the
229 * distribution of buckets in the table.
230 */
231 while (SQLITE_ROW == (c = sqlite3_step(s))) {
232 id = sqlite3_column_int64(s, 0);
233 idx = ohash_lookup_memory
234 (&htab, (char *)&id,
235 sizeof(uint64_t), (uint32_t)id);
236
237 if (NULL != ohash_find(&htab, idx))
238 continue;
239
240 mp = mandoc_calloc(1, sizeof(struct match));
241 mp->id = id;
242 mp->file = mandoc_strdup
243 ((char *)sqlite3_column_text(s, 3));
244 mp->desc = mandoc_strdup
245 ((char *)sqlite3_column_text(s, 4));
246 mp->form = sqlite3_column_int(s, 5);
247 ohash_insert(&htab, idx, mp);
248 }
249
250 if (SQLITE_DONE != c)
251 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
252
253 sqlite3_finalize(s);
254 sqlite3_close(db);
255
256 for (mp = ohash_first(&htab, &idx);
257 NULL != mp;
258 mp = ohash_next(&htab, &idx)) {
259 if (cur + 1 > maxres) {
260 maxres += 1024;
261 *res = mandoc_realloc
262 (*res, maxres * sizeof(struct manpage));
263 }
264 strlcpy((*res)[cur].file,
265 paths->paths[i], MAXPATHLEN);
266 strlcat((*res)[cur].file, "/", MAXPATHLEN);
267 strlcat((*res)[cur].file, mp->file, MAXPATHLEN);
268 (*res)[cur].desc = mp->desc;
269 (*res)[cur].form = mp->form;
270 free(mp->file);
271 free(mp);
272 cur++;
273 }
274 ohash_delete(&htab);
275 }
276 rc = 1;
277 out:
278 exprfree(e);
279 if (-1 != fd)
280 close(fd);
281 free(sql);
282 *sz = cur;
283 return(rc);
284 }
285
286 /*
287 * Prepare the search SQL statement.
288 * We search for any of the words specified in our match expression.
289 * We filter the per-doc AND expressions when collecting results.
290 */
291 static char *
292 sql_statement(const struct expr *e, const char *arch, const char *sec)
293 {
294 char *sql;
295 const char *glob = "(key GLOB ? AND bits & ?)";
296 const char *eq = "(key = ? AND bits & ?)";
297 const char *andarch = "arch = ? AND ";
298 const char *andsec = "sec = ? AND ";
299 size_t globsz;
300 size_t eqsz;
301 size_t sz;
302
303 sql = mandoc_strdup
304 ("SELECT docid,bits,key,file,desc,form,sec,arch "
305 "FROM keys "
306 "INNER JOIN docs ON docs.id=keys.docid "
307 "WHERE ");
308 sz = strlen(sql);
309 globsz = strlen(glob);
310 eqsz = strlen(eq);
311
312 if (NULL != arch) {
313 sz += strlen(andarch) + 1;
314 sql = mandoc_realloc(sql, sz);
315 strlcat(sql, andarch, sz);
316 }
317
318 if (NULL != sec) {
319 sz += strlen(andsec) + 1;
320 sql = mandoc_realloc(sql, sz);
321 strlcat(sql, andsec, sz);
322 }
323
324 sz += 2;
325 sql = mandoc_realloc(sql, sz);
326 strlcat(sql, "(", sz);
327
328 for ( ; NULL != e; e = e->next) {
329 sz += (e->glob ? globsz : eqsz) +
330 (NULL == e->next ? 3 : 5);
331 sql = mandoc_realloc(sql, sz);
332 strlcat(sql, e->glob ? glob : eq, sz);
333 strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
334 }
335
336 return(sql);
337 }
338
339 /*
340 * Compile a set of string tokens into an expression.
341 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
342 * "(", "foo=bar", etc.).
343 */
344 static struct expr *
345 exprcomp(int argc, char *argv[])
346 {
347 int i;
348 struct expr *first, *next, *cur;
349
350 first = cur = NULL;
351
352 for (i = 0; i < argc; i++) {
353 next = exprterm(argv[i]);
354 if (NULL == next) {
355 exprfree(first);
356 return(NULL);
357 }
358 if (NULL != first) {
359 cur->next = next;
360 cur = next;
361 } else
362 cur = first = next;
363 }
364
365 return(first);
366 }
367
368 static struct expr *
369 exprterm(char *buf)
370 {
371 struct expr *e;
372 char *key, *v;
373 size_t i;
374
375 if ('\0' == *buf)
376 return(NULL);
377
378 e = mandoc_calloc(1, sizeof(struct expr));
379
380 /*
381 * If no =~ is specified, search with equality over names and
382 * descriptions.
383 * If =~ begins the phrase, use name and description fields.
384 */
385
386 if (NULL == (v = strpbrk(buf, "=~"))) {
387 e->v = buf;
388 e->bits = TYPE_Nm | TYPE_Nd;
389 return(e);
390 } else if (v == buf)
391 e->bits = TYPE_Nm | TYPE_Nd;
392
393 e->glob = '~' == *v;
394 *v++ = '\0';
395 e->v = v;
396
397 /*
398 * Parse out all possible fields.
399 * If the field doesn't resolve, bail.
400 */
401
402 while (NULL != (key = strsep(&buf, ","))) {
403 if ('\0' == *key)
404 continue;
405 i = 0;
406 while (types[i].bits &&
407 strcasecmp(types[i].name, key))
408 i++;
409 if (0 == types[i].bits) {
410 free(e);
411 return(NULL);
412 }
413 e->bits |= types[i].bits;
414 }
415
416 return(e);
417 }
418
419 static void
420 exprfree(struct expr *p)
421 {
422 struct expr *pp;
423
424 while (NULL != p) {
425 pp = p->next;
426 free(p);
427 p = pp;
428 }
429 }
430
431 static void *
432 hash_halloc(size_t sz, void *arg)
433 {
434
435 return(mandoc_calloc(sz, 1));
436 }
437
438 static void *
439 hash_alloc(size_t sz, void *arg)
440 {
441
442 return(mandoc_malloc(sz));
443 }
444
445 static void
446 hash_free(void *p, size_t sz, void *arg)
447 {
448
449 free(p);
450 }