]>
git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
1 /* $Id: mansearch.c,v 1.8 2013/10/20 00:03:05 schwarze Exp $ */
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37 #include "compat_ohash.h"
43 #include "mansearch.h"
45 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
46 do { if (SQLITE_OK != sqlite3_bind_text \
47 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
48 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
50 #define SQL_BIND_INT64(_db, _s, _i, _v) \
51 do { if (SQLITE_OK != sqlite3_bind_int64 \
52 ((_s), (_i)++, (_v))) \
53 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
55 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
56 do { if (SQLITE_OK != sqlite3_bind_blob \
57 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
58 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
62 uint64_t bits
; /* type-mask */
63 const char *substr
; /* to search for, if applicable */
64 regex_t regexp
; /* compiled regexp, if applicable */
65 struct expr
*next
; /* next in sequence */
69 uint64_t id
; /* identifier in database */
70 char *file
; /* relative filepath of manpage */
71 char *desc
; /* description of manpage */
72 int form
; /* 0 == catpage */
80 static const struct type types
[] = {
124 static void *hash_alloc(size_t, void *);
125 static void hash_free(void *, size_t, void *);
126 static void *hash_halloc(size_t, void *);
127 static struct expr
*exprcomp(const struct mansearch
*,
129 static void exprfree(struct expr
*);
130 static struct expr
*exprterm(const struct mansearch
*, char *, int);
131 static void sql_match(sqlite3_context
*context
,
132 int argc
, sqlite3_value
**argv
);
133 static void sql_regexp(sqlite3_context
*context
,
134 int argc
, sqlite3_value
**argv
);
135 static char *sql_statement(const struct expr
*,
136 const char *, const char *);
139 mansearch(const struct mansearch
*search
,
140 const struct manpaths
*paths
,
141 int argc
, char *argv
[],
142 struct manpage
**res
, size_t *sz
)
152 struct ohash_info info
;
155 size_t i
, j
, cur
, maxres
;
157 memset(&info
, 0, sizeof(struct ohash_info
));
159 info
.halloc
= hash_halloc
;
160 info
.alloc
= hash_alloc
;
161 info
.hfree
= hash_free
;
162 info
.key_offset
= offsetof(struct match
, id
);
164 *sz
= cur
= maxres
= 0;
173 if (NULL
== (e
= exprcomp(search
, argc
, argv
)))
177 * Save a descriptor to the current working directory.
178 * Since pathnames in the "paths" variable might be relative,
179 * and we'll be chdir()ing into them, we need to keep a handle
180 * on our current directory from which to start the chdir().
183 if (NULL
== getcwd(buf
, PATH_MAX
)) {
186 } else if (-1 == (fd
= open(buf
, O_RDONLY
, 0))) {
191 sql
= sql_statement(e
, search
->arch
, search
->sec
);
194 * Loop over the directories (containing databases) for us to
196 * Don't let missing/bad databases/directories phase us.
197 * In each, try to open the resident database and, if it opens,
198 * scan it for our match expression.
201 for (i
= 0; i
< paths
->sz
; i
++) {
202 if (-1 == fchdir(fd
)) {
206 } else if (-1 == chdir(paths
->paths
[i
])) {
207 perror(paths
->paths
[i
]);
213 SQLITE_OPEN_READONLY
, NULL
);
215 if (SQLITE_OK
!= c
) {
222 * Define the SQL functions for substring
223 * and regular expression matching.
226 c
= sqlite3_create_function(db
, "match", 2,
227 SQLITE_ANY
, NULL
, sql_match
, NULL
, NULL
);
228 assert(SQLITE_OK
== c
);
229 c
= sqlite3_create_function(db
, "regexp", 2,
230 SQLITE_ANY
, NULL
, sql_regexp
, NULL
, NULL
);
231 assert(SQLITE_OK
== c
);
234 c
= sqlite3_prepare_v2(db
, sql
, -1, &s
, NULL
);
236 fprintf(stderr
, "%s\n", sqlite3_errmsg(db
));
238 if (NULL
!= search
->arch
)
239 SQL_BIND_TEXT(db
, s
, j
, search
->arch
);
240 if (NULL
!= search
->sec
)
241 SQL_BIND_TEXT(db
, s
, j
, search
->sec
);
243 for (ep
= e
; NULL
!= ep
; ep
= ep
->next
) {
244 if (NULL
== ep
->substr
) {
245 SQL_BIND_BLOB(db
, s
, j
, ep
->regexp
);
247 SQL_BIND_TEXT(db
, s
, j
, ep
->substr
);
248 SQL_BIND_INT64(db
, s
, j
, ep
->bits
);
251 memset(&htab
, 0, sizeof(struct ohash
));
252 ohash_init(&htab
, 4, &info
);
255 * Hash each entry on its [unique] document identifier.
256 * This is a uint64_t.
257 * Instead of using a hash function, simply convert the
258 * uint64_t to a uint32_t, the hash value's type.
259 * This gives good performance and preserves the
260 * distribution of buckets in the table.
262 while (SQLITE_ROW
== (c
= sqlite3_step(s
))) {
263 id
= sqlite3_column_int64(s
, 0);
264 idx
= ohash_lookup_memory
266 sizeof(uint64_t), (uint32_t)id
);
268 if (NULL
!= ohash_find(&htab
, idx
))
271 mp
= mandoc_calloc(1, sizeof(struct match
));
273 mp
->file
= mandoc_strdup
274 ((char *)sqlite3_column_text(s
, 3));
275 mp
->desc
= mandoc_strdup
276 ((char *)sqlite3_column_text(s
, 4));
277 mp
->form
= sqlite3_column_int(s
, 5);
278 ohash_insert(&htab
, idx
, mp
);
281 if (SQLITE_DONE
!= c
)
282 fprintf(stderr
, "%s\n", sqlite3_errmsg(db
));
287 for (mp
= ohash_first(&htab
, &idx
);
289 mp
= ohash_next(&htab
, &idx
)) {
290 if (cur
+ 1 > maxres
) {
292 *res
= mandoc_realloc
293 (*res
, maxres
* sizeof(struct manpage
));
295 strlcpy((*res
)[cur
].file
,
296 paths
->paths
[i
], PATH_MAX
);
297 strlcat((*res
)[cur
].file
, "/", PATH_MAX
);
298 strlcat((*res
)[cur
].file
, mp
->file
, PATH_MAX
);
299 (*res
)[cur
].desc
= mp
->desc
;
300 (*res
)[cur
].form
= mp
->form
;
318 * Implement substring match as an application-defined SQL function.
319 * Using the SQL LIKE or GLOB operators instead would be a bad idea
320 * because that would require escaping metacharacters in the string
321 * being searched for.
324 sql_match(sqlite3_context
*context
, int argc
, sqlite3_value
**argv
)
328 sqlite3_result_int(context
, NULL
!= strcasestr(
329 (const char *)sqlite3_value_text(argv
[1]),
330 (const char *)sqlite3_value_text(argv
[0])));
334 * Implement regular expression match
335 * as an application-defined SQL function.
338 sql_regexp(sqlite3_context
*context
, int argc
, sqlite3_value
**argv
)
342 sqlite3_result_int(context
, !regexec(
343 (regex_t
*)sqlite3_value_blob(argv
[0]),
344 (const char *)sqlite3_value_text(argv
[1]),
349 * Prepare the search SQL statement.
350 * We search for any of the words specified in our match expression.
351 * We filter the per-doc AND expressions when collecting results.
354 sql_statement(const struct expr
*e
, const char *arch
, const char *sec
)
357 const char *substr
= "(key MATCH ? AND bits & ?)";
358 const char *regexp
= "(key REGEXP ? AND bits & ?)";
359 const char *andarch
= "arch = ? AND ";
360 const char *andsec
= "sec = ? AND ";
366 ("SELECT docid,bits,key,file,desc,form,sec,arch "
368 "INNER JOIN docs ON docs.id=keys.docid "
371 substrsz
= strlen(substr
);
372 regexpsz
= strlen(regexp
);
375 sz
+= strlen(andarch
) + 1;
376 sql
= mandoc_realloc(sql
, sz
);
377 strlcat(sql
, andarch
, sz
);
381 sz
+= strlen(andsec
) + 1;
382 sql
= mandoc_realloc(sql
, sz
);
383 strlcat(sql
, andsec
, sz
);
387 sql
= mandoc_realloc(sql
, sz
);
388 strlcat(sql
, "(", sz
);
390 for ( ; NULL
!= e
; e
= e
->next
) {
391 sz
+= (NULL
== e
->substr
? regexpsz
: substrsz
) +
392 (NULL
== e
->next
? 3 : 5);
393 sql
= mandoc_realloc(sql
, sz
);
394 strlcat(sql
, NULL
== e
->substr
? regexp
: substr
, sz
);
395 strlcat(sql
, NULL
== e
->next
? ");" : " OR ", sz
);
402 * Compile a set of string tokens into an expression.
403 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
404 * "(", "foo=bar", etc.).
407 exprcomp(const struct mansearch
*search
, int argc
, char *argv
[])
410 struct expr
*first
, *next
, *cur
;
414 for (i
= 0; i
< argc
; i
++) {
415 if (0 == strcmp("-i", argv
[i
])) {
421 next
= exprterm(search
, argv
[i
], cs
);
437 exprterm(const struct mansearch
*search
, char *buf
, int cs
)
446 e
= mandoc_calloc(1, sizeof(struct expr
));
448 /*"whatis" mode uses an opaque string and default fields. */
450 if (MANSEARCH_WHATIS
& search
->flags
) {
452 e
->bits
= search
->deftype
;
457 * If no =~ is specified, search with equality over names and
459 * If =~ begins the phrase, use name and description fields.
462 if (NULL
== (v
= strpbrk(buf
, "=~"))) {
464 e
->bits
= search
->deftype
;
467 e
->bits
= search
->deftype
;
470 if (regcomp(&e
->regexp
, v
,
471 REG_EXTENDED
| REG_NOSUB
| (cs
? 0 : REG_ICASE
))) {
480 * Parse out all possible fields.
481 * If the field doesn't resolve, bail.
484 while (NULL
!= (key
= strsep(&buf
, ","))) {
488 while (types
[i
].bits
&&
489 strcasecmp(types
[i
].name
, key
))
491 if (0 == types
[i
].bits
) {
495 e
->bits
|= types
[i
].bits
;
502 exprfree(struct expr
*p
)
514 hash_halloc(size_t sz
, void *arg
)
517 return(mandoc_calloc(sz
, 1));
521 hash_alloc(size_t sz
, void *arg
)
524 return(mandoc_malloc(sz
));
528 hash_free(void *p
, size_t sz
, void *arg
)