]>
git.cameronkatri.com Git - mandoc.git/blob - mansearch.c
1 /* $Id: mansearch.c,v 1.12 2013/12/31 03:41:14 schwarze Exp $ */
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37 #include "compat_ohash.h"
43 #include "mansearch.h"
45 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
46 do { if (SQLITE_OK != sqlite3_bind_text \
47 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
48 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
50 #define SQL_BIND_INT64(_db, _s, _i, _v) \
51 do { if (SQLITE_OK != sqlite3_bind_int64 \
52 ((_s), (_i)++, (_v))) \
53 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
55 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
56 do { if (SQLITE_OK != sqlite3_bind_blob \
57 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
58 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
62 uint64_t bits
; /* type-mask */
63 const char *substr
; /* to search for, if applicable */
64 regex_t regexp
; /* compiled regexp, if applicable */
65 struct expr
*next
; /* next in sequence */
69 uint64_t id
; /* identifier in database */
70 char *file
; /* relative filepath of manpage */
71 char *desc
; /* description of manpage */
72 int form
; /* 0 == catpage */
80 static const struct type types
[] = {
124 static char *buildnames(sqlite3
*, sqlite3_stmt
*, uint64_t);
125 static char *buildoutput(sqlite3
*, sqlite3_stmt
*,
127 static void *hash_alloc(size_t, void *);
128 static void hash_free(void *, size_t, void *);
129 static void *hash_halloc(size_t, void *);
130 static struct expr
*exprcomp(const struct mansearch
*,
132 static void exprfree(struct expr
*);
133 static struct expr
*exprterm(const struct mansearch
*, char *, int);
134 static void sql_match(sqlite3_context
*context
,
135 int argc
, sqlite3_value
**argv
);
136 static void sql_regexp(sqlite3_context
*context
,
137 int argc
, sqlite3_value
**argv
);
138 static char *sql_statement(const struct expr
*,
139 const char *, const char *);
142 mansearch(const struct mansearch
*search
,
143 const struct manpaths
*paths
,
144 int argc
, char *argv
[],
146 struct manpage
**res
, size_t *sz
)
153 struct manpage
*mpage
;
156 sqlite3_stmt
*s
, *s2
;
158 struct ohash_info info
;
161 size_t i
, j
, cur
, maxres
;
163 memset(&info
, 0, sizeof(struct ohash_info
));
165 info
.halloc
= hash_halloc
;
166 info
.alloc
= hash_alloc
;
167 info
.hfree
= hash_free
;
168 info
.key_offset
= offsetof(struct match
, id
);
170 *sz
= cur
= maxres
= 0;
179 if (NULL
== (e
= exprcomp(search
, argc
, argv
)))
183 if (NULL
!= outkey
) {
184 for (ibit
= 0; types
[ibit
].bits
; ibit
++) {
185 if (0 == strcasecmp(types
[ibit
].name
, outkey
)) {
186 outbit
= types
[ibit
].bits
;
193 * Save a descriptor to the current working directory.
194 * Since pathnames in the "paths" variable might be relative,
195 * and we'll be chdir()ing into them, we need to keep a handle
196 * on our current directory from which to start the chdir().
199 if (NULL
== getcwd(buf
, PATH_MAX
)) {
202 } else if (-1 == (fd
= open(buf
, O_RDONLY
, 0))) {
207 sql
= sql_statement(e
, search
->arch
, search
->sec
);
210 * Loop over the directories (containing databases) for us to
212 * Don't let missing/bad databases/directories phase us.
213 * In each, try to open the resident database and, if it opens,
214 * scan it for our match expression.
217 for (i
= 0; i
< paths
->sz
; i
++) {
218 if (-1 == fchdir(fd
)) {
222 } else if (-1 == chdir(paths
->paths
[i
])) {
223 perror(paths
->paths
[i
]);
229 SQLITE_OPEN_READONLY
, NULL
);
231 if (SQLITE_OK
!= c
) {
238 * Define the SQL functions for substring
239 * and regular expression matching.
242 c
= sqlite3_create_function(db
, "match", 2,
243 SQLITE_ANY
, NULL
, sql_match
, NULL
, NULL
);
244 assert(SQLITE_OK
== c
);
245 c
= sqlite3_create_function(db
, "regexp", 2,
246 SQLITE_ANY
, NULL
, sql_regexp
, NULL
, NULL
);
247 assert(SQLITE_OK
== c
);
250 c
= sqlite3_prepare_v2(db
, sql
, -1, &s
, NULL
);
252 fprintf(stderr
, "%s\n", sqlite3_errmsg(db
));
254 if (NULL
!= search
->arch
)
255 SQL_BIND_TEXT(db
, s
, j
, search
->arch
);
256 if (NULL
!= search
->sec
)
257 SQL_BIND_TEXT(db
, s
, j
, search
->sec
);
259 for (ep
= e
; NULL
!= ep
; ep
= ep
->next
) {
260 if (NULL
== ep
->substr
) {
261 SQL_BIND_BLOB(db
, s
, j
, ep
->regexp
);
263 SQL_BIND_TEXT(db
, s
, j
, ep
->substr
);
264 SQL_BIND_INT64(db
, s
, j
, ep
->bits
);
267 memset(&htab
, 0, sizeof(struct ohash
));
268 ohash_init(&htab
, 4, &info
);
271 * Hash each entry on its [unique] document identifier.
272 * This is a uint64_t.
273 * Instead of using a hash function, simply convert the
274 * uint64_t to a uint32_t, the hash value's type.
275 * This gives good performance and preserves the
276 * distribution of buckets in the table.
278 while (SQLITE_ROW
== (c
= sqlite3_step(s
))) {
279 id
= sqlite3_column_int64(s
, 0);
280 idx
= ohash_lookup_memory
282 sizeof(uint64_t), (uint32_t)id
);
284 if (NULL
!= ohash_find(&htab
, idx
))
287 mp
= mandoc_calloc(1, sizeof(struct match
));
289 mp
->file
= mandoc_strdup
290 ((char *)sqlite3_column_text(s
, 3));
291 mp
->desc
= mandoc_strdup
292 ((char *)sqlite3_column_text(s
, 4));
293 mp
->form
= sqlite3_column_int(s
, 5);
294 ohash_insert(&htab
, idx
, mp
);
297 if (SQLITE_DONE
!= c
)
298 fprintf(stderr
, "%s\n", sqlite3_errmsg(db
));
302 c
= sqlite3_prepare_v2(db
,
303 "SELECT * FROM mlinks WHERE pageid=?",
306 fprintf(stderr
, "%s\n", sqlite3_errmsg(db
));
308 c
= sqlite3_prepare_v2(db
,
309 "SELECT * FROM keys WHERE pageid=? AND bits & ?",
312 fprintf(stderr
, "%s\n", sqlite3_errmsg(db
));
314 for (mp
= ohash_first(&htab
, &idx
);
316 mp
= ohash_next(&htab
, &idx
)) {
317 if (cur
+ 1 > maxres
) {
319 *res
= mandoc_realloc
320 (*res
, maxres
* sizeof(struct manpage
));
323 if (-1 == asprintf(&mpage
->file
, "%s/%s",
324 paths
->paths
[i
], mp
->file
)) {
326 exit((int)MANDOCLEVEL_SYSERR
);
328 mpage
->desc
= mp
->desc
;
329 mpage
->form
= mp
->form
;
330 mpage
->names
= buildnames(db
, s
, mp
->id
);
331 mpage
->output
= outbit
?
332 buildoutput(db
, s2
, mp
->id
, outbit
) : NULL
;
340 sqlite3_finalize(s2
);
355 buildnames(sqlite3
*db
, sqlite3_stmt
*s
, uint64_t id
)
357 char *names
, *newnames
;
358 const char *oldnames
, *sep1
, *name
, *sec
, *sep2
, *arch
;
364 SQL_BIND_INT64(db
, s
, i
, id
);
365 while (SQLITE_ROW
== (c
= sqlite3_step(s
))) {
373 sec
= sqlite3_column_text(s
, 1);
374 arch
= sqlite3_column_text(s
, 2);
375 name
= sqlite3_column_text(s
, 3);
376 sep2
= '\0' == *arch
? "" : "/";
377 if (-1 == asprintf(&newnames
, "%s%s%s(%s%s%s)",
378 oldnames
, sep1
, name
, sec
, sep2
, arch
)) {
380 exit((int)MANDOCLEVEL_SYSERR
);
385 if (SQLITE_DONE
!= c
)
386 fprintf(stderr
, "%s\n", sqlite3_errmsg(db
));
392 buildoutput(sqlite3
*db
, sqlite3_stmt
*s
, uint64_t id
, uint64_t outbit
)
394 char *output
, *newoutput
;
395 const char *oldoutput
, *sep1
, *data
;
401 SQL_BIND_INT64(db
, s
, i
, id
);
402 SQL_BIND_INT64(db
, s
, i
, outbit
);
403 while (SQLITE_ROW
== (c
= sqlite3_step(s
))) {
404 if (NULL
== output
) {
411 data
= sqlite3_column_text(s
, 1);
412 if (-1 == asprintf(&newoutput
, "%s%s%s",
413 oldoutput
, sep1
, data
)) {
415 exit((int)MANDOCLEVEL_SYSERR
);
420 if (SQLITE_DONE
!= c
)
421 fprintf(stderr
, "%s\n", sqlite3_errmsg(db
));
427 * Implement substring match as an application-defined SQL function.
428 * Using the SQL LIKE or GLOB operators instead would be a bad idea
429 * because that would require escaping metacharacters in the string
430 * being searched for.
433 sql_match(sqlite3_context
*context
, int argc
, sqlite3_value
**argv
)
437 sqlite3_result_int(context
, NULL
!= strcasestr(
438 (const char *)sqlite3_value_text(argv
[1]),
439 (const char *)sqlite3_value_text(argv
[0])));
443 * Implement regular expression match
444 * as an application-defined SQL function.
447 sql_regexp(sqlite3_context
*context
, int argc
, sqlite3_value
**argv
)
451 sqlite3_result_int(context
, !regexec(
452 (regex_t
*)sqlite3_value_blob(argv
[0]),
453 (const char *)sqlite3_value_text(argv
[1]),
458 * Prepare the search SQL statement.
459 * We search for any of the words specified in our match expression.
460 * We filter the per-doc AND expressions when collecting results.
463 sql_statement(const struct expr
*e
, const char *arch
, const char *sec
)
466 const char *substr
= "(key MATCH ? AND bits & ?)";
467 const char *regexp
= "(key REGEXP ? AND bits & ?)";
468 const char *andarch
= "arch = ? AND ";
469 const char *andsec
= "sec = ? AND ";
475 ("SELECT pageid,bits,key,file,desc,form,sec,arch "
477 "INNER JOIN mpages ON mpages.id=keys.pageid "
480 substrsz
= strlen(substr
);
481 regexpsz
= strlen(regexp
);
484 sz
+= strlen(andarch
) + 1;
485 sql
= mandoc_realloc(sql
, sz
);
486 strlcat(sql
, andarch
, sz
);
490 sz
+= strlen(andsec
) + 1;
491 sql
= mandoc_realloc(sql
, sz
);
492 strlcat(sql
, andsec
, sz
);
496 sql
= mandoc_realloc(sql
, sz
);
497 strlcat(sql
, "(", sz
);
499 for ( ; NULL
!= e
; e
= e
->next
) {
500 sz
+= (NULL
== e
->substr
? regexpsz
: substrsz
) +
501 (NULL
== e
->next
? 3 : 5);
502 sql
= mandoc_realloc(sql
, sz
);
503 strlcat(sql
, NULL
== e
->substr
? regexp
: substr
, sz
);
504 strlcat(sql
, NULL
== e
->next
? ");" : " OR ", sz
);
511 * Compile a set of string tokens into an expression.
512 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
513 * "(", "foo=bar", etc.).
516 exprcomp(const struct mansearch
*search
, int argc
, char *argv
[])
519 struct expr
*first
, *next
, *cur
;
523 for (i
= 0; i
< argc
; i
++) {
524 if (0 == strcmp("-i", argv
[i
])) {
530 next
= exprterm(search
, argv
[i
], cs
);
546 exprterm(const struct mansearch
*search
, char *buf
, int cs
)
555 e
= mandoc_calloc(1, sizeof(struct expr
));
557 /*"whatis" mode uses an opaque string and default fields. */
559 if (MANSEARCH_WHATIS
& search
->flags
) {
561 e
->bits
= search
->deftype
;
566 * If no =~ is specified, search with equality over names and
568 * If =~ begins the phrase, use name and description fields.
571 if (NULL
== (v
= strpbrk(buf
, "=~"))) {
573 e
->bits
= search
->deftype
;
576 e
->bits
= search
->deftype
;
579 if (regcomp(&e
->regexp
, v
,
580 REG_EXTENDED
| REG_NOSUB
| (cs
? 0 : REG_ICASE
))) {
589 * Parse out all possible fields.
590 * If the field doesn't resolve, bail.
593 while (NULL
!= (key
= strsep(&buf
, ","))) {
597 while (types
[i
].bits
&&
598 strcasecmp(types
[i
].name
, key
))
600 if (0 == types
[i
].bits
) {
604 e
->bits
|= types
[i
].bits
;
611 exprfree(struct expr
*p
)
623 hash_halloc(size_t sz
, void *arg
)
626 return(mandoc_calloc(sz
, 1));
630 hash_alloc(size_t sz
, void *arg
)
633 return(mandoc_malloc(sz
));
637 hash_free(void *p
, size_t sz
, void *arg
)