-/* $Id: mandocdb.c,v 1.94 2014/01/02 20:24:39 schwarze Exp $ */
+/* $Id: mandocdb.c,v 1.114 2014/01/22 20:58:39 schwarze Exp $ */
/*
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
#include "manpath.h"
#include "mansearch.h"
+extern int mansearch_keymax;
+extern const char *const mansearch_keynames[];
+
#define SQL_EXEC(_v) \
if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \
fprintf(stderr, "%s\n", sqlite3_errmsg(db))
};
struct str {
- char *utf8; /* key in UTF-8 form */
+ char *rendered; /* key in UTF-8 or ASCII form */
const struct mpage *mpage; /* if set, the owning parse */
uint64_t mask; /* bitmask in sequence */
- char key[]; /* the string itself */
+ char key[]; /* may contain escape sequences */
};
struct inodev {
struct mlink *next; /* singly linked list */
};
-struct title {
- char *title; /* name(sec/arch) given inside the file */
- char *file; /* file name in case of mismatch */
-};
-
enum stmt {
STMT_DELETE_PAGE = 0, /* delete mpage */
STMT_INSERT_PAGE, /* insert mpage */
};
static void dbclose(int);
-static void dbindex(const struct mpage *, struct mchars *);
+static void dbadd(const struct mpage *, struct mchars *);
static int dbopen(int);
static void dbprune(void);
static void filescan(const char *);
static void mlink_free(struct mlink *);
static void mlinks_undupe(struct mpage *);
static void mpages_free(void);
-static void mpages_merge(struct mchars *, struct mparse *, int);
+static void mpages_merge(struct mchars *, struct mparse *);
static void parse_cat(struct mpage *);
static void parse_man(struct mpage *, const struct man_node *);
static void parse_mdoc(struct mpage *, const struct mdoc_node *);
static int parse_mdoc_Nm(struct mpage *, const struct mdoc_node *);
static int parse_mdoc_Sh(struct mpage *, const struct mdoc_node *);
static int parse_mdoc_Xr(struct mpage *, const struct mdoc_node *);
-static void putkey(const struct mpage *,
- const char *, uint64_t);
+static void putkey(const struct mpage *, char *, uint64_t);
static void putkeys(const struct mpage *,
const char *, size_t, uint64_t);
static void putmdockey(const struct mpage *,
const struct mdoc_node *, uint64_t);
+static void render_key(struct mchars *, struct str *);
static void say(const char *, const char *, ...);
static int set_basedir(const char *);
static int treescan(void);
static size_t utf8(unsigned int, char [7]);
-static void utf8key(struct mchars *, struct str *);
static char *progname;
-static int use_all; /* use all found files */
static int nodb; /* no database changes */
+static int quick; /* abort the parse early */
+static int use_all; /* use all found files */
static int verb; /* print what we're doing */
static int warnings; /* warn about crap */
+static int write_utf8; /* write UTF-8 output; else ASCII */
static int exitcode; /* to be returned by main */
static enum op op; /* operational mode */
static char basedir[PATH_MAX]; /* current base directory */
path_arg = NULL;
op = OP_DEFAULT;
- while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW")))
+ while (-1 != (ch = getopt(argc, argv, "aC:d:nQT:tu:vW")))
switch (ch) {
case ('a'):
use_all = 1;
case ('n'):
nodb = 1;
break;
+ case ('Q'):
+ quick = 1;
+ break;
+ case ('T'):
+ if (strcmp(optarg, "utf8")) {
+ fprintf(stderr, "-T%s: Unsupported "
+ "output format\n", optarg);
+ goto usage;
+ }
+ write_utf8 = 1;
+ break;
case ('t'):
CHECKOP(op, ch);
dup2(STDOUT_FILENO, STDERR_FILENO);
exitcode = (int)MANDOCLEVEL_OK;
mp = mparse_alloc(MPARSE_AUTO,
- MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+ MANDOCLEVEL_FATAL, NULL, NULL, quick);
mc = mchars_alloc();
ohash_init(&mpages, 6, &mpages_info);
if (OP_TEST != op)
dbprune();
if (OP_DELETE != op)
- mpages_merge(mc, mp, 0);
+ mpages_merge(mc, mp);
dbclose(1);
} else {
/*
if (0 == dbopen(0))
goto out;
- mpages_merge(mc, mp, warnings && !use_all);
+ mpages_merge(mc, mp);
dbclose(0);
if (j + 1 < dirs.sz) {
ohash_delete(&mlinks);
return(exitcode);
usage:
- fprintf(stderr, "usage: %s [-anvW] [-C file]\n"
- " %s [-anvW] dir ...\n"
- " %s [-nvW] -d dir [file ...]\n"
+ fprintf(stderr, "usage: %s [-anQvW] [-C file] [-Tutf8]\n"
+ " %s [-anQvW] [-Tutf8] dir ...\n"
+ " %s [-nQvW] [-Tutf8] -d dir [file ...]\n"
" %s [-nvW] -u dir [file ...]\n"
- " %s -t file ...\n",
+ " %s [-Q] -t file ...\n",
progname, progname, progname,
progname, progname);
exitcode = (int)MANDOCLEVEL_BADARG;
say(file, NULL);
return;
- } else if (OP_TEST != op && strstr(buf, basedir) != buf) {
+ }
+
+ if (strstr(buf, basedir) == buf)
+ start = buf + strlen(basedir) + 1;
+ else if (OP_TEST == op)
+ start = buf;
+ else {
exitcode = (int)MANDOCLEVEL_BADARG;
say("", "%s: outside base directory", buf);
return;
- } else if (-1 == stat(buf, &st)) {
+ }
+
+ if (-1 == stat(buf, &st)) {
exitcode = (int)MANDOCLEVEL_BADARG;
say(file, NULL);
return;
say(file, "Not a regular file");
return;
}
- start = buf + strlen(basedir);
+
mlink = mandoc_calloc(1, sizeof(struct mlink));
strlcpy(mlink->file, start, sizeof(mlink->file));
* and filename to determine whether the file is parsable or not.
*/
static void
-mpages_merge(struct mchars *mc, struct mparse *mp, int check_reachable)
+mpages_merge(struct mchars *mc, struct mparse *mp)
{
- struct ohash title_table;
- struct ohash_info title_info, str_info;
+ char any[] = "any";
+ struct ohash_info str_info;
struct mpage *mpage;
struct mlink *mlink;
struct mdoc *mdoc;
struct man *man;
- struct title *title_entry;
- char *title_str;
- const char *cp;
+ char *cp;
int match;
- unsigned int pslot, tslot;
+ unsigned int pslot;
enum mandoclevel lvl;
str_info.alloc = hash_alloc;
str_info.hfree = hash_free;
str_info.key_offset = offsetof(struct str, key);
- if (check_reachable) {
- title_info.alloc = hash_alloc;
- title_info.halloc = hash_halloc;
- title_info.hfree = hash_free;
- title_info.key_offset = offsetof(struct title, title);
- ohash_init(&title_table, 6, &title_info);
- }
+ if (0 == nodb)
+ SQL_EXEC("BEGIN TRANSACTION");
mpage = ohash_first(&mpages, &pslot);
while (NULL != mpage) {
mpage->title =
mandoc_strdup(mpage->mlinks->name);
}
-
- for (mlink = mpage->mlinks; mlink; mlink = mlink->next)
+ putkey(mpage, mpage->sec, TYPE_sec);
+ putkey(mpage, '\0' == *mpage->arch ?
+ any : mpage->arch, TYPE_arch);
+
+ for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
+ if ('\0' != *mlink->dsec)
+ putkey(mpage, mlink->dsec, TYPE_sec);
+ if ('\0' != *mlink->fsec)
+ putkey(mpage, mlink->fsec, TYPE_sec);
+ putkey(mpage, '\0' == *mlink->arch ?
+ any : mlink->arch, TYPE_arch);
putkey(mpage, mlink->name, TYPE_Nm);
+ }
if (warnings && !use_all) {
match = 0;
else
parse_cat(mpage);
- /*
- * Build a title string for the file. If it matches
- * the location of the file, remember the title as
- * found; else, remember it as missing.
- */
-
- if (check_reachable) {
- if (-1 == asprintf(&title_str, "%s(%s%s%s)",
- mpage->title, mpage->sec,
- '\0' == *mpage->arch ? "" : "/",
- mpage->arch)) {
- perror(NULL);
- exit((int)MANDOCLEVEL_SYSERR);
- }
- tslot = ohash_qlookup(&title_table, title_str);
- title_entry = ohash_find(&title_table, tslot);
- if (NULL == title_entry) {
- title_entry = mandoc_malloc(
- sizeof(struct title));
- title_entry->title = title_str;
- title_entry->file = mandoc_strdup(
- match ? "" : mpage->mlinks->file);
- ohash_insert(&title_table, tslot,
- title_entry);
- } else {
- if (match)
- *title_entry->file = '\0';
- free(title_str);
- }
- }
-
- dbindex(mpage, mc);
+ dbadd(mpage, mc);
ohash_delete(&strings);
mpage = ohash_next(&mpages, &pslot);
}
- if (check_reachable) {
- title_entry = ohash_first(&title_table, &tslot);
- while (NULL != title_entry) {
- if ('\0' != *title_entry->file)
- say(title_entry->file,
- "Probably unreachable, title is %s",
- title_entry->title);
- free(title_entry->title);
- free(title_entry->file);
- free(title_entry);
- title_entry = ohash_next(&title_table, &tslot);
- }
- ohash_delete(&title_table);
- }
+ if (0 == nodb)
+ SQL_EXEC("END TRANSACTION");
}
static void
* Put a type/word pair into the word database for this particular file.
*/
static void
-putkey(const struct mpage *mpage, const char *value, uint64_t type)
+putkey(const struct mpage *mpage, char *value, uint64_t type)
{
+ char *cp;
assert(NULL != value);
+ if (TYPE_arch == type)
+ for (cp = value; *cp; cp++)
+ if (isupper((unsigned char)*cp))
+ *cp = _tolower((unsigned char)*cp);
putkeys(mpage, value, strlen(value), type);
}
byte = start[sz];
start[sz] = '\0';
+ /*
+ * Assume a stray trailing comma in the
+ * name list if a name begins with a dash.
+ */
+
+ if ('-' == start[0] ||
+ ('\\' == start[0] && '-' == start[1]))
+ break;
+
putkey(mpage, start, TYPE_Nm);
if (' ' == byte) {
static int
parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n)
{
- const char *cp;
+ char *cp;
if (NULL == (n = n->child) || MDOC_TEXT != n->type)
return(0);
const char *cp, size_t sz, uint64_t v)
{
struct str *s;
- unsigned int slot;
const char *end;
+ uint64_t mask;
+ unsigned int slot;
+ int i;
if (0 == sz)
return;
+ if (verb > 1) {
+ for (i = 0, mask = 1;
+ i < mansearch_keymax;
+ i++, mask <<= 1)
+ if (mask & v)
+ break;
+ say(mpage->mlinks->file, "Adding key %s=%*s",
+ mansearch_keynames[i], sz, cp);
+ }
+
end = cp + sz;
slot = ohash_qlookupi(&strings, cp, &end);
s = ohash_find(&strings, slot);
}
/*
- * Store the UTF-8 version of a key, or alias the pointer if the key has
- * no UTF-8 transcription marks in it.
+ * Store the rendered version of a key, or alias the pointer
+ * if the key contains no escape sequences.
*/
static void
-utf8key(struct mchars *mc, struct str *key)
+render_key(struct mchars *mc, struct str *key)
{
size_t sz, bsz, pos;
- char utfbuf[7], res[5];
+ char utfbuf[7], res[6];
char *buf;
const char *seq, *cpp, *val;
int len, u;
enum mandoc_esc esc;
- assert(NULL == key->utf8);
+ assert(NULL == key->rendered);
res[0] = '\\';
res[1] = '\t';
res[2] = ASCII_NBRSP;
res[3] = ASCII_HYPH;
- res[4] = '\0';
+ res[4] = ASCII_BREAK;
+ res[5] = '\0';
val = key->key;
bsz = strlen(val);
* pointer as ourselvse and get out of here.
*/
if (strcspn(val, res) == bsz) {
- key->utf8 = key->key;
+ key->rendered = key->key;
return;
}
val += sz;
}
- if (ASCII_HYPH == *val) {
+ switch (*val) {
+ case (ASCII_HYPH):
buf[pos++] = '-';
val++;
continue;
- } else if ('\t' == *val || ASCII_NBRSP == *val) {
+ case ('\t'):
+ /* FALLTHROUGH */
+ case (ASCII_NBRSP):
buf[pos++] = ' ';
val++;
+ /* FALLTHROUGH */
+ case (ASCII_BREAK):
continue;
- } else if ('\\' != *val)
+ default:
+ break;
+ }
+ if ('\\' != *val)
break;
/* Read past the slash. */
* Parse the escape sequence and see if it's a
* predefined character or special character.
*/
+
esc = mandoc_escape
((const char **)&val, &seq, &len);
if (ESCAPE_ERROR == esc)
break;
-
if (ESCAPE_SPECIAL != esc)
continue;
- if (0 == (u = mchars_spec2cp(mc, seq, len)))
- continue;
/*
- * If we have a Unicode codepoint, try to convert that
- * to a UTF-8 byte string.
+ * Render the special character
+ * as either UTF-8 or ASCII.
*/
- cpp = utfbuf;
- if (0 == (sz = utf8(u, utfbuf)))
- continue;
+
+ if (write_utf8) {
+ if (0 == (u = mchars_spec2cp(mc, seq, len)))
+ continue;
+ cpp = utfbuf;
+ if (0 == (sz = utf8(u, utfbuf)))
+ continue;
+ sz = strlen(cpp);
+ } else {
+ cpp = mchars_spec2str(mc, seq, len, &sz);
+ if (NULL == cpp)
+ continue;
+ if (ASCII_NBRSP == *cpp) {
+ cpp = " ";
+ sz = 1;
+ }
+ }
/* Copy the rendered glyph into the stream. */
- sz = strlen(cpp);
bsz += sz;
-
buf = mandoc_realloc(buf, bsz);
-
memcpy(&buf[pos], cpp, sz);
pos += sz;
}
buf[pos] = '\0';
- key->utf8 = buf;
+ key->rendered = buf;
}
/*
* Flush the current page's terms (and their bits) into the database.
* Wrap the entire set of additions in a transaction to make sqlite be a
* little faster.
- * Also, UTF-8-encode the description at the last possible moment.
+ * Also, handle escape sequences at the last possible moment.
*/
static void
-dbindex(const struct mpage *mpage, struct mchars *mc)
+dbadd(const struct mpage *mpage, struct mchars *mc)
{
struct mlink *mlink;
struct str *key;
- const char *desc;
int64_t recno;
size_t i;
unsigned int slot;
if (verb)
- say(mpage->mlinks->file, "Adding to index");
+ say(mpage->mlinks->file, "Adding to database");
if (nodb)
return;
- desc = "";
- if (NULL != mpage->desc && '\0' != *mpage->desc) {
- key = ohash_find(&strings,
- ohash_qlookup(&strings, mpage->desc));
- assert(NULL != key);
- if (NULL == key->utf8)
- utf8key(mc, key);
- desc = key->utf8;
- }
-
- SQL_EXEC("BEGIN TRANSACTION");
-
i = 1;
- /*
- * XXX The following three lines are obsolete
- * and only kept for backward compatibility
- * until apropos(1) and friends have caught up.
- */
- SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->file);
- SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->dsec);
- SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->arch);
- SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, desc);
SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);
SQL_STEP(stmts[STMT_INSERT_PAGE]);
recno = sqlite3_last_insert_rowid(db);
for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
i = 1;
- SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->file);
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);
for (key = ohash_first(&strings, &slot); NULL != key;
key = ohash_next(&strings, &slot)) {
assert(key->mpage == mpage);
- if (NULL == key->utf8)
- utf8key(mc, key);
+ if (NULL == key->rendered)
+ render_key(mc, key);
i = 1;
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask);
- SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8);
+ SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered);
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno);
SQL_STEP(stmts[STMT_INSERT_KEY]);
sqlite3_reset(stmts[STMT_INSERT_KEY]);
- if (key->utf8 != key->key)
- free(key->utf8);
+ if (key->rendered != key->key)
+ free(key->rendered);
free(key);
}
-
- SQL_EXEC("END TRANSACTION");
}
static void
size_t i;
unsigned int slot;
- if (nodb)
- return;
+ if (0 == nodb)
+ SQL_EXEC("BEGIN TRANSACTION");
- mpage = ohash_first(&mpages, &slot);
- while (NULL != mpage) {
+ for (mpage = ohash_first(&mpages, &slot); NULL != mpage;
+ mpage = ohash_next(&mpages, &slot)) {
mlink = mpage->mlinks;
- i = 1;
- SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], i, mlink->file);
- SQL_STEP(stmts[STMT_DELETE_PAGE]);
- sqlite3_reset(stmts[STMT_DELETE_PAGE]);
if (verb)
- say(mlink->file, "Deleted from index");
- mpage = ohash_next(&mpages, &slot);
+ say(mlink->file, "Deleting from database");
+ if (nodb)
+ continue;
+ for ( ; NULL != mlink; mlink = mlink->next) {
+ i = 1;
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->dsec);
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->arch);
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->name);
+ SQL_STEP(stmts[STMT_DELETE_PAGE]);
+ sqlite3_reset(stmts[STMT_DELETE_PAGE]);
+ }
}
+
+ if (0 == nodb)
+ SQL_EXEC("END TRANSACTION");
}
/*
return(0);
}
- /*
- * XXX The first three columns in table mpages are obsolete
- * and only kept for backward compatibility
- * until apropos(1) and friends have caught up.
- */
sql = "CREATE TABLE \"mpages\" (\n"
- " \"file\" TEXT NOT NULL,\n"
- " \"sec\" TEXT NOT NULL,\n"
- " \"arch\" TEXT NOT NULL,\n"
- " \"desc\" TEXT NOT NULL,\n"
" \"form\" INTEGER NOT NULL,\n"
" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
");\n"
"\n"
"CREATE TABLE \"mlinks\" (\n"
- " \"file\" TEXT NOT NULL,\n"
" \"sec\" TEXT NOT NULL,\n"
" \"arch\" TEXT NOT NULL,\n"
" \"name\" TEXT NOT NULL,\n"
" \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
- "ON DELETE CASCADE,\n"
- " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+ "ON DELETE CASCADE\n"
");\n"
"\n"
"CREATE TABLE \"keys\" (\n"
" \"bits\" INTEGER NOT NULL,\n"
" \"key\" TEXT NOT NULL,\n"
" \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
- "ON DELETE CASCADE,\n"
- " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
- ");\n"
- "\n"
- "CREATE INDEX \"key_index\" ON keys (key);\n";
+ "ON DELETE CASCADE\n"
+ ");\n";
if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) {
exitcode = (int)MANDOCLEVEL_SYSERR;
prepare_statements:
SQL_EXEC("PRAGMA foreign_keys = ON");
- sql = "DELETE FROM mpages where file=?";
+ sql = "DELETE FROM mpages WHERE id IN "
+ "(SELECT pageid FROM mlinks WHERE "
+ "sec=? AND arch=? AND name=?)";
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL);
sql = "INSERT INTO mpages "
- "(file,sec,arch,desc,form) VALUES (?,?,?,?,?)";
+ "(form) VALUES (?)";
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL);
sql = "INSERT INTO mlinks "
- "(file,sec,arch,name,pageid) VALUES (?,?,?,?,?)";
+ "(sec,arch,name,pageid) VALUES (?,?,?,?)";
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);
sql = "INSERT INTO keys "
"(bits,key,pageid) VALUES (?,?,?)";