-/* $Id: mandocdb.c,v 1.97 2014/01/05 00:29:54 schwarze Exp $ */
+/* $Id: mandocdb.c,v 1.116 2014/03/19 21:51:20 schwarze Exp $ */
/*
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
#endif
#include <sys/stat.h>
+#include <sys/wait.h>
#include <assert.h>
#include <ctype.h>
#include "manpath.h"
#include "mansearch.h"
+extern int mansearch_keymax;
+extern const char *const mansearch_keynames[];
+
#define SQL_EXEC(_v) \
if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \
fprintf(stderr, "%s\n", sqlite3_errmsg(db))
struct mlink *next; /* singly linked list */
};
-struct title {
- char *title; /* name(sec/arch) given inside the file */
- char *file; /* file name in case of mismatch */
-};
-
enum stmt {
STMT_DELETE_PAGE = 0, /* delete mpage */
STMT_INSERT_PAGE, /* insert mpage */
};
static void dbclose(int);
-static void dbindex(const struct mpage *, struct mchars *);
+static void dbadd(const struct mpage *, struct mchars *);
static int dbopen(int);
static void dbprune(void);
static void filescan(const char *);
static void mlink_free(struct mlink *);
static void mlinks_undupe(struct mpage *);
static void mpages_free(void);
-static void mpages_merge(struct mchars *, struct mparse *, int);
+static void mpages_merge(struct mchars *, struct mparse *);
static void parse_cat(struct mpage *);
static void parse_man(struct mpage *, const struct man_node *);
static void parse_mdoc(struct mpage *, const struct mdoc_node *);
static int parse_mdoc_Nm(struct mpage *, const struct mdoc_node *);
static int parse_mdoc_Sh(struct mpage *, const struct mdoc_node *);
static int parse_mdoc_Xr(struct mpage *, const struct mdoc_node *);
-static void putkey(const struct mpage *,
- const char *, uint64_t);
+static void putkey(const struct mpage *, char *, uint64_t);
static void putkeys(const struct mpage *,
const char *, size_t, uint64_t);
static void putmdockey(const struct mpage *,
static int treescan(void);
static size_t utf8(unsigned int, char [7]);
+static char tempfilename[32];
static char *progname;
-static int use_all; /* use all found files */
static int nodb; /* no database changes */
+static int mparse_options; /* abort the parse early */
+static int use_all; /* use all found files */
static int verb; /* print what we're doing */
static int warnings; /* warn about crap */
static int write_utf8; /* write UTF-8 output; else ASCII */
path_arg = NULL;
op = OP_DEFAULT;
+ mparse_options = MPARSE_SO;
- while (-1 != (ch = getopt(argc, argv, "aC:d:nT:tu:vW")))
+ while (-1 != (ch = getopt(argc, argv, "aC:d:nQT:tu:vW")))
switch (ch) {
case ('a'):
use_all = 1;
case ('n'):
nodb = 1;
break;
+ case ('Q'):
+ mparse_options |= MPARSE_QUICK;
+ break;
case ('T'):
if (strcmp(optarg, "utf8")) {
fprintf(stderr, "-T%s: Unsupported "
}
exitcode = (int)MANDOCLEVEL_OK;
- mp = mparse_alloc(MPARSE_AUTO,
- MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+ mp = mparse_alloc(mparse_options, MANDOCLEVEL_FATAL, NULL, NULL);
mc = mchars_alloc();
ohash_init(&mpages, 6, &mpages_info);
if (OP_TEST != op)
dbprune();
if (OP_DELETE != op)
- mpages_merge(mc, mp, 0);
+ mpages_merge(mc, mp);
dbclose(1);
} else {
/*
if (0 == dbopen(0))
goto out;
- mpages_merge(mc, mp, warnings && !use_all);
+ mpages_merge(mc, mp);
dbclose(0);
if (j + 1 < dirs.sz) {
ohash_delete(&mlinks);
return(exitcode);
usage:
- fprintf(stderr, "usage: %s [-anvW] [-C file] [-Tutf8]\n"
- " %s [-anvW] [-Tutf8] dir ...\n"
- " %s [-nvW] [-Tutf8] -d dir [file ...]\n"
+ fprintf(stderr, "usage: %s [-anQvW] [-C file] [-Tutf8]\n"
+ " %s [-anQvW] [-Tutf8] dir ...\n"
+ " %s [-nQvW] [-Tutf8] -d dir [file ...]\n"
" %s [-nvW] -u dir [file ...]\n"
- " %s -t file ...\n",
+ " %s [-Q] -t file ...\n",
progname, progname, progname,
progname, progname);
exitcode = (int)MANDOCLEVEL_BADARG;
say(file, NULL);
return;
- } else if (OP_TEST != op && strstr(buf, basedir) != buf) {
+ }
+
+ if (strstr(buf, basedir) == buf)
+ start = buf + strlen(basedir) + 1;
+ else if (OP_TEST == op)
+ start = buf;
+ else {
exitcode = (int)MANDOCLEVEL_BADARG;
say("", "%s: outside base directory", buf);
return;
- } else if (-1 == stat(buf, &st)) {
+ }
+
+ if (-1 == stat(buf, &st)) {
exitcode = (int)MANDOCLEVEL_BADARG;
say(file, NULL);
return;
say(file, "Not a regular file");
return;
}
- start = buf + strlen(basedir);
+
mlink = mandoc_calloc(1, sizeof(struct mlink));
strlcpy(mlink->file, start, sizeof(mlink->file));
* and filename to determine whether the file is parsable or not.
*/
static void
-mpages_merge(struct mchars *mc, struct mparse *mp, int check_reachable)
+mpages_merge(struct mchars *mc, struct mparse *mp)
{
- struct ohash title_table;
- struct ohash_info title_info, str_info;
+ char any[] = "any";
+ struct ohash_info str_info;
struct mpage *mpage;
struct mlink *mlink;
struct mdoc *mdoc;
struct man *man;
- struct title *title_entry;
- char *title_str;
- const char *cp;
+ char *cp;
int match;
- unsigned int pslot, tslot;
+ unsigned int pslot;
enum mandoclevel lvl;
str_info.alloc = hash_alloc;
str_info.hfree = hash_free;
str_info.key_offset = offsetof(struct str, key);
- if (check_reachable) {
- title_info.alloc = hash_alloc;
- title_info.halloc = hash_halloc;
- title_info.hfree = hash_free;
- title_info.key_offset = offsetof(struct title, title);
- ohash_init(&title_table, 6, &title_info);
- }
+ if (0 == nodb)
+ SQL_EXEC("BEGIN TRANSACTION");
mpage = ohash_first(&mpages, &pslot);
while (NULL != mpage) {
mandoc_strdup(mpage->mlinks->name);
}
putkey(mpage, mpage->sec, TYPE_sec);
- putkey(mpage, mpage->arch, TYPE_arch);
+ putkey(mpage, '\0' == *mpage->arch ?
+ any : mpage->arch, TYPE_arch);
for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
if ('\0' != *mlink->dsec)
putkey(mpage, mlink->dsec, TYPE_sec);
if ('\0' != *mlink->fsec)
putkey(mpage, mlink->fsec, TYPE_sec);
- putkey(mpage, mlink->arch, TYPE_arch);
+ putkey(mpage, '\0' == *mlink->arch ?
+ any : mlink->arch, TYPE_arch);
putkey(mpage, mlink->name, TYPE_Nm);
}
else
parse_cat(mpage);
- /*
- * Build a title string for the file. If it matches
- * the location of the file, remember the title as
- * found; else, remember it as missing.
- */
-
- if (check_reachable) {
- if (-1 == asprintf(&title_str, "%s(%s%s%s)",
- mpage->title, mpage->sec,
- '\0' == *mpage->arch ? "" : "/",
- mpage->arch)) {
- perror(NULL);
- exit((int)MANDOCLEVEL_SYSERR);
- }
- tslot = ohash_qlookup(&title_table, title_str);
- title_entry = ohash_find(&title_table, tslot);
- if (NULL == title_entry) {
- title_entry = mandoc_malloc(
- sizeof(struct title));
- title_entry->title = title_str;
- title_entry->file = mandoc_strdup(
- match ? "" : mpage->mlinks->file);
- ohash_insert(&title_table, tslot,
- title_entry);
- } else {
- if (match)
- *title_entry->file = '\0';
- free(title_str);
- }
- }
-
- dbindex(mpage, mc);
+ dbadd(mpage, mc);
ohash_delete(&strings);
mpage = ohash_next(&mpages, &pslot);
}
- if (check_reachable) {
- title_entry = ohash_first(&title_table, &tslot);
- while (NULL != title_entry) {
- if ('\0' != *title_entry->file)
- say(title_entry->file,
- "Probably unreachable, title is %s",
- title_entry->title);
- free(title_entry->title);
- free(title_entry->file);
- free(title_entry);
- title_entry = ohash_next(&title_table, &tslot);
- }
- ohash_delete(&title_table);
- }
+ if (0 == nodb)
+ SQL_EXEC("END TRANSACTION");
}
static void
* Put a type/word pair into the word database for this particular file.
*/
static void
-putkey(const struct mpage *mpage, const char *value, uint64_t type)
+putkey(const struct mpage *mpage, char *value, uint64_t type)
{
+ char *cp;
assert(NULL != value);
+ if (TYPE_arch == type)
+ for (cp = value; *cp; cp++)
+ if (isupper((unsigned char)*cp))
+ *cp = _tolower((unsigned char)*cp);
putkeys(mpage, value, strlen(value), type);
}
byte = start[sz];
start[sz] = '\0';
+ /*
+ * Assume a stray trailing comma in the
+ * name list if a name begins with a dash.
+ */
+
+ if ('-' == start[0] ||
+ ('\\' == start[0] && '-' == start[1]))
+ break;
+
putkey(mpage, start, TYPE_Nm);
if (' ' == byte) {
static int
parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n)
{
- const char *cp;
+ char *cp;
if (NULL == (n = n->child) || MDOC_TEXT != n->type)
return(0);
const char *cp, size_t sz, uint64_t v)
{
struct str *s;
- unsigned int slot;
const char *end;
+ uint64_t mask;
+ unsigned int slot;
+ int i;
if (0 == sz)
return;
+ if (verb > 1) {
+ for (i = 0, mask = 1;
+ i < mansearch_keymax;
+ i++, mask <<= 1)
+ if (mask & v)
+ break;
+ say(mpage->mlinks->file, "Adding key %s=%*s",
+ mansearch_keynames[i], sz, cp);
+ }
+
end = cp + sz;
slot = ohash_qlookupi(&strings, cp, &end);
s = ohash_find(&strings, slot);
render_key(struct mchars *mc, struct str *key)
{
size_t sz, bsz, pos;
- char utfbuf[7], res[5];
+ char utfbuf[7], res[6];
char *buf;
const char *seq, *cpp, *val;
int len, u;
res[1] = '\t';
res[2] = ASCII_NBRSP;
res[3] = ASCII_HYPH;
- res[4] = '\0';
+ res[4] = ASCII_BREAK;
+ res[5] = '\0';
val = key->key;
bsz = strlen(val);
val += sz;
}
- if (ASCII_HYPH == *val) {
+ switch (*val) {
+ case (ASCII_HYPH):
buf[pos++] = '-';
val++;
continue;
- } else if ('\t' == *val || ASCII_NBRSP == *val) {
+ case ('\t'):
+ /* FALLTHROUGH */
+ case (ASCII_NBRSP):
buf[pos++] = ' ';
val++;
+ /* FALLTHROUGH */
+ case (ASCII_BREAK):
continue;
- } else if ('\\' != *val)
+ default:
+ break;
+ }
+ if ('\\' != *val)
break;
/* Read past the slash. */
* Also, handle escape sequences at the last possible moment.
*/
static void
-dbindex(const struct mpage *mpage, struct mchars *mc)
+dbadd(const struct mpage *mpage, struct mchars *mc)
{
struct mlink *mlink;
struct str *key;
- const char *desc;
int64_t recno;
size_t i;
unsigned int slot;
if (verb)
- say(mpage->mlinks->file, "Adding to index");
+ say(mpage->mlinks->file, "Adding to database");
if (nodb)
return;
- desc = "";
- if (NULL != mpage->desc && '\0' != *mpage->desc) {
- key = ohash_find(&strings,
- ohash_qlookup(&strings, mpage->desc));
- assert(NULL != key);
- if (NULL == key->rendered)
- render_key(mc, key);
- desc = key->rendered;
- }
-
- SQL_EXEC("BEGIN TRANSACTION");
-
i = 1;
- /*
- * XXX The following three lines are obsolete
- * and only kept for backward compatibility
- * until apropos(1) and friends have caught up.
- */
- SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->file);
- SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->dsec);
- SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->arch);
- SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, desc);
SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);
SQL_STEP(stmts[STMT_INSERT_PAGE]);
recno = sqlite3_last_insert_rowid(db);
for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
i = 1;
- SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->file);
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);
free(key->rendered);
free(key);
}
-
- SQL_EXEC("END TRANSACTION");
}
static void
size_t i;
unsigned int slot;
- if (nodb)
- return;
+ if (0 == nodb)
+ SQL_EXEC("BEGIN TRANSACTION");
- mpage = ohash_first(&mpages, &slot);
- while (NULL != mpage) {
+ for (mpage = ohash_first(&mpages, &slot); NULL != mpage;
+ mpage = ohash_next(&mpages, &slot)) {
mlink = mpage->mlinks;
- i = 1;
- SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], i, mlink->file);
- SQL_STEP(stmts[STMT_DELETE_PAGE]);
- sqlite3_reset(stmts[STMT_DELETE_PAGE]);
if (verb)
- say(mlink->file, "Deleted from index");
- mpage = ohash_next(&mpages, &slot);
+ say(mlink->file, "Deleting from database");
+ if (nodb)
+ continue;
+ for ( ; NULL != mlink; mlink = mlink->next) {
+ i = 1;
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->dsec);
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->arch);
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->name);
+ SQL_STEP(stmts[STMT_DELETE_PAGE]);
+ sqlite3_reset(stmts[STMT_DELETE_PAGE]);
+ }
}
+
+ if (0 == nodb)
+ SQL_EXEC("END TRANSACTION");
}
/*
dbclose(int real)
{
size_t i;
+ int status;
+ pid_t child;
if (nodb)
return;
if (real)
return;
- if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) {
+ if ('\0' == *tempfilename) {
+ if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB, "%s", strerror(errno));
+ }
+ return;
+ }
+
+ switch (child = fork()) {
+ case (-1):
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("fork cmp", "%s", strerror(errno));
+ return;
+ case (0):
+ execlp("cmp", "cmp", "-s",
+ tempfilename, MANDOC_DB, NULL);
+ say("exec cmp", "%s", strerror(errno));
+ exit(0);
+ default:
+ break;
+ }
+ if (-1 == waitpid(child, &status, 0)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("wait cmp", "%s", strerror(errno));
+ } else if (WIFSIGNALED(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("cmp", "Died from a signal");
+ } else if (WEXITSTATUS(status)) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(MANDOC_DB, NULL);
+ say(MANDOC_DB,
+ "Data changed, but cannot replace database");
+ }
+
+ *strrchr(tempfilename, '/') = '\0';
+ switch (child = fork()) {
+ case (-1):
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("fork rm", "%s", strerror(errno));
+ return;
+ case (0):
+ execlp("rm", "rm", "-rf", tempfilename, NULL);
+ say("exec rm", "%s", strerror(errno));
+ exit((int)MANDOCLEVEL_SYSERR);
+ default:
+ break;
+ }
+ if (-1 == waitpid(child, &status, 0)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("wait rm", "%s", strerror(errno));
+ } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(tempfilename,
+ "Cannot remove temporary directory");
}
}
static int
dbopen(int real)
{
- const char *file, *sql;
+ const char *sql;
int rc, ofl;
if (nodb)
return(1);
+ *tempfilename = '\0';
ofl = SQLITE_OPEN_READWRITE;
- if (0 == real) {
- file = MANDOC_DB "~";
- if (-1 == remove(file) && ENOENT != errno) {
+
+ if (real) {
+ rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL);
+ if (SQLITE_OK != rc) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(file, NULL);
+ say(MANDOC_DB, "%s", sqlite3_errmsg(db));
return(0);
}
- ofl |= SQLITE_OPEN_EXCLUSIVE;
- } else
- file = MANDOC_DB;
+ goto prepare_statements;
+ }
- rc = sqlite3_open_v2(file, &db, ofl, NULL);
+ ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE;
+
+ remove(MANDOC_DB "~");
+ rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL);
if (SQLITE_OK == rc)
- goto prepare_statements;
- if (SQLITE_CANTOPEN != rc) {
+ goto create_tables;
+ if (MPARSE_QUICK & mparse_options) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(file, NULL);
+ say(MANDOC_DB "~", "%s", sqlite3_errmsg(db));
return(0);
}
- sqlite3_close(db);
- db = NULL;
-
- if (SQLITE_OK != (rc = sqlite3_open(file, &db))) {
+ if (strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX",
+ sizeof(tempfilename)) >= sizeof(tempfilename)) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(file, NULL);
+ say("/tmp/mandocdb.XXXXXX", "Filename too long");
+ return(0);
+ }
+ if (NULL == mkdtemp(tempfilename)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(tempfilename, "%s", strerror(errno));
+ return(0);
+ }
+ if (strlcat(tempfilename, "/" MANDOC_DB,
+ sizeof(tempfilename)) >= sizeof(tempfilename)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(tempfilename, "Filename too long");
+ return(0);
+ }
+ rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL);
+ if (SQLITE_OK != rc) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(tempfilename, "%s", sqlite3_errmsg(db));
return(0);
}
- /*
- * XXX The first three columns in table mpages are obsolete
- * and only kept for backward compatibility
- * until apropos(1) and friends have caught up.
- */
+create_tables:
sql = "CREATE TABLE \"mpages\" (\n"
- " \"file\" TEXT NOT NULL,\n"
- " \"sec\" TEXT NOT NULL,\n"
- " \"arch\" TEXT NOT NULL,\n"
- " \"desc\" TEXT NOT NULL,\n"
" \"form\" INTEGER NOT NULL,\n"
" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
");\n"
"\n"
"CREATE TABLE \"mlinks\" (\n"
- " \"file\" TEXT NOT NULL,\n"
" \"sec\" TEXT NOT NULL,\n"
" \"arch\" TEXT NOT NULL,\n"
" \"name\" TEXT NOT NULL,\n"
" \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
- "ON DELETE CASCADE,\n"
- " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+ "ON DELETE CASCADE\n"
");\n"
"\n"
"CREATE TABLE \"keys\" (\n"
" \"bits\" INTEGER NOT NULL,\n"
" \"key\" TEXT NOT NULL,\n"
" \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
- "ON DELETE CASCADE,\n"
- " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
- ");\n"
- "\n"
- "CREATE INDEX \"key_index\" ON keys (key);\n";
+ "ON DELETE CASCADE\n"
+ ");\n";
if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(file, "%s", sqlite3_errmsg(db));
+ say(MANDOC_DB, "%s", sqlite3_errmsg(db));
return(0);
}
prepare_statements:
SQL_EXEC("PRAGMA foreign_keys = ON");
- sql = "DELETE FROM mpages where file=?";
+ sql = "DELETE FROM mpages WHERE id IN "
+ "(SELECT pageid FROM mlinks WHERE "
+ "sec=? AND arch=? AND name=?)";
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL);
sql = "INSERT INTO mpages "
- "(file,sec,arch,desc,form) VALUES (?,?,?,?,?)";
+ "(form) VALUES (?)";
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL);
sql = "INSERT INTO mlinks "
- "(file,sec,arch,name,pageid) VALUES (?,?,?,?,?)";
+ "(sec,arch,name,pageid) VALUES (?,?,?,?)";
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);
sql = "INSERT INTO keys "
"(bits,key,pageid) VALUES (?,?,?)";