-/* $Id: mandocdb.c,v 1.114 2014/01/22 20:58:39 schwarze Exp $ */
+/* $Id: mandocdb.c,v 1.124 2014/03/26 21:39:38 schwarze Exp $ */
/*
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
#endif
#include <sys/stat.h>
+#include <sys/wait.h>
#include <assert.h>
#include <ctype.h>
#include "mdoc.h"
#include "man.h"
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "manpath.h"
#include "mansearch.h"
struct mpage {
struct inodev inodev; /* used for hashing routine */
+ int64_t recno; /* id in mpages SQL table */
enum form form; /* format from file content */
char *sec; /* section from file content */
char *arch; /* architecture from file content */
char *name; /* name from file name (not empty) */
char *fsec; /* section from file name suffix */
struct mlink *next; /* singly linked list */
+ struct mpage *mpage; /* parent */
+ int gzip; /* filename has a .gz suffix */
};
enum stmt {
};
static void dbclose(int);
-static void dbadd(const struct mpage *, struct mchars *);
+static void dbadd(struct mpage *, struct mchars *);
+static void dbadd_mlink(const struct mlink *mlink);
static int dbopen(int);
static void dbprune(void);
static void filescan(const char *);
static void mlinks_undupe(struct mpage *);
static void mpages_free(void);
static void mpages_merge(struct mchars *, struct mparse *);
-static void parse_cat(struct mpage *);
+static void parse_cat(struct mpage *, int);
static void parse_man(struct mpage *, const struct man_node *);
static void parse_mdoc(struct mpage *, const struct mdoc_node *);
static int parse_mdoc_body(struct mpage *, const struct mdoc_node *);
static int treescan(void);
static size_t utf8(unsigned int, char [7]);
+static char tempfilename[32];
static char *progname;
static int nodb; /* no database changes */
-static int quick; /* abort the parse early */
+static int mparse_options; /* abort the parse early */
static int use_all; /* use all found files */
static int verb; /* print what we're doing */
static int warnings; /* warn about crap */
nodb = 1;
break;
case ('Q'):
- quick = 1;
+ mparse_options |= MPARSE_QUICK;
break;
case ('T'):
if (strcmp(optarg, "utf8")) {
}
exitcode = (int)MANDOCLEVEL_OK;
- mp = mparse_alloc(MPARSE_AUTO,
- MANDOCLEVEL_FATAL, NULL, NULL, quick);
+ mp = mparse_alloc(mparse_options, MANDOCLEVEL_FATAL, NULL, NULL);
mc = mchars_alloc();
ohash_init(&mpages, 6, &mpages_info);
FTS *f;
FTSENT *ff;
struct mlink *mlink;
- int dform;
+ int dform, gzip;
char *dsec, *arch, *fsec, *cp;
const char *path;
const char *argv[2];
f = fts_open((char * const *)argv, FTS_LOGICAL, NULL);
if (NULL == f) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say("", NULL);
+ say("", "&fts_open");
return(0);
}
if (warnings)
say(path, "Extraneous file");
continue;
- } else if (NULL == (fsec =
- strrchr(ff->fts_name, '.'))) {
+ }
+ gzip = 0;
+ fsec = NULL;
+ while (NULL == fsec) {
+ fsec = strrchr(ff->fts_name, '.');
+ if (NULL == fsec || strcmp(fsec+1, "gz"))
+ break;
+ gzip = 1;
+ *fsec = '\0';
+ fsec = NULL;
+ }
+ if (NULL == fsec) {
if ( ! use_all) {
if (warnings)
say(path,
if (warnings)
say(path, "Skip html");
continue;
- } else if (0 == strcmp(fsec, "gz")) {
- if (warnings)
- say(path, "Skip gz");
- continue;
} else if (0 == strcmp(fsec, "ps")) {
if (warnings)
say(path, "Skip ps");
mlink->arch = arch;
mlink->name = ff->fts_name;
mlink->fsec = fsec;
+ mlink->gzip = gzip;
mlink_add(mlink, ff->fts_statp);
continue;
} else if (FTS_D != ff->fts_info &&
if (NULL == realpath(file, buf)) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, NULL);
+ say(file, "&realpath");
return;
}
if (-1 == stat(buf, &st)) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, NULL);
+ say(file, "&stat");
return;
} else if ( ! (S_IFREG & st.st_mode)) {
exitcode = (int)MANDOCLEVEL_BADARG;
} else
mlink->next = mpage->mlinks;
mpage->mlinks = mlink;
+ mlink->mpage = mpage;
}
static void
{
char any[] = "any";
struct ohash_info str_info;
- struct mpage *mpage;
- struct mlink *mlink;
+ int fd[2];
+ struct mpage *mpage, *mpage_dest;
+ struct mlink *mlink, *mlink_dest;
struct mdoc *mdoc;
struct man *man;
+ char *sodest;
char *cp;
- int match;
+ pid_t child_pid;
+ int match, status;
unsigned int pslot;
enum mandoclevel lvl;
mparse_reset(mp);
mdoc = NULL;
man = NULL;
+ sodest = NULL;
+ child_pid = 0;
+ fd[0] = -1;
+ fd[1] = -1;
+
+ if (mpage->mlinks->gzip) {
+ if (-1 == pipe(fd)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file, "&pipe gunzip");
+ goto nextpage;
+ }
+ switch (child_pid = fork()) {
+ case (-1):
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file, "&fork gunzip");
+ child_pid = 0;
+ close(fd[1]);
+ close(fd[0]);
+ goto nextpage;
+ case (0):
+ close(fd[0]);
+ if (-1 == dup2(fd[1], STDOUT_FILENO)) {
+ say(mpage->mlinks->file,
+ "&dup gunzip");
+ exit(1);
+ }
+ execlp("gunzip", "gunzip", "-c",
+ mpage->mlinks->file, NULL);
+ say(mpage->mlinks->file, "&exec gunzip");
+ exit(1);
+ default:
+ close(fd[1]);
+ break;
+ }
+ }
/*
* Try interpreting the file as mdoc(7) or man(7)
*/
if (FORM_CAT != mpage->mlinks->dform ||
FORM_CAT != mpage->mlinks->fform) {
- lvl = mparse_readfd(mp, -1, mpage->mlinks->file);
+ lvl = mparse_readfd(mp, fd[0], mpage->mlinks->file);
if (lvl < MANDOCLEVEL_FATAL)
- mparse_result(mp, &mdoc, &man);
+ mparse_result(mp, &mdoc, &man, &sodest);
}
- if (NULL != mdoc) {
+ if (NULL != sodest) {
+ mlink_dest = ohash_find(&mlinks,
+ ohash_qlookup(&mlinks, sodest));
+ if (NULL != mlink_dest) {
+
+ /* The .so target exists. */
+
+ mpage_dest = mlink_dest->mpage;
+ mlink = mpage->mlinks;
+ while (1) {
+ mlink->mpage = mpage_dest;
+
+ /*
+ * If the target was already
+ * processed, add the links
+ * to the database now.
+ * Otherwise, this will
+ * happen when we come
+ * to the target.
+ */
+
+ if (mpage_dest->recno)
+ dbadd_mlink(mlink);
+
+ if (NULL == mlink->next)
+ break;
+ mlink = mlink->next;
+ }
+
+ /* Move all links to the target. */
+
+ mlink->next = mlink_dest->next;
+ mlink_dest->next = mpage->mlinks;
+ mpage->mlinks = NULL;
+ }
+ goto nextpage;
+ } else if (NULL != mdoc) {
mpage->form = FORM_SRC;
mpage->sec =
mandoc_strdup(mdoc_meta(mdoc)->msec);
} else if (NULL != man)
parse_man(mpage, man_node(man));
else
- parse_cat(mpage);
+ parse_cat(mpage, fd[0]);
dbadd(mpage, mc);
+
+nextpage:
+ if (child_pid) {
+ if (-1 == waitpid(child_pid, &status, 0)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file, "&wait gunzip");
+ } else if (WIFSIGNALED(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file,
+ "gunzip died from signal %d",
+ WTERMSIG(status));
+ } else if (WEXITSTATUS(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file,
+ "gunzip failed with code %d",
+ WEXITSTATUS(status));
+ }
+ }
ohash_delete(&strings);
mpage = ohash_next(&mpages, &pslot);
}
}
static void
-parse_cat(struct mpage *mpage)
+parse_cat(struct mpage *mpage, int fd)
{
FILE *stream;
char *line, *p, *title;
size_t len, plen, titlesz;
- if (NULL == (stream = fopen(mpage->mlinks->file, "r"))) {
+ stream = (-1 == fd) ?
+ fopen(mpage->mlinks->file, "r") :
+ fdopen(fd, "r");
+ if (NULL == stream) {
if (warnings)
- say(mpage->mlinks->file, NULL);
+ say(mpage->mlinks->file, "&fopen");
return;
}
parse_man(struct mpage *mpage, const struct man_node *n)
{
const struct man_node *head, *body;
- char *start, *sv, *title;
+ char *start, *title;
char byte;
- size_t sz, titlesz;
+ size_t sz;
if (NULL == n)
return;
NULL != (head = (head->child)) &&
MAN_TEXT == head->type &&
0 == strcmp(head->string, "NAME") &&
- NULL != (body = body->child) &&
- MAN_TEXT == body->type) {
-
- title = NULL;
- titlesz = 0;
+ NULL != body->child) {
/*
* Suck the entire NAME section into memory.
* NAME sections over many lines.
*/
- for ( ; NULL != body; body = body->next) {
- if (MAN_TEXT != body->type)
- break;
- if (0 == (sz = strlen(body->string)))
- continue;
- title = mandoc_realloc
- (title, titlesz + sz + 1);
- memcpy(title + titlesz, body->string, sz);
- titlesz += sz + 1;
- title[titlesz - 1] = ' ';
- }
+ title = NULL;
+ man_deroff(&title, body);
if (NULL == title)
return;
- title = mandoc_realloc(title, titlesz + 1);
- title[titlesz] = '\0';
-
- /* Skip leading space. */
-
- sv = title;
- while (isspace((unsigned char)*sv))
- sv++;
-
- if (0 == (sz = strlen(sv))) {
- free(title);
- return;
- }
-
- /* Erase trailing space. */
-
- start = &sv[sz - 1];
- while (start > sv && isspace((unsigned char)*start))
- *start-- = '\0';
-
- if (start == sv) {
- free(title);
- return;
- }
-
- start = sv;
-
/*
* Go through a special heuristic dance here.
* Conventionally, one or more manual names are
* the name parts here.
*/
+ start = title;
for ( ;; ) {
sz = strcspn(start, " ,");
if ('\0' == start[sz])
start++;
}
- if (sv == start) {
+ if (start == title) {
putkey(mpage, start, TYPE_Nm);
free(title);
return;
return(0);
}
- if (-1 == asprintf(&cp, "%s(%s)", n->string, n->next->string)) {
- perror(NULL);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string);
putkey(mpage, cp, TYPE_Xr);
free(cp);
return(0);
static int
parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n)
{
- size_t sz;
-
- if (MDOC_BODY != n->type)
- return(0);
- /*
- * Special-case the `Nd' because we need to put the description
- * into the document table.
- */
-
- for (n = n->child; NULL != n; n = n->next) {
- if (MDOC_TEXT == n->type) {
- if (NULL != mpage->desc) {
- sz = strlen(mpage->desc) +
- strlen(n->string) + 2;
- mpage->desc = mandoc_realloc(
- mpage->desc, sz);
- strlcat(mpage->desc, " ", sz);
- strlcat(mpage->desc, n->string, sz);
- } else
- mpage->desc = mandoc_strdup(n->string);
- }
- if (NULL != n->child)
- parse_mdoc_Nd(mpage, n);
- }
- return(1);
+ if (MDOC_BODY == n->type)
+ mdoc_deroff(&mpage->desc, n);
+ return(0);
}
static int
key->rendered = buf;
}
+static void
+dbadd_mlink(const struct mlink *mlink)
+{
+ size_t i;
+
+ i = 1;
+ SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);
+ SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);
+ SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);
+ SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, mlink->mpage->recno);
+ SQL_STEP(stmts[STMT_INSERT_LINK]);
+ sqlite3_reset(stmts[STMT_INSERT_LINK]);
+}
+
/*
* Flush the current page's terms (and their bits) into the database.
* Wrap the entire set of additions in a transaction to make sqlite be a
* Also, handle escape sequences at the last possible moment.
*/
static void
-dbadd(const struct mpage *mpage, struct mchars *mc)
+dbadd(struct mpage *mpage, struct mchars *mc)
{
struct mlink *mlink;
struct str *key;
- int64_t recno;
size_t i;
unsigned int slot;
i = 1;
SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);
SQL_STEP(stmts[STMT_INSERT_PAGE]);
- recno = sqlite3_last_insert_rowid(db);
+ mpage->recno = sqlite3_last_insert_rowid(db);
sqlite3_reset(stmts[STMT_INSERT_PAGE]);
- for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
- i = 1;
- SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);
- SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);
- SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);
- SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, recno);
- SQL_STEP(stmts[STMT_INSERT_LINK]);
- sqlite3_reset(stmts[STMT_INSERT_LINK]);
- }
+ for (mlink = mpage->mlinks; mlink; mlink = mlink->next)
+ dbadd_mlink(mlink);
for (key = ohash_first(&strings, &slot); NULL != key;
key = ohash_next(&strings, &slot)) {
i = 1;
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask);
SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered);
- SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno);
+ SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, mpage->recno);
SQL_STEP(stmts[STMT_INSERT_KEY]);
sqlite3_reset(stmts[STMT_INSERT_KEY]);
if (key->rendered != key->key)
dbclose(int real)
{
size_t i;
+ int status;
+ pid_t child;
if (nodb)
return;
if (real)
return;
- if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) {
+ if ('\0' == *tempfilename) {
+ if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB, "&rename");
+ }
+ return;
+ }
+
+ switch (child = fork()) {
+ case (-1):
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&fork cmp");
+ return;
+ case (0):
+ execlp("cmp", "cmp", "-s",
+ tempfilename, MANDOC_DB, NULL);
+ say("", "&exec cmp");
+ exit(0);
+ default:
+ break;
+ }
+ if (-1 == waitpid(child, &status, 0)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&wait cmp");
+ } else if (WIFSIGNALED(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "cmp died from signal %d", WTERMSIG(status));
+ } else if (WEXITSTATUS(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB,
+ "Data changed, but cannot replace database");
+ }
+
+ *strrchr(tempfilename, '/') = '\0';
+ switch (child = fork()) {
+ case (-1):
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(MANDOC_DB, NULL);
+ say("", "&fork rm");
+ return;
+ case (0):
+ execlp("rm", "rm", "-rf", tempfilename, NULL);
+ say("", "&exec rm");
+ exit((int)MANDOCLEVEL_SYSERR);
+ default:
+ break;
+ }
+ if (-1 == waitpid(child, &status, 0)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&wait rm");
+ } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "%s: Cannot remove temporary directory",
+ tempfilename);
}
}
static int
dbopen(int real)
{
- const char *file, *sql;
+ const char *sql;
int rc, ofl;
if (nodb)
return(1);
+ *tempfilename = '\0';
ofl = SQLITE_OPEN_READWRITE;
- if (0 == real) {
- file = MANDOC_DB "~";
- if (-1 == remove(file) && ENOENT != errno) {
+
+ if (real) {
+ rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL);
+ if (SQLITE_OK != rc) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(file, NULL);
+ say(MANDOC_DB, "%s", sqlite3_errmsg(db));
return(0);
}
- ofl |= SQLITE_OPEN_EXCLUSIVE;
- } else
- file = MANDOC_DB;
+ goto prepare_statements;
+ }
- rc = sqlite3_open_v2(file, &db, ofl, NULL);
+ ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE;
+
+ remove(MANDOC_DB "~");
+ rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL);
if (SQLITE_OK == rc)
- goto prepare_statements;
- if (SQLITE_CANTOPEN != rc) {
+ goto create_tables;
+ if (MPARSE_QUICK & mparse_options) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(file, NULL);
+ say(MANDOC_DB "~", "%s", sqlite3_errmsg(db));
return(0);
}
- sqlite3_close(db);
- db = NULL;
-
- if (SQLITE_OK != (rc = sqlite3_open(file, &db))) {
+ if (strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX",
+ sizeof(tempfilename)) >= sizeof(tempfilename)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "/tmp/mandocdb.XXXXXX: Filename too long");
+ return(0);
+ }
+ if (NULL == mkdtemp(tempfilename)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&%s", tempfilename);
+ return(0);
+ }
+ if (strlcat(tempfilename, "/" MANDOC_DB,
+ sizeof(tempfilename)) >= sizeof(tempfilename)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "%s/" MANDOC_DB ": Filename too long",
+ tempfilename);
+ return(0);
+ }
+ rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL);
+ if (SQLITE_OK != rc) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(file, NULL);
+ say("", "%s: %s", tempfilename, sqlite3_errmsg(db));
return(0);
}
+create_tables:
sql = "CREATE TABLE \"mpages\" (\n"
" \"form\" INTEGER NOT NULL,\n"
" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(file, "%s", sqlite3_errmsg(db));
+ say(MANDOC_DB, "%s", sqlite3_errmsg(db));
return(0);
}
if (NULL == getcwd(startdir, PATH_MAX)) {
exitcode = (int)MANDOCLEVEL_SYSERR;
if (NULL != targetdir)
- say(".", NULL);
+ say("", "&getcwd");
return(0);
}
if (-1 == (fd = open(startdir, O_RDONLY, 0))) {
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(startdir, NULL);
+ say("", "&open %s", startdir);
return(0);
}
if (NULL == targetdir)
close(fd);
basedir[0] = '\0';
exitcode = (int)MANDOCLEVEL_SYSERR;
- say(startdir, NULL);
+ say("", "&chdir %s", startdir);
return(0);
}
if (NULL == targetdir) {
if (NULL == realpath(targetdir, basedir)) {
basedir[0] = '\0';
exitcode = (int)MANDOCLEVEL_BADARG;
- say(targetdir, NULL);
+ say("", "&%s: realpath", targetdir);
return(0);
} else if (-1 == chdir(basedir)) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say("", NULL);
+ say("", "&chdir");
return(0);
}
return(1);
say(const char *file, const char *format, ...)
{
va_list ap;
+ int use_errno;
if ('\0' != *basedir)
fprintf(stderr, "%s", basedir);
fputs("//", stderr);
if ('\0' != *file)
fprintf(stderr, "%s", file);
- fputs(": ", stderr);
- if (NULL == format) {
- perror(NULL);
- return;
+ use_errno = 1;
+ if (NULL != format) {
+ switch (*format) {
+ case ('&'):
+ format++;
+ break;
+ case ('\0'):
+ format = NULL;
+ break;
+ default:
+ use_errno = 0;
+ break;
+ }
}
-
- va_start(ap, format);
- vfprintf(stderr, format, ap);
- va_end(ap);
-
- fputc('\n', stderr);
+ if (NULL != format) {
+ if ('\0' != *basedir || '\0' != *file)
+ fputs(": ", stderr);
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ }
+ if (use_errno) {
+ if ('\0' != *basedir || '\0' != *file || NULL != format)
+ fputs(": ", stderr);
+ perror(NULL);
+ } else
+ fputc('\n', stderr);
}