aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--mandocdb.8212
-rw-r--r--mandocdb.c2722
-rw-r--r--mandocdb.h3
3 files changed, 1436 insertions, 1501 deletions
diff --git a/mandocdb.8 b/mandocdb.8
index cb48359b..2c51b411 100644
--- a/mandocdb.8
+++ b/mandocdb.8
@@ -1,6 +1,6 @@
-.\" $Id: mandocdb.8,v 1.17 2011/12/25 21:00:23 schwarze Exp $
+.\" $Id: mandocdb.8,v 1.18 2012/06/08 10:43:01 kristaps Exp $
.\"
-.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: December 25 2011 $
+.Dd $Mdocdate: June 8 2012 $
.Dt MANDOCDB 8
.Os
.Sh NAME
@@ -22,17 +22,17 @@
.Nd index UNIX manuals
.Sh SYNOPSIS
.Nm
-.Op Fl avW
+.Op Fl anvW
.Op Fl C Ar file
.Nm
-.Op Fl avW
+.Op Fl anvW
.Ar dir ...
.Nm
-.Op Fl vW
+.Op Fl nvW
.Fl d Ar dir
.Op Ar
.Nm
-.Op Fl vW
+.Op Fl nvW
.Fl u Ar dir
.Op Ar
.Nm
@@ -42,21 +42,15 @@ The
.Nm
utility extracts keywords from
.Ux
-manuals and indexes them in a
-.Sx Keyword Database
-and
-.Sx Index Database
-for fast retrieval by
+manuals and indexes them in a database for fast retrieval by
.Xr apropos 1 ,
.Xr whatis 1 ,
and
-.Xr man 1 Ns 's
-.Fl k
-option.
+.Xr man 1 .
.Pp
By default,
.Nm
-creates databases in each
+creates a database in each
.Ar dir
using the files
.Sm off
@@ -70,14 +64,16 @@ and
.Op Ar arch Li /
.Ar title . Sy 0
.Sm on
-in that directory;
-existing databases are truncated.
+in that directory.
+Existing databases are replaced.
If
.Ar dir
is not provided,
.Nm
uses the default paths stipulated by
-.Xr man 1 .
+.Xr manpath 1 ,
+or
+.Xr man.conf 5 .
.Pp
The arguments are as follows:
.Bl -tag -width "-C file"
@@ -94,15 +90,17 @@ format.
Merge (remove and re-add)
.Ar
to the database in
-.Ar dir
-without truncating it.
+.Ar dir .
+.It Fl n
+Do not create or modify any database;
+scan and parse only.
.It Fl t Ar
Check the given
.Ar files
for potential problems.
-No databases are modified.
Implies
-.Fl a
+.Fl a ,
+.Fl n ,
and
.Fl W .
All diagnostic messages are printed to the standard output;
@@ -111,8 +109,7 @@ the standard error output is not used.
Remove
.Ar
from the database in
-.Ar dir
-without truncating it.
+.Ar dir .
.It Fl v
Display all files added or removed to the index.
.It Fl W
@@ -123,171 +120,28 @@ to the standard error output.
If fatal parse errors are encountered while parsing, the offending file
is printed to stderr, omitted from the index, and the parse continues
with the next input file.
-.Ss Index Database
-The index database,
-.Pa whatis.index ,
-is a
-.Xr recno 3
-database with record values consisting of
-.Pp
-.Bl -enum -compact
-.It
-the character
-.Cm d ,
-.Cm a ,
-or
-.Cm c
-to indicate the file type
-.Po
-.Xr mdoc 7 ,
-.Xr man 7 ,
-and post-formatted, respectively
-.Pc ,
-.It
-the filename relative to the databases' path,
-.It
-the manual section,
-.It
-the manual title,
-.It
-the architecture
-.Pq often empty ,
-.It
-and the description.
-.El
-.Pp
-Each of the above is NUL-terminated.
-.Pp
-If the record value is zero-length, it is unassigned.
-.Ss Keyword Database
-The keyword database,
-.Pa whatis.db ,
-is a
-.Xr btree 3
-database of NUL-terminated keywords (record length is non-zero string
-length plus one) mapping to a 16-byte binary field consisting of the
-64-bit keyword type and the 64-bit
-.Sx Index Database
-record number, both in network-byte order.
-.Pp
-The type bit-mask consists of the following
-values mapping into
-.Xr mdoc 7
-macro identifiers:
-.Pp
-.Bl -column "x0x0000000000000001ULLx" "xLix" -offset indent -compact
-.It Li 0x0000000000000001ULL Ta \&An
-.It Li 0x0000000000000002ULL Ta \&Ar
-.It Li 0x0000000000000004ULL Ta \&At
-.It Li 0x0000000000000008ULL Ta \&Bsx
-.It Li 0x0000000000000010ULL Ta \&Bx
-.It Li 0x0000000000000020ULL Ta \&Cd
-.It Li 0x0000000000000040ULL Ta \&Cm
-.It Li 0x0000000000000080ULL Ta \&Dv
-.It Li 0x0000000000000100ULL Ta \&Dx
-.It Li 0x0000000000000200ULL Ta \&Em
-.It Li 0x0000000000000400ULL Ta \&Er
-.It Li 0x0000000000000800ULL Ta \&Ev
-.It Li 0x0000000000001000ULL Ta \&Fa
-.It Li 0x0000000000002000ULL Ta \&Fl
-.It Li 0x0000000000004000ULL Ta \&Fn
-.It Li 0x0000000000008000ULL Ta \&Ft
-.It Li 0x0000000000010000ULL Ta \&Fx
-.It Li 0x0000000000020000ULL Ta \&Ic
-.It Li 0x0000000000040000ULL Ta \&In
-.It Li 0x0000000000080000ULL Ta \&Lb
-.It Li 0x0000000000100000ULL Ta \&Li
-.It Li 0x0000000000200000ULL Ta \&Lk
-.It Li 0x0000000000400000ULL Ta \&Ms
-.It Li 0x0000000000800000ULL Ta \&Mt
-.It Li 0x0000000001000000ULL Ta \&Nd
-.It Li 0x0000000002000000ULL Ta \&Nm
-.It Li 0x0000000004000000ULL Ta \&Nx
-.It Li 0x0000000008000000ULL Ta \&Ox
-.It Li 0x0000000010000000ULL Ta \&Pa
-.It Li 0x0000000020000000ULL Ta \&Rs
-.It Li 0x0000000040000000ULL Ta \&Sh
-.It Li 0x0000000080000000ULL Ta \&Ss
-.It Li 0x0000000100000000ULL Ta \&St
-.It Li 0x0000000200000000ULL Ta \&Sy
-.It Li 0x0000000400000000ULL Ta \&Tn
-.It Li 0x0000000800000000ULL Ta \&Va
-.It Li 0x0000001000000000ULL Ta \&Vt
-.It Li 0x0000002000000000ULL Ta \&Xr
-.El
-.Sh IMPLEMENTATION NOTES
-The time to construct a new database pair grows linearly with the
-number of keywords in the input files.
-However, removing or updating entries with
-.Fl u
-or
-.Fl d ,
-respectively, grows as a multiple of the index length and input size.
.Sh FILES
.Bl -tag -width Ds
-.It Pa whatis.db
-A
-.Xr btree 3
-keyword database mapping keywords to a type and file reference in
-.Pa whatis.index .
-.It Pa whatis.index
-A
-.Xr recno 3
-database of indexed file-names.
-.It Pa /etc/man.conf
-The default
-.Xr man 1
-configuration file.
+.It Pa mandocdb.db
+A database of manpages relative to the directory of the file.
+This file is portable across architectures and systems, so long as the
+manpage hierarchy it indexes does not change.
+.It Pa mandocdb.db~
+A temporary database used during scanning and parsing.
.El
.Sh EXIT STATUS
-The
-.Nm
-utility exits with one of the following values:
-.Pp
-.Bl -tag -width Ds -compact
-.It 0
-No errors occurred.
-.It 5
-Invalid command line arguments were specified.
-No input files have been read.
-.It 6
-An operating system error occurred, for example memory exhaustion or an
-error accessing input files.
-Such errors cause
-.Nm
-to exit at once, possibly in the middle of parsing or formatting a file.
-The output databases are corrupt and should be removed.
-.El
-.Sh DIAGNOSTICS
-If the following errors occur, the
-.Nm
-databases should be rebuilt.
-.Bl -diag
-.It "%s: Corrupt database"
-The keyword database file indicated by
-.Pa %s
-is unreadable.
-.It "%s: Corrupt index"
-The index database file indicated by
-.Pa %s
-is unreadable.
-.It "%s: Path too long"
-The file
-.Pa %s
-is too long.
-This usually indicates database corruption or invalid command-line
-arguments.
-.El
+.Ex -std
.Sh SEE ALSO
.Xr apropos 1 ,
.Xr man 1 ,
.Xr whatis 1 ,
-.Xr btree 3 ,
-.Xr recno 3 ,
.Xr man.conf 5
.Sh AUTHORS
The
.Nm
utility was written by
.An Kristaps Dzonsons ,
-.Mt kristaps@bsd.lv .
+.Mt kristaps@bsd.lv ,
+and
+.An Ingo Schwarze ,
+.Mt schwarze@openbsd.org .
diff --git a/mandocdb.c b/mandocdb.c
index 11e60657..824a72d7 100644
--- a/mandocdb.c
+++ b/mandocdb.c
@@ -1,4 +1,4 @@
-/* $Id: mandocdb.c,v 1.49 2012/05/27 17:48:57 schwarze Exp $ */
+/* $Id: mandocdb.c,v 1.50 2012/06/08 10:43:01 kristaps Exp $ */
/*
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
@@ -20,42 +20,30 @@
#endif
#include <sys/param.h>
-#include <sys/types.h>
+#include <sys/stat.h>
#include <assert.h>
#include <ctype.h>
-#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
+#include <fts.h>
#include <getopt.h>
-#include <stdio.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#if defined(__linux__)
-# include <endian.h>
-# include <db_185.h>
-#elif defined(__APPLE__)
-# include <libkern/OSByteOrder.h>
-# include <db.h>
-#else
-# include <db.h>
-#endif
+#include <ohash.h>
+#include <sqlite3.h>
-#include "man.h"
#include "mdoc.h"
+#include "man.h"
#include "mandoc.h"
#include "mandocdb.h"
#include "manpath.h"
-#define MANDOC_BUFSZ BUFSIZ
-#define MANDOC_SLOP 1024
-
-#define MANDOC_SRC 0x1
-#define MANDOC_FORM 0x2
-
+/* Post a warning to stderr. */
#define WARNING(_f, _b, _fmt, _args...) \
do if (warnings) { \
fprintf(stderr, "%s: ", (_b)); \
@@ -64,114 +52,139 @@
fprintf(stderr, ": %s", (_f)); \
fprintf(stderr, "\n"); \
} while (/* CONSTCOND */ 0)
-
-/* Access to the mandoc database on disk. */
-
-struct mdb {
- char idxn[MAXPATHLEN]; /* index db filename */
- char dbn[MAXPATHLEN]; /* keyword db filename */
- DB *idx; /* index recno database */
- DB *db; /* keyword btree database */
-};
-
-/* Stack of temporarily unused index records. */
+/* Post a "verbose" message to stderr. */
+#define DEBUG(_f, _b, _fmt, _args...) \
+ do if (verb) { \
+ fprintf(stderr, "%s: ", (_b)); \
+ fprintf(stderr, (_fmt), ##_args); \
+ fprintf(stderr, ": %s\n", (_f)); \
+ } while (/* CONSTCOND */ 0)
-struct recs {
- recno_t *stack; /* pointer to a malloc'ed array */
- size_t size; /* number of allocated slots */
- size_t cur; /* current number of empty records */
- recno_t last; /* last record number in the index */
+enum op {
+ OP_DEFAULT = 0, /* new dbs from dir list or default config */
+ OP_CONFFILE, /* new databases from custom config file */
+ OP_UPDATE, /* delete/add entries in existing database */
+ OP_DELETE, /* delete entries from existing database */
+ OP_TEST /* change no databases, report potential problems */
};
-/* Tiny list for files. No need to bring in QUEUE. */
-
-struct of {
- char *fname; /* heap-allocated */
- char *sec;
- char *arch;
- char *title;
- int src_form;
- struct of *next; /* NULL for last one */
- struct of *first; /* first in list */
+enum form {
+ FORM_SRC, /* format is -man or -mdoc */
+ FORM_CAT, /* format is cat */
+ FORM_NONE /* format is unknown */
};
-/* Buffer for storing growable data. */
+struct str {
+ char *utf8; /* key in UTF-8 form */
+ const struct of *of; /* if set, the owning parse */
+ struct str *next; /* next in owning parse sequence */
+ uint64_t mask; /* bitmask in sequence */
+ char key[1]; /* the string itself */
+};
-struct buf {
- char *cp;
- size_t len; /* current length */
- size_t size; /* total buffer size */
+struct id {
+ ino_t ino;
+ dev_t dev;
};
-/* Operation we're going to perform. */
+struct of {
+ struct id id; /* used for hashing routine */
+ struct of *next; /* next in ofs */
+ enum form dform; /* path-cued form */
+ enum form sform; /* suffix-cued form */
+ char file[MAXPATHLEN]; /* filename rel. to manpath */
+ const char *desc; /* parsed description */
+ const char *sec; /* suffix-cued section (or empty) */
+ const char *dsec; /* path-cued section (or empty) */
+ const char *arch; /* path-cued arch. (or empty) */
+ const char *name; /* name (from filename) (not empty) */
+};
-enum op {
- OP_DEFAULT = 0, /* new dbs from dir list or default config */
- OP_CONFFILE, /* new databases from custom config file */
- OP_UPDATE, /* delete/add entries in existing database */
- OP_DELETE, /* delete entries from existing database */
- OP_TEST /* change no databases, report potential problems */
+enum stmt {
+ STMT_DELETE = 0, /* delete manpage */
+ STMT_INSERT_DOC, /* insert manpage */
+ STMT_INSERT_KEY, /* insert parsed key */
+ STMT__MAX
};
-#define MAN_ARGS DB *hash, \
- struct buf *buf, \
- struct buf *dbuf, \
- const struct man_node *n
-#define MDOC_ARGS DB *hash, \
- struct buf *buf, \
- struct buf *dbuf, \
- const struct mdoc_node *n, \
- const struct mdoc_meta *m
-
-static void buf_appendmdoc(struct buf *,
- const struct mdoc_node *, int);
-static void buf_append(struct buf *, const char *);
-static void buf_appendb(struct buf *,
- const void *, size_t);
-static void dbt_put(DB *, const char *, DBT *, DBT *);
-static void hash_put(DB *, const struct buf *, uint64_t);
-static void hash_reset(DB **);
-static void index_merge(const struct of *, struct mparse *,
- struct buf *, struct buf *, DB *,
- struct mdb *, struct recs *,
- const char *);
-static void index_prune(const struct of *, struct mdb *,
- struct recs *, const char *);
-static void ofile_argbuild(int, char *[],
- struct of **, const char *);
-static void ofile_dirbuild(const char *, const char *,
- const char *, int, struct of **, char *);
-static void ofile_free(struct of *);
-static void pformatted(DB *, struct buf *, struct buf *,
- const struct of *, const char *);
-static int pman_node(MAN_ARGS);
-static void pmdoc_node(MDOC_ARGS);
-static int pmdoc_head(MDOC_ARGS);
-static int pmdoc_body(MDOC_ARGS);
-static int pmdoc_Fd(MDOC_ARGS);
-static int pmdoc_In(MDOC_ARGS);
-static int pmdoc_Fn(MDOC_ARGS);
-static int pmdoc_Nd(MDOC_ARGS);
-static int pmdoc_Nm(MDOC_ARGS);
-static int pmdoc_Sh(MDOC_ARGS);
-static int pmdoc_St(MDOC_ARGS);
-static int pmdoc_Xr(MDOC_ARGS);
-
-#define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
+typedef int (*mdoc_fp)(struct of *, const struct mdoc_node *);
struct mdoc_handler {
- int (*fp)(MDOC_ARGS); /* Optional handler. */
- uint64_t mask; /* Set unless handler returns 0. */
- int flags; /* For use by pmdoc_node. */
+ mdoc_fp fp; /* optional handler */
+ uint64_t mask; /* set unless handler returns 0 */
+ int flags; /* for use by pmdoc_node */
+#define MDOCF_CHILD 0x01 /* automatically index child nodes */
};
+static void dbclose(const char *, int);
+static void dbindex(struct mchars *, int,
+ const struct of *, const char *);
+static int dbopen(const char *, int);
+static void dbprune(const char *);
+static void fileadd(struct of *);
+static int filecheck(const char *);
+static void filescan(const char *, const char *);
+static struct str *hashget(const char *, size_t);
+static void *hash_alloc(size_t, void *);
+static void hash_free(void *, size_t, void *);
+static void *hash_halloc(size_t, void *);
+static void inoadd(const struct stat *, struct of *);
+static int inocheck(const struct stat *);
+static void ofadd(const char *, int, const char *,
+ const char *, const char *, const char *,
+ const char *, const struct stat *);
+static void offree(void);
+static int ofmerge(struct mchars *, struct mparse *, const char *);
+static void parse_catpage(struct of *, const char *);
+static int parse_man(struct of *,
+ const struct man_node *);
+static void parse_mdoc(struct of *, const struct mdoc_node *);
+static int parse_mdoc_body(struct of *, const struct mdoc_node *);
+static int parse_mdoc_head(struct of *, const struct mdoc_node *);
+static int parse_mdoc_Fd(struct of *, const struct mdoc_node *);
+static int parse_mdoc_Fn(struct of *, const struct mdoc_node *);
+static int parse_mdoc_In(struct of *, const struct mdoc_node *);
+static int parse_mdoc_Nd(struct of *, const struct mdoc_node *);
+static int parse_mdoc_Nm(struct of *, const struct mdoc_node *);
+static int parse_mdoc_Sh(struct of *, const struct mdoc_node *);
+static int parse_mdoc_St(struct of *, const struct mdoc_node *);
+static int parse_mdoc_Xr(struct of *, const struct mdoc_node *);
+static int path_reset(const char *, int, const char *);
+static void putkey(const struct of *,
+ const char *, uint64_t);
+static void putkeys(const struct of *,
+ const char *, int, uint64_t);
+static void putmdockey(const struct of *,
+ const struct mdoc_node *, uint64_t);
+static char *stradd(const char *);
+static char *straddbuf(const char *, size_t);
+static int treescan(const char *);
+static size_t utf8(unsigned int, char [7]);
+static void utf8key(struct mchars *, struct str *);
+static void wordaddbuf(const struct of *,
+ const char *, size_t, uint64_t);
+
+static char *progname;
+static int use_all; /* use all found files */
+static int nodb; /* no database changes */
+static int verb; /* print what we're doing */
+static int warnings; /* warn about crap */
+static enum op op; /* operational mode */
+static struct ohash inos; /* table of inodes/devices */
+static struct ohash filenames; /* table of filenames */
+static struct ohash strings; /* table of all strings */
+static struct of *ofs = NULL; /* vector of files to parse */
+static struct str *words = NULL; /* word list in current parse */
+static sqlite3 *db = NULL; /* current database */
+static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */
+
static const struct mdoc_handler mdocs[MDOC_MAX] = {
{ NULL, 0, 0 }, /* Ap */
{ NULL, 0, 0 }, /* Dd */
{ NULL, 0, 0 }, /* Dt */
{ NULL, 0, 0 }, /* Os */
- { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
- { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
+ { parse_mdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
+ { parse_mdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
{ NULL, 0, 0 }, /* Pp */
{ NULL, 0, 0 }, /* D1 */
{ NULL, 0, 0 }, /* Dl */
@@ -190,23 +203,23 @@ static const struct mdoc_handler mdocs[MDOC_MAX] = {
{ NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */
{ NULL, 0, 0 }, /* Ex */
{ NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */
- { pmdoc_Fd, TYPE_In, 0 }, /* Fd */
+ { parse_mdoc_Fd, TYPE_In, 0 }, /* Fd */
{ NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */
- { pmdoc_Fn, 0, 0 }, /* Fn */
+ { parse_mdoc_Fn, 0, 0 }, /* Fn */
{ NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */
{ NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */
- { pmdoc_In, TYPE_In, 0 }, /* In */
+ { parse_mdoc_In, TYPE_In, MDOCF_CHILD }, /* In */
{ NULL, TYPE_Li, MDOCF_CHILD }, /* Li */
- { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */
- { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */
+ { parse_mdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */
+ { parse_mdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */
{ NULL, 0, 0 }, /* Op */
{ NULL, 0, 0 }, /* Ot */
{ NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */
{ NULL, 0, 0 }, /* Rv */
- { pmdoc_St, TYPE_St, 0 }, /* St */
+ { parse_mdoc_St, TYPE_St, 0 }, /* St */
{ NULL, TYPE_Va, MDOCF_CHILD }, /* Va */
- { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */
- { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */
+ { parse_mdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */
+ { parse_mdoc_Xr, TYPE_Xr, 0 }, /* Xr */
{ NULL, 0, 0 }, /* %A */
{ NULL, 0, 0 }, /* %B */
{ NULL, 0, 0 }, /* %D */
@@ -262,7 +275,7 @@ static const struct mdoc_handler mdocs[MDOC_MAX] = {
{ NULL, 0, 0 }, /* Ux */
{ NULL, 0, 0 }, /* Xc */
{ NULL, 0, 0 }, /* Xo */
- { pmdoc_head, TYPE_Fn, 0 }, /* Fo */
+ { parse_mdoc_head, TYPE_Fn, 0 }, /* Fo */
{ NULL, 0, 0 }, /* Fc */
{ NULL, 0, 0 }, /* Oo */
{ NULL, 0, 0 }, /* Oc */
@@ -290,30 +303,32 @@ static const struct mdoc_handler mdocs[MDOC_MAX] = {
{ NULL, 0, 0 }, /* Ta */
};
-static const char *progname;
-static int use_all; /* Use all directories and files. */
-static int verb; /* Output verbosity level. */
-static int warnings; /* Potential problems in manuals. */
-
int
main(int argc, char *argv[])
{
- struct mparse *mp; /* parse sequence */
- struct manpaths dirs;
- struct mdb mdb;
- struct recs recs;
- enum op op; /* current operation */
- const char *dir;
- int ch, i, flags;
- char dirbuf[MAXPATHLEN];
- DB *hash; /* temporary keyword hashtable */
- BTREEINFO info; /* btree configuration */
- size_t sz1, sz2;
- struct buf buf, /* keyword buffer */
- dbuf; /* description buffer */
- struct of *of; /* list of files for processing */
- extern int optind;
- extern char *optarg;
+ char cwd[MAXPATHLEN];
+ int ch, rc, fd, i;
+ size_t j, sz;
+ const char *dir;
+ struct str *s;
+ struct mchars *mc;
+ struct manpaths dirs;
+ struct mparse *mp;
+ struct ohash_info ino_info, filename_info, str_info;
+
+ memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *));
+ memset(&dirs, 0, sizeof(struct manpaths));
+
+ ino_info.halloc = filename_info.halloc =
+ str_info.halloc = hash_halloc;
+ ino_info.hfree = filename_info.hfree =
+ str_info.hfree = hash_free;
+ ino_info.alloc = filename_info.alloc =
+ str_info.alloc = hash_alloc;
+
+ ino_info.key_offset = offsetof(struct of, id);
+ filename_info.key_offset = offsetof(struct of, file);
+ str_info.key_offset = offsetof(struct str, key);
progname = strrchr(argv[0], '/');
if (progname == NULL)
@@ -321,56 +336,60 @@ main(int argc, char *argv[])
else
++progname;
- memset(&dirs, 0, sizeof(struct manpaths));
- memset(&mdb, 0, sizeof(struct mdb));
- memset(&recs, 0, sizeof(struct recs));
+ /*
+ * Remember where we started by keeping a fd open to the origin
+ * path component: throughout this utility, we chdir() a lot to
+ * handle relative paths, and by doing this, we can return to
+ * the starting point.
+ */
+ if (NULL == getcwd(cwd, MAXPATHLEN)) {
+ perror(NULL);
+ return(EXIT_FAILURE);
+ } else if (-1 == (fd = open(cwd, O_RDONLY, 0))) {
+ perror(cwd);
+ return(EXIT_FAILURE);
+ }
+
+ /*
+ * We accept a few different invocations.
+ * The CHECKOP macro makes sure that invocation styles don't
+ * clobber each other.
+ */
+#define CHECKOP(_op, _ch) do \
+ if (OP_DEFAULT != (_op)) { \
+ fprintf(stderr, "-%c: Conflicting option\n", (_ch)); \
+ goto usage; \
+ } while (/*CONSTCOND*/0)
- of = NULL;
- mp = NULL;
- hash = NULL;
- op = OP_DEFAULT;
dir = NULL;
+ op = OP_DEFAULT;
- while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
+ while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW")))
switch (ch) {
case ('a'):
use_all = 1;
break;
case ('C'):
- if (op) {
- fprintf(stderr,
- "-C: conflicting options\n");
- goto usage;
- }
+ CHECKOP(op, ch);
dir = optarg;
op = OP_CONFFILE;
break;
case ('d'):
- if (op) {
- fprintf(stderr,
- "-d: conflicting options\n");
- goto usage;
- }
+ CHECKOP(op, ch);
dir = optarg;
op = OP_UPDATE;
break;
+ case ('n'):
+ nodb = 1;
+ break;
case ('t'):
+ CHECKOP(op, ch);
dup2(STDOUT_FILENO, STDERR_FILENO);
- if (op) {
- fprintf(stderr,
- "-t: conflicting options\n");
- goto usage;
- }
op = OP_TEST;
- use_all = 1;
- warnings = 1;
+ nodb = warnings = 1;
break;
case ('u'):
- if (op) {
- fprintf(stderr,
- "-u: conflicting options\n");
- goto usage;
- }
+ CHECKOP(op, ch);
dir = optarg;
op = OP_DELETE;
break;
@@ -388,233 +407,561 @@ main(int argc, char *argv[])
argv += optind;
if (OP_CONFFILE == op && argc > 0) {
- fprintf(stderr, "-C: too many arguments\n");
+ fprintf(stderr, "-C: Too many arguments\n");
goto usage;
}
- memset(&info, 0, sizeof(BTREEINFO));
- info.lorder = 4321;
- info.flags = R_DUP;
+ rc = 1;
+ mp = mparse_alloc(MPARSE_AUTO,
+ MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+ mc = mchars_alloc();
- mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+ ohash_init(&strings, 6, &str_info);
+ ohash_init(&inos, 6, &ino_info);
+ ohash_init(&filenames, 6, &filename_info);
- memset(&buf, 0, sizeof(struct buf));
- memset(&dbuf, 0, sizeof(struct buf));
+ if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {
+ /*
+ * Force processing all files.
+ */
+ use_all = 1;
+ if (NULL == dir)
+ dir = cwd;
+ /*
+ * All of these deal with a specific directory.
+ * Jump into that directory then collect files specified
+ * on the command-line.
+ */
+ if (0 == path_reset(cwd, fd, dir))
+ goto out;
+ for (i = 0; i < argc; i++)
+ filescan(argv[i], dir);
+ if (0 == dbopen(dir, 1))
+ goto out;
+ if (OP_TEST != op)
+ dbprune(dir);
+ if (OP_DELETE != op)
+ rc = ofmerge(mc, mp, dir);
+ dbclose(dir, 1);
+ } else {
+ /*
+ * If we have arguments, use them as our manpaths.
+ * If we don't, grok from manpath(1) or however else
+ * manpath_parse() wants to do it.
+ */
+ if (argc > 0) {
+ dirs.paths = mandoc_calloc
+ (argc, sizeof(char *));
+ dirs.sz = (size_t)argc;
+ for (i = 0; i < argc; i++)
+ dirs.paths[i] = mandoc_strdup(argv[i]);
+ } else
+ manpath_parse(&dirs, dir, NULL, NULL);
- buf.size = dbuf.size = MANDOC_BUFSZ;
+ /*
+ * First scan the tree rooted at a base directory.
+ * Then whak its database (if one exists), parse, and
+ * build up the database.
+ * Ignore zero-length directories and strip trailing
+ * slashes.
+ */
+ for (j = 0; j < dirs.sz; j++) {
+ sz = strlen(dirs.paths[j]);
+ if (sz && '/' == dirs.paths[j][sz - 1])
+ dirs.paths[j][--sz] = '\0';
+ if (0 == sz)
+ continue;
+ if (0 == path_reset(cwd, fd, dirs.paths[j]))
+ goto out;
+ if (0 == treescan(dirs.paths[j]))
+ goto out;
+ if (0 == path_reset(cwd, fd, dirs.paths[j]))
+ goto out;
+ if (0 == dbopen(dirs.paths[j], 0))
+ goto out;
+ if (0 == ofmerge(mc, mp, dirs.paths[j]))
+ goto out;
+ dbclose(dirs.paths[j], 0);
+ offree();
+ ohash_delete(&inos);
+ ohash_init(&inos, 6, &ino_info);
+ ohash_delete(&filenames);
+ ohash_init(&filenames, 6, &filename_info);
+ }
+ }
+out:
+ close(fd);
+ manpath_free(&dirs);
+ mchars_free(mc);
+ mparse_free(mp);
+ for (s = ohash_first(&strings, &ch);
+ NULL != s; s = ohash_next(&strings, &ch)) {
+ if (s->utf8 != s->key)
+ free(s->utf8);
+ free(s);
+ }
+ ohash_delete(&strings);
+ ohash_delete(&inos);
+ ohash_delete(&filenames);
+ offree();
+ return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
+usage:
+ fprintf(stderr, "usage: %s [-anvW] [-C file]\n"
+ " %s [-anvW] dir ...\n"
+ " %s [-nvW] -d dir [file ...]\n"
+ " %s [-nvW] -u dir [file ...]\n"
+ " %s -t file ...\n",
+ progname, progname, progname,
+ progname, progname);
+
+ return(EXIT_FAILURE);
+}
- buf.cp = mandoc_malloc(buf.size);
- dbuf.cp = mandoc_malloc(dbuf.size);
+/*
+ * Scan a directory tree rooted at "base" for manpages.
+ * We use fts(), scanning directory parts along the way for clues to our
+ * section and architecture.
+ *
+ * If use_all has been specified, grok all files.
+ * If not, sanitise paths to the following:
+ *
+ * [./]man*[/<arch>]/<name>.<section>
+ * or
+ * [./]cat<section>[/<arch>]/<name>.0
+ *
+ * TODO: accomodate for multi-language directories.
+ */
+static int
+treescan(const char *base)
+{
+ FTS *f;
+ FTSENT *ff;
+ int dform;
+ char *sec;
+ const char *dsec, *arch, *cp, *name, *path;
+ const char *argv[2];
- if (OP_TEST == op) {
- ofile_argbuild(argc, argv, &of, ".");
- if (NULL == of)
- goto out;
- index_merge(of, mp, &dbuf, &buf,
- hash, &mdb, &recs, ".");
- goto out;
+ argv[0] = ".";
+ argv[1] = (char *)NULL;
+
+ /*
+ * Walk through all components under the directory, using the
+ * logical descent of files.
+ */
+ f = fts_open((char * const *)argv, FTS_LOGICAL, NULL);
+ if (NULL == f) {
+ perror(base);
+ return(0);
}
- if (OP_UPDATE == op || OP_DELETE == op) {
- strlcat(mdb.dbn, dir, MAXPATHLEN);
- strlcat(mdb.dbn, "/", MAXPATHLEN);
- sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
+ dsec = arch = NULL;
+ dform = FORM_NONE;
+
+ while (NULL != (ff = fts_read(f))) {
+ path = ff->fts_path + 2;
+ /*
+ * If we're a regular file, add an "of" by using the
+ * stored directory data and handling the filename.
+ * Disallow duplicate (hard-linked) files.
+ */
+ if (FTS_F == ff->fts_info) {
+ if ( ! use_all && ff->fts_level < 2) {
+ WARNING(path, base, "Extraneous file");
+ continue;
+ } else if (inocheck(ff->fts_statp)) {
+ WARNING(path, base, "Duplicate file");
+ continue;
+ }
+
+ cp = ff->fts_name;
- strlcat(mdb.idxn, dir, MAXPATHLEN);
- strlcat(mdb.idxn, "/", MAXPATHLEN);
- sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
+ if (0 == strcmp(cp, "mandocdb.db")) {
+ WARNING(path, base, "Skip database");
+ continue;
+ } else if (NULL != (cp = strrchr(cp, '.'))) {
+ if (0 == strcmp(cp + 1, "html")) {
+ WARNING(path, base, "Skip html");
+ continue;
+ } else if (0 == strcmp(cp + 1, "gz")) {
+ WARNING(path, base, "Skip gz");
+ continue;
+ } else if (0 == strcmp(cp + 1, "ps")) {
+ WARNING(path, base, "Skip ps");
+ continue;
+ } else if (0 == strcmp(cp + 1, "pdf")) {
+ WARNING(path, base, "Skip pdf");
+ continue;
+ }
+ }
- if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
- fprintf(stderr, "%s: path too long\n", dir);
- exit((int)MANDOCLEVEL_BADARG);
- }
+ if (NULL != (sec = strrchr(ff->fts_name, '.'))) {
+ *sec = '\0';
+ sec = stradd(sec + 1);
+ }
+ name = stradd(ff->fts_name);
+ ofadd(base, dform, path,
+ name, dsec, sec, arch, ff->fts_statp);
+ continue;
+ } else if (FTS_D != ff->fts_info &&
+ FTS_DP != ff->fts_info)
+ continue;
- flags = O_CREAT | O_RDWR;
- mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
- mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
+ switch (ff->fts_level) {
+ case (0):
+ /* Ignore the root directory. */
+ break;
+ case (1):
+ /*
+ * This might contain manX/ or catX/.
+ * Try to infer this from the name.
+ * If we're not in use_all, enforce it.
+ */
+ dsec = NULL;
+ dform = FORM_NONE;
+ cp = ff->fts_name;
+ if (FTS_DP == ff->fts_info)
+ break;
- if (NULL == mdb.db) {
- perror(mdb.dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (NULL == mdb.idx) {
- perror(mdb.idxn);
- exit((int)MANDOCLEVEL_SYSERR);
+ if (0 == strncmp(cp, "man", 3)) {
+ dform = FORM_SRC;
+ dsec = stradd(cp + 3);
+ } else if (0 == strncmp(cp, "cat", 3)) {
+ dform = FORM_CAT;
+ dsec = stradd(cp + 3);
+ }
+
+ if (NULL != dsec || use_all)
+ break;
+
+ WARNING(path, base, "Unknown directory part");
+ fts_set(f, ff, FTS_SKIP);
+ break;
+ case (2):
+ /*
+ * Possibly our architecture.
+ * If we're descending, keep tabs on it.
+ */
+ arch = NULL;
+ if (FTS_DP != ff->fts_info && NULL != dsec)
+ arch = stradd(ff->fts_name);
+ break;
+ default:
+ if (FTS_DP == ff->fts_info || use_all)
+ break;
+ WARNING(path, base, "Extraneous directory part");
+ fts_set(f, ff, FTS_SKIP);
+ break;
}
+ }
- ofile_argbuild(argc, argv, &of, dir);
+ fts_close(f);
+ return(1);
+}
- if (NULL == of)
- goto out;
+/*
+ * Add a file to the file vector.
+ * Do not verify that it's a "valid" looking manpage (we'll do that
+ * later).
+ *
+ * Try to infer the manual section, architecture, and page name from the
+ * path, assuming it looks like
+ *
+ * [./]man*[/<arch>]/<name>.<section>
+ * or
+ * [./]cat<section>[/<arch>]/<name>.0
+ *
+ * Stuff this information directly into the "of" vector.
+ * See treescan() for the fts(3) version of this.
+ */
+static void
+filescan(const char *file, const char *base)
+{
+ const char *sec, *arch, *name, *dsec;
+ char *p, *start, *buf;
+ int dform;
+ struct stat st;
- index_prune(of, &mdb, &recs, dir);
+ assert(use_all);
- /*
- * Go to the root of the respective manual tree.
- * This must work or no manuals may be found (they're
- * indexed relative to the root).
- */
+ if (0 == strncmp(file, "./", 2))
+ file += 2;
- if (OP_UPDATE == op) {
- if (-1 == chdir(dir)) {
- perror(dir);
- exit((int)MANDOCLEVEL_SYSERR);
- }
- index_merge(of, mp, &dbuf, &buf, hash,
- &mdb, &recs, dir);
+ if (-1 == stat(file, &st)) {
+ WARNING(file, base, "%s", strerror(errno));
+ return;
+ } else if ( ! (S_IFREG & st.st_mode)) {
+ WARNING(file, base, "Not a regular file");
+ return;
+ } else if (inocheck(&st)) {
+ WARNING(file, base, "Duplicate file");
+ return;
+ }
+
+ buf = mandoc_strdup(file);
+ start = buf;
+ sec = arch = name = dsec = NULL;
+ dform = FORM_NONE;
+
+ /*
+ * First try to guess our directory structure.
+ * If we find a separator, try to look for man* or cat*.
+ * If we find one of these and what's underneath is a directory,
+ * assume it's an architecture.
+ */
+ if (NULL != (p = strchr(start, '/'))) {
+ *p++ = '\0';
+ if (0 == strncmp(start, "man", 3)) {
+ dform = FORM_SRC;
+ dsec = start + 3;
+ } else if (0 == strncmp(start, "cat", 3)) {
+ dform = FORM_CAT;
+ dsec = start + 3;
}
- goto out;
+ start = p;
+ if (NULL != dsec && NULL != (p = strchr(start, '/'))) {
+ *p++ = '\0';
+ arch = start;
+ start = p;
+ }
}
/*
- * Configure the directories we're going to scan.
- * If we have command-line arguments, use them.
- * If not, we use man(1)'s method (see mandocdb.8).
+ * Now check the file suffix.
+ * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
*/
+ p = strrchr(start, '\0');
+ while (p-- > start && '/' != *p && '.' != *p)
+ /* Loop. */ ;
- if (argc > 0) {
- dirs.paths = mandoc_calloc(argc, sizeof(char *));
- dirs.sz = argc;
- for (i = 0; i < argc; i++)
- dirs.paths[i] = mandoc_strdup(argv[i]);
- } else
- manpath_parse(&dirs, dir, NULL, NULL);
+ if ('.' == *p) {
+ *p++ = '\0';
+ sec = p;
+ }
- for (i = 0; i < dirs.sz; i++) {
- /*
- * Go to the root of the respective manual tree.
- * This must work or no manuals may be found:
- * They are indexed relative to the root.
- */
+ /*
+ * Now try to parse the name.
+ * Use the filename portion of the path.
+ */
+ name = start;
+ if (NULL != (p = strrchr(start, '/'))) {
+ name = p + 1;
+ *p = '\0';
+ }
- if (-1 == chdir(dirs.paths[i])) {
- perror(dirs.paths[i]);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ ofadd(base, dform, file, name, dsec, sec, arch, &st);
+ free(buf);
+}
- strlcpy(mdb.dbn, MANDOC_DB, MAXPATHLEN);
- strlcpy(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
+/*
+ * See fileadd().
+ */
+static int
+filecheck(const char *name)
+{
+ unsigned int index;
- flags = O_CREAT | O_TRUNC | O_RDWR;
- mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
- mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
+ index = ohash_qlookup(&filenames, name);
+ return(NULL != ohash_find(&filenames, index));
+}
- if (NULL == mdb.db) {
- perror(mdb.dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (NULL == mdb.idx) {
- perror(mdb.idxn);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+/*
+ * Use the standard hashing mechanism (K&R) to see if the given filename
+ * already exists.
+ */
+static void
+fileadd(struct of *of)
+{
+ unsigned int index;
- /*
- * Search for manuals and fill the new database.
- */
+ index = ohash_qlookup(&filenames, of->file);
+ assert(NULL == ohash_find(&filenames, index));
+ ohash_insert(&filenames, index, of);
+}
- strlcpy(dirbuf, dirs.paths[i], MAXPATHLEN);
- ofile_dirbuild(".", "", "", 0, &of, dirbuf);
+/*
+ * See inoadd().
+ */
+static int
+inocheck(const struct stat *st)
+{
+ struct id id;
+ uint32_t hash;
+ unsigned int index;
- if (NULL != of) {
- index_merge(of, mp, &dbuf, &buf, hash,
- &mdb, &recs, dirs.paths[i]);
- ofile_free(of);
- of = NULL;
- }
+ memset(&id, 0, sizeof(id));
+ id.ino = hash = st->st_ino;
+ id.dev = st->st_dev;
+ index = ohash_lookup_memory
+ (&inos, (char *)&id, sizeof(id), hash);
- (*mdb.db->close)(mdb.db);
- (*mdb.idx->close)(mdb.idx);
- mdb.db = NULL;
- mdb.idx = NULL;
- }
+ return(NULL != ohash_find(&inos, index));
+}
-out:
- if (mdb.db)
- (*mdb.db->close)(mdb.db);
- if (mdb.idx)
- (*mdb.idx->close)(mdb.idx);
- if (hash)
- (*hash->close)(hash);
- if (mp)
- mparse_free(mp);
+/*
+ * The hashing function used here is quite simple: simply take the inode
+ * and use uint32_t of its bits.
+ * Then when we do the lookup, use both the inode and device identifier.
+ */
+static void
+inoadd(const struct stat *st, struct of *of)
+{
+ uint32_t hash;
+ unsigned int index;
- manpath_free(&dirs);
- ofile_free(of);
- free(buf.cp);
- free(dbuf.cp);
- free(recs.stack);
+ of->id.ino = hash = st->st_ino;
+ of->id.dev = st->st_dev;
+ index = ohash_lookup_memory
+ (&inos, (char *)&of->id, sizeof(of->id), hash);
- return(MANDOCLEVEL_OK);
+ assert(NULL == ohash_find(&inos, index));
+ ohash_insert(&inos, index, of);
+}
-usage:
- fprintf(stderr,
- "usage: %s [-av] [-C file] | dir ... | -t file ...\n"
- " -d dir [file ...] | "
- "-u dir [file ...]\n",
- progname);
+static void
+ofadd(const char *base, int dform, const char *file,
+ const char *name, const char *dsec, const char *sec,
+ const char *arch, const struct stat *st)
+{
+ struct of *of;
+ int sform;
+
+ assert(NULL != file);
+
+ if (NULL == name)
+ name = "";
+ if (NULL == sec)
+ sec = "";
+ if (NULL == dsec)
+ dsec = "";
+ if (NULL == arch)
+ arch = "";
+
+ sform = FORM_NONE;
+ if (NULL != sec && *sec <= '9' && *sec >= '1')
+ sform = FORM_SRC;
+ else if (NULL != sec && *sec == '0') {
+ sec = dsec;
+ sform = FORM_CAT;
+ }
+
+ of = mandoc_calloc(1, sizeof(struct of));
+ strlcpy(of->file, file, MAXPATHLEN);
+ of->name = name;
+ of->sec = sec;
+ of->dsec = dsec;
+ of->arch = arch;
+ of->sform = sform;
+ of->dform = dform;
+ of->next = ofs;
+ ofs = of;
- return((int)MANDOCLEVEL_BADARG);
+ /*
+ * Add to unique identifier hash.
+ * Then if it's a source manual and we're going to use source in
+ * favour of catpages, add it to that hash.
+ */
+ inoadd(st, of);
+ fileadd(of);
}
-void
-index_merge(const struct of *of, struct mparse *mp,
- struct buf *dbuf, struct buf *buf, DB *hash,
- struct mdb *mdb, struct recs *recs,
- const char *basedir)
+static void
+offree(void)
{
- recno_t rec;
- int ch, skip;
- DBT key, val;
- DB *files; /* temporary file name table */
- char emptystring[1] = {'\0'};
- struct mdoc *mdoc;
- struct man *man;
- char *p;
- const char *fn, *msec, *march, *mtitle;
- uint64_t mask;
- size_t sv;
- unsigned seq;
- uint64_t vbuf[2];
- char type;
-
- if (warnings) {
- files = NULL;
- hash_reset(&files);
+ struct of *of;
+
+ while (NULL != (of = ofs)) {
+ ofs = of->next;
+ free(of);
}
+}
- rec = 0;
- for (of = of->first; of; of = of->next) {
- fn = of->fname;
+/*
+ * Run through the files in the global vector "ofs" and add them to the
+ * database specified in "base".
+ *
+ * This handles the parsing scheme itself, using the cues of directory
+ * and filename to determine whether the file is parsable or not.
+ */
+static int
+ofmerge(struct mchars *mc, struct mparse *mp, const char *base)
+{
+ int form;
+ size_t sz;
+ struct mdoc *mdoc;
+ struct man *man;
+ char buf[MAXPATHLEN];
+ char *bufp;
+ const char *msec, *march, *mtitle, *cp;
+ struct of *of;
+ enum mandoclevel lvl;
+ for (of = ofs; NULL != of; of = of->next) {
/*
- * Try interpreting the file as mdoc(7) or man(7)
- * source code, unless it is already known to be
- * formatted. Fall back to formatted mode.
+ * If we're a catpage (as defined by our path), then see
+ * if a manpage exists by the same name (ignoring the
+ * suffix).
+ * If it does, then we want to use it instead of our
+ * own.
*/
+ if ( ! use_all && FORM_CAT == of->dform) {
+ sz = strlcpy(buf, of->file, MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ WARNING(of->file, base,
+ "Filename too long");
+ continue;
+ }
+ bufp = strstr(buf, "cat");
+ assert(NULL != bufp);
+ memcpy(bufp, "man", 3);
+ if (NULL != (bufp = strrchr(buf, '.')))
+ *++bufp = '\0';
+ strlcat(buf, of->dsec, MAXPATHLEN);
+ if (filecheck(buf)) {
+ WARNING(of->file, base, "Man "
+ "source exists: %s", buf);
+ continue;
+ }
+ }
+ words = NULL;
mparse_reset(mp);
mdoc = NULL;
man = NULL;
+ form = 0;
+ msec = of->dsec;
+ march = of->arch;
+ mtitle = of->name;
- if ((MANDOC_SRC & of->src_form ||
- ! (MANDOC_FORM & of->src_form)) &&
- MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
- mparse_result(mp, &mdoc, &man);
+ /*
+ * Try interpreting the file as mdoc(7) or man(7)
+ * source code, unless it is already known to be
+ * formatted. Fall back to formatted mode.
+ */
+ if (FORM_SRC == of->dform || FORM_SRC == of->sform) {
+ lvl = mparse_readfd(mp, -1, of->file);
+ if (lvl < MANDOCLEVEL_FATAL)
+ mparse_result(mp, &mdoc, &man);
+ }
if (NULL != mdoc) {
+ form = 1;
msec = mdoc_meta(mdoc)->msec;
march = mdoc_meta(mdoc)->arch;
- if (NULL == march)
- march = "";
mtitle = mdoc_meta(mdoc)->title;
} else if (NULL != man) {
+ form = 1;
msec = man_meta(man)->msec;
march = "";
mtitle = man_meta(man)->title;
- } else {
- msec = of->sec;
- march = of->arch;
- mtitle = of->title;
- }
+ }
+
+ if (NULL == msec)
+ msec = "";
+ if (NULL == march)
+ march = "";
+ if (NULL == mtitle)
+ mtitle = "";
/*
* Check whether the manual section given in a file
@@ -625,13 +972,11 @@ index_merge(const struct of *of, struct mparse *mp,
* section, like encrypt(1) = makekey(8). Do not skip
* manuals for such reasons.
*/
+ if ( ! use_all && form && strcasecmp(msec, of->dsec))
+ WARNING(of->file, base, "Section \"%s\" "
+ "manual in %s directory",
+ msec, of->dsec);
- skip = 0;
- assert(of->sec);
- assert(msec);
- if (strcasecmp(msec, of->sec))
- WARNING(fn, basedir, "Section \"%s\" manual "
- "in \"%s\" directory", msec, of->sec);
/*
* Manual page directories exist for each kernel
* architecture as returned by machine(1).
@@ -646,415 +991,362 @@ index_merge(const struct of *of, struct mparse *mp,
* Thus, warn about architecture mismatches,
* but don't skip manuals for this reason.
*/
-
- assert(of->arch);
- assert(march);
- if (strcasecmp(march, of->arch))
- WARNING(fn, basedir, "Architecture \"%s\" "
+ if ( ! use_all && strcasecmp(march, of->arch))
+ WARNING(of->file, base, "Architecture \"%s\" "
"manual in \"%s\" directory",
march, of->arch);
- /*
- * By default, skip a file if the title given
- * in the file disagrees with the file name.
- * Do not warn, this happens for all MLINKs.
- */
+ putkey(of, of->name, TYPE_Nm);
- assert(of->title);
- assert(mtitle);
- if (strcasecmp(mtitle, of->title))
- skip = 1;
+ if (NULL != mdoc) {
+ if (NULL != (cp = mdoc_meta(mdoc)->name))
+ putkey(of, cp, TYPE_Nm);
+ parse_mdoc(of, mdoc_node(mdoc));
+ } else if (NULL != man)
+ parse_man(of, man_node(man));
+ else
+ parse_catpage(of, base);
- /*
- * Build a title string for the file. If it matches
- * the location of the file, remember the title as
- * found; else, remember it as missing.
- */
+ dbindex(mc, form, of, base);
+ }
- if (warnings) {
- buf->len = 0;
- buf_appendb(buf, mtitle, strlen(mtitle));
- buf_appendb(buf, "(", 1);
- buf_appendb(buf, msec, strlen(msec));
- if ('\0' != *march) {
- buf_appendb(buf, "/", 1);
- buf_appendb(buf, march, strlen(march));
- }
- buf_appendb(buf, ")", 2);
- for (p = buf->cp; '\0' != *p; p++)
- *p = tolower(*p);
- key.data = buf->cp;
- key.size = buf->len;
- val.data = NULL;
- val.size = 0;
- if (0 == skip)
- val.data = emptystring;
- else {
- ch = (*files->get)(files, &key, &val, 0);
- if (ch < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (ch > 0) {
- val.data = (void *)fn;
- val.size = strlen(fn) + 1;
- } else
- val.data = NULL;
- }
- if (NULL != val.data &&
- (*files->put)(files, &key, &val, 0) < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- }
- }
+ return(1);
+}
- if (skip && !use_all)
- continue;
+static void
+parse_catpage(struct of *of, const char *base)
+{
+ FILE *stream;
+ char *line, *p, *title;
+ size_t len, plen, titlesz;
- /*
- * The index record value consists of a nil-terminated
- * filename, a nil-terminated manual section, and a
- * nil-terminated description. Use the actual
- * location of the file, such that the user can find
- * it with man(1). Since the description may not be
- * set, we set a sentinel to see if we're going to
- * write a nil byte in its place.
- */
+ if (NULL == (stream = fopen(of->file, "r"))) {
+ WARNING(of->file, base, "%s", strerror(errno));
+ return;
+ }
- dbuf->len = 0;
- type = mdoc ? 'd' : (man ? 'a' : 'c');
- buf_appendb(dbuf, &type, 1);
- buf_appendb(dbuf, fn, strlen(fn) + 1);
- buf_appendb(dbuf, of->sec, strlen(of->sec) + 1);
- buf_appendb(dbuf, of->title, strlen(of->title) + 1);
- buf_appendb(dbuf, of->arch, strlen(of->arch) + 1);
+ /* Skip to first blank line. */
- sv = dbuf->len;
+ while (NULL != (line = fgetln(stream, &len)))
+ if ('\n' == *line)
+ break;
- /*
- * Collect keyword/mask pairs.
- * Each pair will become a new btree node.
- */
+ /*
+ * Assume the first line that is not indented
+ * is the first section header. Skip to it.
+ */
- hash_reset(&hash);
- if (mdoc)
- pmdoc_node(hash, buf, dbuf,
- mdoc_node(mdoc), mdoc_meta(mdoc));
- else if (man)
- pman_node(hash, buf, dbuf, man_node(man));
- else
- pformatted(hash, buf, dbuf, of, basedir);
+ while (NULL != (line = fgetln(stream, &len)))
+ if ('\n' != *line && ' ' != *line)
+ break;
+
+ /*
+ * Read up until the next section into a buffer.
+ * Strip the leading and trailing newline from each read line,
+ * appending a trailing space.
+ * Ignore empty (whitespace-only) lines.
+ */
- /* Test mode, do not access any database. */
+ titlesz = 0;
+ title = NULL;
- if (NULL == mdb->db || NULL == mdb->idx)
+ while (NULL != (line = fgetln(stream, &len))) {
+ if (' ' != *line || '\n' != line[len - 1])
+ break;
+ while (len > 0 && isspace((unsigned char)*line)) {
+ line++;
+ len--;
+ }
+ if (1 == len)
continue;
+ title = mandoc_realloc(title, titlesz + len);
+ memcpy(title + titlesz, line, len);
+ titlesz += len;
+ title[titlesz - 1] = ' ';
+ }
- /*
- * Make sure the file name is always registered
- * as an .Nm search key.
- */
- buf->len = 0;
- buf_append(buf, of->title);
- hash_put(hash, buf, TYPE_Nm);
-
- /*
- * Reclaim an empty index record, if available.
- * Use its record number for all new btree nodes.
- */
-
- if (recs->cur > 0) {
- recs->cur--;
- rec = recs->stack[(int)recs->cur];
- } else if (recs->last > 0) {
- rec = recs->last;
- recs->last = 0;
- } else
- rec++;
- vbuf[1] = htobe64(rec);
-
- /*
- * Copy from the in-memory hashtable of pending
- * keyword/mask pairs into the database.
- */
+ /*
+ * If no page content can be found, or the input line
+ * is already the next section header, or there is no
+ * trailing newline, reuse the page title as the page
+ * description.
+ */
- seq = R_FIRST;
- while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
- seq = R_NEXT;
- assert(sizeof(uint64_t) == val.size);
- memcpy(&mask, val.data, val.size);
- vbuf[0] = htobe64(mask);
- val.size = sizeof(vbuf);
- val.data = &vbuf;
- dbt_put(mdb->db, mdb->dbn, &key, &val);
- }
- if (ch < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ if (NULL == title || '\0' == *title) {
+ WARNING(of->file, base, "Cannot find NAME section");
+ fclose(stream);
+ free(title);
+ return;
+ }
- /*
- * Apply to the index. If we haven't had a description
- * set, put an empty one in now.
- */
+ title = mandoc_realloc(title, titlesz + 1);
+ title[titlesz] = '\0';
- if (dbuf->len == sv)
- buf_appendb(dbuf, "", 1);
+ /*
+ * Skip to the first dash.
+ * Use the remaining line as the description (no more than 70
+ * bytes).
+ */
- key.data = &rec;
- key.size = sizeof(recno_t);
+ if (NULL != (p = strstr(title, "- "))) {
+ for (p += 2; ' ' == *p || '\b' == *p; p++)
+ /* Skip to next word. */ ;
+ } else {
+ WARNING(of->file, base, "No dash in title line");
+ p = title;
+ }
- val.data = dbuf->cp;
- val.size = dbuf->len;
+ plen = strlen(p);
- if (verb)
- printf("%s: Adding to index: %s\n", basedir, fn);
+ /* Strip backspace-encoding from line. */
- dbt_put(mdb->idx, mdb->idxn, &key, &val);
+ while (NULL != (line = memchr(p, '\b', plen))) {
+ len = line - p;
+ if (0 == len) {
+ memmove(line, line + 1, plen--);
+ continue;
+ }
+ memmove(line - 1, line + 1, plen - len);
+ plen -= 2;
}
- /*
- * Iterate the remembered file titles and check that
- * all files can be found by their main title.
- */
-
- if (warnings) {
- seq = R_FIRST;
- while (0 == (*files->seq)(files, &key, &val, seq)) {
- seq = R_NEXT;
- if (val.size)
- WARNING((char *)val.data, basedir,
- "Probably unreachable, title "
- "is %s", (char *)key.data);
- }
- (*files->close)(files);
- }
+ of->desc = stradd(p);
+ putkey(of, p, TYPE_Nd);
+ fclose(stream);
+ free(title);
}
/*
- * Scan through all entries in the index file `idx' and prune those
- * entries in `ofile'.
- * Pruning consists of removing from `db', then invalidating the entry
- * in `idx' (zeroing its value size).
+ * Put a type/word pair into the word database for this particular file.
*/
static void
-index_prune(const struct of *ofile, struct mdb *mdb,
- struct recs *recs, const char *basedir)
+putkey(const struct of *of, const char *value, uint64_t type)
{
- const struct of *of;
- const char *fn;
- uint64_t vbuf[2];
- unsigned seq, sseq;
- DBT key, val;
- int ch;
-
- recs->cur = 0;
- seq = R_FIRST;
- while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
- seq = R_NEXT;
- assert(sizeof(recno_t) == key.size);
- memcpy(&recs->last, key.data, key.size);
- /* Deleted records are zero-sized. Skip them. */
-
- if (0 == val.size)
- goto cont;
-
- /*
- * Make sure we're sane.
- * Read past our mdoc/man/cat type to the next string,
- * then make sure it's bounded by a NUL.
- * Failing any of these, we go into our error handler.
- */
+ assert(NULL != value);
+ wordaddbuf(of, value, strlen(value), type);
+}
- fn = (char *)val.data + 1;
- if (NULL == memchr(fn, '\0', val.size - 1))
- break;
+/*
+ * Like putkey() but for unterminated strings.
+ */
+static void
+putkeys(const struct of *of, const char *value, int sz, uint64_t type)
+{
- /*
- * Search for the file in those we care about.
- * XXX: build this into a tree. Too slow.
- */
+ wordaddbuf(of, value, sz, type);
+}
- for (of = ofile->first; of; of = of->next)
- if (0 == strcmp(fn, of->fname))
- break;
+/*
+ * Grok all nodes at or below a certain mdoc node into putkey().
+ */
+static void
+putmdockey(const struct of *of, const struct mdoc_node *n, uint64_t m)
+{
- if (NULL == of)
- continue;
+ for ( ; NULL != n; n = n->next) {
+ if (NULL != n->child)
+ putmdockey(of, n->child, m);
+ if (MDOC_TEXT == n->type)
+ putkey(of, n->string, m);
+ }
+}
- /*
- * Search through the keyword database, throwing out all
- * references to our file.
- */
+static int
+parse_man(struct of *of, const struct man_node *n)
+{
+ const struct man_node *head, *body;
+ char *start, *sv, *title;
+ char byte;
+ size_t sz, titlesz;
- sseq = R_FIRST;
- while (0 == (ch = (*mdb->db->seq)(mdb->db,
- &key, &val, sseq))) {
- sseq = R_NEXT;
- if (sizeof(vbuf) != val.size)
- break;
+ if (NULL == n)
+ return(0);
- memcpy(vbuf, val.data, val.size);
- if (recs->last != betoh64(vbuf[1]))
- continue;
+ /*
+ * We're only searching for one thing: the first text child in
+ * the BODY of a NAME section. Since we don't keep track of
+ * sections in -man, run some hoops to find out whether we're in
+ * the correct section or not.
+ */
- if ((ch = (*mdb->db->del)(mdb->db,
- &key, R_CURSOR)) < 0)
- break;
- }
+ if (MAN_BODY == n->type && MAN_SH == n->tok) {
+ body = n;
+ assert(body->parent);
+ if (NULL != (head = body->parent->head) &&
+ 1 == head->nchild &&
+ NULL != (head = (head->child)) &&
+ MAN_TEXT == head->type &&
+ 0 == strcmp(head->string, "NAME") &&
+ NULL != (body = body->child) &&
+ MAN_TEXT == body->type) {
- if (ch < 0) {
- perror(mdb->dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (1 != ch) {
- fprintf(stderr, "%s: corrupt database\n",
- mdb->dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ title = NULL;
+ titlesz = 0;
- if (verb)
- printf("%s: Deleting from index: %s\n",
- basedir, fn);
+ /*
+ * Suck the entire NAME section into memory.
+ * Yes, we might run away.
+ * But too many manuals have big, spread-out
+ * NAME sections over many lines.
+ */
- val.size = 0;
- ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
+ for ( ; NULL != body; body = body->next) {
+ if (MAN_TEXT != body->type)
+ break;
+ if (0 == (sz = strlen(body->string)))
+ continue;
+ title = mandoc_realloc
+ (title, titlesz + sz + 1);
+ memcpy(title + titlesz, body->string, sz);
+ titlesz += sz + 1;
+ title[titlesz - 1] = ' ';
+ }
+ if (NULL == title)
+ return(1);
- if (ch < 0)
- break;
-cont:
- if (recs->cur >= recs->size) {
- recs->size += MANDOC_SLOP;
- recs->stack = mandoc_realloc(recs->stack,
- recs->size * sizeof(recno_t));
- }
+ title = mandoc_realloc(title, titlesz + 1);
+ title[titlesz] = '\0';
- recs->stack[(int)recs->cur] = recs->last;
- recs->cur++;
- }
+ /* Skip leading space. */
- if (ch < 0) {
- perror(mdb->idxn);
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (1 != ch) {
- fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ sv = title;
+ while (isspace((unsigned char)*sv))
+ sv++;
- recs->last++;
-}
+ if (0 == (sz = strlen(sv))) {
+ free(title);
+ return(1);
+ }
-/*
- * Grow the buffer (if necessary) and copy in a binary string.
- */
-static void
-buf_appendb(struct buf *buf, const void *cp, size_t sz)
-{
+ /* Erase trailing space. */
- /* Overshoot by MANDOC_BUFSZ. */
+ start = &sv[sz - 1];
+ while (start > sv && isspace((unsigned char)*start))
+ *start-- = '\0';
- while (buf->len + sz >= buf->size) {
- buf->size = buf->len + sz + MANDOC_BUFSZ;
- buf->cp = mandoc_realloc(buf->cp, buf->size);
- }
+ if (start == sv) {
+ free(title);
+ return(1);
+ }
- memcpy(buf->cp + (int)buf->len, cp, sz);
- buf->len += sz;
-}
+ start = sv;
-/*
- * Append a nil-terminated string to the buffer.
- * This can be invoked multiple times.
- * The buffer string will be nil-terminated.
- * If invoked multiple times, a space is put between strings.
- */
-static void
-buf_append(struct buf *buf, const char *cp)
-{
- size_t sz;
+ /*
+ * Go through a special heuristic dance here.
+ * Conventionally, one or more manual names are
+ * comma-specified prior to a whitespace, then a
+ * dash, then a description. Try to puzzle out
+ * the name parts here.
+ */
- if (0 == (sz = strlen(cp)))
- return;
+ for ( ;; ) {
+ sz = strcspn(start, " ,");
+ if ('\0' == start[sz])
+ break;
- if (buf->len)
- buf->cp[(int)buf->len - 1] = ' ';
+ byte = start[sz];
+ start[sz] = '\0';
- buf_appendb(buf, cp, sz + 1);
-}
+ putkey(of, start, TYPE_Nm);
-/*
- * Recursively add all text from a given node.
- * This is optimised for general mdoc nodes in this context, which do
- * not consist of subexpressions and having a recursive call for n->next
- * would be wasteful.
- * The "f" variable should be 0 unless called from pmdoc_Nd for the
- * description buffer, which does not start at the beginning of the
- * buffer.
- */
-static void
-buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
-{
+ if (' ' == byte) {
+ start += sz + 1;
+ break;
+ }
- for ( ; n; n = n->next) {
- if (n->child)
- buf_appendmdoc(buf, n->child, f);
+ assert(',' == byte);
+ start += sz + 1;
+ while (' ' == *start)
+ start++;
+ }
- if (MDOC_TEXT == n->type && f) {
- f = 0;
- buf_appendb(buf, n->string,
- strlen(n->string) + 1);
- } else if (MDOC_TEXT == n->type)
- buf_append(buf, n->string);
+ if (sv == start) {
+ putkey(of, start, TYPE_Nm);
+ free(title);
+ return(1);
+ }
- }
-}
+ while (isspace((unsigned char)*start))
+ start++;
-static void
-hash_reset(DB **db)
-{
- DB *hash;
+ if (0 == strncmp(start, "-", 1))
+ start += 1;
+ else if (0 == strncmp(start, "\\-\\-", 4))
+ start += 4;
+ else if (0 == strncmp(start, "\\-", 2))
+ start += 2;
+ else if (0 == strncmp(start, "\\(en", 4))
+ start += 4;
+ else if (0 == strncmp(start, "\\(em", 4))
+ start += 4;
- if (NULL != (hash = *db))
- (*hash->close)(hash);
+ while (' ' == *start)
+ start++;
- *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
- if (NULL == *db) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
+ assert(NULL == of->desc);
+ of->desc = stradd(start);
+ putkey(of, start, TYPE_Nd);
+ free(title);
+ return(1);
+ }
}
-}
-/* ARGSUSED */
-static int
-pmdoc_head(MDOC_ARGS)
-{
+ for (n = n->child; n; n = n->next)
+ if (parse_man(of, n))
+ return(1);
- return(MDOC_HEAD == n->type);
+ return(0);
}
-/* ARGSUSED */
-static int
-pmdoc_body(MDOC_ARGS)
+static void
+parse_mdoc(struct of *of, const struct mdoc_node *n)
{
- return(MDOC_BODY == n->type);
+ assert(NULL != n);
+ for (n = n->child; NULL != n; n = n->next) {
+ switch (n->type) {
+ case (MDOC_ELEM):
+ /* FALLTHROUGH */
+ case (MDOC_BLOCK):
+ /* FALLTHROUGH */
+ case (MDOC_HEAD):
+ /* FALLTHROUGH */
+ case (MDOC_BODY):
+ /* FALLTHROUGH */
+ case (MDOC_TAIL):
+ if (NULL != mdocs[n->tok].fp)
+ if (0 == (*mdocs[n->tok].fp)(of, n))
+ break;
+
+ if (MDOCF_CHILD & mdocs[n->tok].flags)
+ putmdockey(of, n->child, mdocs[n->tok].mask);
+ break;
+ default:
+ assert(MDOC_ROOT != n->type);
+ continue;
+ }
+ if (NULL != n->child)
+ parse_mdoc(of, n);
+ }
}
-/* ARGSUSED */
static int
-pmdoc_Fd(MDOC_ARGS)
+parse_mdoc_Fd(struct of *of, const struct mdoc_node *n)
{
const char *start, *end;
size_t sz;
- if (SEC_SYNOPSIS != n->sec)
- return(0);
- if (NULL == (n = n->child) || MDOC_TEXT != n->type)
+ if (SEC_SYNOPSIS != n->sec ||
+ NULL == (n = n->child) ||
+ MDOC_TEXT != n->type)
return(0);
/*
* Only consider those `Fd' macro fields that begin with an
* "inclusion" token (versus, e.g., #define).
*/
+
if (strcmp("#include", n->string))
return(0);
@@ -1077,121 +1369,115 @@ pmdoc_Fd(MDOC_ARGS)
if ('>' == *end || '"' == *end)
end--;
- assert(end >= start);
-
- buf_appendb(buf, start, (size_t)(end - start + 1));
- buf_appendb(buf, "", 1);
+ if (end > start)
+ putkeys(of, start, end - start + 1, TYPE_In);
return(1);
}
-/* ARGSUSED */
static int
-pmdoc_In(MDOC_ARGS)
+parse_mdoc_In(struct of *of, const struct mdoc_node *n)
{
- if (NULL == n->child || MDOC_TEXT != n->child->type)
+ if (NULL != n->child && MDOC_TEXT == n->child->type)
return(0);
- buf_append(buf, n->child->string);
+ putkey(of, n->child->string, TYPE_In);
return(1);
}
-/* ARGSUSED */
static int
-pmdoc_Fn(MDOC_ARGS)
+parse_mdoc_Fn(struct of *of, const struct mdoc_node *n)
{
- struct mdoc_node *nn;
const char *cp;
- nn = n->child;
-
- if (NULL == nn || MDOC_TEXT != nn->type)
+ if (NULL == (n = n->child) || MDOC_TEXT != n->type)
return(0);
- /* .Fn "struct type *name" "char *arg" */
-
- cp = strrchr(nn->string, ' ');
- if (NULL == cp)
- cp = nn->string;
+ /*
+ * Parse: .Fn "struct type *name" "char *arg".
+ * First strip away pointer symbol.
+ * Then store the function name, then type.
+ * Finally, store the arguments.
+ */
- /* Strip away pointer symbol. */
+ if (NULL == (cp = strrchr(n->string, ' ')))
+ cp = n->string;
while ('*' == *cp)
cp++;
- /* Store the function name. */
-
- buf_append(buf, cp);
- hash_put(hash, buf, TYPE_Fn);
+ putkey(of, cp, TYPE_Fn);
- /* Store the function type. */
-
- if (nn->string < cp) {
- buf->len = 0;
- buf_appendb(buf, nn->string, cp - nn->string);
- buf_appendb(buf, "", 1);
- hash_put(hash, buf, TYPE_Ft);
- }
+ if (n->string < cp)
+ putkeys(of, n->string, cp - n->string, TYPE_Ft);
- /* Store the arguments. */
-
- for (nn = nn->next; nn; nn = nn->next) {
- if (MDOC_TEXT != nn->type)
- continue;
- buf->len = 0;
- buf_append(buf, nn->string);
- hash_put(hash, buf, TYPE_Fa);
- }
+ for (n = n->next; NULL != n; n = n->next)
+ if (MDOC_TEXT == n->type)
+ putkey(of, n->string, TYPE_Fa);
return(0);
}
-/* ARGSUSED */
static int
-pmdoc_St(MDOC_ARGS)
+parse_mdoc_St(struct of *of, const struct mdoc_node *n)
{
if (NULL == n->child || MDOC_TEXT != n->child->type)
return(0);
- buf_append(buf, n->child->string);
+ putkey(of, n->child->string, TYPE_St);
return(1);
}
-/* ARGSUSED */
static int
-pmdoc_Xr(MDOC_ARGS)
+parse_mdoc_Xr(struct of *of, const struct mdoc_node *n)
{
if (NULL == (n = n->child))
return(0);
- buf_appendb(buf, n->string, strlen(n->string));
-
- if (NULL != (n = n->next)) {
- buf_appendb(buf, ".", 1);
- buf_appendb(buf, n->string, strlen(n->string) + 1);
- } else
- buf_appendb(buf, ".", 2);
-
+ putkey(of, n->string, TYPE_Xr);
return(1);
}
-/* ARGSUSED */
static int
-pmdoc_Nd(MDOC_ARGS)
+parse_mdoc_Nd(struct of *of, const struct mdoc_node *n)
{
+ size_t sz;
+ char *sv, *desc;
if (MDOC_BODY != n->type)
return(0);
- buf_appendmdoc(dbuf, n->child, 1);
+ /*
+ * Special-case the `Nd' because we need to put the description
+ * into the document table.
+ */
+
+ desc = NULL;
+ for (n = n->child; NULL != n; n = n->next) {
+ if (MDOC_TEXT == n->type) {
+ sz = strlen(n->string) + 1;
+ if (NULL != (sv = desc))
+ sz += strlen(desc) + 1;
+ desc = mandoc_realloc(desc, sz);
+ if (NULL != sv)
+ strlcat(desc, " ", sz);
+ else
+ *desc = '\0';
+ strlcat(desc, n->string, sz);
+ }
+ if (NULL != n->child)
+ parse_mdoc_Nd(of, n);
+ }
+
+ of->desc = NULL != desc ? stradd(desc) : NULL;
+ free(desc);
return(1);
}
-/* ARGSUSED */
static int
-pmdoc_Nm(MDOC_ARGS)
+parse_mdoc_Nm(struct of *of, const struct mdoc_node *n)
{
if (SEC_NAME == n->sec)
@@ -1199,711 +1485,507 @@ pmdoc_Nm(MDOC_ARGS)
else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
return(0);
- if (NULL == n->child)
- buf_append(buf, m->name);
-
return(1);
}
-/* ARGSUSED */
static int
-pmdoc_Sh(MDOC_ARGS)
+parse_mdoc_Sh(struct of *of, const struct mdoc_node *n)
{
return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
}
-static void
-hash_put(DB *db, const struct buf *buf, uint64_t mask)
+static int
+parse_mdoc_head(struct of *of, const struct mdoc_node *n)
{
- uint64_t oldmask;
- DBT key, val;
- int rc;
-
- if (buf->len < 2)
- return;
-
- key.data = buf->cp;
- key.size = buf->len;
- if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (0 == rc) {
- assert(sizeof(uint64_t) == val.size);
- memcpy(&oldmask, val.data, val.size);
- mask |= oldmask;
- }
+ return(MDOC_HEAD == n->type);
+}
- val.data = &mask;
- val.size = sizeof(uint64_t);
+static int
+parse_mdoc_body(struct of *of, const struct mdoc_node *n)
+{
- if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ return(MDOC_BODY == n->type);
}
-static void
-dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
+/*
+ * See straddbuf().
+ */
+static char *
+stradd(const char *cp)
{
- assert(key->size);
- assert(val->size);
-
- if (0 == (*db->put)(db, key, val, 0))
- return;
-
- perror(dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- /* NOTREACHED */
+ return(straddbuf(cp, strlen(cp)));
}
/*
- * Call out to per-macro handlers after clearing the persistent database
- * key. If the macro sets the database key, flush it to the database.
+ * This looks up or adds a string to the string table.
+ * The string table is a table of all strings encountered during parse
+ * or file scan.
+ * In using it, we avoid having thousands of (e.g.) "cat1" string
+ * allocations for the "of" table.
+ * We also have a layer atop the string table for keeping track of words
+ * in a parse sequence (see wordaddbuf()).
*/
-static void
-pmdoc_node(MDOC_ARGS)
+static char *
+straddbuf(const char *cp, size_t sz)
{
+ struct str *s;
+ unsigned int index;
+ const char *end;
- if (NULL == n)
- return;
+ if (NULL != (s = hashget(cp, sz)))
+ return(s->key);
- switch (n->type) {
- case (MDOC_HEAD):
- /* FALLTHROUGH */
- case (MDOC_BODY):
- /* FALLTHROUGH */
- case (MDOC_TAIL):
- /* FALLTHROUGH */
- case (MDOC_BLOCK):
- /* FALLTHROUGH */
- case (MDOC_ELEM):
- buf->len = 0;
+ s = mandoc_calloc(sizeof(struct str) + sz, 1);
+ memcpy(s->key, cp, sz);
- /*
- * Both NULL handlers and handlers returning true
- * request using the data. Only skip the element
- * when the handler returns false.
- */
+ end = cp + sz;
+ index = ohash_qlookupi(&strings, cp, &end);
+ assert(NULL == ohash_find(&strings, index));
+ ohash_insert(&strings, index, s);
+ return(s->key);
+}
- if (NULL != mdocs[n->tok].fp &&
- 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
- break;
+static struct str *
+hashget(const char *cp, size_t sz)
+{
+ unsigned int index;
+ const char *end;
- /*
- * For many macros, use the text from all children.
- * Set zero flags for macros not needing this.
- * In that case, the handler must fill the buffer.
- */
+ end = cp + sz;
+ index = ohash_qlookupi(&strings, cp, &end);
+ return(ohash_find(&strings, index));
+}
- if (MDOCF_CHILD & mdocs[n->tok].flags)
- buf_appendmdoc(buf, n->child, 0);
+/*
+ * Add a word to the current parse sequence.
+ * Within the hashtable of strings, we maintain a list of strings that
+ * are currently indexed.
+ * Each of these ("words") has a bitmask modified within the parse.
+ * When we finish a parse, we'll dump the list, then remove the head
+ * entry -- since the next parse will have a new "of", it can keep track
+ * of its entries without conflict.
+ */
+static void
+wordaddbuf(const struct of *of,
+ const char *cp, size_t sz, uint64_t v)
+{
+ struct str *s;
+ unsigned int index;
+ const char *end;
- /*
- * Cover the most common case:
- * Automatically stage one string per element.
- * Set a zero mask for macros not needing this.
- * Additional staging can be done in the handler.
- */
+ if (0 == sz)
+ return;
+
+ s = hashget(cp, sz);
- if (mdocs[n->tok].mask)
- hash_put(hash, buf, mdocs[n->tok].mask);
- break;
- default:
- break;
+ if (NULL != s && of == s->of) {
+ s->mask |= v;
+ return;
+ } else if (NULL == s) {
+ s = mandoc_calloc(sizeof(struct str) + sz, 1);
+ memcpy(s->key, cp, sz);
+ end = cp + sz;
+ index = ohash_qlookupi(&strings, cp, &end);
+ assert(NULL == ohash_find(&strings, index));
+ ohash_insert(&strings, index, s);
}
- pmdoc_node(hash, buf, dbuf, n->child, m);
- pmdoc_node(hash, buf, dbuf, n->next, m);
+ s->next = words;
+ s->of = of;
+ s->mask = v;
+ words = s;
}
-static int
-pman_node(MAN_ARGS)
+/*
+ * Take a Unicode codepoint and produce its UTF-8 encoding.
+ * This isn't the best way to do this, but it works.
+ * The magic numbers are from the UTF-8 packaging.
+ * They're not as scary as they seem: read the UTF-8 spec for details.
+ */
+static size_t
+utf8(unsigned int cp, char out[7])
{
- const struct man_node *head, *body;
- char *start, *sv, *title;
- size_t sz, titlesz;
-
- if (NULL == n)
+ size_t rc;
+
+ rc = 0;
+ if (cp <= 0x0000007F) {
+ rc = 1;
+ out[0] = (char)cp;
+ } else if (cp <= 0x000007FF) {
+ rc = 2;
+ out[0] = (cp >> 6 & 31) | 192;
+ out[1] = (cp & 63) | 128;
+ } else if (cp <= 0x0000FFFF) {
+ rc = 3;
+ out[0] = (cp >> 12 & 15) | 224;
+ out[1] = (cp >> 6 & 63) | 128;
+ out[2] = (cp & 63) | 128;
+ } else if (cp <= 0x001FFFFF) {
+ rc = 4;
+ out[0] = (cp >> 18 & 7) | 240;
+ out[1] = (cp >> 12 & 63) | 128;
+ out[2] = (cp >> 6 & 63) | 128;
+ out[3] = (cp & 63) | 128;
+ } else if (cp <= 0x03FFFFFF) {
+ rc = 5;
+ out[0] = (cp >> 24 & 3) | 248;
+ out[1] = (cp >> 18 & 63) | 128;
+ out[2] = (cp >> 12 & 63) | 128;
+ out[3] = (cp >> 6 & 63) | 128;
+ out[4] = (cp & 63) | 128;
+ } else if (cp <= 0x7FFFFFFF) {
+ rc = 6;
+ out[0] = (cp >> 30 & 1) | 252;
+ out[1] = (cp >> 24 & 63) | 128;
+ out[2] = (cp >> 18 & 63) | 128;
+ out[3] = (cp >> 12 & 63) | 128;
+ out[4] = (cp >> 6 & 63) | 128;
+ out[5] = (cp & 63) | 128;
+ } else
return(0);
- /*
- * We're only searching for one thing: the first text child in
- * the BODY of a NAME section. Since we don't keep track of
- * sections in -man, run some hoops to find out whether we're in
- * the correct section or not.
- */
-
- if (MAN_BODY == n->type && MAN_SH == n->tok) {
- body = n;
- assert(body->parent);
- if (NULL != (head = body->parent->head) &&
- 1 == head->nchild &&
- NULL != (head = (head->child)) &&
- MAN_TEXT == head->type &&
- 0 == strcmp(head->string, "NAME") &&
- NULL != (body = body->child) &&
- MAN_TEXT == body->type) {
-
- title = NULL;
- titlesz = 0;
- /*
- * Suck the entire NAME section into memory.
- * Yes, we might run away.
- * But too many manuals have big, spread-out
- * NAME sections over many lines.
- */
- for ( ; NULL != body; body = body->next) {
- if (MAN_TEXT != body->type)
- break;
- if (0 == (sz = strlen(body->string)))
- continue;
- title = mandoc_realloc
- (title, titlesz + sz + 1);
- memcpy(title + titlesz, body->string, sz);
- titlesz += sz + 1;
- title[(int)titlesz - 1] = ' ';
- }
- if (NULL == title)
- return(0);
-
- title = mandoc_realloc(title, titlesz + 1);
- title[(int)titlesz] = '\0';
-
- /* Skip leading space. */
-
- sv = title;
- while (isspace((unsigned char)*sv))
- sv++;
+ out[rc] = '\0';
+ return(rc);
+}
- if (0 == (sz = strlen(sv))) {
- free(title);
- return(0);
- }
+/*
+ * Store the UTF-8 version of a key, or alias the pointer if the key has
+ * no UTF-8 transcription marks in it.
+ */
+static void
+utf8key(struct mchars *mc, struct str *key)
+{
+ size_t sz, bsz, pos;
+ char utfbuf[7], res[5];
+ char *buf;
+ const char *seq, *cpp, *val;
+ int len, u;
+ enum mandoc_esc esc;
- /* Erase trailing space. */
+ assert(NULL == key->utf8);
- start = &sv[sz - 1];
- while (start > sv && isspace((unsigned char)*start))
- *start-- = '\0';
+ res[0] = '\\';
+ res[1] = '\t';
+ res[2] = ASCII_NBRSP;
+ res[3] = ASCII_HYPH;
+ res[4] = '\0';
- if (start == sv) {
- free(title);
- return(0);
- }
+ val = key->key;
+ bsz = strlen(val);
- start = sv;
+ /*
+ * Pre-check: if we have no stop-characters, then set the
+ * pointer as ourselvse and get out of here.
+ */
+ if (strcspn(val, res) == bsz) {
+ key->utf8 = key->key;
+ return;
+ }
- /*
- * Go through a special heuristic dance here.
- * This is why -man manuals are great!
- * (I'm being sarcastic: my eyes are bleeding.)
- * Conventionally, one or more manual names are
- * comma-specified prior to a whitespace, then a
- * dash, then a description. Try to puzzle out
- * the name parts here.
- */
+ /* Pre-allocate by the length of the input */
- for ( ;; ) {
- sz = strcspn(start, " ,");
- if ('\0' == start[(int)sz])
- break;
+ buf = mandoc_malloc(++bsz);
+ pos = 0;
- buf->len = 0;
- buf_appendb(buf, start, sz);
- buf_appendb(buf, "", 1);
+ while ('\0' != *val) {
+ /*
+ * Halt on the first escape sequence.
+ * This also halts on the end of string, in which case
+ * we just copy, fallthrough, and exit the loop.
+ */
+ if ((sz = strcspn(val, res)) > 0) {
+ memcpy(&buf[pos], val, sz);
+ pos += sz;
+ val += sz;
+ }
- hash_put(hash, buf, TYPE_Nm);
+ if (ASCII_HYPH == *val) {
+ buf[pos++] = '-';
+ val++;
+ continue;
+ } else if ('\t' == *val || ASCII_NBRSP == *val) {
+ buf[pos++] = ' ';
+ val++;
+ continue;
+ } else if ('\\' != *val)
+ break;
- if (' ' == start[(int)sz]) {
- start += (int)sz + 1;
- break;
- }
+ /* Read past the slash. */
- assert(',' == start[(int)sz]);
- start += (int)sz + 1;
- while (' ' == *start)
- start++;
- }
+ val++;
+ u = 0;
- buf->len = 0;
+ /*
+ * Parse the escape sequence and see if it's a
+ * predefined character or special character.
+ */
+ esc = mandoc_escape
+ ((const char **)&val, &seq, &len);
+ if (ESCAPE_ERROR == esc)
+ break;
- if (sv == start) {
- buf_append(buf, start);
- free(title);
- return(1);
- }
+ if (ESCAPE_SPECIAL != esc)
+ continue;
+ if (0 == (u = mchars_spec2cp(mc, seq, len)))
+ continue;
- while (isspace((unsigned char)*start))
- start++;
+ /*
+ * If we have a Unicode codepoint, try to convert that
+ * to a UTF-8 byte string.
+ */
+ cpp = utfbuf;
+ if (0 == (sz = utf8(u, utfbuf)))
+ continue;
- if (0 == strncmp(start, "-", 1))
- start += 1;
- else if (0 == strncmp(start, "\\-\\-", 4))
- start += 4;
- else if (0 == strncmp(start, "\\-", 2))
- start += 2;
- else if (0 == strncmp(start, "\\(en", 4))
- start += 4;
- else if (0 == strncmp(start, "\\(em", 4))
- start += 4;
+ /* Copy the rendered glyph into the stream. */
- while (' ' == *start)
- start++;
+ sz = strlen(cpp);
+ bsz += sz;
- sz = strlen(start) + 1;
- buf_appendb(dbuf, start, sz);
- buf_appendb(buf, start, sz);
+ buf = mandoc_realloc(buf, bsz);
- hash_put(hash, buf, TYPE_Nd);
- free(title);
- }
+ memcpy(&buf[pos], cpp, sz);
+ pos += sz;
}
- for (n = n->child; n; n = n->next)
- if (pman_node(hash, buf, dbuf, n))
- return(1);
-
- return(0);
+ buf[pos] = '\0';
+ key->utf8 = buf;
}
/*
- * Parse a formatted manual page.
- * By necessity, this involves rather crude guesswork.
+ * Flush the current page's terms (and their bits) into the database.
+ * Wrap the entire set of additions in a transaction to make sqlite be a
+ * little faster.
+ * Also, UTF-8-encode the description at the last possible moment.
*/
static void
-pformatted(DB *hash, struct buf *buf, struct buf *dbuf,
- const struct of *of, const char *basedir)
+dbindex(struct mchars *mc, int form,
+ const struct of *of, const char *base)
{
- FILE *stream;
- char *line, *p, *title;
- size_t len, plen, titlesz;
-
- if (NULL == (stream = fopen(of->fname, "r"))) {
- WARNING(of->fname, basedir, "%s", strerror(errno));
- return;
- }
-
- /*
- * Always use the title derived from the filename up front,
- * do not even try to find it in the file. This also makes
- * sure we don't end up with an orphan index record, even if
- * the file content turns out to be completely unintelligible.
- */
+ struct str *key;
+ const char *desc;
+ int64_t recno;
- buf->len = 0;
- buf_append(buf, of->title);
- hash_put(hash, buf, TYPE_Nm);
-
- /* Skip to first blank line. */
+ DEBUG(of->file, base, "Adding to index");
- while (NULL != (line = fgetln(stream, &len)))
- if ('\n' == *line)
- break;
-
- /*
- * Assume the first line that is not indented
- * is the first section header. Skip to it.
- */
-
- while (NULL != (line = fgetln(stream, &len)))
- if ('\n' != *line && ' ' != *line)
- break;
-
- /*
- * Read up until the next section into a buffer.
- * Strip the leading and trailing newline from each read line,
- * appending a trailing space.
- * Ignore empty (whitespace-only) lines.
- */
-
- titlesz = 0;
- title = NULL;
-
- while (NULL != (line = fgetln(stream, &len))) {
- if (' ' != *line || '\n' != line[(int)len - 1])
- break;
- while (len > 0 && isspace((unsigned char)*line)) {
- line++;
- len--;
- }
- if (1 == len)
- continue;
- title = mandoc_realloc(title, titlesz + len);
- memcpy(title + titlesz, line, len);
- titlesz += len;
- title[(int)titlesz - 1] = ' ';
- }
-
- /*
- * If no page content can be found, or the input line
- * is already the next section header, or there is no
- * trailing newline, reuse the page title as the page
- * description.
- */
-
- if (NULL == title || '\0' == *title) {
- WARNING(of->fname, basedir,
- "Cannot find NAME section");
- buf_appendb(dbuf, buf->cp, buf->size);
- hash_put(hash, buf, TYPE_Nd);
- fclose(stream);
- free(title);
+ if (nodb)
return;
- }
-
- title = mandoc_realloc(title, titlesz + 1);
- title[(int)titlesz] = '\0';
-
- /*
- * Skip to the first dash.
- * Use the remaining line as the description (no more than 70
- * bytes).
- */
- if (NULL != (p = strstr(title, "- "))) {
- for (p += 2; ' ' == *p || '\b' == *p; p++)
- /* Skip to next word. */ ;
- } else {
- WARNING(of->fname, basedir,
- "No dash in title line");
- p = title;
+ desc = "";
+ if (NULL != of->desc) {
+ key = hashget(of->desc, strlen(of->desc));
+ assert(NULL != key);
+ if (NULL == key->utf8)
+ utf8key(mc, key);
+ desc = key->utf8;
}
- plen = strlen(p);
-
- /* Strip backspace-encoding from line. */
-
- while (NULL != (line = memchr(p, '\b', plen))) {
- len = line - p;
- if (0 == len) {
- memmove(line, line + 1, plen--);
- continue;
- }
- memmove(line - 1, line + 1, plen - len);
- plen -= 2;
+ sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, NULL);
+
+ sqlite3_bind_text
+ (stmts[STMT_INSERT_DOC], 1,
+ of->file, -1, SQLITE_STATIC);
+ sqlite3_bind_text
+ (stmts[STMT_INSERT_DOC], 2,
+ of->sec, -1, SQLITE_STATIC);
+ sqlite3_bind_text
+ (stmts[STMT_INSERT_DOC], 3,
+ of->arch, -1, SQLITE_STATIC);
+ sqlite3_bind_text
+ (stmts[STMT_INSERT_DOC], 4,
+ desc, -1, SQLITE_STATIC);
+ sqlite3_bind_int
+ (stmts[STMT_INSERT_DOC], 5, form);
+ sqlite3_step(stmts[STMT_INSERT_DOC]);
+ recno = sqlite3_last_insert_rowid(db);
+ sqlite3_reset(stmts[STMT_INSERT_DOC]);
+
+ for (key = words; NULL != key; key = key->next) {
+ assert(key->of == of);
+ if (NULL == key->utf8)
+ utf8key(mc, key);
+ sqlite3_bind_int64
+ (stmts[STMT_INSERT_KEY], 1, key->mask);
+ sqlite3_bind_text
+ (stmts[STMT_INSERT_KEY], 2,
+ key->utf8, -1, SQLITE_STATIC);
+ sqlite3_bind_int64
+ (stmts[STMT_INSERT_KEY], 3, recno);
+ sqlite3_step(stmts[STMT_INSERT_KEY]);
+ sqlite3_reset(stmts[STMT_INSERT_KEY]);
}
- buf_appendb(dbuf, p, plen + 1);
- buf->len = 0;
- buf_appendb(buf, p, plen + 1);
- hash_put(hash, buf, TYPE_Nd);
- fclose(stream);
- free(title);
+ sqlite3_exec(db, "COMMIT TRANSACTION", NULL, NULL, NULL);
+
}
static void
-ofile_argbuild(int argc, char *argv[],
- struct of **of, const char *basedir)
+dbprune(const char *base)
{
- char buf[MAXPATHLEN];
- const char *sec, *arch, *title;
- char *p;
- int i, src_form;
- struct of *nof;
-
- for (i = 0; i < argc; i++) {
+ struct of *of;
- /*
- * Try to infer the manual section, architecture and
- * page title from the path, assuming it looks like
- * man*[/<arch>]/<title>.<section> or
- * cat<section>[/<arch>]/<title>.0
- */
-
- if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
- fprintf(stderr, "%s: Path too long\n", argv[i]);
- continue;
- }
- sec = arch = title = "";
- src_form = 0;
- p = strrchr(buf, '\0');
- while (p-- > buf) {
- if ('\0' == *sec && '.' == *p) {
- sec = p + 1;
- *p = '\0';
- if ('0' == *sec)
- src_form |= MANDOC_FORM;
- else if ('1' <= *sec && '9' >= *sec)
- src_form |= MANDOC_SRC;
- continue;
- }
- if ('/' != *p)
- continue;
- if ('\0' == *title) {
- title = p + 1;
- *p = '\0';
- continue;
- }
- if (0 == strncmp("man", p + 1, 3))
- src_form |= MANDOC_SRC;
- else if (0 == strncmp("cat", p + 1, 3))
- src_form |= MANDOC_FORM;
- else
- arch = p + 1;
- break;
- }
- if ('\0' == *title) {
- WARNING(argv[i], basedir,
- "Cannot deduce title from filename");
- title = buf;
- }
-
- /*
- * Build the file structure.
- */
-
- nof = mandoc_calloc(1, sizeof(struct of));
- nof->fname = mandoc_strdup(argv[i]);
- nof->sec = mandoc_strdup(sec);
- nof->arch = mandoc_strdup(arch);
- nof->title = mandoc_strdup(title);
- nof->src_form = src_form;
-
- /*
- * Add the structure to the list.
- */
+ if (nodb)
+ return;
- if (NULL == *of) {
- *of = nof;
- (*of)->first = nof;
- } else {
- nof->first = (*of)->first;
- (*of)->next = nof;
- *of = nof;
- }
+ for (of = ofs; NULL != of; of = of->next) {
+ sqlite3_bind_text
+ (stmts[STMT_DELETE], 1,
+ of->file, -1, SQLITE_STATIC);
+ sqlite3_step(stmts[STMT_DELETE]);
+ sqlite3_reset(stmts[STMT_DELETE]);
+ DEBUG(of->file, base, "Deleted from index");
}
}
/*
- * Recursively build up a list of files to parse.
- * We use this instead of ftw() and so on because I don't want global
- * variables hanging around.
- * This ignores the mandocdb.db and mandocdb.index files, but assumes that
- * everything else is a manual.
- * Pass in a pointer to a NULL structure for the first invocation.
+ * Close an existing database and its prepared statements.
+ * If "real" is not set, rename the temporary file into the real one.
*/
static void
-ofile_dirbuild(const char *dir, const char* psec, const char *parch,
- int p_src_form, struct of **of, char *basedir)
+dbclose(const char *base, int real)
{
- char buf[MAXPATHLEN];
- size_t sz;
- DIR *d;
- const char *fn, *sec, *arch;
- char *p, *q, *suffix;
- struct of *nof;
- struct dirent *dp;
- int src_form;
-
- if (NULL == (d = opendir(dir))) {
- WARNING("", dir, "%s", strerror(errno));
- return;
- }
+ size_t i;
+ char file[MAXPATHLEN];
- while (NULL != (dp = readdir(d))) {
- fn = dp->d_name;
-
- if ('.' == *fn)
- continue;
+ if (nodb)
+ return;
- src_form = p_src_form;
+ for (i = 0; i < STMT__MAX; i++) {
+ sqlite3_finalize(stmts[i]);
+ stmts[i] = NULL;
+ }
- if (DT_DIR == dp->d_type) {
- sec = psec;
- arch = parch;
+ sqlite3_close(db);
+ db = NULL;
- /*
- * By default, only use directories called:
- * man<section>/[<arch>/] or
- * cat<section>/[<arch>/]
- */
+ if (real)
+ return;
- if ('\0' == *sec) {
- if(0 == strncmp("man", fn, 3)) {
- src_form |= MANDOC_SRC;
- sec = fn + 3;
- } else if (0 == strncmp("cat", fn, 3)) {
- src_form |= MANDOC_FORM;
- sec = fn + 3;
- } else {
- WARNING(fn, basedir, "Bad section");
- if (use_all)
- sec = fn;
- else
- continue;
- }
- } else if ('\0' == *arch) {
- if (NULL != strchr(fn, '.')) {
- WARNING(fn, basedir, "Bad architecture");
- if (0 == use_all)
- continue;
- }
- arch = fn;
- } else {
- WARNING(fn, basedir, "Excessive subdirectory");
- if (0 == use_all)
- continue;
- }
+ strlcpy(file, MANDOC_DB, MAXPATHLEN);
+ strlcat(file, "~", MAXPATHLEN);
+ if (-1 == rename(file, MANDOC_DB))
+ perror(MANDOC_DB);
+}
- buf[0] = '\0';
- strlcat(buf, dir, MAXPATHLEN);
- strlcat(buf, "/", MAXPATHLEN);
- strlcat(basedir, "/", MAXPATHLEN);
- strlcat(basedir, fn, MAXPATHLEN);
- sz = strlcat(buf, fn, MAXPATHLEN);
+/*
+ * This is straightforward stuff.
+ * Open a database connection to a "temporary" database, then open a set
+ * of prepared statements we'll use over and over again.
+ * If "real" is set, we use the existing database; if not, we truncate a
+ * temporary one.
+ * Must be matched by dbclose().
+ */
+static int
+dbopen(const char *base, int real)
+{
+ char file[MAXPATHLEN];
+ const char *sql;
+ int rc, ofl;
+ size_t sz;
- if (MAXPATHLEN <= sz) {
- WARNING(fn, basedir, "Path too long");
- continue;
- }
+ if (nodb)
+ return(1);
- ofile_dirbuild(buf, sec, arch,
- src_form, of, basedir);
+ sz = strlcpy(file, MANDOC_DB, MAXPATHLEN);
+ if ( ! real)
+ sz = strlcat(file, "~", MAXPATHLEN);
- p = strrchr(basedir, '/');
- *p = '\0';
- continue;
- }
+ if (sz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long\n", file);
+ return(0);
+ }
- if (DT_REG != dp->d_type) {
- WARNING(fn, basedir, "Not a regular file");
- continue;
- }
- if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
- continue;
- if ('\0' == *psec) {
- WARNING(fn, basedir, "File outside section");
- if (0 == use_all)
- continue;
- }
+ if ( ! real)
+ remove(file);
- /*
- * By default, skip files where the file name suffix
- * does not agree with the section directory
- * they are located in.
- */
+ ofl = SQLITE_OPEN_PRIVATECACHE | SQLITE_OPEN_READWRITE |
+ (0 == real ? SQLITE_OPEN_EXCLUSIVE : 0);
- suffix = strrchr(fn, '.');
- if (NULL == suffix) {
- WARNING(fn, basedir, "No filename suffix");
- if (0 == use_all)
- continue;
- } else if ((MANDOC_SRC & src_form &&
- strcmp(suffix + 1, psec)) ||
- (MANDOC_FORM & src_form &&
- strcmp(suffix + 1, "0"))) {
- WARNING(fn, basedir, "Wrong filename suffix");
- if (0 == use_all)
- continue;
- if ('0' == suffix[1])
- src_form |= MANDOC_FORM;
- else if ('1' <= suffix[1] && '9' >= suffix[1])
- src_form |= MANDOC_SRC;
- }
+ rc = sqlite3_open_v2(file, &db, ofl, NULL);
+ if (SQLITE_OK == rc)
+ return(1);
+ if (SQLITE_CANTOPEN != rc) {
+ perror(file);
+ return(0);
+ }
- /*
- * Skip formatted manuals if a source version is
- * available. Ignore the age: it is very unlikely
- * that people install newer formatted base manuals
- * when they used to have source manuals before,
- * and in ports, old manuals get removed on update.
- */
- if (0 == use_all && MANDOC_FORM & src_form &&
- '\0' != *psec) {
- buf[0] = '\0';
- strlcat(buf, dir, MAXPATHLEN);
- p = strrchr(buf, '/');
- if ('\0' != *parch && NULL != p)
- for (p--; p > buf; p--)
- if ('/' == *p)
- break;
- if (NULL == p)
- p = buf;
- else
- p++;
- if (0 == strncmp("cat", p, 3))
- memcpy(p, "man", 3);
- strlcat(buf, "/", MAXPATHLEN);
- sz = strlcat(buf, fn, MAXPATHLEN);
- if (sz >= MAXPATHLEN) {
- WARNING(fn, basedir, "Path too long");
- continue;
- }
- q = strrchr(buf, '.');
- if (NULL != q && p < q++) {
- *q = '\0';
- sz = strlcat(buf, psec, MAXPATHLEN);
- if (sz >= MAXPATHLEN) {
- WARNING(fn, basedir, "Path too long");
- continue;
- }
- if (0 == access(buf, R_OK))
- continue;
- }
- }
+ sqlite3_close(db);
+ db = NULL;
- buf[0] = '\0';
- assert('.' == dir[0]);
- if ('/' == dir[1]) {
- strlcat(buf, dir + 2, MAXPATHLEN);
- strlcat(buf, "/", MAXPATHLEN);
- }
- sz = strlcat(buf, fn, MAXPATHLEN);
- if (sz >= MAXPATHLEN) {
- WARNING(fn, basedir, "Path too long");
- continue;
- }
+ if (SQLITE_OK != (rc = sqlite3_open(file, &db))) {
+ perror(file);
+ return(0);
+ }
- nof = mandoc_calloc(1, sizeof(struct of));
- nof->fname = mandoc_strdup(buf);
- nof->sec = mandoc_strdup(psec);
- nof->arch = mandoc_strdup(parch);
- nof->src_form = src_form;
+ sql = "CREATE TABLE \"docs\" (\n"
+ " \"file\" TEXT NOT NULL,\n"
+ " \"sec\" TEXT NOT NULL,\n"
+ " \"arch\" TEXT NOT NULL,\n"
+ " \"desc\" TEXT NOT NULL,\n"
+ " \"form\" INTEGER NOT NULL,\n"
+ " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+ ");\n"
+ "\n"
+ "CREATE TABLE \"keys\" (\n"
+ " \"bits\" INTEGER NOT NULL,\n"
+ " \"key\" TEXT NOT NULL,\n"
+ " \"docid\" INTEGER NOT NULL REFERENCES docs(id) "
+ "ON DELETE CASCADE,\n"
+ " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+ ");\n"
+ "\n"
+ "CREATE INDEX \"key_index\" ON keys (key);\n";
+
+ if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) {
+ perror(sqlite3_errmsg(db));
+ return(0);
+ }
- /*
- * Remember the file name without the extension,
- * to be used as the page title in the database.
- */
+ sql = "DELETE FROM docs where file=?";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE], NULL);
+ sql = "INSERT INTO docs "
+ "(file,sec,arch,desc,form) VALUES (?,?,?,?,?)";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_DOC], NULL);
+ sql = "INSERT INTO keys "
+ "(bits,key,docid) VALUES (?,?,?)";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL);
+ return(1);
+}
- if (NULL != suffix)
- *suffix = '\0';
- nof->title = mandoc_strdup(fn);
+static void *
+hash_halloc(size_t sz, void *arg)
+{
- /*
- * Add the structure to the list.
- */
+ return(mandoc_calloc(sz, 1));
+}
- if (NULL == *of) {
- *of = nof;
- (*of)->first = nof;
- } else {
- nof->first = (*of)->first;
- (*of)->next = nof;
- *of = nof;
- }
- }
+static void *
+hash_alloc(size_t sz, void *arg)
+{
- closedir(d);
+ return(mandoc_malloc(sz));
}
static void
-ofile_free(struct of *of)
+hash_free(void *p, size_t sz, void *arg)
{
- struct of *nof;
- if (NULL != of)
- of = of->first;
+ free(p);
+}
- while (NULL != of) {
- nof = of->next;
- free(of->fname);
- free(of->sec);
- free(of->arch);
- free(of->title);
- free(of);
- of = nof;
+static int
+path_reset(const char *cwd, int fd, const char *base)
+{
+
+ if (-1 == fchdir(fd)) {
+ perror(cwd);
+ return(0);
+ } else if (-1 == chdir(base)) {
+ perror(base);
+ return(0);
}
+ return(1);
}
diff --git a/mandocdb.h b/mandocdb.h
index bda0536e..07456bd1 100644
--- a/mandocdb.h
+++ b/mandocdb.h
@@ -1,4 +1,4 @@
-/* $Id: mandocdb.h,v 1.6 2012/03/23 02:52:33 kristaps Exp $ */
+/* $Id: mandocdb.h,v 1.7 2012/06/08 10:43:01 kristaps Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -18,7 +18,6 @@
#define MANDOCDB_H
#define MANDOC_DB "mandocdb.db"
-#define MANDOC_IDX "mandocdb.index"
#define TYPE_An 0x0000000000000001ULL
#define TYPE_Ar 0x0000000000000002ULL