-/* $Id: mandocdb.c,v 1.259 2018/08/17 20:33:37 schwarze Exp $ */
+/* $Id: mandocdb.c,v 1.267 2020/04/03 11:35:01 schwarze Exp $ */
/*
+ * Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011-2018 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Implementation of the makewhatis(8) program.
*/
#include "config.h"
#include "roff.h"
#include "mdoc.h"
#include "man.h"
+#include "mandoc_parse.h"
#include "manconf.h"
#include "mansearch.h"
#include "dba_array.h"
int mandocdb(int, char *[]);
static void dbadd(struct dba *, struct mpage *);
-static void dbadd_mlink(const struct mlink *mlink);
+static void dbadd_mlink(const struct mlink *);
static void dbprune(struct dba *);
static void dbwrite(struct dba *);
static void filescan(const char *);
static int exitcode; /* to be returned by main */
static enum op op; /* operational mode */
static char basedir[PATH_MAX]; /* current base directory */
+static size_t basedir_len; /* strlen(basedir) */
static struct mpage *mpage_head; /* list of distinct manual pages */
static struct ohash mpages; /* table of distinct manual pages */
static struct ohash mlinks; /* table of directory entries */
* clobber each other.
*/
#define CHECKOP(_op, _ch) do \
- if (OP_DEFAULT != (_op)) { \
+ if ((_op) != OP_DEFAULT) { \
warnx("-%c: Conflicting option", (_ch)); \
goto usage; \
} while (/*CONSTCOND*/0)
+ mparse_options = MPARSE_VALIDATE;
path_arg = NULL;
op = OP_DEFAULT;
- while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")))
+ while ((ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")) != -1)
switch (ch) {
case 'a':
use_all = 1;
mparse_options |= MPARSE_QUICK;
break;
case 'T':
- if (strcmp(optarg, "utf8")) {
+ if (strcmp(optarg, "utf8") != 0) {
warnx("-T%s: Unsupported output format",
optarg);
goto usage;
}
#endif
- if (OP_CONFFILE == op && argc > 0) {
+ if (op == OP_CONFFILE && argc > 0) {
warnx("-C: Too many arguments");
goto usage;
}
exitcode = (int)MANDOCLEVEL_OK;
mchars_alloc();
- mp = mparse_alloc(mparse_options, MANDOCERR_MAX, NULL,
- MANDOC_OS_OTHER, NULL);
+ mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL);
mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev));
mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file));
- if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {
+ if (op == OP_UPDATE || op == OP_DELETE || op == OP_TEST) {
/*
* Most of these deal with a specific directory.
* Jump into that directory first.
*/
- if (OP_TEST != op && 0 == set_basedir(path_arg, 1))
+ if (op != OP_TEST && set_basedir(path_arg, 1) == 0)
goto out;
dba = nodb ? dba_new(128) : dba_read(MANDOC_DB);
" from scratch", strerror(errno));
exitcode = (int)MANDOCLEVEL_OK;
op = OP_DEFAULT;
- if (0 == treescan())
+ if (treescan() == 0)
goto out;
dba = dba_new(128);
}
- if (OP_DELETE != op)
+ if (op != OP_DELETE)
mpages_merge(dba, mp);
if (nodb == 0)
dbwrite(dba);
sz = strlen(conf.manpath.paths[j]);
if (sz && conf.manpath.paths[j][sz - 1] == '/')
conf.manpath.paths[j][--sz] = '\0';
- if (0 == sz)
+ if (sz == 0)
continue;
if (j) {
offsetof(struct mlink, file));
}
- if ( ! set_basedir(conf.manpath.paths[j], argc > 0))
+ if (set_basedir(conf.manpath.paths[j], argc > 0) == 0)
continue;
- if (0 == treescan())
+ if (treescan() == 0)
continue;
dba = dba_new(128);
mpages_merge(dba, mp);
say(path, "&realpath");
continue;
}
- if (strstr(buf, basedir) != buf
+ if (strncmp(buf, basedir, basedir_len) != 0
#ifdef HOMEBREWDIR
- && strstr(buf, HOMEBREWDIR) != buf
+ && strncmp(buf, HOMEBREWDIR, strlen(HOMEBREWDIR))
#endif
) {
if (warnings) say("",
* See treescan() for the fts(3) version of this.
*/
static void
-filescan(const char *file)
+filescan(const char *infile)
{
- char buf[PATH_MAX];
struct stat st;
struct mlink *mlink;
- char *p, *start;
+ char *linkfile, *p, *realdir, *start, *usefile;
+ size_t realdir_len;
assert(use_all);
- if (0 == strncmp(file, "./", 2))
- file += 2;
+ if (strncmp(infile, "./", 2) == 0)
+ infile += 2;
/*
* We have to do lstat(2) before realpath(3) loses
* we want to use the orginal file name, while for
* regular files, we want to use the real path.
*/
- if (-1 == lstat(file, &st)) {
+ if (lstat(infile, &st) == -1) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, "&lstat");
+ say(infile, "&lstat");
return;
- } else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) {
+ } else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, "Not a regular file");
+ say(infile, "Not a regular file");
return;
}
* We have to resolve the file name to the real path
* in any case for the base directory check.
*/
- if (NULL == realpath(file, buf)) {
+ if ((usefile = realpath(infile, NULL)) == NULL) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, "&realpath");
+ say(infile, "&realpath");
return;
}
- if (OP_TEST == op)
- start = buf;
- else if (strstr(buf, basedir) == buf)
- start = buf + strlen(basedir);
+ if (op == OP_TEST)
+ start = usefile;
+ else if (strncmp(usefile, basedir, basedir_len) == 0)
+ start = usefile + basedir_len;
#ifdef HOMEBREWDIR
- else if (strstr(buf, HOMEBREWDIR) == buf)
- start = buf;
+ else if (strncmp(usefile, HOMEBREWDIR, strlen(HOMEBREWDIR)) == 0)
+ start = usefile;
#endif
else {
exitcode = (int)MANDOCLEVEL_BADARG;
- say("", "%s: outside base directory", buf);
+ say("", "%s: outside base directory", infile);
+ free(usefile);
return;
}
* Now we are sure the file is inside our tree.
* If it is a symbolic link, ignore the real path
* and use the original name.
- * This implies passing stuff like "cat1/../man1/foo.1"
- * on the command line won't work. So don't do that.
- * Note the stat(2) can still fail if the link target
- * doesn't exist.
*/
- if (S_IFLNK & st.st_mode) {
- if (-1 == stat(buf, &st)) {
+ do {
+ if (S_ISLNK(st.st_mode) == 0)
+ break;
+
+ /*
+ * Some implementations of realpath(3) may succeed
+ * even if the target of the link does not exist,
+ * so check again for extra safety.
+ */
+ if (stat(usefile, &st) == -1) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, "&stat");
+ say(infile, "&stat");
+ free(usefile);
return;
}
- if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) {
- say(file, "Filename too long");
- return;
+ linkfile = mandoc_strdup(infile);
+ if (op == OP_TEST) {
+ free(usefile);
+ start = usefile = linkfile;
+ break;
}
- start = buf;
- if (OP_TEST != op && strstr(buf, basedir) == buf)
- start += strlen(basedir);
- }
+ if (strncmp(infile, basedir, basedir_len) == 0) {
+ free(usefile);
+ usefile = linkfile;
+ start = usefile + basedir_len;
+ break;
+ }
+
+ /*
+ * This symbolic link points into the basedir
+ * from the outside. Let's see whether any of
+ * the parent directories resolve to the basedir.
+ */
+ p = strchr(linkfile, '\0');
+ do {
+ while (*--p != '/')
+ continue;
+ *p = '\0';
+ if ((realdir = realpath(linkfile, NULL)) == NULL) {
+ exitcode = (int)MANDOCLEVEL_BADARG;
+ say(infile, "&realpath");
+ free(linkfile);
+ free(usefile);
+ return;
+ }
+ realdir_len = strlen(realdir) + 1;
+ free(realdir);
+ *p = '/';
+ } while (realdir_len > basedir_len);
+
+ /*
+ * If one of the directories resolves to the basedir,
+ * use the rest of the original name.
+ * Otherwise, the best we can do
+ * is to use the filename pointed to.
+ */
+ if (realdir_len == basedir_len) {
+ free(usefile);
+ usefile = linkfile;
+ start = p + 1;
+ } else {
+ free(linkfile);
+ start = usefile + basedir_len;
+ }
+ } while (/* CONSTCOND */ 0);
mlink = mandoc_calloc(1, sizeof(struct mlink));
mlink->dform = FORM_NONE;
sizeof(mlink->file)) {
say(start, "Filename too long");
free(mlink);
+ free(usefile);
return;
}
* but outside our tree, guess the base directory.
*/
- if (op == OP_TEST || (start == buf && *start == '/')) {
- if (strncmp(buf, "man/", 4) == 0)
- start = buf + 4;
- else if ((start = strstr(buf, "/man/")) != NULL)
+ if (op == OP_TEST || (start == usefile && *start == '/')) {
+ if (strncmp(usefile, "man/", 4) == 0)
+ start = usefile + 4;
+ else if ((start = strstr(usefile, "/man/")) != NULL)
start += 5;
else
- start = buf;
+ start = usefile;
}
/*
* If we find one of these and what's underneath is a directory,
* assume it's an architecture.
*/
- if (NULL != (p = strchr(start, '/'))) {
+ if ((p = strchr(start, '/')) != NULL) {
*p++ = '\0';
- if (0 == strncmp(start, "man", 3)) {
+ if (strncmp(start, "man", 3) == 0) {
mlink->dform = FORM_SRC;
mlink->dsec = start + 3;
- } else if (0 == strncmp(start, "cat", 3)) {
+ } else if (strncmp(start, "cat", 3) == 0) {
mlink->dform = FORM_CAT;
mlink->dsec = start + 3;
}
start = p;
- if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) {
+ if (mlink->dsec != NULL && (p = strchr(start, '/')) != NULL) {
*p++ = '\0';
mlink->arch = start;
start = p;
* Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
*/
p = strrchr(start, '\0');
- while (p-- > start && '/' != *p && '.' != *p)
- /* Loop. */ ;
+ while (p-- > start && *p != '/' && *p != '.')
+ continue;
- if ('.' == *p) {
+ if (*p == '.') {
*p++ = '\0';
mlink->fsec = p;
}
* Use the filename portion of the path.
*/
mlink->name = start;
- if (NULL != (p = strrchr(start, '/'))) {
+ if ((p = strrchr(start, '/')) != NULL) {
mlink->name = p + 1;
*p = '\0';
}
mlink_add(mlink, &st);
+ free(usefile);
}
static void
{
struct mpage *mpage, *mpage_dest;
struct mlink *mlink, *mlink_dest;
- struct roff_man *man;
- char *sodest;
+ struct roff_meta *meta;
char *cp;
int fd;
mandoc_ohash_init(&names, 4, offsetof(struct str, key));
mandoc_ohash_init(&strings, 6, offsetof(struct str, key));
mparse_reset(mp);
- man = NULL;
- sodest = NULL;
+ meta = NULL;
if ((fd = mparse_open(mp, mlink->file)) == -1) {
say(mlink->file, "&open");
mparse_readfd(mp, fd, mlink->file);
close(fd);
fd = -1;
- mparse_result(mp, &man, &sodest);
+ meta = mparse_result(mp);
}
- if (sodest != NULL) {
+ if (meta != NULL && meta->sodest != NULL) {
mlink_dest = ohash_find(&mlinks,
- ohash_qlookup(&mlinks, sodest));
+ ohash_qlookup(&mlinks, meta->sodest));
if (mlink_dest == NULL) {
- mandoc_asprintf(&cp, "%s.gz", sodest);
+ mandoc_asprintf(&cp, "%s.gz", meta->sodest);
mlink_dest = ohash_find(&mlinks,
ohash_qlookup(&mlinks, cp));
free(cp);
mlink->next = mlink_dest->next;
mlink_dest->next = mpage->mlinks;
mpage->mlinks = NULL;
+ goto nextpage;
}
- goto nextpage;
- } else if (man != NULL && man->macroset == MACROSET_MDOC) {
- mdoc_validate(man);
+ meta->macroset = MACROSET_NONE;
+ }
+ if (meta != NULL && meta->macroset == MACROSET_MDOC) {
mpage->form = FORM_SRC;
- mpage->sec = man->meta.msec;
+ mpage->sec = meta->msec;
mpage->sec = mandoc_strdup(
mpage->sec == NULL ? "" : mpage->sec);
- mpage->arch = man->meta.arch;
+ mpage->arch = meta->arch;
mpage->arch = mandoc_strdup(
mpage->arch == NULL ? "" : mpage->arch);
- mpage->title = mandoc_strdup(man->meta.title);
- } else if (man != NULL && man->macroset == MACROSET_MAN) {
- man_validate(man);
- if (*man->meta.msec != '\0' ||
- *man->meta.title != '\0') {
+ mpage->title = mandoc_strdup(meta->title);
+ } else if (meta != NULL && meta->macroset == MACROSET_MAN) {
+ if (*meta->msec != '\0' || *meta->title != '\0') {
mpage->form = FORM_SRC;
- mpage->sec = mandoc_strdup(man->meta.msec);
+ mpage->sec = mandoc_strdup(meta->msec);
mpage->arch = mandoc_strdup(mlink->arch);
- mpage->title = mandoc_strdup(man->meta.title);
+ mpage->title = mandoc_strdup(meta->title);
} else
- man = NULL;
+ meta = NULL;
}
assert(mpage->desc == NULL);
- if (man == NULL) {
- mpage->form = FORM_CAT;
+ if (meta == NULL || meta->sodest != NULL) {
mpage->sec = mandoc_strdup(mlink->dsec);
mpage->arch = mandoc_strdup(mlink->arch);
mpage->title = mandoc_strdup(mlink->name);
- parse_cat(mpage, fd);
- } else if (man->macroset == MACROSET_MDOC)
- parse_mdoc(mpage, &man->meta, man->first);
+ if (meta == NULL) {
+ mpage->form = FORM_CAT;
+ parse_cat(mpage, fd);
+ } else
+ mpage->form = FORM_SRC;
+ } else if (meta->macroset == MACROSET_MDOC)
+ parse_mdoc(mpage, meta, meta->first);
else
- parse_man(mpage, &man->meta, man->first);
+ parse_man(mpage, meta, meta->first);
if (mpage->desc == NULL) {
mpage->desc = mandoc_strdup(mlink->name);
if (warnings)
static char startdir[PATH_MAX];
static int getcwd_status; /* 1 = ok, 2 = failure */
static int chdir_status; /* 1 = changed directory */
- char *cp;
/*
* Remember the original working directory, if possible.
* Do not error out if the current directory is not
* searchable: Maybe it won't be needed after all.
*/
- if (0 == getcwd_status) {
- if (NULL == getcwd(startdir, sizeof(startdir))) {
+ if (getcwd_status == 0) {
+ if (getcwd(startdir, sizeof(startdir)) == NULL) {
getcwd_status = 2;
(void)strlcpy(startdir, strerror(errno),
sizeof(startdir));
* Do not use it any longer, not even for messages.
*/
*basedir = '\0';
+ basedir_len = 0;
/*
* If and only if the directory was changed earlier and
* the next directory to process is given as a relative path,
* first go back, or bail out if that is impossible.
*/
- if (chdir_status && '/' != *targetdir) {
- if (2 == getcwd_status) {
+ if (chdir_status && *targetdir != '/') {
+ if (getcwd_status == 2) {
exitcode = (int)MANDOCLEVEL_SYSERR;
say("", "getcwd: %s", startdir);
return 0;
}
- if (-1 == chdir(startdir)) {
+ if (chdir(startdir) == -1) {
exitcode = (int)MANDOCLEVEL_SYSERR;
say("", "&chdir %s", startdir);
return 0;
* pathname and append a trailing slash, such that
* we can reliably check whether files are inside.
*/
- if (NULL == realpath(targetdir, basedir)) {
+ if (realpath(targetdir, basedir) == NULL) {
if (report_baddir || errno != ENOENT) {
exitcode = (int)MANDOCLEVEL_BADARG;
say("", "&%s: realpath", targetdir);
}
+ *basedir = '\0';
return 0;
- } else if (-1 == chdir(basedir)) {
+ } else if (chdir(basedir) == -1) {
if (report_baddir || errno != ENOENT) {
exitcode = (int)MANDOCLEVEL_BADARG;
say("", "&chdir");
}
+ *basedir = '\0';
return 0;
}
chdir_status = 1;
- cp = strchr(basedir, '\0');
- if ('/' != cp[-1]) {
- if (cp - basedir >= PATH_MAX - 1) {
+ basedir_len = strlen(basedir);
+ if (basedir[basedir_len - 1] != '/') {
+ if (basedir_len >= PATH_MAX - 1) {
exitcode = (int)MANDOCLEVEL_SYSERR;
say("", "Filename too long");
+ *basedir = '\0';
+ basedir_len = 0;
return 0;
}
- *cp++ = '/';
- *cp = '\0';
+ basedir[basedir_len++] = '/';
+ basedir[basedir_len] = '\0';
}
return 1;
}
va_list ap;
int use_errno;
- if ('\0' != *basedir)
+ if (*basedir != '\0')
fprintf(stderr, "%s", basedir);
- if ('\0' != *basedir && '\0' != *file)
+ if (*basedir != '\0' && *file != '\0')
fputc('/', stderr);
- if ('\0' != *file)
+ if (*file != '\0')
fprintf(stderr, "%s", file);
use_errno = 1;
- if (NULL != format) {
+ if (format != NULL) {
switch (*format) {
case '&':
format++;
break;
}
}
- if (NULL != format) {
- if ('\0' != *basedir || '\0' != *file)
+ if (format != NULL) {
+ if (*basedir != '\0' || *file != '\0')
fputs(": ", stderr);
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
}
if (use_errno) {
- if ('\0' != *basedir || '\0' != *file || NULL != format)
+ if (*basedir != '\0' || *file != '\0' || format != NULL)
fputs(": ", stderr);
perror(NULL);
} else