+/*
+ * Parse a formatted manual page.
+ * By necessity, this involves rather crude guesswork.
+ */
+static void
+pformatted(DB *hash, struct buf *buf, struct buf *dbuf,
+ const struct of *of)
+{
+ FILE *stream;
+ char *line, *p;
+ size_t len, plen;
+
+ if (NULL == (stream = fopen(of->fname, "r"))) {
+ perror(of->fname);
+ return;
+ }
+
+ /*
+ * Always use the title derived from the filename up front,
+ * do not even try to find it in the file. This also makes
+ * sure we don't end up with an orphan index record, even if
+ * the file content turns out to be completely unintelligible.
+ */
+
+ buf->len = 0;
+ buf_append(buf, of->title);
+ hash_put(hash, buf, TYPE_Nm);
+
+ while (NULL != (line = fgetln(stream, &len)) && '\n' != *line)
+ /* Skip to first blank line. */ ;
+
+ while (NULL != (line = fgetln(stream, &len)) &&
+ ('\n' == *line || ' ' == *line))
+ /* Skip to first section header. */ ;
+
+ /*
+ * If no page content can be found,
+ * reuse the page title as the page description.
+ */
+
+ if (NULL == (line = fgetln(stream, &len))) {
+ buf_appendb(dbuf, buf->cp, buf->size);
+ hash_put(hash, buf, TYPE_Nd);
+ fclose(stream);
+ return;
+ }
+ fclose(stream);
+
+ /*
+ * If there is a dash, skip to the text following it.
+ */
+
+ for (p = line, plen = len; plen; p++, plen--)
+ if ('-' == *p)
+ break;
+ for ( ; plen; p++, plen--)
+ if ('-' != *p && ' ' != *p && 8 != *p)
+ break;
+ if (0 == plen) {
+ p = line;
+ plen = len;
+ }
+
+ /*
+ * Copy the rest of the line, but no more than 70 bytes.
+ */
+
+ if (70 < plen)
+ plen = 70;
+ p[plen-1] = '\0';
+ buf_appendb(dbuf, p, plen);
+ buf->len = 0;
+ buf_appendb(buf, p, plen);
+ hash_put(hash, buf, TYPE_Nd);
+}
+
+static void
+ofile_argbuild(int argc, char *argv[], struct of **of)
+{
+ char buf[MAXPATHLEN];
+ char *sec, *arch, *title, *p;
+ int i, src_form;
+ struct of *nof;
+
+ for (i = 0; i < argc; i++) {
+
+ /*
+ * Try to infer the manual section, architecture and
+ * page title from the path, assuming it looks like
+ * man*[/<arch>]/<title>.<section> or
+ * cat<section>[/<arch>]/<title>.0
+ */
+
+ if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
+ fprintf(stderr, "%s: Path too long\n", argv[i]);
+ continue;
+ }
+ sec = arch = title = NULL;
+ src_form = 0;
+ p = strrchr(buf, '\0');
+ while (p-- > buf) {
+ if (NULL == sec && '.' == *p) {
+ sec = p + 1;
+ *p = '\0';
+ if ('0' == *sec)
+ src_form |= MANDOC_FORM;
+ else if ('1' <= *sec && '9' >= *sec)
+ src_form |= MANDOC_SRC;
+ continue;
+ }
+ if ('/' != *p)
+ continue;
+ if (NULL == title) {
+ title = p + 1;
+ *p = '\0';
+ continue;
+ }
+ if (strncmp("man", p + 1, 3)) {
+ src_form |= MANDOC_SRC;
+ arch = p + 1;
+ } else if (strncmp("cat", p + 1, 3)) {
+ src_form |= MANDOC_FORM;
+ arch = p + 1;
+ }
+ break;
+ }
+ if (NULL == title)
+ title = buf;
+
+ /*
+ * Build the file structure.
+ */
+
+ nof = mandoc_calloc(1, sizeof(struct of));
+ nof->fname = mandoc_strdup(argv[i]);
+ if (NULL != sec)
+ nof->sec = mandoc_strdup(sec);
+ if (NULL != arch)
+ nof->arch = mandoc_strdup(arch);
+ nof->title = mandoc_strdup(title);
+ nof->src_form = src_form;
+
+ /*
+ * Add the structure to the list.
+ */
+
+ if (verb > 2)
+ printf("%s: Scheduling\n", argv[i]);
+ if (NULL == *of) {
+ *of = nof;
+ (*of)->first = nof;
+ } else {
+ nof->first = (*of)->first;
+ (*of)->next = nof;
+ *of = nof;
+ }
+ }
+}
+
+/*
+ * Recursively build up a list of files to parse.
+ * We use this instead of ftw() and so on because I don't want global
+ * variables hanging around.
+ * This ignores the mandoc.db and mandoc.index files, but assumes that
+ * everything else is a manual.
+ * Pass in a pointer to a NULL structure for the first invocation.
+ */
+static int
+ofile_dirbuild(const char *dir, const char* psec, const char *parch,
+ int p_src_form, struct of **of)
+{
+ char buf[MAXPATHLEN];
+ struct stat sb;
+ size_t sz;
+ DIR *d;
+ const char *fn, *sec, *arch;
+ char *p, *q, *suffix;
+ struct of *nof;
+ struct dirent *dp;
+ int src_form;
+
+ if (NULL == (d = opendir(dir))) {
+ perror(dir);
+ return(0);
+ }
+
+ while (NULL != (dp = readdir(d))) {
+ fn = dp->d_name;
+
+ if ('.' == *fn)
+ continue;
+
+ src_form = p_src_form;
+
+ if (DT_DIR == dp->d_type) {
+ sec = psec;
+ arch = parch;
+
+ /*
+ * By default, only use directories called:
+ * man<section>/[<arch>/] or
+ * cat<section>/[<arch>/]
+ */
+
+ if (NULL == sec) {
+ if(0 == strncmp("man", fn, 3)) {
+ src_form |= MANDOC_SRC;
+ sec = fn + 3;
+ } else if (0 == strncmp("cat", fn, 3)) {
+ src_form |= MANDOC_FORM;
+ sec = fn + 3;
+ } else if (use_all)
+ sec = fn;
+ else
+ continue;
+ } else if (NULL == arch && (use_all ||
+ NULL == strchr(fn, '.')))
+ arch = fn;
+ else if (0 == use_all)
+ continue;
+
+ buf[0] = '\0';
+ strlcat(buf, dir, MAXPATHLEN);
+ strlcat(buf, "/", MAXPATHLEN);
+ sz = strlcat(buf, fn, MAXPATHLEN);
+
+ if (MAXPATHLEN <= sz) {
+ fprintf(stderr, "%s: Path too long\n", dir);
+ return(0);
+ }
+
+ if (verb > 2)
+ printf("%s: Scanning\n", buf);
+
+ if ( ! ofile_dirbuild(buf, sec, arch,
+ src_form, of))
+ return(0);
+ }
+ if (DT_REG != dp->d_type ||
+ (NULL == psec && !use_all) ||
+ !strcmp(MANDOC_DB, fn) ||
+ !strcmp(MANDOC_IDX, fn))
+ continue;
+
+ /*
+ * By default, skip files where the file name suffix
+ * does not agree with the section directory
+ * they are located in.
+ */
+
+ suffix = strrchr(fn, '.');
+ if (0 == use_all) {
+ if (NULL == suffix)
+ continue;
+ if ((MANDOC_SRC & src_form &&
+ strcmp(suffix + 1, psec)) ||
+ (MANDOC_FORM & src_form &&
+ strcmp(suffix + 1, "0")))
+ continue;
+ }
+ if (NULL != suffix) {
+ if ('0' == suffix[1])
+ src_form |= MANDOC_FORM;
+ else if ('1' <= suffix[1] && '9' >= suffix[1])
+ src_form |= MANDOC_SRC;
+ }
+
+
+ /*
+ * Skip formatted manuals if a source version is
+ * available. Ignore the age: it is very unlikely
+ * that people install newer formatted base manuals
+ * when they used to have source manuals before,
+ * and in ports, old manuals get removed on update.
+ */
+ if (0 == use_all && MANDOC_FORM & src_form &&
+ NULL != psec) {
+ buf[0] = '\0';
+ strlcat(buf, dir, MAXPATHLEN);
+ p = strrchr(buf, '/');
+ if (NULL == p)
+ p = buf;
+ else
+ p++;
+ if (0 == strncmp("cat", p, 3))
+ memcpy(p, "man", 3);
+ strlcat(buf, "/", MAXPATHLEN);
+ sz = strlcat(buf, fn, MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long\n", buf);
+ continue;
+ }
+ q = strrchr(buf, '.');
+ if (NULL != q && p < q++) {
+ *q = '\0';
+ sz = strlcat(buf, psec, MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ fprintf(stderr,
+ "%s: Path too long\n", buf);
+ continue;
+ }
+ if (0 == stat(buf, &sb))
+ continue;
+ }
+ }
+
+ buf[0] = '\0';
+ strlcat(buf, dir, MAXPATHLEN);
+ strlcat(buf, "/", MAXPATHLEN);
+ sz = strlcat(buf, fn, MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long\n", dir);
+ continue;
+ }
+
+ nof = mandoc_calloc(1, sizeof(struct of));
+ nof->fname = mandoc_strdup(buf);
+ if (NULL != psec)
+ nof->sec = mandoc_strdup(psec);
+ if (NULL != parch)
+ nof->arch = mandoc_strdup(parch);
+ nof->src_form = src_form;
+
+ /*
+ * Remember the file name without the extension,
+ * to be used as the page title in the database.
+ */
+
+ if (NULL != suffix)
+ *suffix = '\0';
+ nof->title = mandoc_strdup(fn);
+
+ /*
+ * Add the structure to the list.
+ */
+
+ if (verb > 2)
+ printf("%s: Scheduling\n", buf);
+ if (NULL == *of) {
+ *of = nof;
+ (*of)->first = nof;
+ } else {
+ nof->first = (*of)->first;
+ (*of)->next = nof;
+ *of = nof;
+ }
+ }
+
+ closedir(d);
+ return(1);
+}
+
+static void
+ofile_free(struct of *of)
+{
+ struct of *nof;
+
+ while (of) {
+ nof = of->next;
+ free(of->fname);
+ free(of->sec);
+ free(of->arch);
+ free(of->title);
+ free(of);
+ of = nof;
+ }
+}
+