-/* $Id: cgi.c,v 1.2 2011/11/09 22:05:56 kristaps Exp $ */
+/* $Id: cgi.c,v 1.42 2012/03/24 01:46:25 kristaps Exp $ */
+/*
+ * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/wait.h>
+
#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <dirent.h>
#include <fcntl.h>
+#include <limits.h>
#include <regex.h>
#include <stdio.h>
#include <stdarg.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
-#include "apropos.h"
+#include "apropos_db.h"
#include "mandoc.h"
+#include "mdoc.h"
+#include "man.h"
+#include "main.h"
+#include "manpath.h"
+#include "mandocdb.h"
+
+#ifdef __linux__
+# include <db_185.h>
+#else
+# include <db.h>
+#endif
-/*
- * The page a request is trying to make.
- */
enum page {
PAGE_INDEX,
PAGE_SEARCH,
+ PAGE_SHOW,
PAGE__MAX
};
-/*
- * Key-value pair.
- * Both key and val are on the heap.
- */
-struct kval {
- char *key;
- char *val;
+struct paths {
+ char *name;
+ char *path;
};
/*
- * The media type, determined by suffix, of the requesting or responding
- * context.
+ * A query as passed to the search function.
*/
-enum media {
- MEDIA_HTML,
- MEDIA__MAX
+struct query {
+ const char *arch; /* architecture */
+ const char *sec; /* manual section */
+ const char *expr; /* unparsed expression string */
+ int manroot; /* manroot index (or -1)*/
+ int legacy; /* whether legacy mode */
};
-/*
- * An HTTP request.
- */
struct req {
- struct kval *fields; /* query fields */
- size_t fieldsz;
- enum media media;
+ struct query q;
+ struct paths *p;
+ size_t psz;
enum page page;
};
-#if 0
-static void html_printtext(const char *);
-#endif
-static int kval_decode(char *);
-static void kval_parse(struct kval **, size_t *, char *);
-static void kval_free(struct kval *, size_t);
+static int atou(const char *, unsigned *);
+static void catman(const struct req *, const char *);
+static int cmp(const void *, const void *);
+static void format(const struct req *, const char *);
+static void html_print(const char *);
+static void html_printquery(const struct req *);
+static void html_putchar(char);
+static int http_decode(char *);
+static void http_parse(struct req *, char *);
+static void http_print(const char *);
+static void http_putchar(char);
+static void http_printquery(const struct req *);
+static int pathstop(DIR *);
+static void pathgen(DIR *, char *, struct req *);
static void pg_index(const struct req *, char *);
static void pg_search(const struct req *, char *);
-#if 0
-static void pg_searchres(struct rec *, size_t, void *);
-#endif
+static void pg_show(const struct req *, char *);
+static void resp_bad(void);
+static void resp_baddb(void);
+static void resp_error400(void);
+static void resp_error404(const char *);
+static void resp_begin_html(int, const char *);
+static void resp_begin_http(int, const char *);
+static void resp_end_html(void);
+static void resp_index(const struct req *);
+static void resp_search(struct res *, size_t, void *);
+static void resp_searchform(const struct req *);
+
+static const char *progname; /* cgi script name */
+static const char *cache; /* cache directory */
+static const char *css; /* css directory */
+static const char *host; /* hostname */
static const char * const pages[PAGE__MAX] = {
"index", /* PAGE_INDEX */
"search", /* PAGE_SEARCH */
+ "show", /* PAGE_SHOW */
};
-static const char * const medias[MEDIA__MAX] = {
- "html", /* MEDIA_HTML */
-};
+/*
+ * This is just OpenBSD's strtol(3) suggestion.
+ * I use it instead of strtonum(3) for portability's sake.
+ */
+static int
+atou(const char *buf, unsigned *v)
+{
+ char *ep;
+ long lval;
+
+ errno = 0;
+ lval = strtol(buf, &ep, 10);
+ if (buf[0] == '\0' || *ep != '\0')
+ return(0);
+ if ((errno == ERANGE && (lval == LONG_MAX ||
+ lval == LONG_MIN)) ||
+ (lval > INT_MAX || lval < 0))
+ return(0);
+
+ *v = (unsigned int)lval;
+ return(1);
+}
-#if 0
+/*
+ * Print a character, escaping HTML along the way.
+ * This will pass non-ASCII straight to output: be warned!
+ */
static void
-html_printtext(const char *p)
+html_putchar(char c)
{
- char c;
- while ('\0' != *p)
- switch ((c = *p++)) {
- case ('"'):
- printf(""e;");
- break;
- case ('&'):
- printf("&");
- break;
- case ('>'):
- printf(">");
- break;
- case ('<'):
- printf("<");
- break;
- default:
- putchar((unsigned char)c);
- break;
- }
+ switch (c) {
+ case ('"'):
+ printf(""e;");
+ break;
+ case ('&'):
+ printf("&");
+ break;
+ case ('>'):
+ printf(">");
+ break;
+ case ('<'):
+ printf("<");
+ break;
+ default:
+ putchar((unsigned char)c);
+ break;
+ }
}
-#endif
+static void
+http_printquery(const struct req *req)
+{
+
+ printf("&expr=");
+ http_print(req->q.expr ? req->q.expr : "");
+ printf("&sec=");
+ http_print(req->q.sec ? req->q.sec : "");
+ printf("&arch=");
+ http_print(req->q.arch ? req->q.arch : "");
+}
+
static void
-kval_free(struct kval *p, size_t sz)
+html_printquery(const struct req *req)
{
- int i;
- for (i = 0; i < (int)sz; i++) {
- free(p[i].key);
- free(p[i].val);
- }
- free(p);
+ printf("&expr=");
+ html_print(req->q.expr ? req->q.expr : "");
+ printf("&sec=");
+ html_print(req->q.sec ? req->q.sec : "");
+ printf("&arch=");
+ html_print(req->q.arch ? req->q.arch : "");
+}
+
+static void
+http_print(const char *p)
+{
+
+ if (NULL == p)
+ return;
+ while ('\0' != *p)
+ http_putchar(*p++);
+}
+
+/*
+ * Call through to html_putchar().
+ * Accepts NULL strings.
+ */
+static void
+html_print(const char *p)
+{
+
+ if (NULL == p)
+ return;
+ while ('\0' != *p)
+ html_putchar(*p++);
}
/*
* Parse out key-value pairs from an HTTP request variable.
- * This can be either a cookie or a POST/GET string.
+ * This can be either a cookie or a POST/GET string, although man.cgi
+ * uses only GET for simplicity.
*/
static void
-kval_parse(struct kval **kv, size_t *kvsz, char *p)
+http_parse(struct req *req, char *p)
{
- char *key, *val;
- size_t sz, cur;
+ char *key, *val, *manroot;
+ int i, legacy;
- cur = 0;
+ memset(&req->q, 0, sizeof(struct query));
- while (p && '\0' != *p) {
- while (' ' == *p)
- p++;
+ legacy = -1;
+ manroot = NULL;
+ while ('\0' != *p) {
key = p;
val = NULL;
- if (NULL != (p = strchr(p, '='))) {
+ p += (int)strcspn(p, ";&");
+ if ('\0' != *p)
*p++ = '\0';
- val = p;
-
- sz = strcspn(p, ";&");
- /* LINTED */
- p += sz;
-
- if ('\0' != *p)
- *p++ = '\0';
- } else {
- p = key;
- sz = strcspn(p, ";&");
- /* LINTED */
- p += sz;
-
- if ('\0' != *p)
- p++;
- continue;
- }
+ if (NULL != (val = strchr(key, '=')))
+ *val++ = '\0';
- if ('\0' == *key || '\0' == *val)
+ if ('\0' == *key || NULL == val || '\0' == *val)
continue;
/* Just abort handling. */
- if ( ! kval_decode(key))
- return;
- if ( ! kval_decode(val))
- return;
+ if ( ! http_decode(key))
+ break;
+ if (NULL != val && ! http_decode(val))
+ break;
- if (*kvsz + 1 >= cur) {
- cur++;
- *kv = mandoc_realloc
- (*kv, cur * sizeof(struct kval));
- }
+ if (0 == strcmp(key, "expr"))
+ req->q.expr = val;
+ else if (0 == strcmp(key, "query"))
+ req->q.expr = val;
+ else if (0 == strcmp(key, "sec"))
+ req->q.sec = val;
+ else if (0 == strcmp(key, "sektion"))
+ req->q.sec = val;
+ else if (0 == strcmp(key, "arch"))
+ req->q.arch = val;
+ else if (0 == strcmp(key, "manpath"))
+ manroot = val;
+ else if (0 == strcmp(key, "apropos"))
+ legacy = 0 == strcmp(val, "0");
+ }
+
+ /* Test for old man.cgi compatibility mode. */
+
+ req->q.legacy = legacy > 0;
+
+ /*
+ * Section "0" means no section when in legacy mode.
+ * For some man.cgi scripts, "default" arch is none.
+ */
+
+ if (req->q.legacy && NULL != req->q.sec)
+ if (0 == strcmp(req->q.sec, "0"))
+ req->q.sec = NULL;
+ if (req->q.legacy && NULL != req->q.arch)
+ if (0 == strcmp(req->q.arch, "default"))
+ req->q.arch = NULL;
+
+ /* Default to first manroot. */
+
+ if (NULL != manroot) {
+ for (i = 0; i < (int)req->psz; i++)
+ if (0 == strcmp(req->p[i].name, manroot))
+ break;
+ req->q.manroot = i < (int)req->psz ? i : -1;
+ }
+}
+
+static void
+http_putchar(char c)
+{
- (*kv)[(int)*kvsz].key = mandoc_strdup(key);
- (*kv)[(int)*kvsz].val = mandoc_strdup(val);
- (*kvsz)++;
+ if (isalnum((unsigned char)c)) {
+ putchar((unsigned char)c);
+ return;
+ } else if (' ' == c) {
+ putchar('+');
+ return;
}
+ printf("%%%.2x", c);
}
/*
- * In-place HTTP-decode a string. The standard explanation is that this
- * turns "%4e+foo" into "n foo" in the regular way. This is done
- * in-place over the allocated string.
+ * HTTP-decode a string. The standard explanation is that this turns
+ * "%4e+foo" into "n foo" in the regular way. This is done in-place
+ * over the allocated string.
*/
static int
-kval_decode(char *p)
+http_decode(char *p)
{
char hex[3];
int c;
return(1);
}
+static void
+resp_begin_http(int code, const char *msg)
+{
+
+ if (200 != code)
+ printf("Status: %d %s\n", code, msg);
+
+ puts("Content-Type: text/html; charset=utf-8\n"
+ "Cache-Control: no-cache\n"
+ "Pragma: no-cache\n"
+ "");
+
+ fflush(stdout);
+}
-/* ARGSUSED */
static void
-pg_index(const struct req *req, char *path)
+resp_begin_html(int code, const char *msg)
+{
+
+ resp_begin_http(code, msg);
+
+ printf("<!DOCTYPE HTML PUBLIC "
+ " \"-//W3C//DTD HTML 4.01//EN\""
+ " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
+ "<HTML>\n"
+ "<HEAD>\n"
+ "<META HTTP-EQUIV=\"Content-Type\""
+ " CONTENT=\"text/html; charset=utf-8\">\n"
+ "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
+ " TYPE=\"text/css\" media=\"all\">\n"
+ "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
+ " TYPE=\"text/css\" media=\"all\">\n"
+ "<TITLE>System Manpage Reference</TITLE>\n"
+ "</HEAD>\n"
+ "<BODY>\n"
+ "<!-- Begin page content. //-->\n", css, css);
+}
+
+static void
+resp_end_html(void)
{
+ puts("</BODY>\n"
+ "</HTML>");
}
-#if 0
static void
-pg_searchres(struct rec *recs, size_t sz, void *arg)
+resp_searchform(const struct req *req)
{
int i;
- const char *pg;
- if (NULL == (pg = getenv("SCRIPT_NAME")))
- pg = "";
+ puts("<!-- Begin search form. //-->");
+ printf("<DIV ID=\"mancgi\">\n"
+ "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
+ "<FIELDSET>\n"
+ "<LEGEND>Search Parameters</LEGEND>\n"
+ "<INPUT TYPE=\"submit\" "
+ " VALUE=\"Search\"> for manuals satisfying \n"
+ "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
+ progname);
+ html_print(req->q.expr ? req->q.expr : "");
+ printf("\">, section "
+ "<INPUT TYPE=\"text\""
+ " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
+ html_print(req->q.sec ? req->q.sec : "");
+ printf("\">, arch "
+ "<INPUT TYPE=\"text\""
+ " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
+ html_print(req->q.arch ? req->q.arch : "");
+ printf("\">");
+ if (req->psz > 1) {
+ puts(", <SELECT NAME=\"manpath\">");
+ for (i = 0; i < (int)req->psz; i++) {
+ printf("<OPTION %s VALUE=\"",
+ (i == req->q.manroot) ||
+ (0 == i && -1 == req->q.manroot) ?
+ "SELECTED=\"selected\"" : "");
+ html_print(req->p[i].name);
+ printf("\">");
+ html_print(req->p[i].name);
+ puts("</OPTION>");
+ }
+ puts("</SELECT>");
+ }
+ puts(".\n"
+ "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
+ "</FIELDSET>\n"
+ "</FORM>\n"
+ "</DIV>");
+ puts("<!-- End search form. //-->");
+}
+
+static void
+resp_index(const struct req *req)
+{
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ resp_end_html();
+}
+
+static void
+resp_error400(void)
+{
+
+ resp_begin_html(400, "Query Malformed");
+ printf("<H1>Malformed Query</H1>\n"
+ "<P>\n"
+ "The query your entered was malformed.\n"
+ "Try again from the\n"
+ "<A HREF=\"%s/index.html\">main page</A>.\n"
+ "</P>", progname);
+ resp_end_html();
+}
+
+static void
+resp_error404(const char *page)
+{
- for (i = 0; i < (int)sz; i++) {
- printf("<A HREF=\"%s/show/%u.html\">",
- pg, recs[i].rec);
- html_printtext(recs[i].title);
+ resp_begin_html(404, "Not Found");
+ puts("<H1>Page Not Found</H1>\n"
+ "<P>\n"
+ "The page you're looking for, ");
+ printf("<B>");
+ html_print(page);
+ printf("</B>,\n"
+ "could not be found.\n"
+ "Try searching from the\n"
+ "<A HREF=\"%s/index.html\">main page</A>.\n"
+ "</P>", progname);
+ resp_end_html();
+}
+
+static void
+resp_bad(void)
+{
+ resp_begin_html(500, "Internal Server Error");
+ puts("<P>Generic badness happened.</P>");
+ resp_end_html();
+}
+
+static void
+resp_baddb(void)
+{
+
+ resp_begin_html(500, "Internal Server Error");
+ puts("<P>Your database is broken.</P>");
+ resp_end_html();
+}
+
+static void
+resp_search(struct res *r, size_t sz, void *arg)
+{
+ size_t i, matched;
+ const struct req *req;
+
+ req = (const struct req *)arg;
+
+ if (sz > 0)
+ assert(req->q.manroot >= 0);
+
+ for (matched = i = 0; i < sz; i++)
+ if (r[i].matched)
+ matched++;
+
+ if (1 == matched) {
+ for (i = 0; i < sz; i++)
+ if (r[i].matched)
+ break;
+ /*
+ * If we have just one result, then jump there now
+ * without any delay.
+ */
+ puts("Status: 303 See Other");
+ printf("Location: http://%s%s/show/%d/%u/%u.html?",
+ host, progname, req->q.manroot,
+ r[i].volume, r[i].rec);
+ http_printquery(req);
+ puts("\n"
+ "Content-Type: text/html; charset=utf-8\n");
+ return;
+ }
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+
+ puts("<DIV CLASS=\"results\">");
+
+ if (0 == matched) {
+ puts("<P>\n"
+ "No results found.\n"
+ "</P>\n"
+ "</DIV>");
+ resp_end_html();
+ return;
+ }
+
+ qsort(r, sz, sizeof(struct res), cmp);
+
+ puts("<TABLE>");
+
+ for (i = 0; i < sz; i++) {
+ if ( ! r[i].matched)
+ continue;
+ printf("<TR>\n"
+ "<TD CLASS=\"title\">\n"
+ "<A HREF=\"%s/show/%d/%u/%u.html?",
+ progname, req->q.manroot,
+ r[i].volume, r[i].rec);
+ html_printquery(req);
+ printf("\">");
+ html_print(r[i].title);
putchar('(');
- html_printtext(recs[i].cat);
- puts(")</A>");
+ html_print(r[i].cat);
+ if (r[i].arch && '\0' != *r[i].arch) {
+ putchar('/');
+ html_print(r[i].arch);
+ }
+ printf(")</A>\n"
+ "</TD>\n"
+ "<TD CLASS=\"desc\">");
+ html_print(r[i].desc);
+ puts("</TD>\n"
+ "</TR>");
}
+
+ puts("</TABLE>\n"
+ "</DIV>");
+ resp_end_html();
}
-#endif
+/* ARGSUSED */
static void
-pg_search(const struct req *req, char *path)
+pg_index(const struct req *req, char *path)
{
+
+ resp_index(req);
+}
+
+static void
+catman(const struct req *req, const char *file)
+{
+ FILE *f;
+ size_t len;
int i;
- struct opts opt;
+ char *p;
+ int italic, bold;
- for (i = 0; i < (int)req->fieldsz; i++)
- if (0 == strcmp(req->fields[i].key, "key"))
- break;
+ if (NULL == (f = fopen(file, "r"))) {
+ resp_baddb();
+ return;
+ }
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ puts("<DIV CLASS=\"catman\">\n"
+ "<PRE>");
+
+ while (NULL != (p = fgetln(f, &len))) {
+ bold = italic = 0;
+ for (i = 0; i < (int)len - 1; i++) {
+ /*
+ * This means that the catpage is out of state.
+ * Ignore it and keep going (although the
+ * catpage is bogus).
+ */
+
+ if ('\b' == p[i] || '\n' == p[i])
+ continue;
+
+ /*
+ * Print a regular character.
+ * Close out any bold/italic scopes.
+ * If we're in back-space mode, make sure we'll
+ * have something to enter when we backspace.
+ */
+
+ if ('\b' != p[i + 1]) {
+ if (italic)
+ printf("</I>");
+ if (bold)
+ printf("</B>");
+ italic = bold = 0;
+ html_putchar(p[i]);
+ continue;
+ } else if (i + 2 >= (int)len)
+ continue;
+
+ /* Italic mode. */
+
+ if ('_' == p[i]) {
+ if (bold)
+ printf("</B>");
+ if ( ! italic)
+ printf("<I>");
+ bold = 0;
+ italic = 1;
+ i += 2;
+ html_putchar(p[i]);
+ continue;
+ }
+
+ /*
+ * Handle funny behaviour troff-isms.
+ * These grok'd from the original man2html.c.
+ */
+
+ if (('+' == p[i] && 'o' == p[i + 2]) ||
+ ('o' == p[i] && '+' == p[i + 2]) ||
+ ('|' == p[i] && '=' == p[i + 2]) ||
+ ('=' == p[i] && '|' == p[i + 2]) ||
+ ('*' == p[i] && '=' == p[i + 2]) ||
+ ('=' == p[i] && '*' == p[i + 2]) ||
+ ('*' == p[i] && '|' == p[i + 2]) ||
+ ('|' == p[i] && '*' == p[i + 2])) {
+ if (italic)
+ printf("</I>");
+ if (bold)
+ printf("</B>");
+ italic = bold = 0;
+ putchar('*');
+ i += 2;
+ continue;
+ } else if (('|' == p[i] && '-' == p[i + 2]) ||
+ ('-' == p[i] && '|' == p[i + 1]) ||
+ ('+' == p[i] && '-' == p[i + 1]) ||
+ ('-' == p[i] && '+' == p[i + 1]) ||
+ ('+' == p[i] && '|' == p[i + 1]) ||
+ ('|' == p[i] && '+' == p[i + 1])) {
+ if (italic)
+ printf("</I>");
+ if (bold)
+ printf("</B>");
+ italic = bold = 0;
+ putchar('+');
+ i += 2;
+ continue;
+ }
+
+ /* Bold mode. */
+
+ if (italic)
+ printf("</I>");
+ if ( ! bold)
+ printf("<B>");
+ bold = 1;
+ italic = 0;
+ i += 2;
+ html_putchar(p[i]);
+ }
+
+ /*
+ * Clean up the last character.
+ * We can get to a newline; don't print that.
+ */
+
+ if (italic)
+ printf("</I>");
+ if (bold)
+ printf("</B>");
+
+ if (i == (int)len - 1 && '\n' != p[i])
+ html_putchar(p[i]);
+
+ putchar('\n');
+ }
+
+ puts("</PRE>\n"
+ "</DIV>\n"
+ "</BODY>\n"
+ "</HTML>");
- if (i == (int)req->fieldsz)
+ fclose(f);
+}
+
+static void
+format(const struct req *req, const char *file)
+{
+ struct mparse *mp;
+ int fd;
+ struct mdoc *mdoc;
+ struct man *man;
+ void *vp;
+ enum mandoclevel rc;
+ char opts[MAXPATHLEN + 128];
+
+ if (-1 == (fd = open(file, O_RDONLY, 0))) {
+ resp_baddb();
+ return;
+ }
+
+ mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
+ rc = mparse_readfd(mp, fd, file);
+ close(fd);
+
+ if (rc >= MANDOCLEVEL_FATAL) {
+ resp_baddb();
+ return;
+ }
+
+ snprintf(opts, sizeof(opts), "fragment,"
+ "man=%s/search.html?sec=%%S&expr=%%N,"
+ /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
+ progname);
+
+ mparse_result(mp, &mdoc, &man);
+ if (NULL == man && NULL == mdoc) {
+ resp_baddb();
+ mparse_free(mp);
return;
+ }
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+
+ vp = html_alloc(opts);
+
+ if (NULL != mdoc)
+ html_mdoc(vp, mdoc);
+ else
+ html_man(vp, man);
+
+ puts("</BODY>\n"
+ "</HTML>");
+
+ html_free(vp);
+ mparse_free(mp);
+}
+
+static void
+pg_show(const struct req *req, char *path)
+{
+ struct manpaths ps;
+ size_t sz;
+ char *sub;
+ char file[MAXPATHLEN];
+ const char *cp;
+ int rc, catm;
+ unsigned int vol, rec, mr;
+ DB *idx;
+ DBT key, val;
+
+ idx = NULL;
+
+ /* Parse out mroot, volume, and record from the path. */
+
+ if (NULL == path || NULL == (sub = strchr(path, '/'))) {
+ resp_error400();
+ return;
+ }
+ *sub++ = '\0';
+ if ( ! atou(path, &mr)) {
+ resp_error400();
+ return;
+ }
+ path = sub;
+ if (NULL == (sub = strchr(path, '/'))) {
+ resp_error400();
+ return;
+ }
+ *sub++ = '\0';
+ if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
+ resp_error400();
+ return;
+ } else if (mr >= (unsigned int)req->psz) {
+ resp_error400();
+ return;
+ }
+
+ /*
+ * Begin by chdir()ing into the manroot.
+ * This way we can pick up the database files, which are
+ * relative to the manpath root.
+ */
+
+ if (-1 == chdir(req->p[(int)mr].path)) {
+ perror(req->p[(int)mr].path);
+ resp_baddb();
+ return;
+ }
+
+ memset(&ps, 0, sizeof(struct manpaths));
+ manpath_manconf(&ps, "etc/catman.conf");
+
+ if (vol >= (unsigned int)ps.sz) {
+ resp_error400();
+ goto out;
+ }
+
+ sz = strlcpy(file, ps.paths[vol], MAXPATHLEN);
+ assert(sz < MAXPATHLEN);
+ strlcat(file, "/", MAXPATHLEN);
+ strlcat(file, MANDOC_IDX, MAXPATHLEN);
+
+ /* Open the index recno(3) database. */
+
+ idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
+ if (NULL == idx) {
+ perror(file);
+ resp_baddb();
+ goto out;
+ }
+
+ key.data = &rec;
+ key.size = 4;
+
+ if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
+ rc < 0 ? resp_baddb() : resp_error400();
+ goto out;
+ } else if (0 == val.size) {
+ resp_baddb();
+ goto out;
+ }
+
+ cp = (char *)val.data;
+ catm = 'c' == *cp++;
+
+ if (NULL == memchr(cp, '\0', val.size - 1))
+ resp_baddb();
+ else {
+ file[(int)sz] = '\0';
+ strlcat(file, "/", MAXPATHLEN);
+ strlcat(file, cp, MAXPATHLEN);
+ if (catm)
+ catman(req, file);
+ else
+ format(req, file);
+ }
+out:
+ if (idx)
+ (*idx->close)(idx);
+ manpath_free(&ps);
+}
+
+static void
+pg_search(const struct req *req, char *path)
+{
+ size_t tt, ressz;
+ struct manpaths ps;
+ int i, sz, rc;
+ const char *ep, *start;
+ struct res *res;
+ char **cp;
+ struct opts opt;
+ struct expr *expr;
+
+ if (req->q.manroot < 0 || 0 == req->psz) {
+ resp_search(NULL, 0, (void *)req);
+ return;
+ }
memset(&opt, 0, sizeof(struct opts));
- /*opt.types = TYPE_NAME | TYPE_DESC;
- apropos_search(&opt, req->fields[i].val, NULL, pg_searchres);*/
+
+ ep = req->q.expr;
+ opt.arch = req->q.arch;
+ opt.cat = req->q.sec;
+ rc = -1;
+ sz = 0;
+ cp = NULL;
+ ressz = 0;
+ res = NULL;
+
+ /*
+ * Begin by chdir()ing into the root of the manpath.
+ * This way we can pick up the database files, which are
+ * relative to the manpath root.
+ */
+
+ assert(req->q.manroot < (int)req->psz);
+ if (-1 == (chdir(req->p[req->q.manroot].path))) {
+ perror(req->p[req->q.manroot].path);
+ resp_search(NULL, 0, (void *)req);
+ return;
+ }
+
+ memset(&ps, 0, sizeof(struct manpaths));
+ manpath_manconf(&ps, "etc/catman.conf");
+
+ /*
+ * Poor man's tokenisation: just break apart by spaces.
+ * Yes, this is half-ass. But it works for now.
+ */
+
+ while (ep && isspace((unsigned char)*ep))
+ ep++;
+
+ while (ep && '\0' != *ep) {
+ cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
+ start = ep;
+ while ('\0' != *ep && ! isspace((unsigned char)*ep))
+ ep++;
+ cp[sz] = mandoc_malloc((ep - start) + 1);
+ memcpy(cp[sz], start, ep - start);
+ cp[sz++][ep - start] = '\0';
+ while (isspace((unsigned char)*ep))
+ ep++;
+ }
+
+ /*
+ * Pump down into apropos backend.
+ * The resp_search() function is called with the results.
+ */
+
+ expr = req->q.legacy ?
+ termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
+
+ if (NULL != expr)
+ rc = apropos_search
+ (ps.sz, ps.paths, &opt, expr, tt,
+ (void *)req, &ressz, &res, resp_search);
+
+ /* ...unless errors occured. */
+
+ if (0 == rc)
+ resp_baddb();
+ else if (-1 == rc)
+ resp_search(NULL, 0, NULL);
+
+ for (i = 0; i < sz; i++)
+ free(cp[i]);
+
+ free(cp);
+ resfree(res, ressz);
+ exprfree(expr);
+ manpath_free(&ps);
}
int
main(void)
{
int i;
+ char buf[MAXPATHLEN];
+ DIR *cwd;
struct req req;
- char *p;
- char *path, *subpath, *suffix;
+ char *p, *path, *subpath;
+
+ /* Scan our run-time environment. */
+
+ if (NULL == (cache = getenv("CACHE_DIR")))
+ cache = "/cache/man.cgi";
+
+ if (NULL == (progname = getenv("SCRIPT_NAME")))
+ progname = "";
+
+ if (NULL == (css = getenv("CSS_DIR")))
+ css = "";
+
+ if (NULL == (host = getenv("HTTP_HOST")))
+ host = "localhost";
+
+ /*
+ * First we change directory into the cache directory so that
+ * subsequent scanning for manpath directories is rooted
+ * relative to the same position.
+ */
+
+ if (-1 == chdir(cache)) {
+ perror(cache);
+ resp_bad();
+ return(EXIT_FAILURE);
+ } else if (NULL == (cwd = opendir(cache))) {
+ perror(cache);
+ resp_bad();
+ return(EXIT_FAILURE);
+ }
memset(&req, 0, sizeof(struct req));
+ strlcpy(buf, ".", MAXPATHLEN);
+ pathgen(cwd, buf, &req);
+ closedir(cwd);
+
+ /* Next parse out the query string. */
+
if (NULL != (p = getenv("QUERY_STRING")))
- kval_parse(&req.fields, &req.fieldsz, p);
+ http_parse(&req, p);
- suffix = subpath = path = NULL;
+ /*
+ * Now juggle paths to extract information.
+ * We want to extract our filetype (the file suffix), the
+ * initial path component, then the trailing component(s).
+ * Start with leading subpath component.
+ */
- req.media = MEDIA_HTML;
+ subpath = path = NULL;
req.page = PAGE__MAX;
if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
req.page = PAGE_INDEX;
+
if (NULL != path && '/' == *path && '\0' == *++path)
req.page = PAGE_INDEX;
- if (NULL != path && NULL != (suffix = strrchr(path, '.')))
- if (NULL != suffix && NULL == strchr(suffix, '/'))
- *suffix++ = '\0';
+ /* Strip file suffix. */
+
+ if (NULL != path && NULL != (p = strrchr(path, '.')))
+ if (NULL != p && NULL == strchr(p, '/'))
+ *p++ = '\0';
+
+ /* Resolve subpath component. */
if (NULL != path && NULL != (subpath = strchr(path, '/')))
- *subpath++ = '\0';
+ *subpath++ = '\0';
- if (NULL != suffix && '\0' != *suffix)
- for (i = 0; i < (int)MEDIA__MAX; i++)
- if (0 == strcmp(medias[i], suffix)) {
- req.media = (enum media)i;
- break;
- }
+ /* Map path into one we recognise. */
if (NULL != path && '\0' != *path)
for (i = 0; i < (int)PAGE__MAX; i++)
break;
}
+ /* Route pages. */
+
switch (req.page) {
case (PAGE_INDEX):
pg_index(&req, subpath);
case (PAGE_SEARCH):
pg_search(&req, subpath);
break;
+ case (PAGE_SHOW):
+ pg_show(&req, subpath);
+ break;
default:
- /* Blah */
+ resp_error404(path);
break;
}
- kval_free(req.fields, req.fieldsz);
+ for (i = 0; i < (int)req.psz; i++) {
+ free(req.p[i].path);
+ free(req.p[i].name);
+ }
+
+ free(req.p);
return(EXIT_SUCCESS);
}
+
+static int
+cmp(const void *p1, const void *p2)
+{
+
+ return(strcasecmp(((const struct res *)p1)->title,
+ ((const struct res *)p2)->title));
+}
+
+/*
+ * Check to see if an "etc" path consists of a catman.conf file. If it
+ * does, that means that the path contains a tree created by catman(8)
+ * and should be used for indexing.
+ */
+static int
+pathstop(DIR *dir)
+{
+ struct dirent *d;
+
+ while (NULL != (d = readdir(dir)))
+ if (DT_REG == d->d_type)
+ if (0 == strcmp(d->d_name, "catman.conf"))
+ return(1);
+
+ return(0);
+}
+
+/*
+ * Scan for indexable paths.
+ * This adds all paths with "etc/catman.conf" to the buffer.
+ */
+static void
+pathgen(DIR *dir, char *path, struct req *req)
+{
+ struct dirent *d;
+ char *cp;
+ DIR *cd;
+ int rc;
+ size_t sz, ssz;
+
+ sz = strlcat(path, "/", MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long", path);
+ return;
+ }
+
+ /*
+ * First, scan for the "etc" directory.
+ * If it's found, then see if it should cause us to stop. This
+ * happens when a catman.conf is found in the directory.
+ */
+
+ rc = 0;
+ while (0 == rc && NULL != (d = readdir(dir))) {
+ if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
+ continue;
+
+ path[(int)sz] = '\0';
+ ssz = strlcat(path, d->d_name, MAXPATHLEN);
+
+ if (ssz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long", path);
+ return;
+ } else if (NULL == (cd = opendir(path))) {
+ perror(path);
+ return;
+ }
+
+ rc = pathstop(cd);
+ closedir(cd);
+ }
+
+ if (rc > 0) {
+ /* This also strips the trailing slash. */
+ path[(int)--sz] = '\0';
+ req->p = mandoc_realloc
+ (req->p,
+ (req->psz + 1) * sizeof(struct paths));
+ /*
+ * Strip out the leading "./" unless we're just a ".",
+ * in which case use an empty string as our name.
+ */
+ req->p[(int)req->psz].path = mandoc_strdup(path);
+ req->p[(int)req->psz].name =
+ cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
+ req->psz++;
+ /*
+ * The name is just the path with all the slashes taken
+ * out of it. Simple but effective.
+ */
+ for ( ; '\0' != *cp; cp++)
+ if ('/' == *cp)
+ *cp = ' ';
+ return;
+ }
+
+ /*
+ * If no etc/catman.conf was found, recursively enter child
+ * directory and continue scanning.
+ */
+
+ rewinddir(dir);
+ while (NULL != (d = readdir(dir))) {
+ if (DT_DIR != d->d_type || '.' == d->d_name[0])
+ continue;
+
+ path[(int)sz] = '\0';
+ ssz = strlcat(path, d->d_name, MAXPATHLEN);
+
+ if (ssz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long", path);
+ return;
+ } else if (NULL == (cd = opendir(path))) {
+ perror(path);
+ return;
+ }
+
+ pathgen(cd, path, req);
+ closedir(cd);
+ }
+}