aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/cgi.c
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-11-23 10:01:04 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-11-23 10:01:04 +0000
commita978cf1b0ea016d50b9a383f181e6ae1dd41b6a1 (patch)
treeef8f496f5303fa5eeba705f0089d7fb66f5e36b3 /cgi.c
parent7782f51bf466a5e1a44d6ffcc03af9b68b95f17c (diff)
downloadmandoc-a978cf1b0ea016d50b9a383f181e6ae1dd41b6a1.tar.gz
mandoc-a978cf1b0ea016d50b9a383f181e6ae1dd41b6a1.tar.zst
mandoc-a978cf1b0ea016d50b9a383f181e6ae1dd41b6a1.zip
man.cgi works for the non-jailed case.
In other words, if you smash this into a cgi-bin directory, it will Just Work for your system's manuals (it of course needs access to mandoc(1) and your file-system, hence "non-jailed"). The notion of a jailed case is much more subtle and being worked on now.
Diffstat (limited to 'cgi.c')
-rw-r--r--cgi.c503
1 files changed, 422 insertions, 81 deletions
diff --git a/cgi.c b/cgi.c
index d3b018ad..ac9438e2 100644
--- a/cgi.c
+++ b/cgi.c
@@ -1,80 +1,131 @@
-/* $Id: cgi.c,v 1.5 2011/11/20 12:39:08 kristaps Exp $ */
+/* $Id: cgi.c,v 1.6 2011/11/23 10:01:04 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/wait.h>
+
#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
#include <fcntl.h>
+#include <limits.h>
#include <regex.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
-#include "mandoc.h"
-#include "mandocdb.h"
#include "apropos_db.h"
+#include "mandoc.h"
+#include "manpath.h"
+
+#ifdef __linux__
+# include <db_185.h>
+#else
+# include <db.h>
+#endif
-/*
- * The page a request is trying to make.
- */
enum page {
PAGE_INDEX,
PAGE_SEARCH,
+ PAGE_SHOW,
PAGE__MAX
};
-/*
- * Key-value pair.
- * Both key and val are on the heap.
- */
struct kval {
char *key;
char *val;
};
-/*
- * The media type, determined by suffix, of the requesting or responding
- * context.
- */
-enum media {
- MEDIA_HTML,
- MEDIA__MAX
-};
-
-/*
- * An HTTP request.
- */
struct req {
- struct kval *fields; /* query fields */
+ struct kval *fields;
size_t fieldsz;
- enum media media;
enum page page;
};
-#if 0
-static void html_printtext(const char *);
-#endif
+static int atou(const char *, unsigned *);
+static void html_print(const char *);
static int kval_decode(char *);
static void kval_parse(struct kval **, size_t *, char *);
static void kval_free(struct kval *, size_t);
-static void pg_index(const struct req *, char *);
-static void pg_search(const struct req *, char *);
-#if 0
-static void pg_searchres(struct rec *, size_t, void *);
-#endif
+static void pg_index(const struct manpaths *,
+ const struct req *, char *);
+static void pg_search(const struct manpaths *,
+ const struct req *, char *);
+static void pg_show(const struct manpaths *,
+ const struct req *, char *);
+static void resp_baddb(void);
+static void resp_badexpr(const struct req *);
+static void resp_badmanual(void);
+static void resp_begin_html(int, const char *);
+static void resp_begin_http(int, const char *);
+static void resp_end_html(void);
+static void resp_index(const struct req *);
+static void resp_search(struct res *, size_t, void *);
+static void resp_searchform(const struct req *);
+
+static const char *progname;
+static const char *host;
static const char * const pages[PAGE__MAX] = {
"index", /* PAGE_INDEX */
"search", /* PAGE_SEARCH */
+ "show", /* PAGE_SHOW */
};
-static const char * const medias[MEDIA__MAX] = {
- "html", /* MEDIA_HTML */
-};
+/*
+ * This is just OpenBSD's strtol(3) suggestion.
+ * I use it instead of strtonum(3) for portability's sake.
+ */
+static int
+atou(const char *buf, unsigned *v)
+{
+ char *ep;
+ long lval;
+
+ errno = 0;
+ lval = strtol(buf, &ep, 10);
+ if (buf[0] == '\0' || *ep != '\0')
+ return(0);
+ if ((errno == ERANGE && (lval == LONG_MAX ||
+ lval == LONG_MIN)) ||
+ (lval > UINT_MAX || lval < 0))
+ return(0);
+
+ *v = (unsigned int)lval;
+ return(1);
+}
-#if 0
+/*
+ * Print a word, escaping HTML along the way.
+ * This will pass non-ASCII straight to output: be warned!
+ */
static void
-html_printtext(const char *p)
+html_print(const char *p)
{
char c;
+
+ if (NULL == p)
+ return;
while ('\0' != *p)
switch ((c = *p++)) {
@@ -95,7 +146,6 @@ html_printtext(const char *p)
break;
}
}
-#endif
static void
kval_free(struct kval *p, size_t sz)
@@ -111,7 +161,8 @@ kval_free(struct kval *p, size_t sz)
/*
* Parse out key-value pairs from an HTTP request variable.
- * This can be either a cookie or a POST/GET string.
+ * This can be either a cookie or a POST/GET string, although man.cgi
+ * uses only GET for simplicity.
*/
static void
kval_parse(struct kval **kv, size_t *kvsz, char *p)
@@ -172,9 +223,9 @@ kval_parse(struct kval **kv, size_t *kvsz, char *p)
}
/*
- * In-place HTTP-decode a string. The standard explanation is that this
- * turns "%4e+foo" into "n foo" in the regular way. This is done
- * in-place over the allocated string.
+ * HTTP-decode a string. The standard explanation is that this turns
+ * "%4e+foo" into "n foo" in the regular way. This is done in-place
+ * over the allocated string.
*/
static int
kval_decode(char *p)
@@ -205,51 +256,320 @@ kval_decode(char *p)
return(1);
}
+static void
+resp_begin_http(int code, const char *msg)
+{
+
+ if (200 != code)
+ printf("Status: %d %s\n", code, msg);
+
+ puts("Content-Type: text/html; charset=utf-8" "\n"
+ "Cache-Control: no-cache" "\n"
+ "Pragma: no-cache" "\n"
+ "");
+
+ fflush(stdout);
+}
+
+static void
+resp_begin_html(int code, const char *msg)
+{
+
+ resp_begin_http(code, msg);
+
+ puts("<!DOCTYPE HTML PUBLIC " "\n"
+ " \"-//W3C//DTD HTML 4.01//EN\"" "\n"
+ " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n"
+ "<HTML>" "\n"
+ " <HEAD>" "\n"
+ " <TITLE>System Manpage Reference</TITLE>" "\n"
+ " </HEAD>" "\n"
+ " <BODY>" "\n"
+ "<!-- Begin page content. //-->");
+}
+
+static void
+resp_end_html(void)
+{
+
+ puts(" </BODY>\n</HTML>");
+}
+
+static void
+resp_searchform(const struct req *req)
+{
+ int i;
+ const char *expr, *sec, *arch;
+
+ expr = sec = arch = "";
+
+ for (i = 0; i < (int)req->fieldsz; i++)
+ if (0 == strcmp(req->fields[i].key, "expr"))
+ expr = req->fields[i].val;
+ else if (0 == strcmp(req->fields[i].key, "sec"))
+ sec = req->fields[i].val;
+ else if (0 == strcmp(req->fields[i].key, "arch"))
+ arch = req->fields[i].val;
+
+ puts("<!-- Begin search form. //-->");
+ printf("<FORM ACTION=\"");
+ html_print(progname);
+ printf("/search\" METHOD=\"get\">\n");
+ puts(" <FIELDSET>" "\n"
+ " <INPUT TYPE=\"submit\" VALUE=\"Search:\">");
+ printf(" Terms: <INPUT TYPE=\"text\" "
+ "SIZE=\"60\" NAME=\"expr\" VALUE=\"");
+ html_print(expr);
+ puts("\">");
+ printf(" Section: <INPUT TYPE=\"text\" "
+ "SIZE=\"4\" NAME=\"sec\" VALUE=\"");
+ html_print(sec);
+ puts("\">");
+ printf(" Arch: <INPUT TYPE=\"text\" "
+ "SIZE=\"8\" NAME=\"arch\" VALUE=\"");
+ html_print(arch);
+ puts("\">");
+ puts(" </FIELDSET>\n</FORM>\n<!-- End search form. //-->");
+}
+
+static void
+resp_index(const struct req *req)
+{
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ resp_end_html();
+}
+
+static void
+resp_badmanual(void)
+{
+
+ resp_begin_html(404, "Not Found");
+ puts("<P>Requested manual not found.</P>");
+ resp_end_html();
+}
+
+static void
+resp_badexpr(const struct req *req)
+{
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ puts("<P>Your search didn't work.</P>");
+ resp_end_html();
+}
-/* ARGSUSED */
static void
-pg_index(const struct req *req, char *path)
+resp_baddb(void)
{
+ resp_begin_html(500, "Internal Server Error");
+ puts("<P>Your database is broken.</P>");
+ resp_end_html();
}
-#if 0
static void
-pg_searchres(struct rec *recs, size_t sz, void *arg)
+resp_search(struct res *r, size_t sz, void *arg)
{
int i;
- const char *pg;
- if (NULL == (pg = getenv("SCRIPT_NAME")))
- pg = "";
+ if (1 == sz) {
+ /*
+ * If we have just one result, then jump there now
+ * without any delay.
+ */
+ puts("Status: 303 See Other");
+ printf("Location: http://%s%s/show/%u/%u.html\n",
+ host, progname,
+ r[0].volume, r[0].rec);
+ puts("Content-Type: text/html; charset=utf-8\n");
+ return;
+ }
+
+ resp_begin_html(200, NULL);
+ resp_searchform((const struct req *)arg);
+
+ if (0 == sz)
+ puts("<P>No results found.</P>");
for (i = 0; i < (int)sz; i++) {
- printf("<A HREF=\"%s/show/%u.html\">",
- pg, recs[i].rec);
- html_printtext(recs[i].title);
+ printf("<P><A HREF=\"");
+ html_print(progname);
+ printf("/show/%u/%u.html\">", r[i].volume, r[i].rec);
+ html_print(r[i].title);
putchar('(');
- html_printtext(recs[i].cat);
- puts(")</A>");
+ html_print(r[i].cat);
+ if (r[i].arch && '\0' != *r[i].arch) {
+ putchar('/');
+ html_print(r[i].arch);
+ }
+ printf(")</A> ");
+ html_print(r[i].desc);
+ puts("</P>");
}
+
+ resp_end_html();
}
-#endif
+/* ARGSUSED */
static void
-pg_search(const struct req *req, char *path)
+pg_index(const struct manpaths *ps, const struct req *req, char *path)
{
- int i;
- struct opts opt;
- for (i = 0; i < (int)req->fieldsz; i++)
- if (0 == strcmp(req->fields[i].key, "key"))
- break;
+ resp_index(req);
+}
- if (i == (int)req->fieldsz)
+static void
+pg_show(const struct manpaths *ps, const struct req *req, char *path)
+{
+ pid_t pid;
+ char *sub;
+ char file[MAXPATHLEN], cmd[MAXPATHLEN];
+ int rc;
+ unsigned int vol, rec;
+ DB *db;
+ DBT key, val;
+
+ if (NULL == path) {
+ resp_badmanual();
return;
+ } else if (NULL == (sub = strrchr(path, '/'))) {
+ resp_badmanual();
+ return;
+ } else
+ *sub++ = '\0';
+
+ if ( ! (atou(path, &vol) && atou(sub, &rec))) {
+ resp_badmanual();
+ return;
+ } else if (vol >= (unsigned int)ps->sz) {
+ resp_badmanual();
+ return;
+ }
+
+ strlcpy(file, ps->paths[vol], MAXPATHLEN);
+ strlcat(file, "/mandoc.index", MAXPATHLEN);
+
+ /* Open the index recno(3) database. */
+
+ db = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
+ if (NULL == db) {
+ resp_baddb();
+ return;
+ }
+
+ key.data = &rec;
+ key.size = 4;
+
+ if (0 != (rc = (*db->get)(db, &key, &val, 0))) {
+ rc < 0 ? resp_baddb() : resp_badmanual();
+ (*db->close)(db);
+ return;
+ }
+
+ /* Extra filename: the first nil-terminated entry. */
+
+ strlcpy(file, ps->paths[vol], MAXPATHLEN);
+ strlcat(file, "/", MAXPATHLEN);
+ strlcat(file, (char *)val.data, MAXPATHLEN);
+
+ (*db->close)(db);
+
+ strlcpy(cmd, "man=", MAXPATHLEN);
+ strlcat(cmd, progname, MAXPATHLEN);
+ strlcat(cmd, "/search?expr=%N&sec=%S", MAXPATHLEN);
+
+ /* Get ready to call the child mandoc(1) process. */
+
+ if (-1 == (pid = fork()))
+ exit(EXIT_FAILURE);
+
+ if (pid > 0) {
+ waitpid(pid, NULL, 0);
+ return;
+ }
+
+ dup2(STDOUT_FILENO, STDERR_FILENO);
+
+ puts("Content-Type: text/html; charset=utf-8\n");
+
+ fflush(stdout);
+
+ execlp("mandoc", "mandoc", "-T",
+ "html", "-O", cmd, file, (char *)NULL);
+}
+
+static void
+pg_search(const struct manpaths *ps, const struct req *req, char *path)
+{
+ size_t tt;
+ int i, sz, rc;
+ const char *ep, *start;
+ char **cp;
+ struct opts opt;
+ struct expr *expr;
+
+ expr = NULL;
+ cp = NULL;
+ ep = NULL;
+ sz = 0;
memset(&opt, 0, sizeof(struct opts));
- /*opt.types = TYPE_NAME | TYPE_DESC;
- apropos_search(&opt, req->fields[i].val, NULL, pg_searchres);*/
+
+ for (sz = i = 0; i < (int)req->fieldsz; i++)
+ if (0 == strcmp(req->fields[i].key, "expr"))
+ ep = req->fields[i].val;
+ else if (0 == strcmp(req->fields[i].key, "sec"))
+ opt.cat = req->fields[i].val;
+ else if (0 == strcmp(req->fields[i].key, "arch"))
+ opt.arch = req->fields[i].val;
+
+ /*
+ * Poor man's tokenisation.
+ * Just break apart by spaces.
+ * Yes, this is half-ass. But it works for now.
+ */
+
+ while (ep && isspace((unsigned char)*ep))
+ ep++;
+
+ while (ep && '\0' != *ep) {
+ cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
+ start = ep;
+ while ('\0' != *ep && ! isspace((unsigned char)*ep))
+ ep++;
+ cp[sz] = mandoc_malloc((ep - start) + 1);
+ memcpy(cp[sz], start, ep - start);
+ cp[sz++][ep - start] = '\0';
+ while (isspace((unsigned char)*ep))
+ ep++;
+ }
+
+ rc = -1;
+
+ /*
+ * Pump down into apropos backend.
+ * The resp_search() function is called with the results.
+ */
+
+ if (NULL != (expr = exprcomp(sz, cp, &tt)))
+ rc = apropos_search
+ (ps->sz, ps->paths, &opt,
+ expr, tt, (void *)req, resp_search);
+
+ /* ...unless errors occured. */
+
+ if (0 == rc)
+ resp_baddb();
+ else if (-1 == rc)
+ resp_badexpr(req);
+
+ for (i = 0; i < sz; i++)
+ free(cp[i]);
+
+ free(cp);
+ exprfree(expr);
}
int
@@ -257,37 +577,47 @@ main(void)
{
int i;
struct req req;
- char *p;
- char *path, *subpath, *suffix;
+ char *p, *path, *subpath;
+ struct manpaths paths;
+
+ /* HTTP init: read and parse the query string. */
+
+ progname = getenv("SCRIPT_NAME");
+ if (NULL == progname)
+ progname = "";
+
+ host = getenv("HTTP_HOST");
+ if (NULL == host)
+ host = "localhost";
memset(&req, 0, sizeof(struct req));
if (NULL != (p = getenv("QUERY_STRING")))
kval_parse(&req.fields, &req.fieldsz, p);
- suffix = subpath = path = NULL;
+ /* Resolve leading subpath component. */
- req.media = MEDIA_HTML;
+ subpath = path = NULL;
req.page = PAGE__MAX;
if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
req.page = PAGE_INDEX;
+
if (NULL != path && '/' == *path && '\0' == *++path)
req.page = PAGE_INDEX;
- if (NULL != path && NULL != (suffix = strrchr(path, '.')))
- if (NULL != suffix && NULL == strchr(suffix, '/'))
- *suffix++ = '\0';
+ /* Strip file suffix. */
+
+ if (NULL != path && NULL != (p = strrchr(path, '.')))
+ if (NULL != p && NULL == strchr(p, '/'))
+ *p++ = '\0';
+
+ /* Resolve subpath component. */
if (NULL != path && NULL != (subpath = strchr(path, '/')))
- *subpath++ = '\0';
+ *subpath++ = '\0';
- if (NULL != suffix && '\0' != *suffix)
- for (i = 0; i < (int)MEDIA__MAX; i++)
- if (0 == strcmp(medias[i], suffix)) {
- req.media = (enum media)i;
- break;
- }
+ /* Map path into one we recognise. */
if (NULL != path && '\0' != *path)
for (i = 0; i < (int)PAGE__MAX; i++)
@@ -296,18 +626,29 @@ main(void)
break;
}
+ /* Initialise MANPATH. */
+
+ memset(&paths, 0, sizeof(struct manpaths));
+ manpath_parse(&paths, NULL, NULL);
+
+ /* Route pages. */
+
switch (req.page) {
case (PAGE_INDEX):
- pg_index(&req, subpath);
+ pg_index(&paths, &req, subpath);
break;
case (PAGE_SEARCH):
- pg_search(&req, subpath);
+ pg_search(&paths, &req, subpath);
+ break;
+ case (PAGE_SHOW):
+ pg_show(&paths, &req, subpath);
break;
default:
- /* Blah */
break;
}
+ manpath_free(&paths);
kval_free(req.fields, req.fieldsz);
+
return(EXIT_SUCCESS);
}