]> git.cameronkatri.com Git - mandoc.git/blobdiff - cgi.c
store the operating system name obtained from uname(3) in the adequate
[mandoc.git] / cgi.c
diff --git a/cgi.c b/cgi.c
index 3a47e02b738c376d679d498e07027ff2b26cdb6e..91310ce404b4d2440f60c0508a07c7aa8cc93644 100644 (file)
--- a/cgi.c
+++ b/cgi.c
@@ -1,7 +1,7 @@
-/*     $Id: cgi.c,v 1.146 2017/01/25 03:49:38 schwarze Exp $ */
+/* $Id: cgi.c,v 1.175 2021/08/19 15:23:36 schwarze Exp $ */
 /*
+ * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de>
  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -14,6 +14,8 @@
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Implementation of the man.cgi(8) program.
  */
 #include "config.h"
 
@@ -21,7 +23,9 @@
 #include <sys/time.h>
 
 #include <ctype.h>
+#if HAVE_ERR
 #include <err.h>
+#endif
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
@@ -36,6 +40,7 @@
 #include "roff.h"
 #include "mdoc.h"
 #include "man.h"
+#include "mandoc_parse.h"
 #include "main.h"
 #include "manconf.h"
 #include "mansearch.h"
@@ -67,18 +72,21 @@ enum        focus {
 static void             html_print(const char *);
 static void             html_putchar(char);
 static int              http_decode(char *);
+static void             http_encode(const char *);
 static void             parse_manpath_conf(struct req *);
-static void             parse_path_info(struct req *req, const char *path);
+static void             parse_path_info(struct req *, const char *);
 static void             parse_query_string(struct req *, const char *);
 static void             pg_error_badrequest(const char *);
 static void             pg_error_internal(void);
 static void             pg_index(const struct req *);
-static void             pg_noresult(const struct req *, const char *);
+static void             pg_noresult(const struct req *, int, const char *,
+                               const char *);
+static void             pg_redirect(const struct req *, const char *);
 static void             pg_search(const struct req *);
 static void             pg_searchres(const struct req *,
                                struct manpage *, size_t);
 static void             pg_show(struct req *, const char *);
-static void             resp_begin_html(int, const char *);
+static void             resp_begin_html(int, const char *, const char *);
 static void             resp_begin_http(int, const char *);
 static void             resp_catman(const struct req *, const char *);
 static void             resp_copy(const char *);
@@ -87,6 +95,7 @@ static        void             resp_format(const struct req *, const char *);
 static void             resp_searchform(const struct req *, enum focus);
 static void             resp_show(const struct req *, const char *);
 static void             set_query_attr(char **, char **);
+static int              validate_arch(const char *);
 static int              validate_filename(const char *);
 static int              validate_manpath(const struct req *, const char *);
 static int              validate_urifrag(const char *);
@@ -113,16 +122,18 @@ static    const char *const sec_names[] = {
 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
 
 static const char *const arch_names[] = {
-    "amd64",       "alpha",       "armv7",     "arm64",
-    "hppa",        "i386",        "landisk",
-    "loongson",    "luna88k",     "macppc",      "mips64",
-    "octeon",      "sgi",         "socppc",      "sparc64",
+    "amd64",       "alpha",       "armv7",       "arm64",
+    "hppa",        "i386",        "landisk",     "loongson",
+    "luna88k",     "macppc",      "mips64",      "octeon",
+    "powerpc64",   "riscv64",     "sparc64",
+
     "amiga",       "arc",         "armish",      "arm32",
     "atari",       "aviion",      "beagle",      "cats",
     "hppa64",      "hp300",
     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
     "mvmeppc",     "palm",        "pc532",       "pegasos",
-    "pmax",        "powerpc",     "solbourne",   "sparc",
+    "pmax",        "powerpc",     "sgi",         "socppc",
+    "solbourne",   "sparc",
     "sun3",        "vax",         "wgrisc",      "x68k",
     "zaurus"
 };
@@ -137,16 +148,16 @@ html_putchar(char c)
 {
 
        switch (c) {
-       case ('"'):
+       case '"':
                printf("&quot;");
                break;
-       case ('&'):
+       case '&':
                printf("&amp;");
                break;
-       case ('>'):
+       case '>':
                printf("&gt;");
                break;
-       case ('<'):
+       case '<':
                printf("&lt;");
                break;
        default:
@@ -312,6 +323,18 @@ http_decode(char *p)
        return 1;
 }
 
+static void
+http_encode(const char *p)
+{
+       for (; *p != '\0'; p++) {
+               if (isalnum((unsigned char)*p) == 0 &&
+                   strchr("-._~", *p) == NULL)
+                       printf("%%%2.2X", (unsigned char)*p);
+               else
+                       putchar(*p);
+       }
+}
+
 static void
 resp_begin_http(int code, const char *msg)
 {
@@ -321,6 +344,8 @@ resp_begin_http(int code, const char *msg)
 
        printf("Content-Type: text/html; charset=utf-8\r\n"
             "Cache-Control: no-cache\r\n"
+            "Content-Security-Policy: default-src 'none'; "
+            "style-src 'self' 'unsafe-inline'\r\n"
             "Pragma: no-cache\r\n"
             "\r\n");
 
@@ -343,8 +368,10 @@ resp_copy(const char *filename)
 }
 
 static void
-resp_begin_html(int code, const char *msg)
+resp_begin_html(int code, const char *msg, const char *file)
 {
+       const char      *name, *sec, *cp;
+       int              namesz, secsz;
 
        resp_begin_http(code, msg);
 
@@ -352,12 +379,39 @@ resp_begin_html(int code, const char *msg)
               "<html>\n"
               "<head>\n"
               "  <meta charset=\"UTF-8\"/>\n"
+              "  <meta name=\"viewport\""
+                     " content=\"width=device-width, initial-scale=1.0\">\n"
               "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
               " type=\"text/css\" media=\"all\">\n"
-              "  <title>%s</title>\n"
+              "  <title>",
+              CSS_DIR);
+       if (file != NULL) {
+               cp = strrchr(file, '/');
+               name = cp == NULL ? file : cp + 1;
+               cp = strrchr(name, '.');
+               namesz = cp == NULL ? strlen(name) : cp - name;
+               sec = NULL;
+               if (cp != NULL && cp[1] != '0') {
+                       sec = cp + 1;
+                       secsz = strlen(sec);
+               } else if (name - file > 1) {
+                       for (cp = name - 2; cp >= file; cp--) {
+                               if (*cp < '1' || *cp > '9')
+                                       continue;
+                               sec = cp;
+                               secsz = name - cp - 1;
+                               break;
+                       }
+               }
+               printf("%.*s", namesz, name);
+               if (sec != NULL)
+                       printf("(%.*s)", secsz, sec);
+               fputs(" - ", stdout);
+       }
+       printf("%s</title>\n"
               "</head>\n"
               "<body>\n",
-              CSS_DIR, CUSTOMIZE_TITLE);
+              CUSTOMIZE_TITLE);
 
        resp_copy(MAN_DIR "/header.html");
 }
@@ -377,14 +431,15 @@ resp_searchform(const struct req *req, enum focus focus)
 {
        int              i;
 
-       printf("<form action=\"/%s\" method=\"get\">\n"
+       printf("<form action=\"/%s\" method=\"get\" "
+              "autocomplete=\"off\" autocapitalize=\"none\">\n"
               "  <fieldset>\n"
               "    <legend>Manual Page Search Parameters</legend>\n",
               scriptname);
 
        /* Write query input box. */
 
-       printf("    <input type=\"text\" name=\"query\" value=\"");
+       printf("    <input type=\"search\" name=\"query\" value=\"");
        if (req->q.query != NULL)
                html_print(req->q.query);
        printf( "\" size=\"40\"");
@@ -420,7 +475,7 @@ resp_searchform(const struct req *req, enum focus focus)
                printf(" selected=\"selected\"");
        puts(">All Architectures</option>");
        for (i = 0; i < arch_MAX; i++) {
-               printf("      <option value=\"%s\"", arch_names[i]);
+               printf("      <option");
                if (NULL != req->q.arch &&
                    0 == strcmp(arch_names[i], req->q.arch))
                        printf(" selected=\"selected\"");
@@ -433,12 +488,10 @@ resp_searchform(const struct req *req, enum focus focus)
        if (req->psz > 1) {
                puts("    <select name=\"manpath\">");
                for (i = 0; i < (int)req->psz; i++) {
-                       printf("      <option ");
+                       printf("      <option");
                        if (strcmp(req->q.manpath, req->p[i]) == 0)
-                               printf("selected=\"selected\" ");
-                       printf("value=\"");
-                       html_print(req->p[i]);
-                       printf("\">");
+                               printf(" selected=\"selected\"");
+                       printf(">");
                        html_print(req->p[i]);
                        puts("</option>");
                }
@@ -475,6 +528,18 @@ validate_manpath(const struct req *req, const char* manpath)
        return 0;
 }
 
+static int
+validate_arch(const char *arch)
+{
+       int      i;
+
+       for (i = 0; i < arch_MAX; i++)
+               if (strcmp(arch, arch_names[i]) == 0)
+                       return 1;
+
+       return 0;
+}
+
 static int
 validate_filename(const char *file)
 {
@@ -490,7 +555,7 @@ static void
 pg_index(const struct req *req)
 {
 
-       resp_begin_html(200, NULL);
+       resp_begin_html(200, NULL, NULL);
        resp_searchform(req, FOCUS_QUERY);
        printf("<p>\n"
               "This web interface is documented in the\n"
@@ -505,12 +570,13 @@ pg_index(const struct req *req)
 }
 
 static void
-pg_noresult(const struct req *req, const char *msg)
+pg_noresult(const struct req *req, int code, const char *http_msg,
+    const char *user_msg)
 {
-       resp_begin_html(200, NULL);
+       resp_begin_html(code, http_msg, NULL);
        resp_searchform(req, FOCUS_QUERY);
        puts("<p>");
-       puts(msg);
+       puts(user_msg);
        puts("</p>");
        resp_end_html();
 }
@@ -519,7 +585,7 @@ static void
 pg_error_badrequest(const char *msg)
 {
 
-       resp_begin_html(400, "Bad Request");
+       resp_begin_html(400, "Bad Request", NULL);
        puts("<h1>Bad Request</h1>\n"
             "<p>\n");
        puts(msg);
@@ -532,11 +598,30 @@ pg_error_badrequest(const char *msg)
 static void
 pg_error_internal(void)
 {
-       resp_begin_html(500, "Internal Server Error");
+       resp_begin_html(500, "Internal Server Error", NULL);
        puts("<p>Internal Server Error</p>");
        resp_end_html();
 }
 
+static void
+pg_redirect(const struct req *req, const char *name)
+{
+       printf("Status: 303 See Other\r\n"
+           "Location: /");
+       if (*scriptname != '\0')
+               printf("%s/", scriptname);
+       if (strcmp(req->q.manpath, req->p[0]))
+               printf("%s/", req->q.manpath);
+       if (req->q.arch != NULL)
+               printf("%s/", req->q.arch);
+       http_encode(name);
+       if (req->q.sec != NULL) {
+               putchar('.');
+               http_encode(req->q.sec);
+       }
+       printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
+}
+
 static void
 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
 {
@@ -560,47 +645,25 @@ pg_searchres(const struct req *req, struct manpage *r, size_t sz)
                 * If we have just one result, then jump there now
                 * without any delay.
                 */
-               printf("Status: 303 See Other\r\n");
-               printf("Location: http://%s/%s%s%s/%s",
-                   HTTP_HOST, scriptname,
-                   *scriptname == '\0' ? "" : "/",
-                   req->q.manpath, r[0].file);
-               printf("\r\n"
-                    "Content-Type: text/html; charset=utf-8\r\n"
-                    "\r\n");
+               printf("Status: 303 See Other\r\n"
+                   "Location: /");
+               if (*scriptname != '\0')
+                       printf("%s/", scriptname);
+               if (strcmp(req->q.manpath, req->p[0]))
+                       printf("%s/", req->q.manpath);
+               printf("%s\r\n"
+                   "Content-Type: text/html; charset=utf-8\r\n\r\n",
+                   r[0].file);
                return;
        }
 
-       resp_begin_html(200, NULL);
-       resp_searchform(req,
-           req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
-
-       if (sz > 1) {
-               puts("<table class=\"results\">");
-               for (i = 0; i < sz; i++) {
-                       printf("  <tr>\n"
-                              "    <td>"
-                              "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
-                           scriptname, *scriptname == '\0' ? "" : "/",
-                           req->q.manpath, r[i].file);
-                       html_print(r[i].names);
-                       printf("</a></td>\n"
-                              "    <td><span class=\"Nd\">");
-                       html_print(r[i].output);
-                       puts("</span></td>\n"
-                            "  </tr>");
-               }
-               puts("</table>");
-       }
-
        /*
         * In man(1) mode, show one of the pages
         * even if more than one is found.
         */
 
+       iuse = 0;
        if (req->q.equal || sz == 1) {
-               puts("<hr>");
-               iuse = 0;
                priouse = 20;
                archpriouse = 3;
                for (i = 0; i < sz; i++) {
@@ -633,6 +696,36 @@ pg_searchres(const struct req *req, struct manpage *r, size_t sz)
                        priouse = prio;
                        iuse = i;
                }
+               resp_begin_html(200, NULL, r[iuse].file);
+       } else
+               resp_begin_html(200, NULL, NULL);
+
+       resp_searchform(req,
+           req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
+
+       if (sz > 1) {
+               puts("<table class=\"results\">");
+               for (i = 0; i < sz; i++) {
+                       printf("  <tr>\n"
+                              "    <td>"
+                              "<a class=\"Xr\" href=\"/");
+                       if (*scriptname != '\0')
+                               printf("%s/", scriptname);
+                       if (strcmp(req->q.manpath, req->p[0]))
+                               printf("%s/", req->q.manpath);
+                       printf("%s\">", r[i].file);
+                       html_print(r[i].names);
+                       printf("</a></td>\n"
+                              "    <td><span class=\"Nd\">");
+                       html_print(r[i].output);
+                       puts("</span></td>\n"
+                            "  </tr>");
+               }
+               puts("</table>");
+       }
+
+       if (req->q.equal || sz == 1) {
+               puts("<hr>");
                resp_show(req, r[iuse].file);
        }
 
@@ -781,7 +874,7 @@ resp_format(const struct req *req, const char *file)
 {
        struct manoutput conf;
        struct mparse   *mp;
-       struct roff_man *man;
+       struct roff_meta *meta;
        void            *vp;
        int              fd;
        int              usepath;
@@ -792,36 +885,25 @@ resp_format(const struct req *req, const char *file)
        }
 
        mchars_alloc();
-       mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
-           MANDOCLEVEL_BADARG, NULL, req->q.manpath);
+       mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
+           MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
        mparse_readfd(mp, fd, file);
        close(fd);
+       meta = mparse_result(mp);
 
        memset(&conf, 0, sizeof(conf));
        conf.fragment = 1;
        conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
        usepath = strcmp(req->q.manpath, req->p[0]);
-       mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
+       mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
+           scriptname, *scriptname == '\0' ? "" : "/",
            usepath ? req->q.manpath : "", usepath ? "/" : "");
 
-       mparse_result(mp, &man, NULL);
-       if (man == NULL) {
-               warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
-               pg_error_internal();
-               mparse_free(mp);
-               mchars_free();
-               return;
-       }
-
        vp = html_alloc(&conf);
-
-       if (man->macroset == MACROSET_MDOC) {
-               mdoc_validate(man);
-               html_mdoc(vp, man);
-       } else {
-               man_validate(man);
-               html_man(vp, man);
-       }
+       if (meta->macroset == MACROSET_MDOC)
+               html_mdoc(vp, meta);
+       else
+               html_man(vp, meta);
 
        html_free(vp);
        mparse_free(mp);
@@ -884,7 +966,7 @@ pg_show(struct req *req, const char *fullpath)
                return;
        }
 
-       resp_begin_html(200, NULL);
+       resp_begin_html(200, NULL, file);
        resp_searchform(req, FOCUS_NONE);
        resp_show(req, file);
        resp_end_html();
@@ -954,10 +1036,15 @@ pg_search(const struct req *req)
                }
        }
 
-       if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
-               pg_noresult(req, "You entered an invalid query.");
-       else if (0 == ressz)
-               pg_noresult(req, "No results found.");
+       res = NULL;
+       ressz = 0;
+       if (req->isquery && req->q.equal && argc == 1)
+               pg_redirect(req, argv[0]);
+       else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
+               pg_noresult(req, 400, "Bad Request",
+                   "You entered an invalid query.");
+       else if (ressz == 0)
+               pg_noresult(req, 404, "Not Found", "No results found.");
        else
                pg_searchres(req, res, ressz);
 
@@ -976,6 +1063,22 @@ main(void)
        const char      *querystring;
        int              i;
 
+#if HAVE_PLEDGE
+       /*
+        * The "rpath" pledge could be revoked after mparse_readfd()
+        * if the file desciptor to "/footer.html" would be opened
+        * up front, but it's probably not worth the complication
+        * of the code it would cause: it would require scattering
+        * pledge() calls in multiple low-level resp_*() functions.
+        */
+
+       if (pledge("stdio rpath", NULL) == -1) {
+               warn("pledge");
+               pg_error_internal();
+               return EXIT_FAILURE;
+       }
+#endif
+
        /* Poor man's ReDoS mitigation. */
 
        itimer.it_value.tv_sec = 2;
@@ -1013,7 +1116,8 @@ main(void)
 
        if (*path != '\0') {
                parse_path_info(&req, path);
-               if (req.q.manpath == NULL || access(path, F_OK) == -1)
+               if (req.q.manpath == NULL || req.q.sec == NULL ||
+                   *req.q.query == '\0' || access(path, F_OK) == -1)
                        path = "";
        } else if ((querystring = getenv("QUERY_STRING")) != NULL)
                parse_query_string(&req, querystring);
@@ -1028,7 +1132,7 @@ main(void)
                return EXIT_FAILURE;
        }
 
-       if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
+       if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
                pg_error_badrequest(
                    "You specified an invalid architecture.");
                return EXIT_FAILURE;
@@ -1054,80 +1158,74 @@ main(void)
 }
 
 /*
- * If PATH_INFO is not a file name, translate it to a query.
+ * Translate PATH_INFO to a query.
  */
 static void
 parse_path_info(struct req *req, const char *path)
 {
-       char    *dir[4];
-       int      i;
+       const char      *name, *sec, *end;
 
        req->isquery = 0;
        req->q.equal = 1;
-       req->q.manpath = mandoc_strdup(path);
+       req->q.manpath = NULL;
        req->q.arch = NULL;
 
        /* Mandatory manual page name. */
-       if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
-               req->q.query = req->q.manpath;
-               req->q.manpath = NULL;
-       } else
-               *req->q.query++ = '\0';
+       if ((name = strrchr(path, '/')) == NULL)
+               name = path;
+       else
+               name++;
 
        /* Optional trailing section. */
-       if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
-               if(isdigit((unsigned char)req->q.sec[1])) {
-                       *req->q.sec++ = '\0';
-                       req->q.sec = mandoc_strdup(req->q.sec);
-               } else
-                       req->q.sec = NULL;
+       sec = strrchr(name, '.');
+       if (sec != NULL && isdigit((unsigned char)*++sec)) {
+               req->q.query = mandoc_strndup(name, sec - name - 1);
+               req->q.sec = mandoc_strdup(sec);
+       } else {
+               req->q.query = mandoc_strdup(name);
+               req->q.sec = NULL;
        }
 
        /* Handle the case of name[.section] only. */
-       if (req->q.manpath == NULL)
+       if (name == path)
                return;
-       req->q.query = mandoc_strdup(req->q.query);
-
-       /* Split directory components. */
-       dir[i = 0] = req->q.manpath;
-       while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
-               if (++i == 3) {
-                       pg_error_badrequest(
-                           "You specified too many directory components.");
-                       exit(EXIT_FAILURE);
-               }
-               *dir[i]++ = '\0';
-       }
 
        /* Optional manpath. */
-       if ((i = validate_manpath(req, req->q.manpath)) == 0)
+       end = strchr(path, '/');
+       req->q.manpath = mandoc_strndup(path, end - path);
+       if (validate_manpath(req, req->q.manpath)) {
+               path = end + 1;
+               if (name == path)
+                       return;
+       } else {
+               free(req->q.manpath);
                req->q.manpath = NULL;
-       else if (dir[1] == NULL)
-               return;
+       }
 
        /* Optional section. */
-       if (strncmp(dir[i], "man", 3) == 0) {
+       if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
+               path += 3;
+               end = strchr(path, '/');
                free(req->q.sec);
-               req->q.sec = mandoc_strdup(dir[i++] + 3);
+               req->q.sec = mandoc_strndup(path, end - path);
+               path = end + 1;
+               if (name == path)
+                       return;
        }
-       if (dir[i] == NULL) {
-               if (req->q.manpath == NULL)
-                       free(dir[0]);
-               return;
+
+       /* Optional architecture. */
+       end = strchr(path, '/');
+       if (end + 1 != name) {
+               pg_error_badrequest(
+                   "You specified too many directory components.");
+               exit(EXIT_FAILURE);
        }
-       if (dir[i + 1] != NULL) {
+       req->q.arch = mandoc_strndup(path, end - path);
+       if (validate_arch(req->q.arch) == 0) {
                pg_error_badrequest(
                    "You specified an invalid directory component.");
                exit(EXIT_FAILURE);
        }
-
-       /* Optional architecture. */
-       if (i) {
-               req->q.arch = mandoc_strdup(dir[i]);
-               if (req->q.manpath == NULL)
-                       free(dir[0]);
-       } else
-               req->q.arch = dir[0];
 }
 
 /*