]>
git.cameronkatri.com Git - mandoc.git/blob - cgi.c
1 /* $Id: cgi.c,v 1.116 2016/01/04 12:36:26 schwarze Exp $ */
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@usta.de>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
33 #include "mandoc_aux.h"
40 #include "mansearch.h"
44 * A query as passed to the search function.
47 char *manpath
; /* desired manual directory */
48 char *arch
; /* architecture */
49 char *sec
; /* manual section */
50 char *query
; /* unparsed query expression */
51 int equal
; /* match whole names, not substrings */
56 char **p
; /* array of available manpaths */
57 size_t psz
; /* number of available manpaths */
60 static void catman(const struct req
*, const char *);
61 static void format(const struct req
*, const char *);
62 static void html_print(const char *);
63 static void html_putchar(char);
64 static int http_decode(char *);
65 static void http_parse(struct req
*, const char *);
66 static void pathgen(struct req
*);
67 static void pg_error_badrequest(const char *);
68 static void pg_error_internal(void);
69 static void pg_index(const struct req
*);
70 static void pg_noresult(const struct req
*, const char *);
71 static void pg_search(const struct req
*);
72 static void pg_searchres(const struct req
*,
73 struct manpage
*, size_t);
74 static void pg_show(struct req
*, const char *);
75 static void resp_begin_html(int, const char *);
76 static void resp_begin_http(int, const char *);
77 static void resp_copy(const char *);
78 static void resp_end_html(void);
79 static void resp_searchform(const struct req
*);
80 static void resp_show(const struct req
*, const char *);
81 static void set_query_attr(char **, char **);
82 static int validate_filename(const char *);
83 static int validate_manpath(const struct req
*, const char *);
84 static int validate_urifrag(const char *);
86 static const char *scriptname
; /* CGI script name */
88 static const int sec_prios
[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
89 static const char *const sec_numbers
[] = {
90 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
92 static const char *const sec_names
[] = {
94 "1 - General Commands",
96 "3 - Library Functions",
101 "7 - Miscellaneous Information",
102 "8 - System Manager\'s Manual",
103 "9 - Kernel Developer\'s Manual"
105 static const int sec_MAX
= sizeof(sec_names
) / sizeof(char *);
107 static const char *const arch_names
[] = {
108 "amd64", "alpha", "armish", "armv7",
109 "aviion", "hppa", "hppa64", "i386",
110 "ia64", "landisk", "loongson", "luna88k",
111 "macppc", "mips64", "octeon", "sgi",
112 "socppc", "solbourne", "sparc", "sparc64",
114 "amiga", "arc", "arm32", "atari",
115 "beagle", "cats", "hp300", "mac68k",
116 "mvme68k", "mvme88k", "mvmeppc", "palm",
117 "pc532", "pegasos", "pmax", "powerpc",
118 "sun3", "wgrisc", "x68k"
120 static const int arch_MAX
= sizeof(arch_names
) / sizeof(char *);
123 * Print a character, escaping HTML along the way.
124 * This will pass non-ASCII straight to output: be warned!
144 putchar((unsigned char)c
);
150 * Call through to html_putchar().
151 * Accepts NULL strings.
154 html_print(const char *p
)
164 * Transfer the responsibility for the allocated string *val
165 * to the query structure.
168 set_query_attr(char **attr
, char **val
)
181 * Parse the QUERY_STRING for key-value pairs
182 * and store the values into the query structure.
185 http_parse(struct req
*req
, const char *qs
)
190 req
->q
.manpath
= NULL
;
197 while (*qs
!= '\0') {
201 keysz
= strcspn(qs
, "=;&");
202 key
= mandoc_strndup(qs
, keysz
);
207 /* Parse one value. */
209 valsz
= strcspn(++qs
, ";&");
210 val
= mandoc_strndup(qs
, valsz
);
213 /* Decode and catch encoding errors. */
215 if ( ! (http_decode(key
) && http_decode(val
)))
218 /* Handle key-value pairs. */
220 if ( ! strcmp(key
, "query"))
221 set_query_attr(&req
->q
.query
, &val
);
223 else if ( ! strcmp(key
, "apropos"))
224 req
->q
.equal
= !strcmp(val
, "0");
226 else if ( ! strcmp(key
, "manpath")) {
228 if ( ! strncmp(val
, "OpenBSD ", 8)) {
234 set_query_attr(&req
->q
.manpath
, &val
);
237 else if ( ! (strcmp(key
, "sec")
239 && strcmp(key
, "sektion")
242 if ( ! strcmp(val
, "0"))
244 set_query_attr(&req
->q
.sec
, &val
);
247 else if ( ! strcmp(key
, "arch")) {
248 if ( ! strcmp(val
, "default"))
250 set_query_attr(&req
->q
.arch
, &val
);
254 * The key must be freed in any case.
255 * The val may have been handed over to the query
256 * structure, in which case it is now NULL.
270 * HTTP-decode a string. The standard explanation is that this turns
271 * "%4e+foo" into "n foo" in the regular way. This is done in-place
272 * over the allocated string.
284 for ( ; '\0' != *p
; p
++, q
++) {
286 if ('\0' == (hex
[0] = *(p
+ 1)))
288 if ('\0' == (hex
[1] = *(p
+ 2)))
290 if (1 != sscanf(hex
, "%x", &c
))
298 *q
= '+' == *p
? ' ' : *p
;
306 resp_begin_http(int code
, const char *msg
)
310 printf("Status: %d %s\r\n", code
, msg
);
312 printf("Content-Type: text/html; charset=utf-8\r\n"
313 "Cache-Control: no-cache\r\n"
314 "Pragma: no-cache\r\n"
321 resp_copy(const char *filename
)
327 if ((fd
= open(filename
, O_RDONLY
)) != -1) {
329 while ((sz
= read(fd
, buf
, sizeof(buf
))) > 0)
330 write(STDOUT_FILENO
, buf
, sz
);
335 resp_begin_html(int code
, const char *msg
)
338 resp_begin_http(code
, msg
);
340 printf("<!DOCTYPE html>\n"
343 "<META CHARSET=\"UTF-8\" />\n"
344 "<LINK REL=\"stylesheet\" HREF=\"%s/mandoc.css\""
345 " TYPE=\"text/css\" media=\"all\">\n"
346 "<TITLE>%s</TITLE>\n"
349 "<!-- Begin page content. //-->\n",
350 CSS_DIR
, CUSTOMIZE_TITLE
);
352 resp_copy(MAN_DIR
"/header.html");
359 resp_copy(MAN_DIR
"/footer.html");
366 resp_searchform(const struct req
*req
)
370 puts("<!-- Begin search form. //-->");
371 printf("<DIV ID=\"mancgi\">\n"
372 "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
374 "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
377 /* Write query input box. */
379 printf( "<TABLE><TR><TD>\n"
380 "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
381 if (NULL
!= req
->q
.query
)
382 html_print(req
->q
.query
);
383 puts("\" SIZE=\"40\">");
385 /* Write submission and reset buttons. */
387 printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
388 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
390 /* Write show radio button */
392 printf( "</TD><TD>\n"
393 "<INPUT TYPE=\"radio\" ");
395 printf("CHECKED=\"checked\" ");
396 printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
397 "<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
399 /* Write section selector. */
401 puts( "</TD></TR><TR><TD>\n"
402 "<SELECT NAME=\"sec\">");
403 for (i
= 0; i
< sec_MAX
; i
++) {
404 printf("<OPTION VALUE=\"%s\"", sec_numbers
[i
]);
405 if (NULL
!= req
->q
.sec
&&
406 0 == strcmp(sec_numbers
[i
], req
->q
.sec
))
407 printf(" SELECTED=\"selected\"");
408 printf(">%s</OPTION>\n", sec_names
[i
]);
412 /* Write architecture selector. */
414 printf( "<SELECT NAME=\"arch\">\n"
415 "<OPTION VALUE=\"default\"");
416 if (NULL
== req
->q
.arch
)
417 printf(" SELECTED=\"selected\"");
418 puts(">All Architectures</OPTION>");
419 for (i
= 0; i
< arch_MAX
; i
++) {
420 printf("<OPTION VALUE=\"%s\"", arch_names
[i
]);
421 if (NULL
!= req
->q
.arch
&&
422 0 == strcmp(arch_names
[i
], req
->q
.arch
))
423 printf(" SELECTED=\"selected\"");
424 printf(">%s</OPTION>\n", arch_names
[i
]);
428 /* Write manpath selector. */
431 puts("<SELECT NAME=\"manpath\">");
432 for (i
= 0; i
< (int)req
->psz
; i
++) {
434 if (strcmp(req
->q
.manpath
, req
->p
[i
]) == 0)
435 printf("SELECTED=\"selected\" ");
437 html_print(req
->p
[i
]);
439 html_print(req
->p
[i
]);
445 /* Write search radio button */
447 printf( "</TD><TD>\n"
448 "<INPUT TYPE=\"radio\" ");
449 if (0 == req
->q
.equal
)
450 printf("CHECKED=\"checked\" ");
451 printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
452 "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
454 puts("</TD></TR></TABLE>\n"
458 puts("<!-- End search form. //-->");
462 validate_urifrag(const char *frag
)
465 while ('\0' != *frag
) {
466 if ( ! (isalnum((unsigned char)*frag
) ||
467 '-' == *frag
|| '.' == *frag
||
468 '/' == *frag
|| '_' == *frag
))
476 validate_manpath(const struct req
*req
, const char* manpath
)
480 if ( ! strcmp(manpath
, "mandoc"))
483 for (i
= 0; i
< req
->psz
; i
++)
484 if ( ! strcmp(manpath
, req
->p
[i
]))
491 validate_filename(const char *file
)
494 if ('.' == file
[0] && '/' == file
[1])
497 return ! (strstr(file
, "../") || strstr(file
, "/..") ||
498 (strncmp(file
, "man", 3) && strncmp(file
, "cat", 3)));
502 pg_index(const struct req
*req
)
505 resp_begin_html(200, NULL
);
506 resp_searchform(req
);
508 "This web interface is documented in the\n"
509 "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
511 "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
512 "manual explains the query syntax.\n"
514 scriptname
, scriptname
);
519 pg_noresult(const struct req
*req
, const char *msg
)
521 resp_begin_html(200, NULL
);
522 resp_searchform(req
);
530 pg_error_badrequest(const char *msg
)
533 resp_begin_html(400, "Bad Request");
534 puts("<H1>Bad Request</H1>\n"
537 printf("Try again from the\n"
538 "<A HREF=\"%s\">main page</A>.\n"
544 pg_error_internal(void)
546 resp_begin_html(500, "Internal Server Error");
547 puts("<P>Internal Server Error</P>");
552 pg_searchres(const struct req
*req
, struct manpage
*r
, size_t sz
)
554 char *arch
, *archend
;
555 size_t i
, iuse
, isec
;
556 int archprio
, archpriouse
;
560 for (i
= 0; i
< sz
; i
++) {
561 if (validate_filename(r
[i
].file
))
563 fprintf(stderr
, "invalid filename %s in %s database\n",
564 r
[i
].file
, req
->q
.manpath
);
571 * If we have just one result, then jump there now
574 printf("Status: 303 See Other\r\n");
575 printf("Location: http://%s%s/%s/%s",
576 HTTP_HOST
, scriptname
, req
->q
.manpath
, r
[0].file
);
578 "Content-Type: text/html; charset=utf-8\r\n"
583 resp_begin_html(200, NULL
);
584 resp_searchform(req
);
585 puts("<DIV CLASS=\"results\">");
588 for (i
= 0; i
< sz
; i
++) {
590 "<TD CLASS=\"title\">\n"
591 "<A HREF=\"%s/%s/%s",
592 scriptname
, req
->q
.manpath
, r
[i
].file
);
594 html_print(r
[i
].names
);
597 "<TD CLASS=\"desc\">");
598 html_print(r
[i
].output
);
607 * In man(1) mode, show one of the pages
608 * even if more than one is found.
616 for (i
= 0; i
< sz
; i
++) {
617 isec
= strcspn(r
[i
].file
, "123456789");
618 sec
= r
[i
].file
[isec
];
621 prio
= sec_prios
[sec
- '1'];
622 if (NULL
== req
->q
.arch
) {
624 (NULL
== (arch
= strchr(
625 r
[i
].file
+ isec
, '/'))) ? 3 :
626 (NULL
== (archend
= strchr(
627 arch
+ 1, '/'))) ? 0 :
628 strncmp(arch
, "amd64/",
629 archend
- arch
) ? 2 : 1;
630 if (archprio
< archpriouse
) {
631 archpriouse
= archprio
;
636 if (archprio
> archpriouse
)
644 resp_show(req
, r
[iuse
].file
);
651 catman(const struct req
*req
, const char *file
)
660 if ((f
= fopen(file
, "r")) == NULL
) {
661 puts("<P>You specified an invalid manual file.</P>");
665 puts("<DIV CLASS=\"catman\">\n"
671 while ((len
= getline(&p
, &sz
, f
)) != -1) {
673 for (i
= 0; i
< len
- 1; i
++) {
675 * This means that the catpage is out of state.
676 * Ignore it and keep going (although the
680 if ('\b' == p
[i
] || '\n' == p
[i
])
684 * Print a regular character.
685 * Close out any bold/italic scopes.
686 * If we're in back-space mode, make sure we'll
687 * have something to enter when we backspace.
690 if ('\b' != p
[i
+ 1]) {
698 } else if (i
+ 2 >= len
)
716 * Handle funny behaviour troff-isms.
717 * These grok'd from the original man2html.c.
720 if (('+' == p
[i
] && 'o' == p
[i
+ 2]) ||
721 ('o' == p
[i
] && '+' == p
[i
+ 2]) ||
722 ('|' == p
[i
] && '=' == p
[i
+ 2]) ||
723 ('=' == p
[i
] && '|' == p
[i
+ 2]) ||
724 ('*' == p
[i
] && '=' == p
[i
+ 2]) ||
725 ('=' == p
[i
] && '*' == p
[i
+ 2]) ||
726 ('*' == p
[i
] && '|' == p
[i
+ 2]) ||
727 ('|' == p
[i
] && '*' == p
[i
+ 2])) {
736 } else if (('|' == p
[i
] && '-' == p
[i
+ 2]) ||
737 ('-' == p
[i
] && '|' == p
[i
+ 1]) ||
738 ('+' == p
[i
] && '-' == p
[i
+ 1]) ||
739 ('-' == p
[i
] && '+' == p
[i
+ 1]) ||
740 ('+' == p
[i
] && '|' == p
[i
+ 1]) ||
741 ('|' == p
[i
] && '+' == p
[i
+ 1])) {
765 * Clean up the last character.
766 * We can get to a newline; don't print that.
774 if (i
== len
- 1 && p
[i
] != '\n')
788 format(const struct req
*req
, const char *file
)
790 struct manoutput conf
;
792 struct roff_man
*man
;
797 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
798 puts("<P>You specified an invalid manual file.</P>");
803 mp
= mparse_alloc(MPARSE_SO
, MANDOCLEVEL_BADARG
, NULL
, req
->q
.manpath
);
804 mparse_readfd(mp
, fd
, file
);
807 memset(&conf
, 0, sizeof(conf
));
809 usepath
= strcmp(req
->q
.manpath
, req
->p
[0]);
810 mandoc_asprintf(&conf
.man
, "%s?query=%%N&sec=%%S%s%s%s%s",
812 req
->q
.arch
? "&arch=" : "",
813 req
->q
.arch
? req
->q
.arch
: "",
814 usepath
? "&manpath=" : "",
815 usepath
? req
->q
.manpath
: "");
817 mparse_result(mp
, &man
, NULL
);
819 fprintf(stderr
, "fatal mandoc error: %s/%s\n",
820 req
->q
.manpath
, file
);
827 vp
= html_alloc(&conf
);
829 if (man
->macroset
== MACROSET_MDOC
) {
844 resp_show(const struct req
*req
, const char *file
)
847 if ('.' == file
[0] && '/' == file
[1])
857 pg_show(struct req
*req
, const char *fullpath
)
862 if ((file
= strchr(fullpath
, '/')) == NULL
) {
864 "You did not specify a page to show.");
867 manpath
= mandoc_strndup(fullpath
, file
- fullpath
);
870 if ( ! validate_manpath(req
, manpath
)) {
872 "You specified an invalid manpath.");
878 * Begin by chdir()ing into the manpath.
879 * This way we can pick up the database files, which are
880 * relative to the manpath root.
883 if (chdir(manpath
) == -1) {
884 fprintf(stderr
, "chdir %s: %s\n",
885 manpath
, strerror(errno
));
891 if (strcmp(manpath
, "mandoc")) {
892 free(req
->q
.manpath
);
893 req
->q
.manpath
= manpath
;
897 if ( ! validate_filename(file
)) {
899 "You specified an invalid manual file.");
903 resp_begin_html(200, NULL
);
904 resp_searchform(req
);
905 resp_show(req
, file
);
910 pg_search(const struct req
*req
)
912 struct mansearch search
;
913 struct manpaths paths
;
916 char *query
, *rp
, *wp
;
921 * Begin by chdir()ing into the root of the manpath.
922 * This way we can pick up the database files, which are
923 * relative to the manpath root.
926 if (-1 == (chdir(req
->q
.manpath
))) {
927 fprintf(stderr
, "chdir %s: %s\n",
928 req
->q
.manpath
, strerror(errno
));
933 search
.arch
= req
->q
.arch
;
934 search
.sec
= req
->q
.sec
;
935 search
.outkey
= "Nd";
936 search
.argmode
= req
->q
.equal
? ARG_NAME
: ARG_EXPR
;
937 search
.firstmatch
= 1;
940 paths
.paths
= mandoc_malloc(sizeof(char *));
941 paths
.paths
[0] = mandoc_strdup(".");
944 * Break apart at spaces with backslash-escaping.
949 rp
= query
= mandoc_strdup(req
->q
.query
);
951 while (isspace((unsigned char)*rp
))
955 argv
= mandoc_reallocarray(argv
, argc
+ 1, sizeof(char *));
956 argv
[argc
++] = wp
= rp
;
958 if (isspace((unsigned char)*rp
)) {
963 if (rp
[0] == '\\' && rp
[1] != '\0')
974 if (0 == mansearch(&search
, &paths
, argc
, argv
, &res
, &ressz
))
975 pg_noresult(req
, "You entered an invalid query.");
977 pg_noresult(req
, "No results found.");
979 pg_searchres(req
, res
, ressz
);
982 mansearch_free(res
, ressz
);
983 free(paths
.paths
[0]);
991 struct itimerval itimer
;
993 const char *querystring
;
996 /* Poor man's ReDoS mitigation. */
998 itimer
.it_value
.tv_sec
= 2;
999 itimer
.it_value
.tv_usec
= 0;
1000 itimer
.it_interval
.tv_sec
= 2;
1001 itimer
.it_interval
.tv_usec
= 0;
1002 if (setitimer(ITIMER_VIRTUAL
, &itimer
, NULL
) == -1) {
1003 fprintf(stderr
, "setitimer: %s\n", strerror(errno
));
1004 pg_error_internal();
1005 return EXIT_FAILURE
;
1008 /* Scan our run-time environment. */
1010 if (NULL
== (scriptname
= getenv("SCRIPT_NAME")))
1013 if ( ! validate_urifrag(scriptname
)) {
1014 fprintf(stderr
, "unsafe SCRIPT_NAME \"%s\"\n",
1016 pg_error_internal();
1017 return EXIT_FAILURE
;
1021 * First we change directory into the MAN_DIR so that
1022 * subsequent scanning for manpath directories is rooted
1023 * relative to the same position.
1026 if (-1 == chdir(MAN_DIR
)) {
1027 fprintf(stderr
, "MAN_DIR: %s: %s\n",
1028 MAN_DIR
, strerror(errno
));
1029 pg_error_internal();
1030 return EXIT_FAILURE
;
1033 memset(&req
, 0, sizeof(struct req
));
1036 /* Next parse out the query string. */
1038 if (NULL
!= (querystring
= getenv("QUERY_STRING")))
1039 http_parse(&req
, querystring
);
1041 if (req
.q
.manpath
== NULL
)
1042 req
.q
.manpath
= mandoc_strdup(req
.p
[0]);
1043 else if ( ! validate_manpath(&req
, req
.q
.manpath
)) {
1044 pg_error_badrequest(
1045 "You specified an invalid manpath.");
1046 return EXIT_FAILURE
;
1049 if ( ! (NULL
== req
.q
.arch
|| validate_urifrag(req
.q
.arch
))) {
1050 pg_error_badrequest(
1051 "You specified an invalid architecture.");
1052 return EXIT_FAILURE
;
1055 /* Dispatch to the three different pages. */
1057 path
= getenv("PATH_INFO");
1060 else if ('/' == *path
)
1064 pg_show(&req
, path
);
1065 else if (NULL
!= req
.q
.query
)
1070 free(req
.q
.manpath
);
1074 for (i
= 0; i
< (int)req
.psz
; i
++)
1077 return EXIT_SUCCESS
;
1081 * Scan for indexable paths.
1084 pathgen(struct req
*req
)
1091 if (NULL
== (fp
= fopen("manpath.conf", "r"))) {
1092 fprintf(stderr
, "%s/manpath.conf: %s\n",
1093 MAN_DIR
, strerror(errno
));
1094 pg_error_internal();
1101 while ((len
= getline(&dp
, &dpsz
, fp
)) != -1) {
1102 if (dp
[len
- 1] == '\n')
1104 req
->p
= mandoc_realloc(req
->p
,
1105 (req
->psz
+ 1) * sizeof(char *));
1106 if ( ! validate_urifrag(dp
)) {
1107 fprintf(stderr
, "%s/manpath.conf contains "
1108 "unsafe path \"%s\"\n", MAN_DIR
, dp
);
1109 pg_error_internal();
1112 if (NULL
!= strchr(dp
, '/')) {
1113 fprintf(stderr
, "%s/manpath.conf contains "
1114 "path with slash \"%s\"\n", MAN_DIR
, dp
);
1115 pg_error_internal();
1118 req
->p
[req
->psz
++] = dp
;
1124 if ( req
->p
== NULL
) {
1125 fprintf(stderr
, "%s/manpath.conf is empty\n", MAN_DIR
);
1126 pg_error_internal();