]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Ignore unreasonably large spacing modifiers in tbl layouts.
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.173 2020/06/29 19:22:09 schwarze Exp $ */
2 /*
3 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the man.cgi(8) program.
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23 #include <sys/time.h>
24
25 #include <ctype.h>
26 #if HAVE_ERR
27 #include <err.h>
28 #endif
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <limits.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37
38 #include "mandoc_aux.h"
39 #include "mandoc.h"
40 #include "roff.h"
41 #include "mdoc.h"
42 #include "man.h"
43 #include "mandoc_parse.h"
44 #include "main.h"
45 #include "manconf.h"
46 #include "mansearch.h"
47 #include "cgi.h"
48
49 /*
50 * A query as passed to the search function.
51 */
52 struct query {
53 char *manpath; /* desired manual directory */
54 char *arch; /* architecture */
55 char *sec; /* manual section */
56 char *query; /* unparsed query expression */
57 int equal; /* match whole names, not substrings */
58 };
59
60 struct req {
61 struct query q;
62 char **p; /* array of available manpaths */
63 size_t psz; /* number of available manpaths */
64 int isquery; /* QUERY_STRING used, not PATH_INFO */
65 };
66
67 enum focus {
68 FOCUS_NONE = 0,
69 FOCUS_QUERY
70 };
71
72 static void html_print(const char *);
73 static void html_putchar(char);
74 static int http_decode(char *);
75 static void http_encode(const char *);
76 static void parse_manpath_conf(struct req *);
77 static void parse_path_info(struct req *, const char *);
78 static void parse_query_string(struct req *, const char *);
79 static void pg_error_badrequest(const char *);
80 static void pg_error_internal(void);
81 static void pg_index(const struct req *);
82 static void pg_noresult(const struct req *, int, const char *,
83 const char *);
84 static void pg_redirect(const struct req *, const char *);
85 static void pg_search(const struct req *);
86 static void pg_searchres(const struct req *,
87 struct manpage *, size_t);
88 static void pg_show(struct req *, const char *);
89 static void resp_begin_html(int, const char *, const char *);
90 static void resp_begin_http(int, const char *);
91 static void resp_catman(const struct req *, const char *);
92 static void resp_copy(const char *);
93 static void resp_end_html(void);
94 static void resp_format(const struct req *, const char *);
95 static void resp_searchform(const struct req *, enum focus);
96 static void resp_show(const struct req *, const char *);
97 static void set_query_attr(char **, char **);
98 static int validate_arch(const char *);
99 static int validate_filename(const char *);
100 static int validate_manpath(const struct req *, const char *);
101 static int validate_urifrag(const char *);
102
103 static const char *scriptname = SCRIPT_NAME;
104
105 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
106 static const char *const sec_numbers[] = {
107 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
108 };
109 static const char *const sec_names[] = {
110 "All Sections",
111 "1 - General Commands",
112 "2 - System Calls",
113 "3 - Library Functions",
114 "3p - Perl Library",
115 "4 - Device Drivers",
116 "5 - File Formats",
117 "6 - Games",
118 "7 - Miscellaneous Information",
119 "8 - System Manager\'s Manual",
120 "9 - Kernel Developer\'s Manual"
121 };
122 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
123
124 static const char *const arch_names[] = {
125 "amd64", "alpha", "armv7", "arm64",
126 "hppa", "i386", "landisk", "loongson",
127 "luna88k", "macppc", "mips64", "octeon",
128 "powerpc64", "sgi", "socppc", "sparc64",
129
130 "amiga", "arc", "armish", "arm32",
131 "atari", "aviion", "beagle", "cats",
132 "hppa64", "hp300",
133 "ia64", "mac68k", "mvme68k", "mvme88k",
134 "mvmeppc", "palm", "pc532", "pegasos",
135 "pmax", "powerpc", "solbourne", "sparc",
136 "sun3", "vax", "wgrisc", "x68k",
137 "zaurus"
138 };
139 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
140
141 /*
142 * Print a character, escaping HTML along the way.
143 * This will pass non-ASCII straight to output: be warned!
144 */
145 static void
146 html_putchar(char c)
147 {
148
149 switch (c) {
150 case '"':
151 printf("&quot;");
152 break;
153 case '&':
154 printf("&amp;");
155 break;
156 case '>':
157 printf("&gt;");
158 break;
159 case '<':
160 printf("&lt;");
161 break;
162 default:
163 putchar((unsigned char)c);
164 break;
165 }
166 }
167
168 /*
169 * Call through to html_putchar().
170 * Accepts NULL strings.
171 */
172 static void
173 html_print(const char *p)
174 {
175
176 if (NULL == p)
177 return;
178 while ('\0' != *p)
179 html_putchar(*p++);
180 }
181
182 /*
183 * Transfer the responsibility for the allocated string *val
184 * to the query structure.
185 */
186 static void
187 set_query_attr(char **attr, char **val)
188 {
189
190 free(*attr);
191 if (**val == '\0') {
192 *attr = NULL;
193 free(*val);
194 } else
195 *attr = *val;
196 *val = NULL;
197 }
198
199 /*
200 * Parse the QUERY_STRING for key-value pairs
201 * and store the values into the query structure.
202 */
203 static void
204 parse_query_string(struct req *req, const char *qs)
205 {
206 char *key, *val;
207 size_t keysz, valsz;
208
209 req->isquery = 1;
210 req->q.manpath = NULL;
211 req->q.arch = NULL;
212 req->q.sec = NULL;
213 req->q.query = NULL;
214 req->q.equal = 1;
215
216 key = val = NULL;
217 while (*qs != '\0') {
218
219 /* Parse one key. */
220
221 keysz = strcspn(qs, "=;&");
222 key = mandoc_strndup(qs, keysz);
223 qs += keysz;
224 if (*qs != '=')
225 goto next;
226
227 /* Parse one value. */
228
229 valsz = strcspn(++qs, ";&");
230 val = mandoc_strndup(qs, valsz);
231 qs += valsz;
232
233 /* Decode and catch encoding errors. */
234
235 if ( ! (http_decode(key) && http_decode(val)))
236 goto next;
237
238 /* Handle key-value pairs. */
239
240 if ( ! strcmp(key, "query"))
241 set_query_attr(&req->q.query, &val);
242
243 else if ( ! strcmp(key, "apropos"))
244 req->q.equal = !strcmp(val, "0");
245
246 else if ( ! strcmp(key, "manpath")) {
247 #ifdef COMPAT_OLDURI
248 if ( ! strncmp(val, "OpenBSD ", 8)) {
249 val[7] = '-';
250 if ('C' == val[8])
251 val[8] = 'c';
252 }
253 #endif
254 set_query_attr(&req->q.manpath, &val);
255 }
256
257 else if ( ! (strcmp(key, "sec")
258 #ifdef COMPAT_OLDURI
259 && strcmp(key, "sektion")
260 #endif
261 )) {
262 if ( ! strcmp(val, "0"))
263 *val = '\0';
264 set_query_attr(&req->q.sec, &val);
265 }
266
267 else if ( ! strcmp(key, "arch")) {
268 if ( ! strcmp(val, "default"))
269 *val = '\0';
270 set_query_attr(&req->q.arch, &val);
271 }
272
273 /*
274 * The key must be freed in any case.
275 * The val may have been handed over to the query
276 * structure, in which case it is now NULL.
277 */
278 next:
279 free(key);
280 key = NULL;
281 free(val);
282 val = NULL;
283
284 if (*qs != '\0')
285 qs++;
286 }
287 }
288
289 /*
290 * HTTP-decode a string. The standard explanation is that this turns
291 * "%4e+foo" into "n foo" in the regular way. This is done in-place
292 * over the allocated string.
293 */
294 static int
295 http_decode(char *p)
296 {
297 char hex[3];
298 char *q;
299 int c;
300
301 hex[2] = '\0';
302
303 q = p;
304 for ( ; '\0' != *p; p++, q++) {
305 if ('%' == *p) {
306 if ('\0' == (hex[0] = *(p + 1)))
307 return 0;
308 if ('\0' == (hex[1] = *(p + 2)))
309 return 0;
310 if (1 != sscanf(hex, "%x", &c))
311 return 0;
312 if ('\0' == c)
313 return 0;
314
315 *q = (char)c;
316 p += 2;
317 } else
318 *q = '+' == *p ? ' ' : *p;
319 }
320
321 *q = '\0';
322 return 1;
323 }
324
325 static void
326 http_encode(const char *p)
327 {
328 for (; *p != '\0'; p++) {
329 if (isalnum((unsigned char)*p) == 0 &&
330 strchr("-._~", *p) == NULL)
331 printf("%%%2.2X", (unsigned char)*p);
332 else
333 putchar(*p);
334 }
335 }
336
337 static void
338 resp_begin_http(int code, const char *msg)
339 {
340
341 if (200 != code)
342 printf("Status: %d %s\r\n", code, msg);
343
344 printf("Content-Type: text/html; charset=utf-8\r\n"
345 "Cache-Control: no-cache\r\n"
346 "Content-Security-Policy: default-src 'none'; "
347 "style-src 'self' 'unsafe-inline'\r\n"
348 "Pragma: no-cache\r\n"
349 "\r\n");
350
351 fflush(stdout);
352 }
353
354 static void
355 resp_copy(const char *filename)
356 {
357 char buf[4096];
358 ssize_t sz;
359 int fd;
360
361 if ((fd = open(filename, O_RDONLY)) != -1) {
362 fflush(stdout);
363 while ((sz = read(fd, buf, sizeof(buf))) > 0)
364 write(STDOUT_FILENO, buf, sz);
365 close(fd);
366 }
367 }
368
369 static void
370 resp_begin_html(int code, const char *msg, const char *file)
371 {
372 char *cp;
373
374 resp_begin_http(code, msg);
375
376 printf("<!DOCTYPE html>\n"
377 "<html>\n"
378 "<head>\n"
379 " <meta charset=\"UTF-8\"/>\n"
380 " <meta name=\"viewport\""
381 " content=\"width=device-width, initial-scale=1.0\">\n"
382 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
383 " type=\"text/css\" media=\"all\">\n"
384 " <title>",
385 CSS_DIR);
386 if (file != NULL) {
387 if ((cp = strrchr(file, '/')) != NULL)
388 file = cp + 1;
389 if ((cp = strrchr(file, '.')) != NULL) {
390 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
391 } else
392 printf("%s - ", file);
393 }
394 printf("%s</title>\n"
395 "</head>\n"
396 "<body>\n",
397 CUSTOMIZE_TITLE);
398
399 resp_copy(MAN_DIR "/header.html");
400 }
401
402 static void
403 resp_end_html(void)
404 {
405
406 resp_copy(MAN_DIR "/footer.html");
407
408 puts("</body>\n"
409 "</html>");
410 }
411
412 static void
413 resp_searchform(const struct req *req, enum focus focus)
414 {
415 int i;
416
417 printf("<form action=\"/%s\" method=\"get\" "
418 "autocomplete=\"off\" autocapitalize=\"none\">\n"
419 " <fieldset>\n"
420 " <legend>Manual Page Search Parameters</legend>\n",
421 scriptname);
422
423 /* Write query input box. */
424
425 printf(" <input type=\"search\" name=\"query\" value=\"");
426 if (req->q.query != NULL)
427 html_print(req->q.query);
428 printf( "\" size=\"40\"");
429 if (focus == FOCUS_QUERY)
430 printf(" autofocus");
431 puts(">");
432
433 /* Write submission buttons. */
434
435 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
436 "man</button>\n"
437 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
438 "apropos</button>\n"
439 " <br/>\n");
440
441 /* Write section selector. */
442
443 puts(" <select name=\"sec\">");
444 for (i = 0; i < sec_MAX; i++) {
445 printf(" <option value=\"%s\"", sec_numbers[i]);
446 if (NULL != req->q.sec &&
447 0 == strcmp(sec_numbers[i], req->q.sec))
448 printf(" selected=\"selected\"");
449 printf(">%s</option>\n", sec_names[i]);
450 }
451 puts(" </select>");
452
453 /* Write architecture selector. */
454
455 printf( " <select name=\"arch\">\n"
456 " <option value=\"default\"");
457 if (NULL == req->q.arch)
458 printf(" selected=\"selected\"");
459 puts(">All Architectures</option>");
460 for (i = 0; i < arch_MAX; i++) {
461 printf(" <option");
462 if (NULL != req->q.arch &&
463 0 == strcmp(arch_names[i], req->q.arch))
464 printf(" selected=\"selected\"");
465 printf(">%s</option>\n", arch_names[i]);
466 }
467 puts(" </select>");
468
469 /* Write manpath selector. */
470
471 if (req->psz > 1) {
472 puts(" <select name=\"manpath\">");
473 for (i = 0; i < (int)req->psz; i++) {
474 printf(" <option");
475 if (strcmp(req->q.manpath, req->p[i]) == 0)
476 printf(" selected=\"selected\"");
477 printf(">");
478 html_print(req->p[i]);
479 puts("</option>");
480 }
481 puts(" </select>");
482 }
483
484 puts(" </fieldset>\n"
485 "</form>");
486 }
487
488 static int
489 validate_urifrag(const char *frag)
490 {
491
492 while ('\0' != *frag) {
493 if ( ! (isalnum((unsigned char)*frag) ||
494 '-' == *frag || '.' == *frag ||
495 '/' == *frag || '_' == *frag))
496 return 0;
497 frag++;
498 }
499 return 1;
500 }
501
502 static int
503 validate_manpath(const struct req *req, const char* manpath)
504 {
505 size_t i;
506
507 for (i = 0; i < req->psz; i++)
508 if ( ! strcmp(manpath, req->p[i]))
509 return 1;
510
511 return 0;
512 }
513
514 static int
515 validate_arch(const char *arch)
516 {
517 int i;
518
519 for (i = 0; i < arch_MAX; i++)
520 if (strcmp(arch, arch_names[i]) == 0)
521 return 1;
522
523 return 0;
524 }
525
526 static int
527 validate_filename(const char *file)
528 {
529
530 if ('.' == file[0] && '/' == file[1])
531 file += 2;
532
533 return ! (strstr(file, "../") || strstr(file, "/..") ||
534 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
535 }
536
537 static void
538 pg_index(const struct req *req)
539 {
540
541 resp_begin_html(200, NULL, NULL);
542 resp_searchform(req, FOCUS_QUERY);
543 printf("<p>\n"
544 "This web interface is documented in the\n"
545 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
546 "manual, and the\n"
547 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
548 "manual explains the query syntax.\n"
549 "</p>\n",
550 scriptname, *scriptname == '\0' ? "" : "/",
551 scriptname, *scriptname == '\0' ? "" : "/");
552 resp_end_html();
553 }
554
555 static void
556 pg_noresult(const struct req *req, int code, const char *http_msg,
557 const char *user_msg)
558 {
559 resp_begin_html(code, http_msg, NULL);
560 resp_searchform(req, FOCUS_QUERY);
561 puts("<p>");
562 puts(user_msg);
563 puts("</p>");
564 resp_end_html();
565 }
566
567 static void
568 pg_error_badrequest(const char *msg)
569 {
570
571 resp_begin_html(400, "Bad Request", NULL);
572 puts("<h1>Bad Request</h1>\n"
573 "<p>\n");
574 puts(msg);
575 printf("Try again from the\n"
576 "<a href=\"/%s\">main page</a>.\n"
577 "</p>", scriptname);
578 resp_end_html();
579 }
580
581 static void
582 pg_error_internal(void)
583 {
584 resp_begin_html(500, "Internal Server Error", NULL);
585 puts("<p>Internal Server Error</p>");
586 resp_end_html();
587 }
588
589 static void
590 pg_redirect(const struct req *req, const char *name)
591 {
592 printf("Status: 303 See Other\r\n"
593 "Location: /");
594 if (*scriptname != '\0')
595 printf("%s/", scriptname);
596 if (strcmp(req->q.manpath, req->p[0]))
597 printf("%s/", req->q.manpath);
598 if (req->q.arch != NULL)
599 printf("%s/", req->q.arch);
600 http_encode(name);
601 if (req->q.sec != NULL) {
602 putchar('.');
603 http_encode(req->q.sec);
604 }
605 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
606 }
607
608 static void
609 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
610 {
611 char *arch, *archend;
612 const char *sec;
613 size_t i, iuse;
614 int archprio, archpriouse;
615 int prio, priouse;
616
617 for (i = 0; i < sz; i++) {
618 if (validate_filename(r[i].file))
619 continue;
620 warnx("invalid filename %s in %s database",
621 r[i].file, req->q.manpath);
622 pg_error_internal();
623 return;
624 }
625
626 if (req->isquery && sz == 1) {
627 /*
628 * If we have just one result, then jump there now
629 * without any delay.
630 */
631 printf("Status: 303 See Other\r\n"
632 "Location: /");
633 if (*scriptname != '\0')
634 printf("%s/", scriptname);
635 if (strcmp(req->q.manpath, req->p[0]))
636 printf("%s/", req->q.manpath);
637 printf("%s\r\n"
638 "Content-Type: text/html; charset=utf-8\r\n\r\n",
639 r[0].file);
640 return;
641 }
642
643 /*
644 * In man(1) mode, show one of the pages
645 * even if more than one is found.
646 */
647
648 iuse = 0;
649 if (req->q.equal || sz == 1) {
650 priouse = 20;
651 archpriouse = 3;
652 for (i = 0; i < sz; i++) {
653 sec = r[i].file;
654 sec += strcspn(sec, "123456789");
655 if (sec[0] == '\0')
656 continue;
657 prio = sec_prios[sec[0] - '1'];
658 if (sec[1] != '/')
659 prio += 10;
660 if (req->q.arch == NULL) {
661 archprio =
662 ((arch = strchr(sec + 1, '/'))
663 == NULL) ? 3 :
664 ((archend = strchr(arch + 1, '/'))
665 == NULL) ? 0 :
666 strncmp(arch, "amd64/",
667 archend - arch) ? 2 : 1;
668 if (archprio < archpriouse) {
669 archpriouse = archprio;
670 priouse = prio;
671 iuse = i;
672 continue;
673 }
674 if (archprio > archpriouse)
675 continue;
676 }
677 if (prio >= priouse)
678 continue;
679 priouse = prio;
680 iuse = i;
681 }
682 resp_begin_html(200, NULL, r[iuse].file);
683 } else
684 resp_begin_html(200, NULL, NULL);
685
686 resp_searchform(req,
687 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
688
689 if (sz > 1) {
690 puts("<table class=\"results\">");
691 for (i = 0; i < sz; i++) {
692 printf(" <tr>\n"
693 " <td>"
694 "<a class=\"Xr\" href=\"/");
695 if (*scriptname != '\0')
696 printf("%s/", scriptname);
697 if (strcmp(req->q.manpath, req->p[0]))
698 printf("%s/", req->q.manpath);
699 printf("%s\">", r[i].file);
700 html_print(r[i].names);
701 printf("</a></td>\n"
702 " <td><span class=\"Nd\">");
703 html_print(r[i].output);
704 puts("</span></td>\n"
705 " </tr>");
706 }
707 puts("</table>");
708 }
709
710 if (req->q.equal || sz == 1) {
711 puts("<hr>");
712 resp_show(req, r[iuse].file);
713 }
714
715 resp_end_html();
716 }
717
718 static void
719 resp_catman(const struct req *req, const char *file)
720 {
721 FILE *f;
722 char *p;
723 size_t sz;
724 ssize_t len;
725 int i;
726 int italic, bold;
727
728 if ((f = fopen(file, "r")) == NULL) {
729 puts("<p>You specified an invalid manual file.</p>");
730 return;
731 }
732
733 puts("<div class=\"catman\">\n"
734 "<pre>");
735
736 p = NULL;
737 sz = 0;
738
739 while ((len = getline(&p, &sz, f)) != -1) {
740 bold = italic = 0;
741 for (i = 0; i < len - 1; i++) {
742 /*
743 * This means that the catpage is out of state.
744 * Ignore it and keep going (although the
745 * catpage is bogus).
746 */
747
748 if ('\b' == p[i] || '\n' == p[i])
749 continue;
750
751 /*
752 * Print a regular character.
753 * Close out any bold/italic scopes.
754 * If we're in back-space mode, make sure we'll
755 * have something to enter when we backspace.
756 */
757
758 if ('\b' != p[i + 1]) {
759 if (italic)
760 printf("</i>");
761 if (bold)
762 printf("</b>");
763 italic = bold = 0;
764 html_putchar(p[i]);
765 continue;
766 } else if (i + 2 >= len)
767 continue;
768
769 /* Italic mode. */
770
771 if ('_' == p[i]) {
772 if (bold)
773 printf("</b>");
774 if ( ! italic)
775 printf("<i>");
776 bold = 0;
777 italic = 1;
778 i += 2;
779 html_putchar(p[i]);
780 continue;
781 }
782
783 /*
784 * Handle funny behaviour troff-isms.
785 * These grok'd from the original man2html.c.
786 */
787
788 if (('+' == p[i] && 'o' == p[i + 2]) ||
789 ('o' == p[i] && '+' == p[i + 2]) ||
790 ('|' == p[i] && '=' == p[i + 2]) ||
791 ('=' == p[i] && '|' == p[i + 2]) ||
792 ('*' == p[i] && '=' == p[i + 2]) ||
793 ('=' == p[i] && '*' == p[i + 2]) ||
794 ('*' == p[i] && '|' == p[i + 2]) ||
795 ('|' == p[i] && '*' == p[i + 2])) {
796 if (italic)
797 printf("</i>");
798 if (bold)
799 printf("</b>");
800 italic = bold = 0;
801 putchar('*');
802 i += 2;
803 continue;
804 } else if (('|' == p[i] && '-' == p[i + 2]) ||
805 ('-' == p[i] && '|' == p[i + 1]) ||
806 ('+' == p[i] && '-' == p[i + 1]) ||
807 ('-' == p[i] && '+' == p[i + 1]) ||
808 ('+' == p[i] && '|' == p[i + 1]) ||
809 ('|' == p[i] && '+' == p[i + 1])) {
810 if (italic)
811 printf("</i>");
812 if (bold)
813 printf("</b>");
814 italic = bold = 0;
815 putchar('+');
816 i += 2;
817 continue;
818 }
819
820 /* Bold mode. */
821
822 if (italic)
823 printf("</i>");
824 if ( ! bold)
825 printf("<b>");
826 bold = 1;
827 italic = 0;
828 i += 2;
829 html_putchar(p[i]);
830 }
831
832 /*
833 * Clean up the last character.
834 * We can get to a newline; don't print that.
835 */
836
837 if (italic)
838 printf("</i>");
839 if (bold)
840 printf("</b>");
841
842 if (i == len - 1 && p[i] != '\n')
843 html_putchar(p[i]);
844
845 putchar('\n');
846 }
847 free(p);
848
849 puts("</pre>\n"
850 "</div>");
851
852 fclose(f);
853 }
854
855 static void
856 resp_format(const struct req *req, const char *file)
857 {
858 struct manoutput conf;
859 struct mparse *mp;
860 struct roff_meta *meta;
861 void *vp;
862 int fd;
863 int usepath;
864
865 if (-1 == (fd = open(file, O_RDONLY, 0))) {
866 puts("<p>You specified an invalid manual file.</p>");
867 return;
868 }
869
870 mchars_alloc();
871 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
872 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
873 mparse_readfd(mp, fd, file);
874 close(fd);
875 meta = mparse_result(mp);
876
877 memset(&conf, 0, sizeof(conf));
878 conf.fragment = 1;
879 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
880 usepath = strcmp(req->q.manpath, req->p[0]);
881 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
882 scriptname, *scriptname == '\0' ? "" : "/",
883 usepath ? req->q.manpath : "", usepath ? "/" : "");
884
885 vp = html_alloc(&conf);
886 if (meta->macroset == MACROSET_MDOC)
887 html_mdoc(vp, meta);
888 else
889 html_man(vp, meta);
890
891 html_free(vp);
892 mparse_free(mp);
893 mchars_free();
894 free(conf.man);
895 free(conf.style);
896 }
897
898 static void
899 resp_show(const struct req *req, const char *file)
900 {
901
902 if ('.' == file[0] && '/' == file[1])
903 file += 2;
904
905 if ('c' == *file)
906 resp_catman(req, file);
907 else
908 resp_format(req, file);
909 }
910
911 static void
912 pg_show(struct req *req, const char *fullpath)
913 {
914 char *manpath;
915 const char *file;
916
917 if ((file = strchr(fullpath, '/')) == NULL) {
918 pg_error_badrequest(
919 "You did not specify a page to show.");
920 return;
921 }
922 manpath = mandoc_strndup(fullpath, file - fullpath);
923 file++;
924
925 if ( ! validate_manpath(req, manpath)) {
926 pg_error_badrequest(
927 "You specified an invalid manpath.");
928 free(manpath);
929 return;
930 }
931
932 /*
933 * Begin by chdir()ing into the manpath.
934 * This way we can pick up the database files, which are
935 * relative to the manpath root.
936 */
937
938 if (chdir(manpath) == -1) {
939 warn("chdir %s", manpath);
940 pg_error_internal();
941 free(manpath);
942 return;
943 }
944 free(manpath);
945
946 if ( ! validate_filename(file)) {
947 pg_error_badrequest(
948 "You specified an invalid manual file.");
949 return;
950 }
951
952 resp_begin_html(200, NULL, file);
953 resp_searchform(req, FOCUS_NONE);
954 resp_show(req, file);
955 resp_end_html();
956 }
957
958 static void
959 pg_search(const struct req *req)
960 {
961 struct mansearch search;
962 struct manpaths paths;
963 struct manpage *res;
964 char **argv;
965 char *query, *rp, *wp;
966 size_t ressz;
967 int argc;
968
969 /*
970 * Begin by chdir()ing into the root of the manpath.
971 * This way we can pick up the database files, which are
972 * relative to the manpath root.
973 */
974
975 if (chdir(req->q.manpath) == -1) {
976 warn("chdir %s", req->q.manpath);
977 pg_error_internal();
978 return;
979 }
980
981 search.arch = req->q.arch;
982 search.sec = req->q.sec;
983 search.outkey = "Nd";
984 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
985 search.firstmatch = 1;
986
987 paths.sz = 1;
988 paths.paths = mandoc_malloc(sizeof(char *));
989 paths.paths[0] = mandoc_strdup(".");
990
991 /*
992 * Break apart at spaces with backslash-escaping.
993 */
994
995 argc = 0;
996 argv = NULL;
997 rp = query = mandoc_strdup(req->q.query);
998 for (;;) {
999 while (isspace((unsigned char)*rp))
1000 rp++;
1001 if (*rp == '\0')
1002 break;
1003 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1004 argv[argc++] = wp = rp;
1005 for (;;) {
1006 if (isspace((unsigned char)*rp)) {
1007 *wp = '\0';
1008 rp++;
1009 break;
1010 }
1011 if (rp[0] == '\\' && rp[1] != '\0')
1012 rp++;
1013 if (wp != rp)
1014 *wp = *rp;
1015 if (*rp == '\0')
1016 break;
1017 wp++;
1018 rp++;
1019 }
1020 }
1021
1022 res = NULL;
1023 ressz = 0;
1024 if (req->isquery && req->q.equal && argc == 1)
1025 pg_redirect(req, argv[0]);
1026 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1027 pg_noresult(req, 400, "Bad Request",
1028 "You entered an invalid query.");
1029 else if (ressz == 0)
1030 pg_noresult(req, 404, "Not Found", "No results found.");
1031 else
1032 pg_searchres(req, res, ressz);
1033
1034 free(query);
1035 mansearch_free(res, ressz);
1036 free(paths.paths[0]);
1037 free(paths.paths);
1038 }
1039
1040 int
1041 main(void)
1042 {
1043 struct req req;
1044 struct itimerval itimer;
1045 const char *path;
1046 const char *querystring;
1047 int i;
1048
1049 #if HAVE_PLEDGE
1050 /*
1051 * The "rpath" pledge could be revoked after mparse_readfd()
1052 * if the file desciptor to "/footer.html" would be opened
1053 * up front, but it's probably not worth the complication
1054 * of the code it would cause: it would require scattering
1055 * pledge() calls in multiple low-level resp_*() functions.
1056 */
1057
1058 if (pledge("stdio rpath", NULL) == -1) {
1059 warn("pledge");
1060 pg_error_internal();
1061 return EXIT_FAILURE;
1062 }
1063 #endif
1064
1065 /* Poor man's ReDoS mitigation. */
1066
1067 itimer.it_value.tv_sec = 2;
1068 itimer.it_value.tv_usec = 0;
1069 itimer.it_interval.tv_sec = 2;
1070 itimer.it_interval.tv_usec = 0;
1071 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1072 warn("setitimer");
1073 pg_error_internal();
1074 return EXIT_FAILURE;
1075 }
1076
1077 /*
1078 * First we change directory into the MAN_DIR so that
1079 * subsequent scanning for manpath directories is rooted
1080 * relative to the same position.
1081 */
1082
1083 if (chdir(MAN_DIR) == -1) {
1084 warn("MAN_DIR: %s", MAN_DIR);
1085 pg_error_internal();
1086 return EXIT_FAILURE;
1087 }
1088
1089 memset(&req, 0, sizeof(struct req));
1090 req.q.equal = 1;
1091 parse_manpath_conf(&req);
1092
1093 /* Parse the path info and the query string. */
1094
1095 if ((path = getenv("PATH_INFO")) == NULL)
1096 path = "";
1097 else if (*path == '/')
1098 path++;
1099
1100 if (*path != '\0') {
1101 parse_path_info(&req, path);
1102 if (req.q.manpath == NULL || req.q.sec == NULL ||
1103 *req.q.query == '\0' || access(path, F_OK) == -1)
1104 path = "";
1105 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1106 parse_query_string(&req, querystring);
1107
1108 /* Validate parsed data and add defaults. */
1109
1110 if (req.q.manpath == NULL)
1111 req.q.manpath = mandoc_strdup(req.p[0]);
1112 else if ( ! validate_manpath(&req, req.q.manpath)) {
1113 pg_error_badrequest(
1114 "You specified an invalid manpath.");
1115 return EXIT_FAILURE;
1116 }
1117
1118 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1119 pg_error_badrequest(
1120 "You specified an invalid architecture.");
1121 return EXIT_FAILURE;
1122 }
1123
1124 /* Dispatch to the three different pages. */
1125
1126 if ('\0' != *path)
1127 pg_show(&req, path);
1128 else if (NULL != req.q.query)
1129 pg_search(&req);
1130 else
1131 pg_index(&req);
1132
1133 free(req.q.manpath);
1134 free(req.q.arch);
1135 free(req.q.sec);
1136 free(req.q.query);
1137 for (i = 0; i < (int)req.psz; i++)
1138 free(req.p[i]);
1139 free(req.p);
1140 return EXIT_SUCCESS;
1141 }
1142
1143 /*
1144 * Translate PATH_INFO to a query.
1145 */
1146 static void
1147 parse_path_info(struct req *req, const char *path)
1148 {
1149 const char *name, *sec, *end;
1150
1151 req->isquery = 0;
1152 req->q.equal = 1;
1153 req->q.manpath = NULL;
1154 req->q.arch = NULL;
1155
1156 /* Mandatory manual page name. */
1157 if ((name = strrchr(path, '/')) == NULL)
1158 name = path;
1159 else
1160 name++;
1161
1162 /* Optional trailing section. */
1163 sec = strrchr(name, '.');
1164 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1165 req->q.query = mandoc_strndup(name, sec - name - 1);
1166 req->q.sec = mandoc_strdup(sec);
1167 } else {
1168 req->q.query = mandoc_strdup(name);
1169 req->q.sec = NULL;
1170 }
1171
1172 /* Handle the case of name[.section] only. */
1173 if (name == path)
1174 return;
1175
1176 /* Optional manpath. */
1177 end = strchr(path, '/');
1178 req->q.manpath = mandoc_strndup(path, end - path);
1179 if (validate_manpath(req, req->q.manpath)) {
1180 path = end + 1;
1181 if (name == path)
1182 return;
1183 } else {
1184 free(req->q.manpath);
1185 req->q.manpath = NULL;
1186 }
1187
1188 /* Optional section. */
1189 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1190 path += 3;
1191 end = strchr(path, '/');
1192 free(req->q.sec);
1193 req->q.sec = mandoc_strndup(path, end - path);
1194 path = end + 1;
1195 if (name == path)
1196 return;
1197 }
1198
1199 /* Optional architecture. */
1200 end = strchr(path, '/');
1201 if (end + 1 != name) {
1202 pg_error_badrequest(
1203 "You specified too many directory components.");
1204 exit(EXIT_FAILURE);
1205 }
1206 req->q.arch = mandoc_strndup(path, end - path);
1207 if (validate_arch(req->q.arch) == 0) {
1208 pg_error_badrequest(
1209 "You specified an invalid directory component.");
1210 exit(EXIT_FAILURE);
1211 }
1212 }
1213
1214 /*
1215 * Scan for indexable paths.
1216 */
1217 static void
1218 parse_manpath_conf(struct req *req)
1219 {
1220 FILE *fp;
1221 char *dp;
1222 size_t dpsz;
1223 ssize_t len;
1224
1225 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1226 warn("%s/manpath.conf", MAN_DIR);
1227 pg_error_internal();
1228 exit(EXIT_FAILURE);
1229 }
1230
1231 dp = NULL;
1232 dpsz = 0;
1233
1234 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1235 if (dp[len - 1] == '\n')
1236 dp[--len] = '\0';
1237 req->p = mandoc_realloc(req->p,
1238 (req->psz + 1) * sizeof(char *));
1239 if ( ! validate_urifrag(dp)) {
1240 warnx("%s/manpath.conf contains "
1241 "unsafe path \"%s\"", MAN_DIR, dp);
1242 pg_error_internal();
1243 exit(EXIT_FAILURE);
1244 }
1245 if (strchr(dp, '/') != NULL) {
1246 warnx("%s/manpath.conf contains "
1247 "path with slash \"%s\"", MAN_DIR, dp);
1248 pg_error_internal();
1249 exit(EXIT_FAILURE);
1250 }
1251 req->p[req->psz++] = dp;
1252 dp = NULL;
1253 dpsz = 0;
1254 }
1255 free(dp);
1256
1257 if (req->p == NULL) {
1258 warnx("%s/manpath.conf is empty", MAN_DIR);
1259 pg_error_internal();
1260 exit(EXIT_FAILURE);
1261 }
1262 }