]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
delete the entry for a crash that was already fixed
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.171 2020/01/10 15:21:19 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "mandoc_parse.h"
42 #include "main.h"
43 #include "manconf.h"
44 #include "mansearch.h"
45 #include "cgi.h"
46
47 /*
48 * A query as passed to the search function.
49 */
50 struct query {
51 char *manpath; /* desired manual directory */
52 char *arch; /* architecture */
53 char *sec; /* manual section */
54 char *query; /* unparsed query expression */
55 int equal; /* match whole names, not substrings */
56 };
57
58 struct req {
59 struct query q;
60 char **p; /* array of available manpaths */
61 size_t psz; /* number of available manpaths */
62 int isquery; /* QUERY_STRING used, not PATH_INFO */
63 };
64
65 enum focus {
66 FOCUS_NONE = 0,
67 FOCUS_QUERY
68 };
69
70 static void html_print(const char *);
71 static void html_putchar(char);
72 static int http_decode(char *);
73 static void http_encode(const char *p);
74 static void parse_manpath_conf(struct req *);
75 static void parse_path_info(struct req *req, const char *path);
76 static void parse_query_string(struct req *, const char *);
77 static void pg_error_badrequest(const char *);
78 static void pg_error_internal(void);
79 static void pg_index(const struct req *);
80 static void pg_noresult(const struct req *, int, const char *,
81 const char *);
82 static void pg_redirect(const struct req *, const char *);
83 static void pg_search(const struct req *);
84 static void pg_searchres(const struct req *,
85 struct manpage *, size_t);
86 static void pg_show(struct req *, const char *);
87 static void resp_begin_html(int, const char *, const char *);
88 static void resp_begin_http(int, const char *);
89 static void resp_catman(const struct req *, const char *);
90 static void resp_copy(const char *);
91 static void resp_end_html(void);
92 static void resp_format(const struct req *, const char *);
93 static void resp_searchform(const struct req *, enum focus);
94 static void resp_show(const struct req *, const char *);
95 static void set_query_attr(char **, char **);
96 static int validate_arch(const char *);
97 static int validate_filename(const char *);
98 static int validate_manpath(const struct req *, const char *);
99 static int validate_urifrag(const char *);
100
101 static const char *scriptname = SCRIPT_NAME;
102
103 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
104 static const char *const sec_numbers[] = {
105 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
106 };
107 static const char *const sec_names[] = {
108 "All Sections",
109 "1 - General Commands",
110 "2 - System Calls",
111 "3 - Library Functions",
112 "3p - Perl Library",
113 "4 - Device Drivers",
114 "5 - File Formats",
115 "6 - Games",
116 "7 - Miscellaneous Information",
117 "8 - System Manager\'s Manual",
118 "9 - Kernel Developer\'s Manual"
119 };
120 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
121
122 static const char *const arch_names[] = {
123 "amd64", "alpha", "armv7", "arm64",
124 "hppa", "i386", "landisk",
125 "loongson", "luna88k", "macppc", "mips64",
126 "octeon", "sgi", "socppc", "sparc64",
127 "amiga", "arc", "armish", "arm32",
128 "atari", "aviion", "beagle", "cats",
129 "hppa64", "hp300",
130 "ia64", "mac68k", "mvme68k", "mvme88k",
131 "mvmeppc", "palm", "pc532", "pegasos",
132 "pmax", "powerpc", "solbourne", "sparc",
133 "sun3", "vax", "wgrisc", "x68k",
134 "zaurus"
135 };
136 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
137
138 /*
139 * Print a character, escaping HTML along the way.
140 * This will pass non-ASCII straight to output: be warned!
141 */
142 static void
143 html_putchar(char c)
144 {
145
146 switch (c) {
147 case '"':
148 printf("&quot;");
149 break;
150 case '&':
151 printf("&amp;");
152 break;
153 case '>':
154 printf("&gt;");
155 break;
156 case '<':
157 printf("&lt;");
158 break;
159 default:
160 putchar((unsigned char)c);
161 break;
162 }
163 }
164
165 /*
166 * Call through to html_putchar().
167 * Accepts NULL strings.
168 */
169 static void
170 html_print(const char *p)
171 {
172
173 if (NULL == p)
174 return;
175 while ('\0' != *p)
176 html_putchar(*p++);
177 }
178
179 /*
180 * Transfer the responsibility for the allocated string *val
181 * to the query structure.
182 */
183 static void
184 set_query_attr(char **attr, char **val)
185 {
186
187 free(*attr);
188 if (**val == '\0') {
189 *attr = NULL;
190 free(*val);
191 } else
192 *attr = *val;
193 *val = NULL;
194 }
195
196 /*
197 * Parse the QUERY_STRING for key-value pairs
198 * and store the values into the query structure.
199 */
200 static void
201 parse_query_string(struct req *req, const char *qs)
202 {
203 char *key, *val;
204 size_t keysz, valsz;
205
206 req->isquery = 1;
207 req->q.manpath = NULL;
208 req->q.arch = NULL;
209 req->q.sec = NULL;
210 req->q.query = NULL;
211 req->q.equal = 1;
212
213 key = val = NULL;
214 while (*qs != '\0') {
215
216 /* Parse one key. */
217
218 keysz = strcspn(qs, "=;&");
219 key = mandoc_strndup(qs, keysz);
220 qs += keysz;
221 if (*qs != '=')
222 goto next;
223
224 /* Parse one value. */
225
226 valsz = strcspn(++qs, ";&");
227 val = mandoc_strndup(qs, valsz);
228 qs += valsz;
229
230 /* Decode and catch encoding errors. */
231
232 if ( ! (http_decode(key) && http_decode(val)))
233 goto next;
234
235 /* Handle key-value pairs. */
236
237 if ( ! strcmp(key, "query"))
238 set_query_attr(&req->q.query, &val);
239
240 else if ( ! strcmp(key, "apropos"))
241 req->q.equal = !strcmp(val, "0");
242
243 else if ( ! strcmp(key, "manpath")) {
244 #ifdef COMPAT_OLDURI
245 if ( ! strncmp(val, "OpenBSD ", 8)) {
246 val[7] = '-';
247 if ('C' == val[8])
248 val[8] = 'c';
249 }
250 #endif
251 set_query_attr(&req->q.manpath, &val);
252 }
253
254 else if ( ! (strcmp(key, "sec")
255 #ifdef COMPAT_OLDURI
256 && strcmp(key, "sektion")
257 #endif
258 )) {
259 if ( ! strcmp(val, "0"))
260 *val = '\0';
261 set_query_attr(&req->q.sec, &val);
262 }
263
264 else if ( ! strcmp(key, "arch")) {
265 if ( ! strcmp(val, "default"))
266 *val = '\0';
267 set_query_attr(&req->q.arch, &val);
268 }
269
270 /*
271 * The key must be freed in any case.
272 * The val may have been handed over to the query
273 * structure, in which case it is now NULL.
274 */
275 next:
276 free(key);
277 key = NULL;
278 free(val);
279 val = NULL;
280
281 if (*qs != '\0')
282 qs++;
283 }
284 }
285
286 /*
287 * HTTP-decode a string. The standard explanation is that this turns
288 * "%4e+foo" into "n foo" in the regular way. This is done in-place
289 * over the allocated string.
290 */
291 static int
292 http_decode(char *p)
293 {
294 char hex[3];
295 char *q;
296 int c;
297
298 hex[2] = '\0';
299
300 q = p;
301 for ( ; '\0' != *p; p++, q++) {
302 if ('%' == *p) {
303 if ('\0' == (hex[0] = *(p + 1)))
304 return 0;
305 if ('\0' == (hex[1] = *(p + 2)))
306 return 0;
307 if (1 != sscanf(hex, "%x", &c))
308 return 0;
309 if ('\0' == c)
310 return 0;
311
312 *q = (char)c;
313 p += 2;
314 } else
315 *q = '+' == *p ? ' ' : *p;
316 }
317
318 *q = '\0';
319 return 1;
320 }
321
322 static void
323 http_encode(const char *p)
324 {
325 for (; *p != '\0'; p++) {
326 if (isalnum((unsigned char)*p) == 0 &&
327 strchr("-._~", *p) == NULL)
328 printf("%%%2.2X", (unsigned char)*p);
329 else
330 putchar(*p);
331 }
332 }
333
334 static void
335 resp_begin_http(int code, const char *msg)
336 {
337
338 if (200 != code)
339 printf("Status: %d %s\r\n", code, msg);
340
341 printf("Content-Type: text/html; charset=utf-8\r\n"
342 "Cache-Control: no-cache\r\n"
343 "Content-Security-Policy: default-src 'none'; "
344 "style-src 'self' 'unsafe-inline'\r\n"
345 "Pragma: no-cache\r\n"
346 "\r\n");
347
348 fflush(stdout);
349 }
350
351 static void
352 resp_copy(const char *filename)
353 {
354 char buf[4096];
355 ssize_t sz;
356 int fd;
357
358 if ((fd = open(filename, O_RDONLY)) != -1) {
359 fflush(stdout);
360 while ((sz = read(fd, buf, sizeof(buf))) > 0)
361 write(STDOUT_FILENO, buf, sz);
362 close(fd);
363 }
364 }
365
366 static void
367 resp_begin_html(int code, const char *msg, const char *file)
368 {
369 char *cp;
370
371 resp_begin_http(code, msg);
372
373 printf("<!DOCTYPE html>\n"
374 "<html>\n"
375 "<head>\n"
376 " <meta charset=\"UTF-8\"/>\n"
377 " <meta name=\"viewport\""
378 " content=\"width=device-width, initial-scale=1.0\">\n"
379 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
380 " type=\"text/css\" media=\"all\">\n"
381 " <title>",
382 CSS_DIR);
383 if (file != NULL) {
384 if ((cp = strrchr(file, '/')) != NULL)
385 file = cp + 1;
386 if ((cp = strrchr(file, '.')) != NULL) {
387 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
388 } else
389 printf("%s - ", file);
390 }
391 printf("%s</title>\n"
392 "</head>\n"
393 "<body>\n",
394 CUSTOMIZE_TITLE);
395
396 resp_copy(MAN_DIR "/header.html");
397 }
398
399 static void
400 resp_end_html(void)
401 {
402
403 resp_copy(MAN_DIR "/footer.html");
404
405 puts("</body>\n"
406 "</html>");
407 }
408
409 static void
410 resp_searchform(const struct req *req, enum focus focus)
411 {
412 int i;
413
414 printf("<form action=\"/%s\" method=\"get\" "
415 "autocomplete=\"off\" autocapitalize=\"none\">\n"
416 " <fieldset>\n"
417 " <legend>Manual Page Search Parameters</legend>\n",
418 scriptname);
419
420 /* Write query input box. */
421
422 printf(" <input type=\"search\" name=\"query\" value=\"");
423 if (req->q.query != NULL)
424 html_print(req->q.query);
425 printf( "\" size=\"40\"");
426 if (focus == FOCUS_QUERY)
427 printf(" autofocus");
428 puts(">");
429
430 /* Write submission buttons. */
431
432 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
433 "man</button>\n"
434 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
435 "apropos</button>\n"
436 " <br/>\n");
437
438 /* Write section selector. */
439
440 puts(" <select name=\"sec\">");
441 for (i = 0; i < sec_MAX; i++) {
442 printf(" <option value=\"%s\"", sec_numbers[i]);
443 if (NULL != req->q.sec &&
444 0 == strcmp(sec_numbers[i], req->q.sec))
445 printf(" selected=\"selected\"");
446 printf(">%s</option>\n", sec_names[i]);
447 }
448 puts(" </select>");
449
450 /* Write architecture selector. */
451
452 printf( " <select name=\"arch\">\n"
453 " <option value=\"default\"");
454 if (NULL == req->q.arch)
455 printf(" selected=\"selected\"");
456 puts(">All Architectures</option>");
457 for (i = 0; i < arch_MAX; i++) {
458 printf(" <option");
459 if (NULL != req->q.arch &&
460 0 == strcmp(arch_names[i], req->q.arch))
461 printf(" selected=\"selected\"");
462 printf(">%s</option>\n", arch_names[i]);
463 }
464 puts(" </select>");
465
466 /* Write manpath selector. */
467
468 if (req->psz > 1) {
469 puts(" <select name=\"manpath\">");
470 for (i = 0; i < (int)req->psz; i++) {
471 printf(" <option");
472 if (strcmp(req->q.manpath, req->p[i]) == 0)
473 printf(" selected=\"selected\"");
474 printf(">");
475 html_print(req->p[i]);
476 puts("</option>");
477 }
478 puts(" </select>");
479 }
480
481 puts(" </fieldset>\n"
482 "</form>");
483 }
484
485 static int
486 validate_urifrag(const char *frag)
487 {
488
489 while ('\0' != *frag) {
490 if ( ! (isalnum((unsigned char)*frag) ||
491 '-' == *frag || '.' == *frag ||
492 '/' == *frag || '_' == *frag))
493 return 0;
494 frag++;
495 }
496 return 1;
497 }
498
499 static int
500 validate_manpath(const struct req *req, const char* manpath)
501 {
502 size_t i;
503
504 for (i = 0; i < req->psz; i++)
505 if ( ! strcmp(manpath, req->p[i]))
506 return 1;
507
508 return 0;
509 }
510
511 static int
512 validate_arch(const char *arch)
513 {
514 int i;
515
516 for (i = 0; i < arch_MAX; i++)
517 if (strcmp(arch, arch_names[i]) == 0)
518 return 1;
519
520 return 0;
521 }
522
523 static int
524 validate_filename(const char *file)
525 {
526
527 if ('.' == file[0] && '/' == file[1])
528 file += 2;
529
530 return ! (strstr(file, "../") || strstr(file, "/..") ||
531 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
532 }
533
534 static void
535 pg_index(const struct req *req)
536 {
537
538 resp_begin_html(200, NULL, NULL);
539 resp_searchform(req, FOCUS_QUERY);
540 printf("<p>\n"
541 "This web interface is documented in the\n"
542 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
543 "manual, and the\n"
544 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
545 "manual explains the query syntax.\n"
546 "</p>\n",
547 scriptname, *scriptname == '\0' ? "" : "/",
548 scriptname, *scriptname == '\0' ? "" : "/");
549 resp_end_html();
550 }
551
552 static void
553 pg_noresult(const struct req *req, int code, const char *http_msg,
554 const char *user_msg)
555 {
556 resp_begin_html(code, http_msg, NULL);
557 resp_searchform(req, FOCUS_QUERY);
558 puts("<p>");
559 puts(user_msg);
560 puts("</p>");
561 resp_end_html();
562 }
563
564 static void
565 pg_error_badrequest(const char *msg)
566 {
567
568 resp_begin_html(400, "Bad Request", NULL);
569 puts("<h1>Bad Request</h1>\n"
570 "<p>\n");
571 puts(msg);
572 printf("Try again from the\n"
573 "<a href=\"/%s\">main page</a>.\n"
574 "</p>", scriptname);
575 resp_end_html();
576 }
577
578 static void
579 pg_error_internal(void)
580 {
581 resp_begin_html(500, "Internal Server Error", NULL);
582 puts("<p>Internal Server Error</p>");
583 resp_end_html();
584 }
585
586 static void
587 pg_redirect(const struct req *req, const char *name)
588 {
589 printf("Status: 303 See Other\r\n"
590 "Location: /");
591 if (*scriptname != '\0')
592 printf("%s/", scriptname);
593 if (strcmp(req->q.manpath, req->p[0]))
594 printf("%s/", req->q.manpath);
595 if (req->q.arch != NULL)
596 printf("%s/", req->q.arch);
597 http_encode(name);
598 if (req->q.sec != NULL) {
599 putchar('.');
600 http_encode(req->q.sec);
601 }
602 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
603 }
604
605 static void
606 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
607 {
608 char *arch, *archend;
609 const char *sec;
610 size_t i, iuse;
611 int archprio, archpriouse;
612 int prio, priouse;
613
614 for (i = 0; i < sz; i++) {
615 if (validate_filename(r[i].file))
616 continue;
617 warnx("invalid filename %s in %s database",
618 r[i].file, req->q.manpath);
619 pg_error_internal();
620 return;
621 }
622
623 if (req->isquery && sz == 1) {
624 /*
625 * If we have just one result, then jump there now
626 * without any delay.
627 */
628 printf("Status: 303 See Other\r\n"
629 "Location: /");
630 if (*scriptname != '\0')
631 printf("%s/", scriptname);
632 if (strcmp(req->q.manpath, req->p[0]))
633 printf("%s/", req->q.manpath);
634 printf("%s\r\n"
635 "Content-Type: text/html; charset=utf-8\r\n\r\n",
636 r[0].file);
637 return;
638 }
639
640 /*
641 * In man(1) mode, show one of the pages
642 * even if more than one is found.
643 */
644
645 iuse = 0;
646 if (req->q.equal || sz == 1) {
647 priouse = 20;
648 archpriouse = 3;
649 for (i = 0; i < sz; i++) {
650 sec = r[i].file;
651 sec += strcspn(sec, "123456789");
652 if (sec[0] == '\0')
653 continue;
654 prio = sec_prios[sec[0] - '1'];
655 if (sec[1] != '/')
656 prio += 10;
657 if (req->q.arch == NULL) {
658 archprio =
659 ((arch = strchr(sec + 1, '/'))
660 == NULL) ? 3 :
661 ((archend = strchr(arch + 1, '/'))
662 == NULL) ? 0 :
663 strncmp(arch, "amd64/",
664 archend - arch) ? 2 : 1;
665 if (archprio < archpriouse) {
666 archpriouse = archprio;
667 priouse = prio;
668 iuse = i;
669 continue;
670 }
671 if (archprio > archpriouse)
672 continue;
673 }
674 if (prio >= priouse)
675 continue;
676 priouse = prio;
677 iuse = i;
678 }
679 resp_begin_html(200, NULL, r[iuse].file);
680 } else
681 resp_begin_html(200, NULL, NULL);
682
683 resp_searchform(req,
684 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
685
686 if (sz > 1) {
687 puts("<table class=\"results\">");
688 for (i = 0; i < sz; i++) {
689 printf(" <tr>\n"
690 " <td>"
691 "<a class=\"Xr\" href=\"/");
692 if (*scriptname != '\0')
693 printf("%s/", scriptname);
694 if (strcmp(req->q.manpath, req->p[0]))
695 printf("%s/", req->q.manpath);
696 printf("%s\">", r[i].file);
697 html_print(r[i].names);
698 printf("</a></td>\n"
699 " <td><span class=\"Nd\">");
700 html_print(r[i].output);
701 puts("</span></td>\n"
702 " </tr>");
703 }
704 puts("</table>");
705 }
706
707 if (req->q.equal || sz == 1) {
708 puts("<hr>");
709 resp_show(req, r[iuse].file);
710 }
711
712 resp_end_html();
713 }
714
715 static void
716 resp_catman(const struct req *req, const char *file)
717 {
718 FILE *f;
719 char *p;
720 size_t sz;
721 ssize_t len;
722 int i;
723 int italic, bold;
724
725 if ((f = fopen(file, "r")) == NULL) {
726 puts("<p>You specified an invalid manual file.</p>");
727 return;
728 }
729
730 puts("<div class=\"catman\">\n"
731 "<pre>");
732
733 p = NULL;
734 sz = 0;
735
736 while ((len = getline(&p, &sz, f)) != -1) {
737 bold = italic = 0;
738 for (i = 0; i < len - 1; i++) {
739 /*
740 * This means that the catpage is out of state.
741 * Ignore it and keep going (although the
742 * catpage is bogus).
743 */
744
745 if ('\b' == p[i] || '\n' == p[i])
746 continue;
747
748 /*
749 * Print a regular character.
750 * Close out any bold/italic scopes.
751 * If we're in back-space mode, make sure we'll
752 * have something to enter when we backspace.
753 */
754
755 if ('\b' != p[i + 1]) {
756 if (italic)
757 printf("</i>");
758 if (bold)
759 printf("</b>");
760 italic = bold = 0;
761 html_putchar(p[i]);
762 continue;
763 } else if (i + 2 >= len)
764 continue;
765
766 /* Italic mode. */
767
768 if ('_' == p[i]) {
769 if (bold)
770 printf("</b>");
771 if ( ! italic)
772 printf("<i>");
773 bold = 0;
774 italic = 1;
775 i += 2;
776 html_putchar(p[i]);
777 continue;
778 }
779
780 /*
781 * Handle funny behaviour troff-isms.
782 * These grok'd from the original man2html.c.
783 */
784
785 if (('+' == p[i] && 'o' == p[i + 2]) ||
786 ('o' == p[i] && '+' == p[i + 2]) ||
787 ('|' == p[i] && '=' == p[i + 2]) ||
788 ('=' == p[i] && '|' == p[i + 2]) ||
789 ('*' == p[i] && '=' == p[i + 2]) ||
790 ('=' == p[i] && '*' == p[i + 2]) ||
791 ('*' == p[i] && '|' == p[i + 2]) ||
792 ('|' == p[i] && '*' == p[i + 2])) {
793 if (italic)
794 printf("</i>");
795 if (bold)
796 printf("</b>");
797 italic = bold = 0;
798 putchar('*');
799 i += 2;
800 continue;
801 } else if (('|' == p[i] && '-' == p[i + 2]) ||
802 ('-' == p[i] && '|' == p[i + 1]) ||
803 ('+' == p[i] && '-' == p[i + 1]) ||
804 ('-' == p[i] && '+' == p[i + 1]) ||
805 ('+' == p[i] && '|' == p[i + 1]) ||
806 ('|' == p[i] && '+' == p[i + 1])) {
807 if (italic)
808 printf("</i>");
809 if (bold)
810 printf("</b>");
811 italic = bold = 0;
812 putchar('+');
813 i += 2;
814 continue;
815 }
816
817 /* Bold mode. */
818
819 if (italic)
820 printf("</i>");
821 if ( ! bold)
822 printf("<b>");
823 bold = 1;
824 italic = 0;
825 i += 2;
826 html_putchar(p[i]);
827 }
828
829 /*
830 * Clean up the last character.
831 * We can get to a newline; don't print that.
832 */
833
834 if (italic)
835 printf("</i>");
836 if (bold)
837 printf("</b>");
838
839 if (i == len - 1 && p[i] != '\n')
840 html_putchar(p[i]);
841
842 putchar('\n');
843 }
844 free(p);
845
846 puts("</pre>\n"
847 "</div>");
848
849 fclose(f);
850 }
851
852 static void
853 resp_format(const struct req *req, const char *file)
854 {
855 struct manoutput conf;
856 struct mparse *mp;
857 struct roff_meta *meta;
858 void *vp;
859 int fd;
860 int usepath;
861
862 if (-1 == (fd = open(file, O_RDONLY, 0))) {
863 puts("<p>You specified an invalid manual file.</p>");
864 return;
865 }
866
867 mchars_alloc();
868 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
869 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
870 mparse_readfd(mp, fd, file);
871 close(fd);
872 meta = mparse_result(mp);
873
874 memset(&conf, 0, sizeof(conf));
875 conf.fragment = 1;
876 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
877 usepath = strcmp(req->q.manpath, req->p[0]);
878 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
879 scriptname, *scriptname == '\0' ? "" : "/",
880 usepath ? req->q.manpath : "", usepath ? "/" : "");
881
882 vp = html_alloc(&conf);
883 if (meta->macroset == MACROSET_MDOC)
884 html_mdoc(vp, meta);
885 else
886 html_man(vp, meta);
887
888 html_free(vp);
889 mparse_free(mp);
890 mchars_free();
891 free(conf.man);
892 free(conf.style);
893 }
894
895 static void
896 resp_show(const struct req *req, const char *file)
897 {
898
899 if ('.' == file[0] && '/' == file[1])
900 file += 2;
901
902 if ('c' == *file)
903 resp_catman(req, file);
904 else
905 resp_format(req, file);
906 }
907
908 static void
909 pg_show(struct req *req, const char *fullpath)
910 {
911 char *manpath;
912 const char *file;
913
914 if ((file = strchr(fullpath, '/')) == NULL) {
915 pg_error_badrequest(
916 "You did not specify a page to show.");
917 return;
918 }
919 manpath = mandoc_strndup(fullpath, file - fullpath);
920 file++;
921
922 if ( ! validate_manpath(req, manpath)) {
923 pg_error_badrequest(
924 "You specified an invalid manpath.");
925 free(manpath);
926 return;
927 }
928
929 /*
930 * Begin by chdir()ing into the manpath.
931 * This way we can pick up the database files, which are
932 * relative to the manpath root.
933 */
934
935 if (chdir(manpath) == -1) {
936 warn("chdir %s", manpath);
937 pg_error_internal();
938 free(manpath);
939 return;
940 }
941 free(manpath);
942
943 if ( ! validate_filename(file)) {
944 pg_error_badrequest(
945 "You specified an invalid manual file.");
946 return;
947 }
948
949 resp_begin_html(200, NULL, file);
950 resp_searchform(req, FOCUS_NONE);
951 resp_show(req, file);
952 resp_end_html();
953 }
954
955 static void
956 pg_search(const struct req *req)
957 {
958 struct mansearch search;
959 struct manpaths paths;
960 struct manpage *res;
961 char **argv;
962 char *query, *rp, *wp;
963 size_t ressz;
964 int argc;
965
966 /*
967 * Begin by chdir()ing into the root of the manpath.
968 * This way we can pick up the database files, which are
969 * relative to the manpath root.
970 */
971
972 if (chdir(req->q.manpath) == -1) {
973 warn("chdir %s", req->q.manpath);
974 pg_error_internal();
975 return;
976 }
977
978 search.arch = req->q.arch;
979 search.sec = req->q.sec;
980 search.outkey = "Nd";
981 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
982 search.firstmatch = 1;
983
984 paths.sz = 1;
985 paths.paths = mandoc_malloc(sizeof(char *));
986 paths.paths[0] = mandoc_strdup(".");
987
988 /*
989 * Break apart at spaces with backslash-escaping.
990 */
991
992 argc = 0;
993 argv = NULL;
994 rp = query = mandoc_strdup(req->q.query);
995 for (;;) {
996 while (isspace((unsigned char)*rp))
997 rp++;
998 if (*rp == '\0')
999 break;
1000 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1001 argv[argc++] = wp = rp;
1002 for (;;) {
1003 if (isspace((unsigned char)*rp)) {
1004 *wp = '\0';
1005 rp++;
1006 break;
1007 }
1008 if (rp[0] == '\\' && rp[1] != '\0')
1009 rp++;
1010 if (wp != rp)
1011 *wp = *rp;
1012 if (*rp == '\0')
1013 break;
1014 wp++;
1015 rp++;
1016 }
1017 }
1018
1019 res = NULL;
1020 ressz = 0;
1021 if (req->isquery && req->q.equal && argc == 1)
1022 pg_redirect(req, argv[0]);
1023 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1024 pg_noresult(req, 400, "Bad Request",
1025 "You entered an invalid query.");
1026 else if (ressz == 0)
1027 pg_noresult(req, 404, "Not Found", "No results found.");
1028 else
1029 pg_searchres(req, res, ressz);
1030
1031 free(query);
1032 mansearch_free(res, ressz);
1033 free(paths.paths[0]);
1034 free(paths.paths);
1035 }
1036
1037 int
1038 main(void)
1039 {
1040 struct req req;
1041 struct itimerval itimer;
1042 const char *path;
1043 const char *querystring;
1044 int i;
1045
1046 #if HAVE_PLEDGE
1047 /*
1048 * The "rpath" pledge could be revoked after mparse_readfd()
1049 * if the file desciptor to "/footer.html" would be opened
1050 * up front, but it's probably not worth the complication
1051 * of the code it would cause: it would require scattering
1052 * pledge() calls in multiple low-level resp_*() functions.
1053 */
1054
1055 if (pledge("stdio rpath", NULL) == -1) {
1056 warn("pledge");
1057 pg_error_internal();
1058 return EXIT_FAILURE;
1059 }
1060 #endif
1061
1062 /* Poor man's ReDoS mitigation. */
1063
1064 itimer.it_value.tv_sec = 2;
1065 itimer.it_value.tv_usec = 0;
1066 itimer.it_interval.tv_sec = 2;
1067 itimer.it_interval.tv_usec = 0;
1068 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1069 warn("setitimer");
1070 pg_error_internal();
1071 return EXIT_FAILURE;
1072 }
1073
1074 /*
1075 * First we change directory into the MAN_DIR so that
1076 * subsequent scanning for manpath directories is rooted
1077 * relative to the same position.
1078 */
1079
1080 if (chdir(MAN_DIR) == -1) {
1081 warn("MAN_DIR: %s", MAN_DIR);
1082 pg_error_internal();
1083 return EXIT_FAILURE;
1084 }
1085
1086 memset(&req, 0, sizeof(struct req));
1087 req.q.equal = 1;
1088 parse_manpath_conf(&req);
1089
1090 /* Parse the path info and the query string. */
1091
1092 if ((path = getenv("PATH_INFO")) == NULL)
1093 path = "";
1094 else if (*path == '/')
1095 path++;
1096
1097 if (*path != '\0') {
1098 parse_path_info(&req, path);
1099 if (req.q.manpath == NULL || req.q.sec == NULL ||
1100 *req.q.query == '\0' || access(path, F_OK) == -1)
1101 path = "";
1102 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1103 parse_query_string(&req, querystring);
1104
1105 /* Validate parsed data and add defaults. */
1106
1107 if (req.q.manpath == NULL)
1108 req.q.manpath = mandoc_strdup(req.p[0]);
1109 else if ( ! validate_manpath(&req, req.q.manpath)) {
1110 pg_error_badrequest(
1111 "You specified an invalid manpath.");
1112 return EXIT_FAILURE;
1113 }
1114
1115 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1116 pg_error_badrequest(
1117 "You specified an invalid architecture.");
1118 return EXIT_FAILURE;
1119 }
1120
1121 /* Dispatch to the three different pages. */
1122
1123 if ('\0' != *path)
1124 pg_show(&req, path);
1125 else if (NULL != req.q.query)
1126 pg_search(&req);
1127 else
1128 pg_index(&req);
1129
1130 free(req.q.manpath);
1131 free(req.q.arch);
1132 free(req.q.sec);
1133 free(req.q.query);
1134 for (i = 0; i < (int)req.psz; i++)
1135 free(req.p[i]);
1136 free(req.p);
1137 return EXIT_SUCCESS;
1138 }
1139
1140 /*
1141 * Translate PATH_INFO to a query.
1142 */
1143 static void
1144 parse_path_info(struct req *req, const char *path)
1145 {
1146 const char *name, *sec, *end;
1147
1148 req->isquery = 0;
1149 req->q.equal = 1;
1150 req->q.manpath = NULL;
1151 req->q.arch = NULL;
1152
1153 /* Mandatory manual page name. */
1154 if ((name = strrchr(path, '/')) == NULL)
1155 name = path;
1156 else
1157 name++;
1158
1159 /* Optional trailing section. */
1160 sec = strrchr(name, '.');
1161 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1162 req->q.query = mandoc_strndup(name, sec - name - 1);
1163 req->q.sec = mandoc_strdup(sec);
1164 } else {
1165 req->q.query = mandoc_strdup(name);
1166 req->q.sec = NULL;
1167 }
1168
1169 /* Handle the case of name[.section] only. */
1170 if (name == path)
1171 return;
1172
1173 /* Optional manpath. */
1174 end = strchr(path, '/');
1175 req->q.manpath = mandoc_strndup(path, end - path);
1176 if (validate_manpath(req, req->q.manpath)) {
1177 path = end + 1;
1178 if (name == path)
1179 return;
1180 } else {
1181 free(req->q.manpath);
1182 req->q.manpath = NULL;
1183 }
1184
1185 /* Optional section. */
1186 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1187 path += 3;
1188 end = strchr(path, '/');
1189 free(req->q.sec);
1190 req->q.sec = mandoc_strndup(path, end - path);
1191 path = end + 1;
1192 if (name == path)
1193 return;
1194 }
1195
1196 /* Optional architecture. */
1197 end = strchr(path, '/');
1198 if (end + 1 != name) {
1199 pg_error_badrequest(
1200 "You specified too many directory components.");
1201 exit(EXIT_FAILURE);
1202 }
1203 req->q.arch = mandoc_strndup(path, end - path);
1204 if (validate_arch(req->q.arch) == 0) {
1205 pg_error_badrequest(
1206 "You specified an invalid directory component.");
1207 exit(EXIT_FAILURE);
1208 }
1209 }
1210
1211 /*
1212 * Scan for indexable paths.
1213 */
1214 static void
1215 parse_manpath_conf(struct req *req)
1216 {
1217 FILE *fp;
1218 char *dp;
1219 size_t dpsz;
1220 ssize_t len;
1221
1222 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1223 warn("%s/manpath.conf", MAN_DIR);
1224 pg_error_internal();
1225 exit(EXIT_FAILURE);
1226 }
1227
1228 dp = NULL;
1229 dpsz = 0;
1230
1231 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1232 if (dp[len - 1] == '\n')
1233 dp[--len] = '\0';
1234 req->p = mandoc_realloc(req->p,
1235 (req->psz + 1) * sizeof(char *));
1236 if ( ! validate_urifrag(dp)) {
1237 warnx("%s/manpath.conf contains "
1238 "unsafe path \"%s\"", MAN_DIR, dp);
1239 pg_error_internal();
1240 exit(EXIT_FAILURE);
1241 }
1242 if (strchr(dp, '/') != NULL) {
1243 warnx("%s/manpath.conf contains "
1244 "path with slash \"%s\"", MAN_DIR, dp);
1245 pg_error_internal();
1246 exit(EXIT_FAILURE);
1247 }
1248 req->p[req->psz++] = dp;
1249 dp = NULL;
1250 dpsz = 0;
1251 }
1252 free(dp);
1253
1254 if (req->p == NULL) {
1255 warnx("%s/manpath.conf is empty", MAN_DIR);
1256 pg_error_internal();
1257 exit(EXIT_FAILURE);
1258 }
1259 }