]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
The wcwidth(3) of Plane 15 and Plane 16 Private Use Characters
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.174 2021/05/13 13:33:11 schwarze Exp $ */
2 /*
3 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the man.cgi(8) program.
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23 #include <sys/time.h>
24
25 #include <ctype.h>
26 #if HAVE_ERR
27 #include <err.h>
28 #endif
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <limits.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37
38 #include "mandoc_aux.h"
39 #include "mandoc.h"
40 #include "roff.h"
41 #include "mdoc.h"
42 #include "man.h"
43 #include "mandoc_parse.h"
44 #include "main.h"
45 #include "manconf.h"
46 #include "mansearch.h"
47 #include "cgi.h"
48
49 /*
50 * A query as passed to the search function.
51 */
52 struct query {
53 char *manpath; /* desired manual directory */
54 char *arch; /* architecture */
55 char *sec; /* manual section */
56 char *query; /* unparsed query expression */
57 int equal; /* match whole names, not substrings */
58 };
59
60 struct req {
61 struct query q;
62 char **p; /* array of available manpaths */
63 size_t psz; /* number of available manpaths */
64 int isquery; /* QUERY_STRING used, not PATH_INFO */
65 };
66
67 enum focus {
68 FOCUS_NONE = 0,
69 FOCUS_QUERY
70 };
71
72 static void html_print(const char *);
73 static void html_putchar(char);
74 static int http_decode(char *);
75 static void http_encode(const char *);
76 static void parse_manpath_conf(struct req *);
77 static void parse_path_info(struct req *, const char *);
78 static void parse_query_string(struct req *, const char *);
79 static void pg_error_badrequest(const char *);
80 static void pg_error_internal(void);
81 static void pg_index(const struct req *);
82 static void pg_noresult(const struct req *, int, const char *,
83 const char *);
84 static void pg_redirect(const struct req *, const char *);
85 static void pg_search(const struct req *);
86 static void pg_searchres(const struct req *,
87 struct manpage *, size_t);
88 static void pg_show(struct req *, const char *);
89 static void resp_begin_html(int, const char *, const char *);
90 static void resp_begin_http(int, const char *);
91 static void resp_catman(const struct req *, const char *);
92 static void resp_copy(const char *);
93 static void resp_end_html(void);
94 static void resp_format(const struct req *, const char *);
95 static void resp_searchform(const struct req *, enum focus);
96 static void resp_show(const struct req *, const char *);
97 static void set_query_attr(char **, char **);
98 static int validate_arch(const char *);
99 static int validate_filename(const char *);
100 static int validate_manpath(const struct req *, const char *);
101 static int validate_urifrag(const char *);
102
103 static const char *scriptname = SCRIPT_NAME;
104
105 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
106 static const char *const sec_numbers[] = {
107 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
108 };
109 static const char *const sec_names[] = {
110 "All Sections",
111 "1 - General Commands",
112 "2 - System Calls",
113 "3 - Library Functions",
114 "3p - Perl Library",
115 "4 - Device Drivers",
116 "5 - File Formats",
117 "6 - Games",
118 "7 - Miscellaneous Information",
119 "8 - System Manager\'s Manual",
120 "9 - Kernel Developer\'s Manual"
121 };
122 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
123
124 static const char *const arch_names[] = {
125 "amd64", "alpha", "armv7", "arm64",
126 "hppa", "i386", "landisk", "loongson",
127 "luna88k", "macppc", "mips64", "octeon",
128 "powerpc64", "riscv64", "sparc64",
129
130 "amiga", "arc", "armish", "arm32",
131 "atari", "aviion", "beagle", "cats",
132 "hppa64", "hp300",
133 "ia64", "mac68k", "mvme68k", "mvme88k",
134 "mvmeppc", "palm", "pc532", "pegasos",
135 "pmax", "powerpc", "sgi", "socppc",
136 "solbourne", "sparc",
137 "sun3", "vax", "wgrisc", "x68k",
138 "zaurus"
139 };
140 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
141
142 /*
143 * Print a character, escaping HTML along the way.
144 * This will pass non-ASCII straight to output: be warned!
145 */
146 static void
147 html_putchar(char c)
148 {
149
150 switch (c) {
151 case '"':
152 printf("&quot;");
153 break;
154 case '&':
155 printf("&amp;");
156 break;
157 case '>':
158 printf("&gt;");
159 break;
160 case '<':
161 printf("&lt;");
162 break;
163 default:
164 putchar((unsigned char)c);
165 break;
166 }
167 }
168
169 /*
170 * Call through to html_putchar().
171 * Accepts NULL strings.
172 */
173 static void
174 html_print(const char *p)
175 {
176
177 if (NULL == p)
178 return;
179 while ('\0' != *p)
180 html_putchar(*p++);
181 }
182
183 /*
184 * Transfer the responsibility for the allocated string *val
185 * to the query structure.
186 */
187 static void
188 set_query_attr(char **attr, char **val)
189 {
190
191 free(*attr);
192 if (**val == '\0') {
193 *attr = NULL;
194 free(*val);
195 } else
196 *attr = *val;
197 *val = NULL;
198 }
199
200 /*
201 * Parse the QUERY_STRING for key-value pairs
202 * and store the values into the query structure.
203 */
204 static void
205 parse_query_string(struct req *req, const char *qs)
206 {
207 char *key, *val;
208 size_t keysz, valsz;
209
210 req->isquery = 1;
211 req->q.manpath = NULL;
212 req->q.arch = NULL;
213 req->q.sec = NULL;
214 req->q.query = NULL;
215 req->q.equal = 1;
216
217 key = val = NULL;
218 while (*qs != '\0') {
219
220 /* Parse one key. */
221
222 keysz = strcspn(qs, "=;&");
223 key = mandoc_strndup(qs, keysz);
224 qs += keysz;
225 if (*qs != '=')
226 goto next;
227
228 /* Parse one value. */
229
230 valsz = strcspn(++qs, ";&");
231 val = mandoc_strndup(qs, valsz);
232 qs += valsz;
233
234 /* Decode and catch encoding errors. */
235
236 if ( ! (http_decode(key) && http_decode(val)))
237 goto next;
238
239 /* Handle key-value pairs. */
240
241 if ( ! strcmp(key, "query"))
242 set_query_attr(&req->q.query, &val);
243
244 else if ( ! strcmp(key, "apropos"))
245 req->q.equal = !strcmp(val, "0");
246
247 else if ( ! strcmp(key, "manpath")) {
248 #ifdef COMPAT_OLDURI
249 if ( ! strncmp(val, "OpenBSD ", 8)) {
250 val[7] = '-';
251 if ('C' == val[8])
252 val[8] = 'c';
253 }
254 #endif
255 set_query_attr(&req->q.manpath, &val);
256 }
257
258 else if ( ! (strcmp(key, "sec")
259 #ifdef COMPAT_OLDURI
260 && strcmp(key, "sektion")
261 #endif
262 )) {
263 if ( ! strcmp(val, "0"))
264 *val = '\0';
265 set_query_attr(&req->q.sec, &val);
266 }
267
268 else if ( ! strcmp(key, "arch")) {
269 if ( ! strcmp(val, "default"))
270 *val = '\0';
271 set_query_attr(&req->q.arch, &val);
272 }
273
274 /*
275 * The key must be freed in any case.
276 * The val may have been handed over to the query
277 * structure, in which case it is now NULL.
278 */
279 next:
280 free(key);
281 key = NULL;
282 free(val);
283 val = NULL;
284
285 if (*qs != '\0')
286 qs++;
287 }
288 }
289
290 /*
291 * HTTP-decode a string. The standard explanation is that this turns
292 * "%4e+foo" into "n foo" in the regular way. This is done in-place
293 * over the allocated string.
294 */
295 static int
296 http_decode(char *p)
297 {
298 char hex[3];
299 char *q;
300 int c;
301
302 hex[2] = '\0';
303
304 q = p;
305 for ( ; '\0' != *p; p++, q++) {
306 if ('%' == *p) {
307 if ('\0' == (hex[0] = *(p + 1)))
308 return 0;
309 if ('\0' == (hex[1] = *(p + 2)))
310 return 0;
311 if (1 != sscanf(hex, "%x", &c))
312 return 0;
313 if ('\0' == c)
314 return 0;
315
316 *q = (char)c;
317 p += 2;
318 } else
319 *q = '+' == *p ? ' ' : *p;
320 }
321
322 *q = '\0';
323 return 1;
324 }
325
326 static void
327 http_encode(const char *p)
328 {
329 for (; *p != '\0'; p++) {
330 if (isalnum((unsigned char)*p) == 0 &&
331 strchr("-._~", *p) == NULL)
332 printf("%%%2.2X", (unsigned char)*p);
333 else
334 putchar(*p);
335 }
336 }
337
338 static void
339 resp_begin_http(int code, const char *msg)
340 {
341
342 if (200 != code)
343 printf("Status: %d %s\r\n", code, msg);
344
345 printf("Content-Type: text/html; charset=utf-8\r\n"
346 "Cache-Control: no-cache\r\n"
347 "Content-Security-Policy: default-src 'none'; "
348 "style-src 'self' 'unsafe-inline'\r\n"
349 "Pragma: no-cache\r\n"
350 "\r\n");
351
352 fflush(stdout);
353 }
354
355 static void
356 resp_copy(const char *filename)
357 {
358 char buf[4096];
359 ssize_t sz;
360 int fd;
361
362 if ((fd = open(filename, O_RDONLY)) != -1) {
363 fflush(stdout);
364 while ((sz = read(fd, buf, sizeof(buf))) > 0)
365 write(STDOUT_FILENO, buf, sz);
366 close(fd);
367 }
368 }
369
370 static void
371 resp_begin_html(int code, const char *msg, const char *file)
372 {
373 char *cp;
374
375 resp_begin_http(code, msg);
376
377 printf("<!DOCTYPE html>\n"
378 "<html>\n"
379 "<head>\n"
380 " <meta charset=\"UTF-8\"/>\n"
381 " <meta name=\"viewport\""
382 " content=\"width=device-width, initial-scale=1.0\">\n"
383 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
384 " type=\"text/css\" media=\"all\">\n"
385 " <title>",
386 CSS_DIR);
387 if (file != NULL) {
388 if ((cp = strrchr(file, '/')) != NULL)
389 file = cp + 1;
390 if ((cp = strrchr(file, '.')) != NULL) {
391 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
392 } else
393 printf("%s - ", file);
394 }
395 printf("%s</title>\n"
396 "</head>\n"
397 "<body>\n",
398 CUSTOMIZE_TITLE);
399
400 resp_copy(MAN_DIR "/header.html");
401 }
402
403 static void
404 resp_end_html(void)
405 {
406
407 resp_copy(MAN_DIR "/footer.html");
408
409 puts("</body>\n"
410 "</html>");
411 }
412
413 static void
414 resp_searchform(const struct req *req, enum focus focus)
415 {
416 int i;
417
418 printf("<form action=\"/%s\" method=\"get\" "
419 "autocomplete=\"off\" autocapitalize=\"none\">\n"
420 " <fieldset>\n"
421 " <legend>Manual Page Search Parameters</legend>\n",
422 scriptname);
423
424 /* Write query input box. */
425
426 printf(" <input type=\"search\" name=\"query\" value=\"");
427 if (req->q.query != NULL)
428 html_print(req->q.query);
429 printf( "\" size=\"40\"");
430 if (focus == FOCUS_QUERY)
431 printf(" autofocus");
432 puts(">");
433
434 /* Write submission buttons. */
435
436 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
437 "man</button>\n"
438 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
439 "apropos</button>\n"
440 " <br/>\n");
441
442 /* Write section selector. */
443
444 puts(" <select name=\"sec\">");
445 for (i = 0; i < sec_MAX; i++) {
446 printf(" <option value=\"%s\"", sec_numbers[i]);
447 if (NULL != req->q.sec &&
448 0 == strcmp(sec_numbers[i], req->q.sec))
449 printf(" selected=\"selected\"");
450 printf(">%s</option>\n", sec_names[i]);
451 }
452 puts(" </select>");
453
454 /* Write architecture selector. */
455
456 printf( " <select name=\"arch\">\n"
457 " <option value=\"default\"");
458 if (NULL == req->q.arch)
459 printf(" selected=\"selected\"");
460 puts(">All Architectures</option>");
461 for (i = 0; i < arch_MAX; i++) {
462 printf(" <option");
463 if (NULL != req->q.arch &&
464 0 == strcmp(arch_names[i], req->q.arch))
465 printf(" selected=\"selected\"");
466 printf(">%s</option>\n", arch_names[i]);
467 }
468 puts(" </select>");
469
470 /* Write manpath selector. */
471
472 if (req->psz > 1) {
473 puts(" <select name=\"manpath\">");
474 for (i = 0; i < (int)req->psz; i++) {
475 printf(" <option");
476 if (strcmp(req->q.manpath, req->p[i]) == 0)
477 printf(" selected=\"selected\"");
478 printf(">");
479 html_print(req->p[i]);
480 puts("</option>");
481 }
482 puts(" </select>");
483 }
484
485 puts(" </fieldset>\n"
486 "</form>");
487 }
488
489 static int
490 validate_urifrag(const char *frag)
491 {
492
493 while ('\0' != *frag) {
494 if ( ! (isalnum((unsigned char)*frag) ||
495 '-' == *frag || '.' == *frag ||
496 '/' == *frag || '_' == *frag))
497 return 0;
498 frag++;
499 }
500 return 1;
501 }
502
503 static int
504 validate_manpath(const struct req *req, const char* manpath)
505 {
506 size_t i;
507
508 for (i = 0; i < req->psz; i++)
509 if ( ! strcmp(manpath, req->p[i]))
510 return 1;
511
512 return 0;
513 }
514
515 static int
516 validate_arch(const char *arch)
517 {
518 int i;
519
520 for (i = 0; i < arch_MAX; i++)
521 if (strcmp(arch, arch_names[i]) == 0)
522 return 1;
523
524 return 0;
525 }
526
527 static int
528 validate_filename(const char *file)
529 {
530
531 if ('.' == file[0] && '/' == file[1])
532 file += 2;
533
534 return ! (strstr(file, "../") || strstr(file, "/..") ||
535 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
536 }
537
538 static void
539 pg_index(const struct req *req)
540 {
541
542 resp_begin_html(200, NULL, NULL);
543 resp_searchform(req, FOCUS_QUERY);
544 printf("<p>\n"
545 "This web interface is documented in the\n"
546 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
547 "manual, and the\n"
548 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
549 "manual explains the query syntax.\n"
550 "</p>\n",
551 scriptname, *scriptname == '\0' ? "" : "/",
552 scriptname, *scriptname == '\0' ? "" : "/");
553 resp_end_html();
554 }
555
556 static void
557 pg_noresult(const struct req *req, int code, const char *http_msg,
558 const char *user_msg)
559 {
560 resp_begin_html(code, http_msg, NULL);
561 resp_searchform(req, FOCUS_QUERY);
562 puts("<p>");
563 puts(user_msg);
564 puts("</p>");
565 resp_end_html();
566 }
567
568 static void
569 pg_error_badrequest(const char *msg)
570 {
571
572 resp_begin_html(400, "Bad Request", NULL);
573 puts("<h1>Bad Request</h1>\n"
574 "<p>\n");
575 puts(msg);
576 printf("Try again from the\n"
577 "<a href=\"/%s\">main page</a>.\n"
578 "</p>", scriptname);
579 resp_end_html();
580 }
581
582 static void
583 pg_error_internal(void)
584 {
585 resp_begin_html(500, "Internal Server Error", NULL);
586 puts("<p>Internal Server Error</p>");
587 resp_end_html();
588 }
589
590 static void
591 pg_redirect(const struct req *req, const char *name)
592 {
593 printf("Status: 303 See Other\r\n"
594 "Location: /");
595 if (*scriptname != '\0')
596 printf("%s/", scriptname);
597 if (strcmp(req->q.manpath, req->p[0]))
598 printf("%s/", req->q.manpath);
599 if (req->q.arch != NULL)
600 printf("%s/", req->q.arch);
601 http_encode(name);
602 if (req->q.sec != NULL) {
603 putchar('.');
604 http_encode(req->q.sec);
605 }
606 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
607 }
608
609 static void
610 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
611 {
612 char *arch, *archend;
613 const char *sec;
614 size_t i, iuse;
615 int archprio, archpriouse;
616 int prio, priouse;
617
618 for (i = 0; i < sz; i++) {
619 if (validate_filename(r[i].file))
620 continue;
621 warnx("invalid filename %s in %s database",
622 r[i].file, req->q.manpath);
623 pg_error_internal();
624 return;
625 }
626
627 if (req->isquery && sz == 1) {
628 /*
629 * If we have just one result, then jump there now
630 * without any delay.
631 */
632 printf("Status: 303 See Other\r\n"
633 "Location: /");
634 if (*scriptname != '\0')
635 printf("%s/", scriptname);
636 if (strcmp(req->q.manpath, req->p[0]))
637 printf("%s/", req->q.manpath);
638 printf("%s\r\n"
639 "Content-Type: text/html; charset=utf-8\r\n\r\n",
640 r[0].file);
641 return;
642 }
643
644 /*
645 * In man(1) mode, show one of the pages
646 * even if more than one is found.
647 */
648
649 iuse = 0;
650 if (req->q.equal || sz == 1) {
651 priouse = 20;
652 archpriouse = 3;
653 for (i = 0; i < sz; i++) {
654 sec = r[i].file;
655 sec += strcspn(sec, "123456789");
656 if (sec[0] == '\0')
657 continue;
658 prio = sec_prios[sec[0] - '1'];
659 if (sec[1] != '/')
660 prio += 10;
661 if (req->q.arch == NULL) {
662 archprio =
663 ((arch = strchr(sec + 1, '/'))
664 == NULL) ? 3 :
665 ((archend = strchr(arch + 1, '/'))
666 == NULL) ? 0 :
667 strncmp(arch, "amd64/",
668 archend - arch) ? 2 : 1;
669 if (archprio < archpriouse) {
670 archpriouse = archprio;
671 priouse = prio;
672 iuse = i;
673 continue;
674 }
675 if (archprio > archpriouse)
676 continue;
677 }
678 if (prio >= priouse)
679 continue;
680 priouse = prio;
681 iuse = i;
682 }
683 resp_begin_html(200, NULL, r[iuse].file);
684 } else
685 resp_begin_html(200, NULL, NULL);
686
687 resp_searchform(req,
688 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
689
690 if (sz > 1) {
691 puts("<table class=\"results\">");
692 for (i = 0; i < sz; i++) {
693 printf(" <tr>\n"
694 " <td>"
695 "<a class=\"Xr\" href=\"/");
696 if (*scriptname != '\0')
697 printf("%s/", scriptname);
698 if (strcmp(req->q.manpath, req->p[0]))
699 printf("%s/", req->q.manpath);
700 printf("%s\">", r[i].file);
701 html_print(r[i].names);
702 printf("</a></td>\n"
703 " <td><span class=\"Nd\">");
704 html_print(r[i].output);
705 puts("</span></td>\n"
706 " </tr>");
707 }
708 puts("</table>");
709 }
710
711 if (req->q.equal || sz == 1) {
712 puts("<hr>");
713 resp_show(req, r[iuse].file);
714 }
715
716 resp_end_html();
717 }
718
719 static void
720 resp_catman(const struct req *req, const char *file)
721 {
722 FILE *f;
723 char *p;
724 size_t sz;
725 ssize_t len;
726 int i;
727 int italic, bold;
728
729 if ((f = fopen(file, "r")) == NULL) {
730 puts("<p>You specified an invalid manual file.</p>");
731 return;
732 }
733
734 puts("<div class=\"catman\">\n"
735 "<pre>");
736
737 p = NULL;
738 sz = 0;
739
740 while ((len = getline(&p, &sz, f)) != -1) {
741 bold = italic = 0;
742 for (i = 0; i < len - 1; i++) {
743 /*
744 * This means that the catpage is out of state.
745 * Ignore it and keep going (although the
746 * catpage is bogus).
747 */
748
749 if ('\b' == p[i] || '\n' == p[i])
750 continue;
751
752 /*
753 * Print a regular character.
754 * Close out any bold/italic scopes.
755 * If we're in back-space mode, make sure we'll
756 * have something to enter when we backspace.
757 */
758
759 if ('\b' != p[i + 1]) {
760 if (italic)
761 printf("</i>");
762 if (bold)
763 printf("</b>");
764 italic = bold = 0;
765 html_putchar(p[i]);
766 continue;
767 } else if (i + 2 >= len)
768 continue;
769
770 /* Italic mode. */
771
772 if ('_' == p[i]) {
773 if (bold)
774 printf("</b>");
775 if ( ! italic)
776 printf("<i>");
777 bold = 0;
778 italic = 1;
779 i += 2;
780 html_putchar(p[i]);
781 continue;
782 }
783
784 /*
785 * Handle funny behaviour troff-isms.
786 * These grok'd from the original man2html.c.
787 */
788
789 if (('+' == p[i] && 'o' == p[i + 2]) ||
790 ('o' == p[i] && '+' == p[i + 2]) ||
791 ('|' == p[i] && '=' == p[i + 2]) ||
792 ('=' == p[i] && '|' == p[i + 2]) ||
793 ('*' == p[i] && '=' == p[i + 2]) ||
794 ('=' == p[i] && '*' == p[i + 2]) ||
795 ('*' == p[i] && '|' == p[i + 2]) ||
796 ('|' == p[i] && '*' == p[i + 2])) {
797 if (italic)
798 printf("</i>");
799 if (bold)
800 printf("</b>");
801 italic = bold = 0;
802 putchar('*');
803 i += 2;
804 continue;
805 } else if (('|' == p[i] && '-' == p[i + 2]) ||
806 ('-' == p[i] && '|' == p[i + 1]) ||
807 ('+' == p[i] && '-' == p[i + 1]) ||
808 ('-' == p[i] && '+' == p[i + 1]) ||
809 ('+' == p[i] && '|' == p[i + 1]) ||
810 ('|' == p[i] && '+' == p[i + 1])) {
811 if (italic)
812 printf("</i>");
813 if (bold)
814 printf("</b>");
815 italic = bold = 0;
816 putchar('+');
817 i += 2;
818 continue;
819 }
820
821 /* Bold mode. */
822
823 if (italic)
824 printf("</i>");
825 if ( ! bold)
826 printf("<b>");
827 bold = 1;
828 italic = 0;
829 i += 2;
830 html_putchar(p[i]);
831 }
832
833 /*
834 * Clean up the last character.
835 * We can get to a newline; don't print that.
836 */
837
838 if (italic)
839 printf("</i>");
840 if (bold)
841 printf("</b>");
842
843 if (i == len - 1 && p[i] != '\n')
844 html_putchar(p[i]);
845
846 putchar('\n');
847 }
848 free(p);
849
850 puts("</pre>\n"
851 "</div>");
852
853 fclose(f);
854 }
855
856 static void
857 resp_format(const struct req *req, const char *file)
858 {
859 struct manoutput conf;
860 struct mparse *mp;
861 struct roff_meta *meta;
862 void *vp;
863 int fd;
864 int usepath;
865
866 if (-1 == (fd = open(file, O_RDONLY, 0))) {
867 puts("<p>You specified an invalid manual file.</p>");
868 return;
869 }
870
871 mchars_alloc();
872 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
873 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
874 mparse_readfd(mp, fd, file);
875 close(fd);
876 meta = mparse_result(mp);
877
878 memset(&conf, 0, sizeof(conf));
879 conf.fragment = 1;
880 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
881 usepath = strcmp(req->q.manpath, req->p[0]);
882 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
883 scriptname, *scriptname == '\0' ? "" : "/",
884 usepath ? req->q.manpath : "", usepath ? "/" : "");
885
886 vp = html_alloc(&conf);
887 if (meta->macroset == MACROSET_MDOC)
888 html_mdoc(vp, meta);
889 else
890 html_man(vp, meta);
891
892 html_free(vp);
893 mparse_free(mp);
894 mchars_free();
895 free(conf.man);
896 free(conf.style);
897 }
898
899 static void
900 resp_show(const struct req *req, const char *file)
901 {
902
903 if ('.' == file[0] && '/' == file[1])
904 file += 2;
905
906 if ('c' == *file)
907 resp_catman(req, file);
908 else
909 resp_format(req, file);
910 }
911
912 static void
913 pg_show(struct req *req, const char *fullpath)
914 {
915 char *manpath;
916 const char *file;
917
918 if ((file = strchr(fullpath, '/')) == NULL) {
919 pg_error_badrequest(
920 "You did not specify a page to show.");
921 return;
922 }
923 manpath = mandoc_strndup(fullpath, file - fullpath);
924 file++;
925
926 if ( ! validate_manpath(req, manpath)) {
927 pg_error_badrequest(
928 "You specified an invalid manpath.");
929 free(manpath);
930 return;
931 }
932
933 /*
934 * Begin by chdir()ing into the manpath.
935 * This way we can pick up the database files, which are
936 * relative to the manpath root.
937 */
938
939 if (chdir(manpath) == -1) {
940 warn("chdir %s", manpath);
941 pg_error_internal();
942 free(manpath);
943 return;
944 }
945 free(manpath);
946
947 if ( ! validate_filename(file)) {
948 pg_error_badrequest(
949 "You specified an invalid manual file.");
950 return;
951 }
952
953 resp_begin_html(200, NULL, file);
954 resp_searchform(req, FOCUS_NONE);
955 resp_show(req, file);
956 resp_end_html();
957 }
958
959 static void
960 pg_search(const struct req *req)
961 {
962 struct mansearch search;
963 struct manpaths paths;
964 struct manpage *res;
965 char **argv;
966 char *query, *rp, *wp;
967 size_t ressz;
968 int argc;
969
970 /*
971 * Begin by chdir()ing into the root of the manpath.
972 * This way we can pick up the database files, which are
973 * relative to the manpath root.
974 */
975
976 if (chdir(req->q.manpath) == -1) {
977 warn("chdir %s", req->q.manpath);
978 pg_error_internal();
979 return;
980 }
981
982 search.arch = req->q.arch;
983 search.sec = req->q.sec;
984 search.outkey = "Nd";
985 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
986 search.firstmatch = 1;
987
988 paths.sz = 1;
989 paths.paths = mandoc_malloc(sizeof(char *));
990 paths.paths[0] = mandoc_strdup(".");
991
992 /*
993 * Break apart at spaces with backslash-escaping.
994 */
995
996 argc = 0;
997 argv = NULL;
998 rp = query = mandoc_strdup(req->q.query);
999 for (;;) {
1000 while (isspace((unsigned char)*rp))
1001 rp++;
1002 if (*rp == '\0')
1003 break;
1004 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1005 argv[argc++] = wp = rp;
1006 for (;;) {
1007 if (isspace((unsigned char)*rp)) {
1008 *wp = '\0';
1009 rp++;
1010 break;
1011 }
1012 if (rp[0] == '\\' && rp[1] != '\0')
1013 rp++;
1014 if (wp != rp)
1015 *wp = *rp;
1016 if (*rp == '\0')
1017 break;
1018 wp++;
1019 rp++;
1020 }
1021 }
1022
1023 res = NULL;
1024 ressz = 0;
1025 if (req->isquery && req->q.equal && argc == 1)
1026 pg_redirect(req, argv[0]);
1027 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1028 pg_noresult(req, 400, "Bad Request",
1029 "You entered an invalid query.");
1030 else if (ressz == 0)
1031 pg_noresult(req, 404, "Not Found", "No results found.");
1032 else
1033 pg_searchres(req, res, ressz);
1034
1035 free(query);
1036 mansearch_free(res, ressz);
1037 free(paths.paths[0]);
1038 free(paths.paths);
1039 }
1040
1041 int
1042 main(void)
1043 {
1044 struct req req;
1045 struct itimerval itimer;
1046 const char *path;
1047 const char *querystring;
1048 int i;
1049
1050 #if HAVE_PLEDGE
1051 /*
1052 * The "rpath" pledge could be revoked after mparse_readfd()
1053 * if the file desciptor to "/footer.html" would be opened
1054 * up front, but it's probably not worth the complication
1055 * of the code it would cause: it would require scattering
1056 * pledge() calls in multiple low-level resp_*() functions.
1057 */
1058
1059 if (pledge("stdio rpath", NULL) == -1) {
1060 warn("pledge");
1061 pg_error_internal();
1062 return EXIT_FAILURE;
1063 }
1064 #endif
1065
1066 /* Poor man's ReDoS mitigation. */
1067
1068 itimer.it_value.tv_sec = 2;
1069 itimer.it_value.tv_usec = 0;
1070 itimer.it_interval.tv_sec = 2;
1071 itimer.it_interval.tv_usec = 0;
1072 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1073 warn("setitimer");
1074 pg_error_internal();
1075 return EXIT_FAILURE;
1076 }
1077
1078 /*
1079 * First we change directory into the MAN_DIR so that
1080 * subsequent scanning for manpath directories is rooted
1081 * relative to the same position.
1082 */
1083
1084 if (chdir(MAN_DIR) == -1) {
1085 warn("MAN_DIR: %s", MAN_DIR);
1086 pg_error_internal();
1087 return EXIT_FAILURE;
1088 }
1089
1090 memset(&req, 0, sizeof(struct req));
1091 req.q.equal = 1;
1092 parse_manpath_conf(&req);
1093
1094 /* Parse the path info and the query string. */
1095
1096 if ((path = getenv("PATH_INFO")) == NULL)
1097 path = "";
1098 else if (*path == '/')
1099 path++;
1100
1101 if (*path != '\0') {
1102 parse_path_info(&req, path);
1103 if (req.q.manpath == NULL || req.q.sec == NULL ||
1104 *req.q.query == '\0' || access(path, F_OK) == -1)
1105 path = "";
1106 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1107 parse_query_string(&req, querystring);
1108
1109 /* Validate parsed data and add defaults. */
1110
1111 if (req.q.manpath == NULL)
1112 req.q.manpath = mandoc_strdup(req.p[0]);
1113 else if ( ! validate_manpath(&req, req.q.manpath)) {
1114 pg_error_badrequest(
1115 "You specified an invalid manpath.");
1116 return EXIT_FAILURE;
1117 }
1118
1119 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1120 pg_error_badrequest(
1121 "You specified an invalid architecture.");
1122 return EXIT_FAILURE;
1123 }
1124
1125 /* Dispatch to the three different pages. */
1126
1127 if ('\0' != *path)
1128 pg_show(&req, path);
1129 else if (NULL != req.q.query)
1130 pg_search(&req);
1131 else
1132 pg_index(&req);
1133
1134 free(req.q.manpath);
1135 free(req.q.arch);
1136 free(req.q.sec);
1137 free(req.q.query);
1138 for (i = 0; i < (int)req.psz; i++)
1139 free(req.p[i]);
1140 free(req.p);
1141 return EXIT_SUCCESS;
1142 }
1143
1144 /*
1145 * Translate PATH_INFO to a query.
1146 */
1147 static void
1148 parse_path_info(struct req *req, const char *path)
1149 {
1150 const char *name, *sec, *end;
1151
1152 req->isquery = 0;
1153 req->q.equal = 1;
1154 req->q.manpath = NULL;
1155 req->q.arch = NULL;
1156
1157 /* Mandatory manual page name. */
1158 if ((name = strrchr(path, '/')) == NULL)
1159 name = path;
1160 else
1161 name++;
1162
1163 /* Optional trailing section. */
1164 sec = strrchr(name, '.');
1165 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1166 req->q.query = mandoc_strndup(name, sec - name - 1);
1167 req->q.sec = mandoc_strdup(sec);
1168 } else {
1169 req->q.query = mandoc_strdup(name);
1170 req->q.sec = NULL;
1171 }
1172
1173 /* Handle the case of name[.section] only. */
1174 if (name == path)
1175 return;
1176
1177 /* Optional manpath. */
1178 end = strchr(path, '/');
1179 req->q.manpath = mandoc_strndup(path, end - path);
1180 if (validate_manpath(req, req->q.manpath)) {
1181 path = end + 1;
1182 if (name == path)
1183 return;
1184 } else {
1185 free(req->q.manpath);
1186 req->q.manpath = NULL;
1187 }
1188
1189 /* Optional section. */
1190 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1191 path += 3;
1192 end = strchr(path, '/');
1193 free(req->q.sec);
1194 req->q.sec = mandoc_strndup(path, end - path);
1195 path = end + 1;
1196 if (name == path)
1197 return;
1198 }
1199
1200 /* Optional architecture. */
1201 end = strchr(path, '/');
1202 if (end + 1 != name) {
1203 pg_error_badrequest(
1204 "You specified too many directory components.");
1205 exit(EXIT_FAILURE);
1206 }
1207 req->q.arch = mandoc_strndup(path, end - path);
1208 if (validate_arch(req->q.arch) == 0) {
1209 pg_error_badrequest(
1210 "You specified an invalid directory component.");
1211 exit(EXIT_FAILURE);
1212 }
1213 }
1214
1215 /*
1216 * Scan for indexable paths.
1217 */
1218 static void
1219 parse_manpath_conf(struct req *req)
1220 {
1221 FILE *fp;
1222 char *dp;
1223 size_t dpsz;
1224 ssize_t len;
1225
1226 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1227 warn("%s/manpath.conf", MAN_DIR);
1228 pg_error_internal();
1229 exit(EXIT_FAILURE);
1230 }
1231
1232 dp = NULL;
1233 dpsz = 0;
1234
1235 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1236 if (dp[len - 1] == '\n')
1237 dp[--len] = '\0';
1238 req->p = mandoc_realloc(req->p,
1239 (req->psz + 1) * sizeof(char *));
1240 if ( ! validate_urifrag(dp)) {
1241 warnx("%s/manpath.conf contains "
1242 "unsafe path \"%s\"", MAN_DIR, dp);
1243 pg_error_internal();
1244 exit(EXIT_FAILURE);
1245 }
1246 if (strchr(dp, '/') != NULL) {
1247 warnx("%s/manpath.conf contains "
1248 "path with slash \"%s\"", MAN_DIR, dp);
1249 pg_error_internal();
1250 exit(EXIT_FAILURE);
1251 }
1252 req->p[req->psz++] = dp;
1253 dp = NULL;
1254 dpsz = 0;
1255 }
1256 free(dp);
1257
1258 if (req->p == NULL) {
1259 warnx("%s/manpath.conf is empty", MAN_DIR);
1260 pg_error_internal();
1261 exit(EXIT_FAILURE);
1262 }
1263 }