]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
271f9b0a7ff4cc6d60661cc5fbc69f33122c6c76
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.178 2022/07/05 14:04:25 schwarze Exp $ */
2 /*
3 * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Implementation of the man.cgi(8) program.
20 */
21 #include "config.h"
22
23 #include <sys/types.h>
24 #include <sys/time.h>
25
26 #include <ctype.h>
27 #if HAVE_ERR
28 #include <err.h>
29 #endif
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <limits.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc_aux.h"
40 #include "mandoc.h"
41 #include "roff.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "mandoc_parse.h"
45 #include "main.h"
46 #include "manconf.h"
47 #include "mansearch.h"
48 #include "cgi.h"
49
50 /*
51 * A query as passed to the search function.
52 */
53 struct query {
54 char *manpath; /* desired manual directory */
55 char *arch; /* architecture */
56 char *sec; /* manual section */
57 char *query; /* unparsed query expression */
58 int equal; /* match whole names, not substrings */
59 };
60
61 struct req {
62 struct query q;
63 char **p; /* array of available manpaths */
64 size_t psz; /* number of available manpaths */
65 int isquery; /* QUERY_STRING used, not PATH_INFO */
66 };
67
68 enum focus {
69 FOCUS_NONE = 0,
70 FOCUS_QUERY
71 };
72
73 static void html_print(const char *);
74 static void html_putchar(char);
75 static int http_decode(char *);
76 static void http_encode(const char *);
77 static void parse_manpath_conf(struct req *);
78 static void parse_path_info(struct req *, const char *);
79 static void parse_query_string(struct req *, const char *);
80 static void pg_error_badrequest(const char *);
81 static void pg_error_internal(void);
82 static void pg_index(const struct req *);
83 static void pg_noresult(const struct req *, int, const char *,
84 const char *);
85 static void pg_redirect(const struct req *, const char *);
86 static void pg_search(const struct req *);
87 static void pg_searchres(const struct req *,
88 struct manpage *, size_t);
89 static void pg_show(struct req *, const char *);
90 static int resp_begin_html(int, const char *, const char *);
91 static void resp_begin_http(int, const char *);
92 static void resp_catman(const struct req *, const char *);
93 static int resp_copy(const char *, const char *);
94 static void resp_end_html(void);
95 static void resp_format(const struct req *, const char *);
96 static void resp_searchform(const struct req *, enum focus);
97 static void resp_show(const struct req *, const char *);
98 static void set_query_attr(char **, char **);
99 static int validate_arch(const char *);
100 static int validate_filename(const char *);
101 static int validate_manpath(const struct req *, const char *);
102 static int validate_urifrag(const char *);
103
104 static const char *scriptname = SCRIPT_NAME;
105
106 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
107 static const char *const sec_numbers[] = {
108 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
109 };
110 static const char *const sec_names[] = {
111 "All Sections",
112 "1 - General Commands",
113 "2 - System Calls",
114 "3 - Library Functions",
115 "3p - Perl Library",
116 "4 - Device Drivers",
117 "5 - File Formats",
118 "6 - Games",
119 "7 - Miscellaneous Information",
120 "8 - System Manager\'s Manual",
121 "9 - Kernel Developer\'s Manual"
122 };
123 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
124
125 static const char *const arch_names[] = {
126 "amd64", "alpha", "armv7", "arm64",
127 "hppa", "i386", "landisk", "loongson",
128 "luna88k", "macppc", "mips64", "octeon",
129 "powerpc64", "riscv64", "sparc64",
130
131 "amiga", "arc", "armish", "arm32",
132 "atari", "aviion", "beagle", "cats",
133 "hppa64", "hp300",
134 "ia64", "mac68k", "mvme68k", "mvme88k",
135 "mvmeppc", "palm", "pc532", "pegasos",
136 "pmax", "powerpc", "sgi", "socppc",
137 "solbourne", "sparc",
138 "sun3", "vax", "wgrisc", "x68k",
139 "zaurus"
140 };
141 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
142
143 /*
144 * Print a character, escaping HTML along the way.
145 * This will pass non-ASCII straight to output: be warned!
146 */
147 static void
148 html_putchar(char c)
149 {
150
151 switch (c) {
152 case '"':
153 printf("&quot;");
154 break;
155 case '&':
156 printf("&amp;");
157 break;
158 case '>':
159 printf("&gt;");
160 break;
161 case '<':
162 printf("&lt;");
163 break;
164 default:
165 putchar((unsigned char)c);
166 break;
167 }
168 }
169
170 /*
171 * Call through to html_putchar().
172 * Accepts NULL strings.
173 */
174 static void
175 html_print(const char *p)
176 {
177
178 if (NULL == p)
179 return;
180 while ('\0' != *p)
181 html_putchar(*p++);
182 }
183
184 /*
185 * Transfer the responsibility for the allocated string *val
186 * to the query structure.
187 */
188 static void
189 set_query_attr(char **attr, char **val)
190 {
191
192 free(*attr);
193 if (**val == '\0') {
194 *attr = NULL;
195 free(*val);
196 } else
197 *attr = *val;
198 *val = NULL;
199 }
200
201 /*
202 * Parse the QUERY_STRING for key-value pairs
203 * and store the values into the query structure.
204 */
205 static void
206 parse_query_string(struct req *req, const char *qs)
207 {
208 char *key, *val;
209 size_t keysz, valsz;
210
211 req->isquery = 1;
212 req->q.manpath = NULL;
213 req->q.arch = NULL;
214 req->q.sec = NULL;
215 req->q.query = NULL;
216 req->q.equal = 1;
217
218 key = val = NULL;
219 while (*qs != '\0') {
220
221 /* Parse one key. */
222
223 keysz = strcspn(qs, "=;&");
224 key = mandoc_strndup(qs, keysz);
225 qs += keysz;
226 if (*qs != '=')
227 goto next;
228
229 /* Parse one value. */
230
231 valsz = strcspn(++qs, ";&");
232 val = mandoc_strndup(qs, valsz);
233 qs += valsz;
234
235 /* Decode and catch encoding errors. */
236
237 if ( ! (http_decode(key) && http_decode(val)))
238 goto next;
239
240 /* Handle key-value pairs. */
241
242 if ( ! strcmp(key, "query"))
243 set_query_attr(&req->q.query, &val);
244
245 else if ( ! strcmp(key, "apropos"))
246 req->q.equal = !strcmp(val, "0");
247
248 else if ( ! strcmp(key, "manpath")) {
249 #ifdef COMPAT_OLDURI
250 if ( ! strncmp(val, "OpenBSD ", 8)) {
251 val[7] = '-';
252 if ('C' == val[8])
253 val[8] = 'c';
254 }
255 #endif
256 set_query_attr(&req->q.manpath, &val);
257 }
258
259 else if ( ! (strcmp(key, "sec")
260 #ifdef COMPAT_OLDURI
261 && strcmp(key, "sektion")
262 #endif
263 )) {
264 if ( ! strcmp(val, "0"))
265 *val = '\0';
266 set_query_attr(&req->q.sec, &val);
267 }
268
269 else if ( ! strcmp(key, "arch")) {
270 if ( ! strcmp(val, "default"))
271 *val = '\0';
272 set_query_attr(&req->q.arch, &val);
273 }
274
275 /*
276 * The key must be freed in any case.
277 * The val may have been handed over to the query
278 * structure, in which case it is now NULL.
279 */
280 next:
281 free(key);
282 key = NULL;
283 free(val);
284 val = NULL;
285
286 if (*qs != '\0')
287 qs++;
288 }
289 }
290
291 /*
292 * HTTP-decode a string. The standard explanation is that this turns
293 * "%4e+foo" into "n foo" in the regular way. This is done in-place
294 * over the allocated string.
295 */
296 static int
297 http_decode(char *p)
298 {
299 char hex[3];
300 char *q;
301 int c;
302
303 hex[2] = '\0';
304
305 q = p;
306 for ( ; '\0' != *p; p++, q++) {
307 if ('%' == *p) {
308 if ('\0' == (hex[0] = *(p + 1)))
309 return 0;
310 if ('\0' == (hex[1] = *(p + 2)))
311 return 0;
312 if (1 != sscanf(hex, "%x", &c))
313 return 0;
314 if ('\0' == c)
315 return 0;
316
317 *q = (char)c;
318 p += 2;
319 } else
320 *q = '+' == *p ? ' ' : *p;
321 }
322
323 *q = '\0';
324 return 1;
325 }
326
327 static void
328 http_encode(const char *p)
329 {
330 for (; *p != '\0'; p++) {
331 if (isalnum((unsigned char)*p) == 0 &&
332 strchr("-._~", *p) == NULL)
333 printf("%%%2.2X", (unsigned char)*p);
334 else
335 putchar(*p);
336 }
337 }
338
339 static void
340 resp_begin_http(int code, const char *msg)
341 {
342
343 if (200 != code)
344 printf("Status: %d %s\r\n", code, msg);
345
346 printf("Content-Type: text/html; charset=utf-8\r\n"
347 "Cache-Control: no-cache\r\n"
348 "Content-Security-Policy: default-src 'none'; "
349 "style-src 'self' 'unsafe-inline'\r\n"
350 "Pragma: no-cache\r\n"
351 "\r\n");
352
353 fflush(stdout);
354 }
355
356 static int
357 resp_copy(const char *element, const char *filename)
358 {
359 char buf[4096];
360 ssize_t sz;
361 int fd;
362
363 if ((fd = open(filename, O_RDONLY)) == -1)
364 return 0;
365
366 if (element != NULL)
367 printf("<%s>\n", element);
368 fflush(stdout);
369 while ((sz = read(fd, buf, sizeof(buf))) > 0)
370 write(STDOUT_FILENO, buf, sz);
371 close(fd);
372 return 1;
373 }
374
375 static int
376 resp_begin_html(int code, const char *msg, const char *file)
377 {
378 const char *name, *sec, *cp;
379 int namesz, secsz;
380
381 resp_begin_http(code, msg);
382
383 printf("<!DOCTYPE html>\n"
384 "<html>\n"
385 "<head>\n"
386 " <meta charset=\"UTF-8\"/>\n"
387 " <meta name=\"viewport\""
388 " content=\"width=device-width, initial-scale=1.0\">\n"
389 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
390 " type=\"text/css\" media=\"all\">\n"
391 " <title>",
392 CSS_DIR);
393 if (file != NULL) {
394 cp = strrchr(file, '/');
395 name = cp == NULL ? file : cp + 1;
396 cp = strrchr(name, '.');
397 namesz = cp == NULL ? strlen(name) : cp - name;
398 sec = NULL;
399 if (cp != NULL && cp[1] != '0') {
400 sec = cp + 1;
401 secsz = strlen(sec);
402 } else if (name - file > 1) {
403 for (cp = name - 2; cp >= file; cp--) {
404 if (*cp < '1' || *cp > '9')
405 continue;
406 sec = cp;
407 secsz = name - cp - 1;
408 break;
409 }
410 }
411 printf("%.*s", namesz, name);
412 if (sec != NULL)
413 printf("(%.*s)", secsz, sec);
414 fputs(" - ", stdout);
415 }
416 printf("%s</title>\n"
417 "</head>\n"
418 "<body>\n",
419 CUSTOMIZE_TITLE);
420
421 return resp_copy("header", MAN_DIR "/header.html");
422 }
423
424 static void
425 resp_end_html(void)
426 {
427 if (resp_copy("footer", MAN_DIR "/footer.html"))
428 puts("</footer>");
429
430 puts("</body>\n"
431 "</html>");
432 }
433
434 static void
435 resp_searchform(const struct req *req, enum focus focus)
436 {
437 int i;
438
439 printf("<form role=\"search\" action=\"/%s\" method=\"get\" "
440 "autocomplete=\"off\" autocapitalize=\"none\">\n"
441 " <fieldset>\n"
442 " <legend>Manual Page Search Parameters</legend>\n",
443 scriptname);
444
445 /* Write query input box. */
446
447 printf(" <input type=\"search\" name=\"query\" value=\"");
448 if (req->q.query != NULL)
449 html_print(req->q.query);
450 printf( "\" size=\"40\"");
451 if (focus == FOCUS_QUERY)
452 printf(" autofocus");
453 puts(">");
454
455 /* Write submission buttons. */
456
457 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
458 "man</button>\n"
459 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
460 "apropos</button>\n"
461 " <br/>\n");
462
463 /* Write section selector. */
464
465 puts(" <select name=\"sec\" aria-label=\"manual section\">");
466 for (i = 0; i < sec_MAX; i++) {
467 printf(" <option value=\"%s\"", sec_numbers[i]);
468 if (NULL != req->q.sec &&
469 0 == strcmp(sec_numbers[i], req->q.sec))
470 printf(" selected=\"selected\"");
471 printf(">%s</option>\n", sec_names[i]);
472 }
473 puts(" </select>");
474
475 /* Write architecture selector. */
476
477 printf( " <select name=\"arch\" aria-label=\"CPU architecture\">\n"
478 " <option value=\"default\"");
479 if (NULL == req->q.arch)
480 printf(" selected=\"selected\"");
481 puts(">All Architectures</option>");
482 for (i = 0; i < arch_MAX; i++) {
483 printf(" <option");
484 if (NULL != req->q.arch &&
485 0 == strcmp(arch_names[i], req->q.arch))
486 printf(" selected=\"selected\"");
487 printf(">%s</option>\n", arch_names[i]);
488 }
489 puts(" </select>");
490
491 /* Write manpath selector. */
492
493 if (req->psz > 1) {
494 puts(" <select name=\"manpath\">");
495 for (i = 0; i < (int)req->psz; i++) {
496 printf(" <option");
497 if (strcmp(req->q.manpath, req->p[i]) == 0)
498 printf(" selected=\"selected\"");
499 printf(">");
500 html_print(req->p[i]);
501 puts("</option>");
502 }
503 puts(" </select>");
504 }
505
506 puts(" </fieldset>\n"
507 "</form>");
508 }
509
510 static int
511 validate_urifrag(const char *frag)
512 {
513
514 while ('\0' != *frag) {
515 if ( ! (isalnum((unsigned char)*frag) ||
516 '-' == *frag || '.' == *frag ||
517 '/' == *frag || '_' == *frag))
518 return 0;
519 frag++;
520 }
521 return 1;
522 }
523
524 static int
525 validate_manpath(const struct req *req, const char* manpath)
526 {
527 size_t i;
528
529 for (i = 0; i < req->psz; i++)
530 if ( ! strcmp(manpath, req->p[i]))
531 return 1;
532
533 return 0;
534 }
535
536 static int
537 validate_arch(const char *arch)
538 {
539 int i;
540
541 for (i = 0; i < arch_MAX; i++)
542 if (strcmp(arch, arch_names[i]) == 0)
543 return 1;
544
545 return 0;
546 }
547
548 static int
549 validate_filename(const char *file)
550 {
551
552 if ('.' == file[0] && '/' == file[1])
553 file += 2;
554
555 return ! (strstr(file, "../") || strstr(file, "/..") ||
556 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
557 }
558
559 static void
560 pg_index(const struct req *req)
561 {
562 if (resp_begin_html(200, NULL, NULL) == 0)
563 puts("<header>");
564 resp_searchform(req, FOCUS_QUERY);
565 printf("</header>\n"
566 "<main>\n"
567 "<p role=\"doc-notice\" aria-label=\"usage\">\n"
568 "This web interface is documented in the\n"
569 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
570 " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
571 "manual, and the\n"
572 "<a class=\"Xr\" href=\"/%s%sapropos.1\""
573 " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
574 "manual explains the query syntax.\n"
575 "</p>\n"
576 "</main>\n",
577 scriptname, *scriptname == '\0' ? "" : "/",
578 scriptname, *scriptname == '\0' ? "" : "/");
579 resp_end_html();
580 }
581
582 static void
583 pg_noresult(const struct req *req, int code, const char *http_msg,
584 const char *user_msg)
585 {
586 if (resp_begin_html(code, http_msg, NULL) == 0)
587 puts("<header>");
588 resp_searchform(req, FOCUS_QUERY);
589 puts("</header>");
590 puts("<main>");
591 puts("<p role=\"doc-notice\" aria-label=\"no result\">");
592 puts(user_msg);
593 puts("</p>");
594 puts("</main>");
595 resp_end_html();
596 }
597
598 static void
599 pg_error_badrequest(const char *msg)
600 {
601 if (resp_begin_html(400, "Bad Request", NULL))
602 puts("</header>");
603 puts("<main>\n"
604 "<h1>Bad Request</h1>\n"
605 "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
606 puts(msg);
607 printf("Try again from the\n"
608 "<a href=\"/%s\">main page</a>.\n"
609 "</p>\n"
610 "</main>\n", scriptname);
611 resp_end_html();
612 }
613
614 static void
615 pg_error_internal(void)
616 {
617 if (resp_begin_html(500, "Internal Server Error", NULL))
618 puts("</header>");
619 puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
620 resp_end_html();
621 }
622
623 static void
624 pg_redirect(const struct req *req, const char *name)
625 {
626 printf("Status: 303 See Other\r\n"
627 "Location: /");
628 if (*scriptname != '\0')
629 printf("%s/", scriptname);
630 if (strcmp(req->q.manpath, req->p[0]))
631 printf("%s/", req->q.manpath);
632 if (req->q.arch != NULL)
633 printf("%s/", req->q.arch);
634 http_encode(name);
635 if (req->q.sec != NULL) {
636 putchar('.');
637 http_encode(req->q.sec);
638 }
639 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
640 }
641
642 static void
643 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
644 {
645 char *arch, *archend;
646 const char *sec;
647 size_t i, iuse;
648 int archprio, archpriouse;
649 int prio, priouse;
650 int have_header;
651
652 for (i = 0; i < sz; i++) {
653 if (validate_filename(r[i].file))
654 continue;
655 warnx("invalid filename %s in %s database",
656 r[i].file, req->q.manpath);
657 pg_error_internal();
658 return;
659 }
660
661 if (req->isquery && sz == 1) {
662 /*
663 * If we have just one result, then jump there now
664 * without any delay.
665 */
666 printf("Status: 303 See Other\r\n"
667 "Location: /");
668 if (*scriptname != '\0')
669 printf("%s/", scriptname);
670 if (strcmp(req->q.manpath, req->p[0]))
671 printf("%s/", req->q.manpath);
672 printf("%s\r\n"
673 "Content-Type: text/html; charset=utf-8\r\n\r\n",
674 r[0].file);
675 return;
676 }
677
678 /*
679 * In man(1) mode, show one of the pages
680 * even if more than one is found.
681 */
682
683 iuse = 0;
684 if (req->q.equal || sz == 1) {
685 priouse = 20;
686 archpriouse = 3;
687 for (i = 0; i < sz; i++) {
688 sec = r[i].file;
689 sec += strcspn(sec, "123456789");
690 if (sec[0] == '\0')
691 continue;
692 prio = sec_prios[sec[0] - '1'];
693 if (sec[1] != '/')
694 prio += 10;
695 if (req->q.arch == NULL) {
696 archprio =
697 ((arch = strchr(sec + 1, '/'))
698 == NULL) ? 3 :
699 ((archend = strchr(arch + 1, '/'))
700 == NULL) ? 0 :
701 strncmp(arch, "amd64/",
702 archend - arch) ? 2 : 1;
703 if (archprio < archpriouse) {
704 archpriouse = archprio;
705 priouse = prio;
706 iuse = i;
707 continue;
708 }
709 if (archprio > archpriouse)
710 continue;
711 }
712 if (prio >= priouse)
713 continue;
714 priouse = prio;
715 iuse = i;
716 }
717 have_header = resp_begin_html(200, NULL, r[iuse].file);
718 } else
719 have_header = resp_begin_html(200, NULL, NULL);
720
721 if (have_header == 0)
722 puts("<header>");
723 resp_searchform(req,
724 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
725 puts("</header>");
726
727 if (sz > 1) {
728 puts("<nav>");
729 puts("<table class=\"results\">");
730 for (i = 0; i < sz; i++) {
731 printf(" <tr>\n"
732 " <td>"
733 "<a class=\"Xr\" href=\"/");
734 if (*scriptname != '\0')
735 printf("%s/", scriptname);
736 if (strcmp(req->q.manpath, req->p[0]))
737 printf("%s/", req->q.manpath);
738 printf("%s\">", r[i].file);
739 html_print(r[i].names);
740 printf("</a></td>\n"
741 " <td><span class=\"Nd\">");
742 html_print(r[i].output);
743 puts("</span></td>\n"
744 " </tr>");
745 }
746 puts("</table>");
747 puts("</nav>");
748 }
749
750 if (req->q.equal || sz == 1) {
751 puts("<hr>");
752 resp_show(req, r[iuse].file);
753 }
754
755 resp_end_html();
756 }
757
758 static void
759 resp_catman(const struct req *req, const char *file)
760 {
761 FILE *f;
762 char *p;
763 size_t sz;
764 ssize_t len;
765 int i;
766 int italic, bold;
767
768 if ((f = fopen(file, "r")) == NULL) {
769 puts("<p role=\"doc-notice\">\n"
770 " You specified an invalid manual file.\n"
771 "</p>");
772 return;
773 }
774
775 puts("<div class=\"catman\">\n"
776 "<pre>");
777
778 p = NULL;
779 sz = 0;
780
781 while ((len = getline(&p, &sz, f)) != -1) {
782 bold = italic = 0;
783 for (i = 0; i < len - 1; i++) {
784 /*
785 * This means that the catpage is out of state.
786 * Ignore it and keep going (although the
787 * catpage is bogus).
788 */
789
790 if ('\b' == p[i] || '\n' == p[i])
791 continue;
792
793 /*
794 * Print a regular character.
795 * Close out any bold/italic scopes.
796 * If we're in back-space mode, make sure we'll
797 * have something to enter when we backspace.
798 */
799
800 if ('\b' != p[i + 1]) {
801 if (italic)
802 printf("</i>");
803 if (bold)
804 printf("</b>");
805 italic = bold = 0;
806 html_putchar(p[i]);
807 continue;
808 } else if (i + 2 >= len)
809 continue;
810
811 /* Italic mode. */
812
813 if ('_' == p[i]) {
814 if (bold)
815 printf("</b>");
816 if ( ! italic)
817 printf("<i>");
818 bold = 0;
819 italic = 1;
820 i += 2;
821 html_putchar(p[i]);
822 continue;
823 }
824
825 /*
826 * Handle funny behaviour troff-isms.
827 * These grok'd from the original man2html.c.
828 */
829
830 if (('+' == p[i] && 'o' == p[i + 2]) ||
831 ('o' == p[i] && '+' == p[i + 2]) ||
832 ('|' == p[i] && '=' == p[i + 2]) ||
833 ('=' == p[i] && '|' == p[i + 2]) ||
834 ('*' == p[i] && '=' == p[i + 2]) ||
835 ('=' == p[i] && '*' == p[i + 2]) ||
836 ('*' == p[i] && '|' == p[i + 2]) ||
837 ('|' == p[i] && '*' == p[i + 2])) {
838 if (italic)
839 printf("</i>");
840 if (bold)
841 printf("</b>");
842 italic = bold = 0;
843 putchar('*');
844 i += 2;
845 continue;
846 } else if (('|' == p[i] && '-' == p[i + 2]) ||
847 ('-' == p[i] && '|' == p[i + 1]) ||
848 ('+' == p[i] && '-' == p[i + 1]) ||
849 ('-' == p[i] && '+' == p[i + 1]) ||
850 ('+' == p[i] && '|' == p[i + 1]) ||
851 ('|' == p[i] && '+' == p[i + 1])) {
852 if (italic)
853 printf("</i>");
854 if (bold)
855 printf("</b>");
856 italic = bold = 0;
857 putchar('+');
858 i += 2;
859 continue;
860 }
861
862 /* Bold mode. */
863
864 if (italic)
865 printf("</i>");
866 if ( ! bold)
867 printf("<b>");
868 bold = 1;
869 italic = 0;
870 i += 2;
871 html_putchar(p[i]);
872 }
873
874 /*
875 * Clean up the last character.
876 * We can get to a newline; don't print that.
877 */
878
879 if (italic)
880 printf("</i>");
881 if (bold)
882 printf("</b>");
883
884 if (i == len - 1 && p[i] != '\n')
885 html_putchar(p[i]);
886
887 putchar('\n');
888 }
889 free(p);
890
891 puts("</pre>\n"
892 "</div>");
893
894 fclose(f);
895 }
896
897 static void
898 resp_format(const struct req *req, const char *file)
899 {
900 struct manoutput conf;
901 struct mparse *mp;
902 struct roff_meta *meta;
903 void *vp;
904 int fd;
905 int usepath;
906
907 if (-1 == (fd = open(file, O_RDONLY))) {
908 puts("<p role=\"doc-notice\">\n"
909 " You specified an invalid manual file.\n"
910 "</p>");
911 return;
912 }
913
914 mchars_alloc();
915 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
916 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
917 mparse_readfd(mp, fd, file);
918 close(fd);
919 meta = mparse_result(mp);
920
921 memset(&conf, 0, sizeof(conf));
922 conf.fragment = 1;
923 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
924 usepath = strcmp(req->q.manpath, req->p[0]);
925 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
926 scriptname, *scriptname == '\0' ? "" : "/",
927 usepath ? req->q.manpath : "", usepath ? "/" : "");
928
929 vp = html_alloc(&conf);
930 if (meta->macroset == MACROSET_MDOC)
931 html_mdoc(vp, meta);
932 else
933 html_man(vp, meta);
934
935 html_free(vp);
936 mparse_free(mp);
937 mchars_free();
938 free(conf.man);
939 free(conf.style);
940 }
941
942 static void
943 resp_show(const struct req *req, const char *file)
944 {
945
946 if ('.' == file[0] && '/' == file[1])
947 file += 2;
948
949 if ('c' == *file)
950 resp_catman(req, file);
951 else
952 resp_format(req, file);
953 }
954
955 static void
956 pg_show(struct req *req, const char *fullpath)
957 {
958 char *manpath;
959 const char *file;
960
961 if ((file = strchr(fullpath, '/')) == NULL) {
962 pg_error_badrequest(
963 "You did not specify a page to show.");
964 return;
965 }
966 manpath = mandoc_strndup(fullpath, file - fullpath);
967 file++;
968
969 if ( ! validate_manpath(req, manpath)) {
970 pg_error_badrequest(
971 "You specified an invalid manpath.");
972 free(manpath);
973 return;
974 }
975
976 /*
977 * Begin by chdir()ing into the manpath.
978 * This way we can pick up the database files, which are
979 * relative to the manpath root.
980 */
981
982 if (chdir(manpath) == -1) {
983 warn("chdir %s", manpath);
984 pg_error_internal();
985 free(manpath);
986 return;
987 }
988 free(manpath);
989
990 if ( ! validate_filename(file)) {
991 pg_error_badrequest(
992 "You specified an invalid manual file.");
993 return;
994 }
995
996 if (resp_begin_html(200, NULL, file) == 0)
997 puts("<header>");
998 resp_searchform(req, FOCUS_NONE);
999 puts("</header>");
1000 resp_show(req, file);
1001 resp_end_html();
1002 }
1003
1004 static void
1005 pg_search(const struct req *req)
1006 {
1007 struct mansearch search;
1008 struct manpaths paths;
1009 struct manpage *res;
1010 char **argv;
1011 char *query, *rp, *wp;
1012 size_t ressz;
1013 int argc;
1014
1015 /*
1016 * Begin by chdir()ing into the root of the manpath.
1017 * This way we can pick up the database files, which are
1018 * relative to the manpath root.
1019 */
1020
1021 if (chdir(req->q.manpath) == -1) {
1022 warn("chdir %s", req->q.manpath);
1023 pg_error_internal();
1024 return;
1025 }
1026
1027 search.arch = req->q.arch;
1028 search.sec = req->q.sec;
1029 search.outkey = "Nd";
1030 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1031 search.firstmatch = 1;
1032
1033 paths.sz = 1;
1034 paths.paths = mandoc_malloc(sizeof(char *));
1035 paths.paths[0] = mandoc_strdup(".");
1036
1037 /*
1038 * Break apart at spaces with backslash-escaping.
1039 */
1040
1041 argc = 0;
1042 argv = NULL;
1043 rp = query = mandoc_strdup(req->q.query);
1044 for (;;) {
1045 while (isspace((unsigned char)*rp))
1046 rp++;
1047 if (*rp == '\0')
1048 break;
1049 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1050 argv[argc++] = wp = rp;
1051 for (;;) {
1052 if (isspace((unsigned char)*rp)) {
1053 *wp = '\0';
1054 rp++;
1055 break;
1056 }
1057 if (rp[0] == '\\' && rp[1] != '\0')
1058 rp++;
1059 if (wp != rp)
1060 *wp = *rp;
1061 if (*rp == '\0')
1062 break;
1063 wp++;
1064 rp++;
1065 }
1066 }
1067
1068 res = NULL;
1069 ressz = 0;
1070 if (req->isquery && req->q.equal && argc == 1)
1071 pg_redirect(req, argv[0]);
1072 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1073 pg_noresult(req, 400, "Bad Request",
1074 "You entered an invalid query.");
1075 else if (ressz == 0)
1076 pg_noresult(req, 404, "Not Found", "No results found.");
1077 else
1078 pg_searchres(req, res, ressz);
1079
1080 free(query);
1081 mansearch_free(res, ressz);
1082 free(paths.paths[0]);
1083 free(paths.paths);
1084 }
1085
1086 int
1087 main(void)
1088 {
1089 struct req req;
1090 struct itimerval itimer;
1091 const char *path;
1092 const char *querystring;
1093 int i;
1094
1095 #if HAVE_PLEDGE
1096 /*
1097 * The "rpath" pledge could be revoked after mparse_readfd()
1098 * if the file desciptor to "/footer.html" would be opened
1099 * up front, but it's probably not worth the complication
1100 * of the code it would cause: it would require scattering
1101 * pledge() calls in multiple low-level resp_*() functions.
1102 */
1103
1104 if (pledge("stdio rpath", NULL) == -1) {
1105 warn("pledge");
1106 pg_error_internal();
1107 return EXIT_FAILURE;
1108 }
1109 #endif
1110
1111 /* Poor man's ReDoS mitigation. */
1112
1113 itimer.it_value.tv_sec = 2;
1114 itimer.it_value.tv_usec = 0;
1115 itimer.it_interval.tv_sec = 2;
1116 itimer.it_interval.tv_usec = 0;
1117 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1118 warn("setitimer");
1119 pg_error_internal();
1120 return EXIT_FAILURE;
1121 }
1122
1123 /*
1124 * First we change directory into the MAN_DIR so that
1125 * subsequent scanning for manpath directories is rooted
1126 * relative to the same position.
1127 */
1128
1129 if (chdir(MAN_DIR) == -1) {
1130 warn("MAN_DIR: %s", MAN_DIR);
1131 pg_error_internal();
1132 return EXIT_FAILURE;
1133 }
1134
1135 memset(&req, 0, sizeof(struct req));
1136 req.q.equal = 1;
1137 parse_manpath_conf(&req);
1138
1139 /* Parse the path info and the query string. */
1140
1141 if ((path = getenv("PATH_INFO")) == NULL)
1142 path = "";
1143 else if (*path == '/')
1144 path++;
1145
1146 if (*path != '\0') {
1147 parse_path_info(&req, path);
1148 if (req.q.manpath == NULL || req.q.sec == NULL ||
1149 *req.q.query == '\0' || access(path, F_OK) == -1)
1150 path = "";
1151 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1152 parse_query_string(&req, querystring);
1153
1154 /* Validate parsed data and add defaults. */
1155
1156 if (req.q.manpath == NULL)
1157 req.q.manpath = mandoc_strdup(req.p[0]);
1158 else if ( ! validate_manpath(&req, req.q.manpath)) {
1159 pg_error_badrequest(
1160 "You specified an invalid manpath.");
1161 return EXIT_FAILURE;
1162 }
1163
1164 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1165 pg_error_badrequest(
1166 "You specified an invalid architecture.");
1167 return EXIT_FAILURE;
1168 }
1169
1170 /* Dispatch to the three different pages. */
1171
1172 if ('\0' != *path)
1173 pg_show(&req, path);
1174 else if (NULL != req.q.query)
1175 pg_search(&req);
1176 else
1177 pg_index(&req);
1178
1179 free(req.q.manpath);
1180 free(req.q.arch);
1181 free(req.q.sec);
1182 free(req.q.query);
1183 for (i = 0; i < (int)req.psz; i++)
1184 free(req.p[i]);
1185 free(req.p);
1186 return EXIT_SUCCESS;
1187 }
1188
1189 /*
1190 * Translate PATH_INFO to a query.
1191 */
1192 static void
1193 parse_path_info(struct req *req, const char *path)
1194 {
1195 const char *name, *sec, *end;
1196
1197 req->isquery = 0;
1198 req->q.equal = 1;
1199 req->q.manpath = NULL;
1200 req->q.arch = NULL;
1201
1202 /* Mandatory manual page name. */
1203 if ((name = strrchr(path, '/')) == NULL)
1204 name = path;
1205 else
1206 name++;
1207
1208 /* Optional trailing section. */
1209 sec = strrchr(name, '.');
1210 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1211 req->q.query = mandoc_strndup(name, sec - name - 1);
1212 req->q.sec = mandoc_strdup(sec);
1213 } else {
1214 req->q.query = mandoc_strdup(name);
1215 req->q.sec = NULL;
1216 }
1217
1218 /* Handle the case of name[.section] only. */
1219 if (name == path)
1220 return;
1221
1222 /* Optional manpath. */
1223 end = strchr(path, '/');
1224 req->q.manpath = mandoc_strndup(path, end - path);
1225 if (validate_manpath(req, req->q.manpath)) {
1226 path = end + 1;
1227 if (name == path)
1228 return;
1229 } else {
1230 free(req->q.manpath);
1231 req->q.manpath = NULL;
1232 }
1233
1234 /* Optional section. */
1235 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1236 path += 3;
1237 end = strchr(path, '/');
1238 free(req->q.sec);
1239 req->q.sec = mandoc_strndup(path, end - path);
1240 path = end + 1;
1241 if (name == path)
1242 return;
1243 }
1244
1245 /* Optional architecture. */
1246 end = strchr(path, '/');
1247 if (end + 1 != name) {
1248 pg_error_badrequest(
1249 "You specified too many directory components.");
1250 exit(EXIT_FAILURE);
1251 }
1252 req->q.arch = mandoc_strndup(path, end - path);
1253 if (validate_arch(req->q.arch) == 0) {
1254 pg_error_badrequest(
1255 "You specified an invalid directory component.");
1256 exit(EXIT_FAILURE);
1257 }
1258 }
1259
1260 /*
1261 * Scan for indexable paths.
1262 */
1263 static void
1264 parse_manpath_conf(struct req *req)
1265 {
1266 FILE *fp;
1267 char *dp;
1268 size_t dpsz;
1269 ssize_t len;
1270
1271 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1272 warn("%s/manpath.conf", MAN_DIR);
1273 pg_error_internal();
1274 exit(EXIT_FAILURE);
1275 }
1276
1277 dp = NULL;
1278 dpsz = 0;
1279
1280 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1281 if (dp[len - 1] == '\n')
1282 dp[--len] = '\0';
1283 req->p = mandoc_realloc(req->p,
1284 (req->psz + 1) * sizeof(char *));
1285 if ( ! validate_urifrag(dp)) {
1286 warnx("%s/manpath.conf contains "
1287 "unsafe path \"%s\"", MAN_DIR, dp);
1288 pg_error_internal();
1289 exit(EXIT_FAILURE);
1290 }
1291 if (strchr(dp, '/') != NULL) {
1292 warnx("%s/manpath.conf contains "
1293 "path with slash \"%s\"", MAN_DIR, dp);
1294 pg_error_internal();
1295 exit(EXIT_FAILURE);
1296 }
1297 req->p[req->psz++] = dp;
1298 dp = NULL;
1299 dpsz = 0;
1300 }
1301 free(dp);
1302
1303 if (req->p == NULL) {
1304 warnx("%s/manpath.conf is empty", MAN_DIR);
1305 pg_error_internal();
1306 exit(EXIT_FAILURE);
1307 }
1308 }