]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Manually tag the section option.
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.172 2020/04/03 11:35:01 schwarze Exp $ */
2 /*
3 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the man.cgi(8) program.
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23 #include <sys/time.h>
24
25 #include <ctype.h>
26 #if HAVE_ERR
27 #include <err.h>
28 #endif
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <limits.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37
38 #include "mandoc_aux.h"
39 #include "mandoc.h"
40 #include "roff.h"
41 #include "mdoc.h"
42 #include "man.h"
43 #include "mandoc_parse.h"
44 #include "main.h"
45 #include "manconf.h"
46 #include "mansearch.h"
47 #include "cgi.h"
48
49 /*
50 * A query as passed to the search function.
51 */
52 struct query {
53 char *manpath; /* desired manual directory */
54 char *arch; /* architecture */
55 char *sec; /* manual section */
56 char *query; /* unparsed query expression */
57 int equal; /* match whole names, not substrings */
58 };
59
60 struct req {
61 struct query q;
62 char **p; /* array of available manpaths */
63 size_t psz; /* number of available manpaths */
64 int isquery; /* QUERY_STRING used, not PATH_INFO */
65 };
66
67 enum focus {
68 FOCUS_NONE = 0,
69 FOCUS_QUERY
70 };
71
72 static void html_print(const char *);
73 static void html_putchar(char);
74 static int http_decode(char *);
75 static void http_encode(const char *);
76 static void parse_manpath_conf(struct req *);
77 static void parse_path_info(struct req *, const char *);
78 static void parse_query_string(struct req *, const char *);
79 static void pg_error_badrequest(const char *);
80 static void pg_error_internal(void);
81 static void pg_index(const struct req *);
82 static void pg_noresult(const struct req *, int, const char *,
83 const char *);
84 static void pg_redirect(const struct req *, const char *);
85 static void pg_search(const struct req *);
86 static void pg_searchres(const struct req *,
87 struct manpage *, size_t);
88 static void pg_show(struct req *, const char *);
89 static void resp_begin_html(int, const char *, const char *);
90 static void resp_begin_http(int, const char *);
91 static void resp_catman(const struct req *, const char *);
92 static void resp_copy(const char *);
93 static void resp_end_html(void);
94 static void resp_format(const struct req *, const char *);
95 static void resp_searchform(const struct req *, enum focus);
96 static void resp_show(const struct req *, const char *);
97 static void set_query_attr(char **, char **);
98 static int validate_arch(const char *);
99 static int validate_filename(const char *);
100 static int validate_manpath(const struct req *, const char *);
101 static int validate_urifrag(const char *);
102
103 static const char *scriptname = SCRIPT_NAME;
104
105 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
106 static const char *const sec_numbers[] = {
107 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
108 };
109 static const char *const sec_names[] = {
110 "All Sections",
111 "1 - General Commands",
112 "2 - System Calls",
113 "3 - Library Functions",
114 "3p - Perl Library",
115 "4 - Device Drivers",
116 "5 - File Formats",
117 "6 - Games",
118 "7 - Miscellaneous Information",
119 "8 - System Manager\'s Manual",
120 "9 - Kernel Developer\'s Manual"
121 };
122 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
123
124 static const char *const arch_names[] = {
125 "amd64", "alpha", "armv7", "arm64",
126 "hppa", "i386", "landisk",
127 "loongson", "luna88k", "macppc", "mips64",
128 "octeon", "sgi", "socppc", "sparc64",
129 "amiga", "arc", "armish", "arm32",
130 "atari", "aviion", "beagle", "cats",
131 "hppa64", "hp300",
132 "ia64", "mac68k", "mvme68k", "mvme88k",
133 "mvmeppc", "palm", "pc532", "pegasos",
134 "pmax", "powerpc", "solbourne", "sparc",
135 "sun3", "vax", "wgrisc", "x68k",
136 "zaurus"
137 };
138 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
139
140 /*
141 * Print a character, escaping HTML along the way.
142 * This will pass non-ASCII straight to output: be warned!
143 */
144 static void
145 html_putchar(char c)
146 {
147
148 switch (c) {
149 case '"':
150 printf("&quot;");
151 break;
152 case '&':
153 printf("&amp;");
154 break;
155 case '>':
156 printf("&gt;");
157 break;
158 case '<':
159 printf("&lt;");
160 break;
161 default:
162 putchar((unsigned char)c);
163 break;
164 }
165 }
166
167 /*
168 * Call through to html_putchar().
169 * Accepts NULL strings.
170 */
171 static void
172 html_print(const char *p)
173 {
174
175 if (NULL == p)
176 return;
177 while ('\0' != *p)
178 html_putchar(*p++);
179 }
180
181 /*
182 * Transfer the responsibility for the allocated string *val
183 * to the query structure.
184 */
185 static void
186 set_query_attr(char **attr, char **val)
187 {
188
189 free(*attr);
190 if (**val == '\0') {
191 *attr = NULL;
192 free(*val);
193 } else
194 *attr = *val;
195 *val = NULL;
196 }
197
198 /*
199 * Parse the QUERY_STRING for key-value pairs
200 * and store the values into the query structure.
201 */
202 static void
203 parse_query_string(struct req *req, const char *qs)
204 {
205 char *key, *val;
206 size_t keysz, valsz;
207
208 req->isquery = 1;
209 req->q.manpath = NULL;
210 req->q.arch = NULL;
211 req->q.sec = NULL;
212 req->q.query = NULL;
213 req->q.equal = 1;
214
215 key = val = NULL;
216 while (*qs != '\0') {
217
218 /* Parse one key. */
219
220 keysz = strcspn(qs, "=;&");
221 key = mandoc_strndup(qs, keysz);
222 qs += keysz;
223 if (*qs != '=')
224 goto next;
225
226 /* Parse one value. */
227
228 valsz = strcspn(++qs, ";&");
229 val = mandoc_strndup(qs, valsz);
230 qs += valsz;
231
232 /* Decode and catch encoding errors. */
233
234 if ( ! (http_decode(key) && http_decode(val)))
235 goto next;
236
237 /* Handle key-value pairs. */
238
239 if ( ! strcmp(key, "query"))
240 set_query_attr(&req->q.query, &val);
241
242 else if ( ! strcmp(key, "apropos"))
243 req->q.equal = !strcmp(val, "0");
244
245 else if ( ! strcmp(key, "manpath")) {
246 #ifdef COMPAT_OLDURI
247 if ( ! strncmp(val, "OpenBSD ", 8)) {
248 val[7] = '-';
249 if ('C' == val[8])
250 val[8] = 'c';
251 }
252 #endif
253 set_query_attr(&req->q.manpath, &val);
254 }
255
256 else if ( ! (strcmp(key, "sec")
257 #ifdef COMPAT_OLDURI
258 && strcmp(key, "sektion")
259 #endif
260 )) {
261 if ( ! strcmp(val, "0"))
262 *val = '\0';
263 set_query_attr(&req->q.sec, &val);
264 }
265
266 else if ( ! strcmp(key, "arch")) {
267 if ( ! strcmp(val, "default"))
268 *val = '\0';
269 set_query_attr(&req->q.arch, &val);
270 }
271
272 /*
273 * The key must be freed in any case.
274 * The val may have been handed over to the query
275 * structure, in which case it is now NULL.
276 */
277 next:
278 free(key);
279 key = NULL;
280 free(val);
281 val = NULL;
282
283 if (*qs != '\0')
284 qs++;
285 }
286 }
287
288 /*
289 * HTTP-decode a string. The standard explanation is that this turns
290 * "%4e+foo" into "n foo" in the regular way. This is done in-place
291 * over the allocated string.
292 */
293 static int
294 http_decode(char *p)
295 {
296 char hex[3];
297 char *q;
298 int c;
299
300 hex[2] = '\0';
301
302 q = p;
303 for ( ; '\0' != *p; p++, q++) {
304 if ('%' == *p) {
305 if ('\0' == (hex[0] = *(p + 1)))
306 return 0;
307 if ('\0' == (hex[1] = *(p + 2)))
308 return 0;
309 if (1 != sscanf(hex, "%x", &c))
310 return 0;
311 if ('\0' == c)
312 return 0;
313
314 *q = (char)c;
315 p += 2;
316 } else
317 *q = '+' == *p ? ' ' : *p;
318 }
319
320 *q = '\0';
321 return 1;
322 }
323
324 static void
325 http_encode(const char *p)
326 {
327 for (; *p != '\0'; p++) {
328 if (isalnum((unsigned char)*p) == 0 &&
329 strchr("-._~", *p) == NULL)
330 printf("%%%2.2X", (unsigned char)*p);
331 else
332 putchar(*p);
333 }
334 }
335
336 static void
337 resp_begin_http(int code, const char *msg)
338 {
339
340 if (200 != code)
341 printf("Status: %d %s\r\n", code, msg);
342
343 printf("Content-Type: text/html; charset=utf-8\r\n"
344 "Cache-Control: no-cache\r\n"
345 "Content-Security-Policy: default-src 'none'; "
346 "style-src 'self' 'unsafe-inline'\r\n"
347 "Pragma: no-cache\r\n"
348 "\r\n");
349
350 fflush(stdout);
351 }
352
353 static void
354 resp_copy(const char *filename)
355 {
356 char buf[4096];
357 ssize_t sz;
358 int fd;
359
360 if ((fd = open(filename, O_RDONLY)) != -1) {
361 fflush(stdout);
362 while ((sz = read(fd, buf, sizeof(buf))) > 0)
363 write(STDOUT_FILENO, buf, sz);
364 close(fd);
365 }
366 }
367
368 static void
369 resp_begin_html(int code, const char *msg, const char *file)
370 {
371 char *cp;
372
373 resp_begin_http(code, msg);
374
375 printf("<!DOCTYPE html>\n"
376 "<html>\n"
377 "<head>\n"
378 " <meta charset=\"UTF-8\"/>\n"
379 " <meta name=\"viewport\""
380 " content=\"width=device-width, initial-scale=1.0\">\n"
381 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
382 " type=\"text/css\" media=\"all\">\n"
383 " <title>",
384 CSS_DIR);
385 if (file != NULL) {
386 if ((cp = strrchr(file, '/')) != NULL)
387 file = cp + 1;
388 if ((cp = strrchr(file, '.')) != NULL) {
389 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
390 } else
391 printf("%s - ", file);
392 }
393 printf("%s</title>\n"
394 "</head>\n"
395 "<body>\n",
396 CUSTOMIZE_TITLE);
397
398 resp_copy(MAN_DIR "/header.html");
399 }
400
401 static void
402 resp_end_html(void)
403 {
404
405 resp_copy(MAN_DIR "/footer.html");
406
407 puts("</body>\n"
408 "</html>");
409 }
410
411 static void
412 resp_searchform(const struct req *req, enum focus focus)
413 {
414 int i;
415
416 printf("<form action=\"/%s\" method=\"get\" "
417 "autocomplete=\"off\" autocapitalize=\"none\">\n"
418 " <fieldset>\n"
419 " <legend>Manual Page Search Parameters</legend>\n",
420 scriptname);
421
422 /* Write query input box. */
423
424 printf(" <input type=\"search\" name=\"query\" value=\"");
425 if (req->q.query != NULL)
426 html_print(req->q.query);
427 printf( "\" size=\"40\"");
428 if (focus == FOCUS_QUERY)
429 printf(" autofocus");
430 puts(">");
431
432 /* Write submission buttons. */
433
434 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
435 "man</button>\n"
436 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
437 "apropos</button>\n"
438 " <br/>\n");
439
440 /* Write section selector. */
441
442 puts(" <select name=\"sec\">");
443 for (i = 0; i < sec_MAX; i++) {
444 printf(" <option value=\"%s\"", sec_numbers[i]);
445 if (NULL != req->q.sec &&
446 0 == strcmp(sec_numbers[i], req->q.sec))
447 printf(" selected=\"selected\"");
448 printf(">%s</option>\n", sec_names[i]);
449 }
450 puts(" </select>");
451
452 /* Write architecture selector. */
453
454 printf( " <select name=\"arch\">\n"
455 " <option value=\"default\"");
456 if (NULL == req->q.arch)
457 printf(" selected=\"selected\"");
458 puts(">All Architectures</option>");
459 for (i = 0; i < arch_MAX; i++) {
460 printf(" <option");
461 if (NULL != req->q.arch &&
462 0 == strcmp(arch_names[i], req->q.arch))
463 printf(" selected=\"selected\"");
464 printf(">%s</option>\n", arch_names[i]);
465 }
466 puts(" </select>");
467
468 /* Write manpath selector. */
469
470 if (req->psz > 1) {
471 puts(" <select name=\"manpath\">");
472 for (i = 0; i < (int)req->psz; i++) {
473 printf(" <option");
474 if (strcmp(req->q.manpath, req->p[i]) == 0)
475 printf(" selected=\"selected\"");
476 printf(">");
477 html_print(req->p[i]);
478 puts("</option>");
479 }
480 puts(" </select>");
481 }
482
483 puts(" </fieldset>\n"
484 "</form>");
485 }
486
487 static int
488 validate_urifrag(const char *frag)
489 {
490
491 while ('\0' != *frag) {
492 if ( ! (isalnum((unsigned char)*frag) ||
493 '-' == *frag || '.' == *frag ||
494 '/' == *frag || '_' == *frag))
495 return 0;
496 frag++;
497 }
498 return 1;
499 }
500
501 static int
502 validate_manpath(const struct req *req, const char* manpath)
503 {
504 size_t i;
505
506 for (i = 0; i < req->psz; i++)
507 if ( ! strcmp(manpath, req->p[i]))
508 return 1;
509
510 return 0;
511 }
512
513 static int
514 validate_arch(const char *arch)
515 {
516 int i;
517
518 for (i = 0; i < arch_MAX; i++)
519 if (strcmp(arch, arch_names[i]) == 0)
520 return 1;
521
522 return 0;
523 }
524
525 static int
526 validate_filename(const char *file)
527 {
528
529 if ('.' == file[0] && '/' == file[1])
530 file += 2;
531
532 return ! (strstr(file, "../") || strstr(file, "/..") ||
533 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
534 }
535
536 static void
537 pg_index(const struct req *req)
538 {
539
540 resp_begin_html(200, NULL, NULL);
541 resp_searchform(req, FOCUS_QUERY);
542 printf("<p>\n"
543 "This web interface is documented in the\n"
544 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
545 "manual, and the\n"
546 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
547 "manual explains the query syntax.\n"
548 "</p>\n",
549 scriptname, *scriptname == '\0' ? "" : "/",
550 scriptname, *scriptname == '\0' ? "" : "/");
551 resp_end_html();
552 }
553
554 static void
555 pg_noresult(const struct req *req, int code, const char *http_msg,
556 const char *user_msg)
557 {
558 resp_begin_html(code, http_msg, NULL);
559 resp_searchform(req, FOCUS_QUERY);
560 puts("<p>");
561 puts(user_msg);
562 puts("</p>");
563 resp_end_html();
564 }
565
566 static void
567 pg_error_badrequest(const char *msg)
568 {
569
570 resp_begin_html(400, "Bad Request", NULL);
571 puts("<h1>Bad Request</h1>\n"
572 "<p>\n");
573 puts(msg);
574 printf("Try again from the\n"
575 "<a href=\"/%s\">main page</a>.\n"
576 "</p>", scriptname);
577 resp_end_html();
578 }
579
580 static void
581 pg_error_internal(void)
582 {
583 resp_begin_html(500, "Internal Server Error", NULL);
584 puts("<p>Internal Server Error</p>");
585 resp_end_html();
586 }
587
588 static void
589 pg_redirect(const struct req *req, const char *name)
590 {
591 printf("Status: 303 See Other\r\n"
592 "Location: /");
593 if (*scriptname != '\0')
594 printf("%s/", scriptname);
595 if (strcmp(req->q.manpath, req->p[0]))
596 printf("%s/", req->q.manpath);
597 if (req->q.arch != NULL)
598 printf("%s/", req->q.arch);
599 http_encode(name);
600 if (req->q.sec != NULL) {
601 putchar('.');
602 http_encode(req->q.sec);
603 }
604 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
605 }
606
607 static void
608 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
609 {
610 char *arch, *archend;
611 const char *sec;
612 size_t i, iuse;
613 int archprio, archpriouse;
614 int prio, priouse;
615
616 for (i = 0; i < sz; i++) {
617 if (validate_filename(r[i].file))
618 continue;
619 warnx("invalid filename %s in %s database",
620 r[i].file, req->q.manpath);
621 pg_error_internal();
622 return;
623 }
624
625 if (req->isquery && sz == 1) {
626 /*
627 * If we have just one result, then jump there now
628 * without any delay.
629 */
630 printf("Status: 303 See Other\r\n"
631 "Location: /");
632 if (*scriptname != '\0')
633 printf("%s/", scriptname);
634 if (strcmp(req->q.manpath, req->p[0]))
635 printf("%s/", req->q.manpath);
636 printf("%s\r\n"
637 "Content-Type: text/html; charset=utf-8\r\n\r\n",
638 r[0].file);
639 return;
640 }
641
642 /*
643 * In man(1) mode, show one of the pages
644 * even if more than one is found.
645 */
646
647 iuse = 0;
648 if (req->q.equal || sz == 1) {
649 priouse = 20;
650 archpriouse = 3;
651 for (i = 0; i < sz; i++) {
652 sec = r[i].file;
653 sec += strcspn(sec, "123456789");
654 if (sec[0] == '\0')
655 continue;
656 prio = sec_prios[sec[0] - '1'];
657 if (sec[1] != '/')
658 prio += 10;
659 if (req->q.arch == NULL) {
660 archprio =
661 ((arch = strchr(sec + 1, '/'))
662 == NULL) ? 3 :
663 ((archend = strchr(arch + 1, '/'))
664 == NULL) ? 0 :
665 strncmp(arch, "amd64/",
666 archend - arch) ? 2 : 1;
667 if (archprio < archpriouse) {
668 archpriouse = archprio;
669 priouse = prio;
670 iuse = i;
671 continue;
672 }
673 if (archprio > archpriouse)
674 continue;
675 }
676 if (prio >= priouse)
677 continue;
678 priouse = prio;
679 iuse = i;
680 }
681 resp_begin_html(200, NULL, r[iuse].file);
682 } else
683 resp_begin_html(200, NULL, NULL);
684
685 resp_searchform(req,
686 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
687
688 if (sz > 1) {
689 puts("<table class=\"results\">");
690 for (i = 0; i < sz; i++) {
691 printf(" <tr>\n"
692 " <td>"
693 "<a class=\"Xr\" href=\"/");
694 if (*scriptname != '\0')
695 printf("%s/", scriptname);
696 if (strcmp(req->q.manpath, req->p[0]))
697 printf("%s/", req->q.manpath);
698 printf("%s\">", r[i].file);
699 html_print(r[i].names);
700 printf("</a></td>\n"
701 " <td><span class=\"Nd\">");
702 html_print(r[i].output);
703 puts("</span></td>\n"
704 " </tr>");
705 }
706 puts("</table>");
707 }
708
709 if (req->q.equal || sz == 1) {
710 puts("<hr>");
711 resp_show(req, r[iuse].file);
712 }
713
714 resp_end_html();
715 }
716
717 static void
718 resp_catman(const struct req *req, const char *file)
719 {
720 FILE *f;
721 char *p;
722 size_t sz;
723 ssize_t len;
724 int i;
725 int italic, bold;
726
727 if ((f = fopen(file, "r")) == NULL) {
728 puts("<p>You specified an invalid manual file.</p>");
729 return;
730 }
731
732 puts("<div class=\"catman\">\n"
733 "<pre>");
734
735 p = NULL;
736 sz = 0;
737
738 while ((len = getline(&p, &sz, f)) != -1) {
739 bold = italic = 0;
740 for (i = 0; i < len - 1; i++) {
741 /*
742 * This means that the catpage is out of state.
743 * Ignore it and keep going (although the
744 * catpage is bogus).
745 */
746
747 if ('\b' == p[i] || '\n' == p[i])
748 continue;
749
750 /*
751 * Print a regular character.
752 * Close out any bold/italic scopes.
753 * If we're in back-space mode, make sure we'll
754 * have something to enter when we backspace.
755 */
756
757 if ('\b' != p[i + 1]) {
758 if (italic)
759 printf("</i>");
760 if (bold)
761 printf("</b>");
762 italic = bold = 0;
763 html_putchar(p[i]);
764 continue;
765 } else if (i + 2 >= len)
766 continue;
767
768 /* Italic mode. */
769
770 if ('_' == p[i]) {
771 if (bold)
772 printf("</b>");
773 if ( ! italic)
774 printf("<i>");
775 bold = 0;
776 italic = 1;
777 i += 2;
778 html_putchar(p[i]);
779 continue;
780 }
781
782 /*
783 * Handle funny behaviour troff-isms.
784 * These grok'd from the original man2html.c.
785 */
786
787 if (('+' == p[i] && 'o' == p[i + 2]) ||
788 ('o' == p[i] && '+' == p[i + 2]) ||
789 ('|' == p[i] && '=' == p[i + 2]) ||
790 ('=' == p[i] && '|' == p[i + 2]) ||
791 ('*' == p[i] && '=' == p[i + 2]) ||
792 ('=' == p[i] && '*' == p[i + 2]) ||
793 ('*' == p[i] && '|' == p[i + 2]) ||
794 ('|' == p[i] && '*' == p[i + 2])) {
795 if (italic)
796 printf("</i>");
797 if (bold)
798 printf("</b>");
799 italic = bold = 0;
800 putchar('*');
801 i += 2;
802 continue;
803 } else if (('|' == p[i] && '-' == p[i + 2]) ||
804 ('-' == p[i] && '|' == p[i + 1]) ||
805 ('+' == p[i] && '-' == p[i + 1]) ||
806 ('-' == p[i] && '+' == p[i + 1]) ||
807 ('+' == p[i] && '|' == p[i + 1]) ||
808 ('|' == p[i] && '+' == p[i + 1])) {
809 if (italic)
810 printf("</i>");
811 if (bold)
812 printf("</b>");
813 italic = bold = 0;
814 putchar('+');
815 i += 2;
816 continue;
817 }
818
819 /* Bold mode. */
820
821 if (italic)
822 printf("</i>");
823 if ( ! bold)
824 printf("<b>");
825 bold = 1;
826 italic = 0;
827 i += 2;
828 html_putchar(p[i]);
829 }
830
831 /*
832 * Clean up the last character.
833 * We can get to a newline; don't print that.
834 */
835
836 if (italic)
837 printf("</i>");
838 if (bold)
839 printf("</b>");
840
841 if (i == len - 1 && p[i] != '\n')
842 html_putchar(p[i]);
843
844 putchar('\n');
845 }
846 free(p);
847
848 puts("</pre>\n"
849 "</div>");
850
851 fclose(f);
852 }
853
854 static void
855 resp_format(const struct req *req, const char *file)
856 {
857 struct manoutput conf;
858 struct mparse *mp;
859 struct roff_meta *meta;
860 void *vp;
861 int fd;
862 int usepath;
863
864 if (-1 == (fd = open(file, O_RDONLY, 0))) {
865 puts("<p>You specified an invalid manual file.</p>");
866 return;
867 }
868
869 mchars_alloc();
870 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
871 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
872 mparse_readfd(mp, fd, file);
873 close(fd);
874 meta = mparse_result(mp);
875
876 memset(&conf, 0, sizeof(conf));
877 conf.fragment = 1;
878 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
879 usepath = strcmp(req->q.manpath, req->p[0]);
880 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
881 scriptname, *scriptname == '\0' ? "" : "/",
882 usepath ? req->q.manpath : "", usepath ? "/" : "");
883
884 vp = html_alloc(&conf);
885 if (meta->macroset == MACROSET_MDOC)
886 html_mdoc(vp, meta);
887 else
888 html_man(vp, meta);
889
890 html_free(vp);
891 mparse_free(mp);
892 mchars_free();
893 free(conf.man);
894 free(conf.style);
895 }
896
897 static void
898 resp_show(const struct req *req, const char *file)
899 {
900
901 if ('.' == file[0] && '/' == file[1])
902 file += 2;
903
904 if ('c' == *file)
905 resp_catman(req, file);
906 else
907 resp_format(req, file);
908 }
909
910 static void
911 pg_show(struct req *req, const char *fullpath)
912 {
913 char *manpath;
914 const char *file;
915
916 if ((file = strchr(fullpath, '/')) == NULL) {
917 pg_error_badrequest(
918 "You did not specify a page to show.");
919 return;
920 }
921 manpath = mandoc_strndup(fullpath, file - fullpath);
922 file++;
923
924 if ( ! validate_manpath(req, manpath)) {
925 pg_error_badrequest(
926 "You specified an invalid manpath.");
927 free(manpath);
928 return;
929 }
930
931 /*
932 * Begin by chdir()ing into the manpath.
933 * This way we can pick up the database files, which are
934 * relative to the manpath root.
935 */
936
937 if (chdir(manpath) == -1) {
938 warn("chdir %s", manpath);
939 pg_error_internal();
940 free(manpath);
941 return;
942 }
943 free(manpath);
944
945 if ( ! validate_filename(file)) {
946 pg_error_badrequest(
947 "You specified an invalid manual file.");
948 return;
949 }
950
951 resp_begin_html(200, NULL, file);
952 resp_searchform(req, FOCUS_NONE);
953 resp_show(req, file);
954 resp_end_html();
955 }
956
957 static void
958 pg_search(const struct req *req)
959 {
960 struct mansearch search;
961 struct manpaths paths;
962 struct manpage *res;
963 char **argv;
964 char *query, *rp, *wp;
965 size_t ressz;
966 int argc;
967
968 /*
969 * Begin by chdir()ing into the root of the manpath.
970 * This way we can pick up the database files, which are
971 * relative to the manpath root.
972 */
973
974 if (chdir(req->q.manpath) == -1) {
975 warn("chdir %s", req->q.manpath);
976 pg_error_internal();
977 return;
978 }
979
980 search.arch = req->q.arch;
981 search.sec = req->q.sec;
982 search.outkey = "Nd";
983 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
984 search.firstmatch = 1;
985
986 paths.sz = 1;
987 paths.paths = mandoc_malloc(sizeof(char *));
988 paths.paths[0] = mandoc_strdup(".");
989
990 /*
991 * Break apart at spaces with backslash-escaping.
992 */
993
994 argc = 0;
995 argv = NULL;
996 rp = query = mandoc_strdup(req->q.query);
997 for (;;) {
998 while (isspace((unsigned char)*rp))
999 rp++;
1000 if (*rp == '\0')
1001 break;
1002 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1003 argv[argc++] = wp = rp;
1004 for (;;) {
1005 if (isspace((unsigned char)*rp)) {
1006 *wp = '\0';
1007 rp++;
1008 break;
1009 }
1010 if (rp[0] == '\\' && rp[1] != '\0')
1011 rp++;
1012 if (wp != rp)
1013 *wp = *rp;
1014 if (*rp == '\0')
1015 break;
1016 wp++;
1017 rp++;
1018 }
1019 }
1020
1021 res = NULL;
1022 ressz = 0;
1023 if (req->isquery && req->q.equal && argc == 1)
1024 pg_redirect(req, argv[0]);
1025 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1026 pg_noresult(req, 400, "Bad Request",
1027 "You entered an invalid query.");
1028 else if (ressz == 0)
1029 pg_noresult(req, 404, "Not Found", "No results found.");
1030 else
1031 pg_searchres(req, res, ressz);
1032
1033 free(query);
1034 mansearch_free(res, ressz);
1035 free(paths.paths[0]);
1036 free(paths.paths);
1037 }
1038
1039 int
1040 main(void)
1041 {
1042 struct req req;
1043 struct itimerval itimer;
1044 const char *path;
1045 const char *querystring;
1046 int i;
1047
1048 #if HAVE_PLEDGE
1049 /*
1050 * The "rpath" pledge could be revoked after mparse_readfd()
1051 * if the file desciptor to "/footer.html" would be opened
1052 * up front, but it's probably not worth the complication
1053 * of the code it would cause: it would require scattering
1054 * pledge() calls in multiple low-level resp_*() functions.
1055 */
1056
1057 if (pledge("stdio rpath", NULL) == -1) {
1058 warn("pledge");
1059 pg_error_internal();
1060 return EXIT_FAILURE;
1061 }
1062 #endif
1063
1064 /* Poor man's ReDoS mitigation. */
1065
1066 itimer.it_value.tv_sec = 2;
1067 itimer.it_value.tv_usec = 0;
1068 itimer.it_interval.tv_sec = 2;
1069 itimer.it_interval.tv_usec = 0;
1070 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1071 warn("setitimer");
1072 pg_error_internal();
1073 return EXIT_FAILURE;
1074 }
1075
1076 /*
1077 * First we change directory into the MAN_DIR so that
1078 * subsequent scanning for manpath directories is rooted
1079 * relative to the same position.
1080 */
1081
1082 if (chdir(MAN_DIR) == -1) {
1083 warn("MAN_DIR: %s", MAN_DIR);
1084 pg_error_internal();
1085 return EXIT_FAILURE;
1086 }
1087
1088 memset(&req, 0, sizeof(struct req));
1089 req.q.equal = 1;
1090 parse_manpath_conf(&req);
1091
1092 /* Parse the path info and the query string. */
1093
1094 if ((path = getenv("PATH_INFO")) == NULL)
1095 path = "";
1096 else if (*path == '/')
1097 path++;
1098
1099 if (*path != '\0') {
1100 parse_path_info(&req, path);
1101 if (req.q.manpath == NULL || req.q.sec == NULL ||
1102 *req.q.query == '\0' || access(path, F_OK) == -1)
1103 path = "";
1104 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1105 parse_query_string(&req, querystring);
1106
1107 /* Validate parsed data and add defaults. */
1108
1109 if (req.q.manpath == NULL)
1110 req.q.manpath = mandoc_strdup(req.p[0]);
1111 else if ( ! validate_manpath(&req, req.q.manpath)) {
1112 pg_error_badrequest(
1113 "You specified an invalid manpath.");
1114 return EXIT_FAILURE;
1115 }
1116
1117 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1118 pg_error_badrequest(
1119 "You specified an invalid architecture.");
1120 return EXIT_FAILURE;
1121 }
1122
1123 /* Dispatch to the three different pages. */
1124
1125 if ('\0' != *path)
1126 pg_show(&req, path);
1127 else if (NULL != req.q.query)
1128 pg_search(&req);
1129 else
1130 pg_index(&req);
1131
1132 free(req.q.manpath);
1133 free(req.q.arch);
1134 free(req.q.sec);
1135 free(req.q.query);
1136 for (i = 0; i < (int)req.psz; i++)
1137 free(req.p[i]);
1138 free(req.p);
1139 return EXIT_SUCCESS;
1140 }
1141
1142 /*
1143 * Translate PATH_INFO to a query.
1144 */
1145 static void
1146 parse_path_info(struct req *req, const char *path)
1147 {
1148 const char *name, *sec, *end;
1149
1150 req->isquery = 0;
1151 req->q.equal = 1;
1152 req->q.manpath = NULL;
1153 req->q.arch = NULL;
1154
1155 /* Mandatory manual page name. */
1156 if ((name = strrchr(path, '/')) == NULL)
1157 name = path;
1158 else
1159 name++;
1160
1161 /* Optional trailing section. */
1162 sec = strrchr(name, '.');
1163 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1164 req->q.query = mandoc_strndup(name, sec - name - 1);
1165 req->q.sec = mandoc_strdup(sec);
1166 } else {
1167 req->q.query = mandoc_strdup(name);
1168 req->q.sec = NULL;
1169 }
1170
1171 /* Handle the case of name[.section] only. */
1172 if (name == path)
1173 return;
1174
1175 /* Optional manpath. */
1176 end = strchr(path, '/');
1177 req->q.manpath = mandoc_strndup(path, end - path);
1178 if (validate_manpath(req, req->q.manpath)) {
1179 path = end + 1;
1180 if (name == path)
1181 return;
1182 } else {
1183 free(req->q.manpath);
1184 req->q.manpath = NULL;
1185 }
1186
1187 /* Optional section. */
1188 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1189 path += 3;
1190 end = strchr(path, '/');
1191 free(req->q.sec);
1192 req->q.sec = mandoc_strndup(path, end - path);
1193 path = end + 1;
1194 if (name == path)
1195 return;
1196 }
1197
1198 /* Optional architecture. */
1199 end = strchr(path, '/');
1200 if (end + 1 != name) {
1201 pg_error_badrequest(
1202 "You specified too many directory components.");
1203 exit(EXIT_FAILURE);
1204 }
1205 req->q.arch = mandoc_strndup(path, end - path);
1206 if (validate_arch(req->q.arch) == 0) {
1207 pg_error_badrequest(
1208 "You specified an invalid directory component.");
1209 exit(EXIT_FAILURE);
1210 }
1211 }
1212
1213 /*
1214 * Scan for indexable paths.
1215 */
1216 static void
1217 parse_manpath_conf(struct req *req)
1218 {
1219 FILE *fp;
1220 char *dp;
1221 size_t dpsz;
1222 ssize_t len;
1223
1224 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1225 warn("%s/manpath.conf", MAN_DIR);
1226 pg_error_internal();
1227 exit(EXIT_FAILURE);
1228 }
1229
1230 dp = NULL;
1231 dpsz = 0;
1232
1233 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1234 if (dp[len - 1] == '\n')
1235 dp[--len] = '\0';
1236 req->p = mandoc_realloc(req->p,
1237 (req->psz + 1) * sizeof(char *));
1238 if ( ! validate_urifrag(dp)) {
1239 warnx("%s/manpath.conf contains "
1240 "unsafe path \"%s\"", MAN_DIR, dp);
1241 pg_error_internal();
1242 exit(EXIT_FAILURE);
1243 }
1244 if (strchr(dp, '/') != NULL) {
1245 warnx("%s/manpath.conf contains "
1246 "path with slash \"%s\"", MAN_DIR, dp);
1247 pg_error_internal();
1248 exit(EXIT_FAILURE);
1249 }
1250 req->p[req->psz++] = dp;
1251 dp = NULL;
1252 dpsz = 0;
1253 }
1254 free(dp);
1255
1256 if (req->p == NULL) {
1257 warnx("%s/manpath.conf is empty", MAN_DIR);
1258 pg_error_internal();
1259 exit(EXIT_FAILURE);
1260 }
1261 }