]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Cleanup, no functional change:
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.162 2018/12/13 11:55:46 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "mandoc_parse.h"
42 #include "main.h"
43 #include "manconf.h"
44 #include "mansearch.h"
45 #include "cgi.h"
46
47 /*
48 * A query as passed to the search function.
49 */
50 struct query {
51 char *manpath; /* desired manual directory */
52 char *arch; /* architecture */
53 char *sec; /* manual section */
54 char *query; /* unparsed query expression */
55 int equal; /* match whole names, not substrings */
56 };
57
58 struct req {
59 struct query q;
60 char **p; /* array of available manpaths */
61 size_t psz; /* number of available manpaths */
62 int isquery; /* QUERY_STRING used, not PATH_INFO */
63 };
64
65 enum focus {
66 FOCUS_NONE = 0,
67 FOCUS_QUERY
68 };
69
70 static void html_print(const char *);
71 static void html_putchar(char);
72 static int http_decode(char *);
73 static void http_encode(const char *p);
74 static void parse_manpath_conf(struct req *);
75 static void parse_path_info(struct req *req, const char *path);
76 static void parse_query_string(struct req *, const char *);
77 static void pg_error_badrequest(const char *);
78 static void pg_error_internal(void);
79 static void pg_index(const struct req *);
80 static void pg_noresult(const struct req *, const char *);
81 static void pg_redirect(const struct req *, const char *);
82 static void pg_search(const struct req *);
83 static void pg_searchres(const struct req *,
84 struct manpage *, size_t);
85 static void pg_show(struct req *, const char *);
86 static void resp_begin_html(int, const char *, const char *);
87 static void resp_begin_http(int, const char *);
88 static void resp_catman(const struct req *, const char *);
89 static void resp_copy(const char *);
90 static void resp_end_html(void);
91 static void resp_format(const struct req *, const char *);
92 static void resp_searchform(const struct req *, enum focus);
93 static void resp_show(const struct req *, const char *);
94 static void set_query_attr(char **, char **);
95 static int validate_arch(const char *);
96 static int validate_filename(const char *);
97 static int validate_manpath(const struct req *, const char *);
98 static int validate_urifrag(const char *);
99
100 static const char *scriptname = SCRIPT_NAME;
101
102 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
103 static const char *const sec_numbers[] = {
104 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
105 };
106 static const char *const sec_names[] = {
107 "All Sections",
108 "1 - General Commands",
109 "2 - System Calls",
110 "3 - Library Functions",
111 "3p - Perl Library",
112 "4 - Device Drivers",
113 "5 - File Formats",
114 "6 - Games",
115 "7 - Miscellaneous Information",
116 "8 - System Manager\'s Manual",
117 "9 - Kernel Developer\'s Manual"
118 };
119 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
120
121 static const char *const arch_names[] = {
122 "amd64", "alpha", "armv7", "arm64",
123 "hppa", "i386", "landisk",
124 "loongson", "luna88k", "macppc", "mips64",
125 "octeon", "sgi", "socppc", "sparc64",
126 "amiga", "arc", "armish", "arm32",
127 "atari", "aviion", "beagle", "cats",
128 "hppa64", "hp300",
129 "ia64", "mac68k", "mvme68k", "mvme88k",
130 "mvmeppc", "palm", "pc532", "pegasos",
131 "pmax", "powerpc", "solbourne", "sparc",
132 "sun3", "vax", "wgrisc", "x68k",
133 "zaurus"
134 };
135 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
136
137 /*
138 * Print a character, escaping HTML along the way.
139 * This will pass non-ASCII straight to output: be warned!
140 */
141 static void
142 html_putchar(char c)
143 {
144
145 switch (c) {
146 case '"':
147 printf("&quot;");
148 break;
149 case '&':
150 printf("&amp;");
151 break;
152 case '>':
153 printf("&gt;");
154 break;
155 case '<':
156 printf("&lt;");
157 break;
158 default:
159 putchar((unsigned char)c);
160 break;
161 }
162 }
163
164 /*
165 * Call through to html_putchar().
166 * Accepts NULL strings.
167 */
168 static void
169 html_print(const char *p)
170 {
171
172 if (NULL == p)
173 return;
174 while ('\0' != *p)
175 html_putchar(*p++);
176 }
177
178 /*
179 * Transfer the responsibility for the allocated string *val
180 * to the query structure.
181 */
182 static void
183 set_query_attr(char **attr, char **val)
184 {
185
186 free(*attr);
187 if (**val == '\0') {
188 *attr = NULL;
189 free(*val);
190 } else
191 *attr = *val;
192 *val = NULL;
193 }
194
195 /*
196 * Parse the QUERY_STRING for key-value pairs
197 * and store the values into the query structure.
198 */
199 static void
200 parse_query_string(struct req *req, const char *qs)
201 {
202 char *key, *val;
203 size_t keysz, valsz;
204
205 req->isquery = 1;
206 req->q.manpath = NULL;
207 req->q.arch = NULL;
208 req->q.sec = NULL;
209 req->q.query = NULL;
210 req->q.equal = 1;
211
212 key = val = NULL;
213 while (*qs != '\0') {
214
215 /* Parse one key. */
216
217 keysz = strcspn(qs, "=;&");
218 key = mandoc_strndup(qs, keysz);
219 qs += keysz;
220 if (*qs != '=')
221 goto next;
222
223 /* Parse one value. */
224
225 valsz = strcspn(++qs, ";&");
226 val = mandoc_strndup(qs, valsz);
227 qs += valsz;
228
229 /* Decode and catch encoding errors. */
230
231 if ( ! (http_decode(key) && http_decode(val)))
232 goto next;
233
234 /* Handle key-value pairs. */
235
236 if ( ! strcmp(key, "query"))
237 set_query_attr(&req->q.query, &val);
238
239 else if ( ! strcmp(key, "apropos"))
240 req->q.equal = !strcmp(val, "0");
241
242 else if ( ! strcmp(key, "manpath")) {
243 #ifdef COMPAT_OLDURI
244 if ( ! strncmp(val, "OpenBSD ", 8)) {
245 val[7] = '-';
246 if ('C' == val[8])
247 val[8] = 'c';
248 }
249 #endif
250 set_query_attr(&req->q.manpath, &val);
251 }
252
253 else if ( ! (strcmp(key, "sec")
254 #ifdef COMPAT_OLDURI
255 && strcmp(key, "sektion")
256 #endif
257 )) {
258 if ( ! strcmp(val, "0"))
259 *val = '\0';
260 set_query_attr(&req->q.sec, &val);
261 }
262
263 else if ( ! strcmp(key, "arch")) {
264 if ( ! strcmp(val, "default"))
265 *val = '\0';
266 set_query_attr(&req->q.arch, &val);
267 }
268
269 /*
270 * The key must be freed in any case.
271 * The val may have been handed over to the query
272 * structure, in which case it is now NULL.
273 */
274 next:
275 free(key);
276 key = NULL;
277 free(val);
278 val = NULL;
279
280 if (*qs != '\0')
281 qs++;
282 }
283 }
284
285 /*
286 * HTTP-decode a string. The standard explanation is that this turns
287 * "%4e+foo" into "n foo" in the regular way. This is done in-place
288 * over the allocated string.
289 */
290 static int
291 http_decode(char *p)
292 {
293 char hex[3];
294 char *q;
295 int c;
296
297 hex[2] = '\0';
298
299 q = p;
300 for ( ; '\0' != *p; p++, q++) {
301 if ('%' == *p) {
302 if ('\0' == (hex[0] = *(p + 1)))
303 return 0;
304 if ('\0' == (hex[1] = *(p + 2)))
305 return 0;
306 if (1 != sscanf(hex, "%x", &c))
307 return 0;
308 if ('\0' == c)
309 return 0;
310
311 *q = (char)c;
312 p += 2;
313 } else
314 *q = '+' == *p ? ' ' : *p;
315 }
316
317 *q = '\0';
318 return 1;
319 }
320
321 static void
322 http_encode(const char *p)
323 {
324 for (; *p != '\0'; p++) {
325 if (isalnum((unsigned char)*p) == 0 &&
326 strchr("-._~", *p) == NULL)
327 printf("%%%02.2X", (unsigned char)*p);
328 else
329 putchar(*p);
330 }
331 }
332
333 static void
334 resp_begin_http(int code, const char *msg)
335 {
336
337 if (200 != code)
338 printf("Status: %d %s\r\n", code, msg);
339
340 printf("Content-Type: text/html; charset=utf-8\r\n"
341 "Cache-Control: no-cache\r\n"
342 "Pragma: no-cache\r\n"
343 "\r\n");
344
345 fflush(stdout);
346 }
347
348 static void
349 resp_copy(const char *filename)
350 {
351 char buf[4096];
352 ssize_t sz;
353 int fd;
354
355 if ((fd = open(filename, O_RDONLY)) != -1) {
356 fflush(stdout);
357 while ((sz = read(fd, buf, sizeof(buf))) > 0)
358 write(STDOUT_FILENO, buf, sz);
359 close(fd);
360 }
361 }
362
363 static void
364 resp_begin_html(int code, const char *msg, const char *file)
365 {
366 char *cp;
367
368 resp_begin_http(code, msg);
369
370 printf("<!DOCTYPE html>\n"
371 "<html>\n"
372 "<head>\n"
373 " <meta charset=\"UTF-8\"/>\n"
374 " <meta name=\"viewport\""
375 " content=\"width=device-width, initial-scale=1.0\">\n"
376 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
377 " type=\"text/css\" media=\"all\">\n"
378 " <title>",
379 CSS_DIR);
380 if (file != NULL) {
381 if ((cp = strrchr(file, '/')) != NULL)
382 file = cp + 1;
383 if ((cp = strrchr(file, '.')) != NULL) {
384 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
385 } else
386 printf("%s - ", file);
387 }
388 printf("%s</title>\n"
389 "</head>\n"
390 "<body>\n",
391 CUSTOMIZE_TITLE);
392
393 resp_copy(MAN_DIR "/header.html");
394 }
395
396 static void
397 resp_end_html(void)
398 {
399
400 resp_copy(MAN_DIR "/footer.html");
401
402 puts("</body>\n"
403 "</html>");
404 }
405
406 static void
407 resp_searchform(const struct req *req, enum focus focus)
408 {
409 int i;
410
411 printf("<form action=\"/%s\" method=\"get\">\n"
412 " <fieldset>\n"
413 " <legend>Manual Page Search Parameters</legend>\n",
414 scriptname);
415
416 /* Write query input box. */
417
418 printf(" <input type=\"search\" name=\"query\" value=\"");
419 if (req->q.query != NULL)
420 html_print(req->q.query);
421 printf( "\" size=\"40\"");
422 if (focus == FOCUS_QUERY)
423 printf(" autofocus");
424 puts(">");
425
426 /* Write submission buttons. */
427
428 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
429 "man</button>\n"
430 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
431 "apropos</button>\n"
432 " <br/>\n");
433
434 /* Write section selector. */
435
436 puts(" <select name=\"sec\">");
437 for (i = 0; i < sec_MAX; i++) {
438 printf(" <option value=\"%s\"", sec_numbers[i]);
439 if (NULL != req->q.sec &&
440 0 == strcmp(sec_numbers[i], req->q.sec))
441 printf(" selected=\"selected\"");
442 printf(">%s</option>\n", sec_names[i]);
443 }
444 puts(" </select>");
445
446 /* Write architecture selector. */
447
448 printf( " <select name=\"arch\">\n"
449 " <option value=\"default\"");
450 if (NULL == req->q.arch)
451 printf(" selected=\"selected\"");
452 puts(">All Architectures</option>");
453 for (i = 0; i < arch_MAX; i++) {
454 printf(" <option");
455 if (NULL != req->q.arch &&
456 0 == strcmp(arch_names[i], req->q.arch))
457 printf(" selected=\"selected\"");
458 printf(">%s</option>\n", arch_names[i]);
459 }
460 puts(" </select>");
461
462 /* Write manpath selector. */
463
464 if (req->psz > 1) {
465 puts(" <select name=\"manpath\">");
466 for (i = 0; i < (int)req->psz; i++) {
467 printf(" <option");
468 if (strcmp(req->q.manpath, req->p[i]) == 0)
469 printf(" selected=\"selected\"");
470 printf(">");
471 html_print(req->p[i]);
472 puts("</option>");
473 }
474 puts(" </select>");
475 }
476
477 puts(" </fieldset>\n"
478 "</form>");
479 }
480
481 static int
482 validate_urifrag(const char *frag)
483 {
484
485 while ('\0' != *frag) {
486 if ( ! (isalnum((unsigned char)*frag) ||
487 '-' == *frag || '.' == *frag ||
488 '/' == *frag || '_' == *frag))
489 return 0;
490 frag++;
491 }
492 return 1;
493 }
494
495 static int
496 validate_manpath(const struct req *req, const char* manpath)
497 {
498 size_t i;
499
500 for (i = 0; i < req->psz; i++)
501 if ( ! strcmp(manpath, req->p[i]))
502 return 1;
503
504 return 0;
505 }
506
507 static int
508 validate_arch(const char *arch)
509 {
510 int i;
511
512 for (i = 0; i < arch_MAX; i++)
513 if (strcmp(arch, arch_names[i]) == 0)
514 return 1;
515
516 return 0;
517 }
518
519 static int
520 validate_filename(const char *file)
521 {
522
523 if ('.' == file[0] && '/' == file[1])
524 file += 2;
525
526 return ! (strstr(file, "../") || strstr(file, "/..") ||
527 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
528 }
529
530 static void
531 pg_index(const struct req *req)
532 {
533
534 resp_begin_html(200, NULL, NULL);
535 resp_searchform(req, FOCUS_QUERY);
536 printf("<p>\n"
537 "This web interface is documented in the\n"
538 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
539 "manual, and the\n"
540 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
541 "manual explains the query syntax.\n"
542 "</p>\n",
543 scriptname, *scriptname == '\0' ? "" : "/",
544 scriptname, *scriptname == '\0' ? "" : "/");
545 resp_end_html();
546 }
547
548 static void
549 pg_noresult(const struct req *req, const char *msg)
550 {
551 resp_begin_html(200, NULL, NULL);
552 resp_searchform(req, FOCUS_QUERY);
553 puts("<p>");
554 puts(msg);
555 puts("</p>");
556 resp_end_html();
557 }
558
559 static void
560 pg_error_badrequest(const char *msg)
561 {
562
563 resp_begin_html(400, "Bad Request", NULL);
564 puts("<h1>Bad Request</h1>\n"
565 "<p>\n");
566 puts(msg);
567 printf("Try again from the\n"
568 "<a href=\"/%s\">main page</a>.\n"
569 "</p>", scriptname);
570 resp_end_html();
571 }
572
573 static void
574 pg_error_internal(void)
575 {
576 resp_begin_html(500, "Internal Server Error", NULL);
577 puts("<p>Internal Server Error</p>");
578 resp_end_html();
579 }
580
581 static void
582 pg_redirect(const struct req *req, const char *name)
583 {
584 printf("Status: 303 See Other\r\n"
585 "Location: /");
586 if (*scriptname != '\0')
587 printf("%s/", scriptname);
588 if (strcmp(req->q.manpath, req->p[0]))
589 printf("%s/", req->q.manpath);
590 if (req->q.arch != NULL)
591 printf("%s/", req->q.arch);
592 http_encode(name);
593 if (req->q.sec != NULL) {
594 putchar('.');
595 http_encode(req->q.sec);
596 }
597 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
598 }
599
600 static void
601 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
602 {
603 char *arch, *archend;
604 const char *sec;
605 size_t i, iuse;
606 int archprio, archpriouse;
607 int prio, priouse;
608
609 for (i = 0; i < sz; i++) {
610 if (validate_filename(r[i].file))
611 continue;
612 warnx("invalid filename %s in %s database",
613 r[i].file, req->q.manpath);
614 pg_error_internal();
615 return;
616 }
617
618 if (req->isquery && sz == 1) {
619 /*
620 * If we have just one result, then jump there now
621 * without any delay.
622 */
623 printf("Status: 303 See Other\r\n"
624 "Location: /");
625 if (*scriptname != '\0')
626 printf("%s/", scriptname);
627 if (strcmp(req->q.manpath, req->p[0]))
628 printf("%s/", req->q.manpath);
629 printf("%s\r\n"
630 "Content-Type: text/html; charset=utf-8\r\n\r\n",
631 r[0].file);
632 return;
633 }
634
635 /*
636 * In man(1) mode, show one of the pages
637 * even if more than one is found.
638 */
639
640 iuse = 0;
641 if (req->q.equal || sz == 1) {
642 priouse = 20;
643 archpriouse = 3;
644 for (i = 0; i < sz; i++) {
645 sec = r[i].file;
646 sec += strcspn(sec, "123456789");
647 if (sec[0] == '\0')
648 continue;
649 prio = sec_prios[sec[0] - '1'];
650 if (sec[1] != '/')
651 prio += 10;
652 if (req->q.arch == NULL) {
653 archprio =
654 ((arch = strchr(sec + 1, '/'))
655 == NULL) ? 3 :
656 ((archend = strchr(arch + 1, '/'))
657 == NULL) ? 0 :
658 strncmp(arch, "amd64/",
659 archend - arch) ? 2 : 1;
660 if (archprio < archpriouse) {
661 archpriouse = archprio;
662 priouse = prio;
663 iuse = i;
664 continue;
665 }
666 if (archprio > archpriouse)
667 continue;
668 }
669 if (prio >= priouse)
670 continue;
671 priouse = prio;
672 iuse = i;
673 }
674 resp_begin_html(200, NULL, r[iuse].file);
675 } else
676 resp_begin_html(200, NULL, NULL);
677
678 resp_searchform(req,
679 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
680
681 if (sz > 1) {
682 puts("<table class=\"results\">");
683 for (i = 0; i < sz; i++) {
684 printf(" <tr>\n"
685 " <td>"
686 "<a class=\"Xr\" href=\"/");
687 if (*scriptname != '\0')
688 printf("%s/", scriptname);
689 if (strcmp(req->q.manpath, req->p[0]))
690 printf("%s/", req->q.manpath);
691 printf("%s\">", r[i].file);
692 html_print(r[i].names);
693 printf("</a></td>\n"
694 " <td><span class=\"Nd\">");
695 html_print(r[i].output);
696 puts("</span></td>\n"
697 " </tr>");
698 }
699 puts("</table>");
700 }
701
702 if (req->q.equal || sz == 1) {
703 puts("<hr>");
704 resp_show(req, r[iuse].file);
705 }
706
707 resp_end_html();
708 }
709
710 static void
711 resp_catman(const struct req *req, const char *file)
712 {
713 FILE *f;
714 char *p;
715 size_t sz;
716 ssize_t len;
717 int i;
718 int italic, bold;
719
720 if ((f = fopen(file, "r")) == NULL) {
721 puts("<p>You specified an invalid manual file.</p>");
722 return;
723 }
724
725 puts("<div class=\"catman\">\n"
726 "<pre>");
727
728 p = NULL;
729 sz = 0;
730
731 while ((len = getline(&p, &sz, f)) != -1) {
732 bold = italic = 0;
733 for (i = 0; i < len - 1; i++) {
734 /*
735 * This means that the catpage is out of state.
736 * Ignore it and keep going (although the
737 * catpage is bogus).
738 */
739
740 if ('\b' == p[i] || '\n' == p[i])
741 continue;
742
743 /*
744 * Print a regular character.
745 * Close out any bold/italic scopes.
746 * If we're in back-space mode, make sure we'll
747 * have something to enter when we backspace.
748 */
749
750 if ('\b' != p[i + 1]) {
751 if (italic)
752 printf("</i>");
753 if (bold)
754 printf("</b>");
755 italic = bold = 0;
756 html_putchar(p[i]);
757 continue;
758 } else if (i + 2 >= len)
759 continue;
760
761 /* Italic mode. */
762
763 if ('_' == p[i]) {
764 if (bold)
765 printf("</b>");
766 if ( ! italic)
767 printf("<i>");
768 bold = 0;
769 italic = 1;
770 i += 2;
771 html_putchar(p[i]);
772 continue;
773 }
774
775 /*
776 * Handle funny behaviour troff-isms.
777 * These grok'd from the original man2html.c.
778 */
779
780 if (('+' == p[i] && 'o' == p[i + 2]) ||
781 ('o' == p[i] && '+' == p[i + 2]) ||
782 ('|' == p[i] && '=' == p[i + 2]) ||
783 ('=' == p[i] && '|' == p[i + 2]) ||
784 ('*' == p[i] && '=' == p[i + 2]) ||
785 ('=' == p[i] && '*' == p[i + 2]) ||
786 ('*' == p[i] && '|' == p[i + 2]) ||
787 ('|' == p[i] && '*' == p[i + 2])) {
788 if (italic)
789 printf("</i>");
790 if (bold)
791 printf("</b>");
792 italic = bold = 0;
793 putchar('*');
794 i += 2;
795 continue;
796 } else if (('|' == p[i] && '-' == p[i + 2]) ||
797 ('-' == p[i] && '|' == p[i + 1]) ||
798 ('+' == p[i] && '-' == p[i + 1]) ||
799 ('-' == p[i] && '+' == p[i + 1]) ||
800 ('+' == p[i] && '|' == p[i + 1]) ||
801 ('|' == p[i] && '+' == p[i + 1])) {
802 if (italic)
803 printf("</i>");
804 if (bold)
805 printf("</b>");
806 italic = bold = 0;
807 putchar('+');
808 i += 2;
809 continue;
810 }
811
812 /* Bold mode. */
813
814 if (italic)
815 printf("</i>");
816 if ( ! bold)
817 printf("<b>");
818 bold = 1;
819 italic = 0;
820 i += 2;
821 html_putchar(p[i]);
822 }
823
824 /*
825 * Clean up the last character.
826 * We can get to a newline; don't print that.
827 */
828
829 if (italic)
830 printf("</i>");
831 if (bold)
832 printf("</b>");
833
834 if (i == len - 1 && p[i] != '\n')
835 html_putchar(p[i]);
836
837 putchar('\n');
838 }
839 free(p);
840
841 puts("</pre>\n"
842 "</div>");
843
844 fclose(f);
845 }
846
847 static void
848 resp_format(const struct req *req, const char *file)
849 {
850 struct manoutput conf;
851 struct mparse *mp;
852 struct roff_man *man;
853 void *vp;
854 int fd;
855 int usepath;
856
857 if (-1 == (fd = open(file, O_RDONLY, 0))) {
858 puts("<p>You specified an invalid manual file.</p>");
859 return;
860 }
861
862 mchars_alloc();
863 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
864 MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath);
865 mparse_readfd(mp, fd, file);
866 close(fd);
867
868 memset(&conf, 0, sizeof(conf));
869 conf.fragment = 1;
870 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
871 conf.toc = 1;
872 usepath = strcmp(req->q.manpath, req->p[0]);
873 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
874 scriptname, *scriptname == '\0' ? "" : "/",
875 usepath ? req->q.manpath : "", usepath ? "/" : "");
876
877 mparse_result(mp, &man, NULL);
878 if (man == NULL) {
879 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
880 pg_error_internal();
881 mparse_free(mp);
882 mchars_free();
883 return;
884 }
885
886 vp = html_alloc(&conf);
887
888 if (man->macroset == MACROSET_MDOC) {
889 mdoc_validate(man);
890 html_mdoc(vp, man);
891 } else {
892 man_validate(man);
893 html_man(vp, man);
894 }
895
896 html_free(vp);
897 mparse_free(mp);
898 mchars_free();
899 free(conf.man);
900 free(conf.style);
901 }
902
903 static void
904 resp_show(const struct req *req, const char *file)
905 {
906
907 if ('.' == file[0] && '/' == file[1])
908 file += 2;
909
910 if ('c' == *file)
911 resp_catman(req, file);
912 else
913 resp_format(req, file);
914 }
915
916 static void
917 pg_show(struct req *req, const char *fullpath)
918 {
919 char *manpath;
920 const char *file;
921
922 if ((file = strchr(fullpath, '/')) == NULL) {
923 pg_error_badrequest(
924 "You did not specify a page to show.");
925 return;
926 }
927 manpath = mandoc_strndup(fullpath, file - fullpath);
928 file++;
929
930 if ( ! validate_manpath(req, manpath)) {
931 pg_error_badrequest(
932 "You specified an invalid manpath.");
933 free(manpath);
934 return;
935 }
936
937 /*
938 * Begin by chdir()ing into the manpath.
939 * This way we can pick up the database files, which are
940 * relative to the manpath root.
941 */
942
943 if (chdir(manpath) == -1) {
944 warn("chdir %s", manpath);
945 pg_error_internal();
946 free(manpath);
947 return;
948 }
949 free(manpath);
950
951 if ( ! validate_filename(file)) {
952 pg_error_badrequest(
953 "You specified an invalid manual file.");
954 return;
955 }
956
957 resp_begin_html(200, NULL, file);
958 resp_searchform(req, FOCUS_NONE);
959 resp_show(req, file);
960 resp_end_html();
961 }
962
963 static void
964 pg_search(const struct req *req)
965 {
966 struct mansearch search;
967 struct manpaths paths;
968 struct manpage *res;
969 char **argv;
970 char *query, *rp, *wp;
971 size_t ressz;
972 int argc;
973
974 /*
975 * Begin by chdir()ing into the root of the manpath.
976 * This way we can pick up the database files, which are
977 * relative to the manpath root.
978 */
979
980 if (chdir(req->q.manpath) == -1) {
981 warn("chdir %s", req->q.manpath);
982 pg_error_internal();
983 return;
984 }
985
986 search.arch = req->q.arch;
987 search.sec = req->q.sec;
988 search.outkey = "Nd";
989 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
990 search.firstmatch = 1;
991
992 paths.sz = 1;
993 paths.paths = mandoc_malloc(sizeof(char *));
994 paths.paths[0] = mandoc_strdup(".");
995
996 /*
997 * Break apart at spaces with backslash-escaping.
998 */
999
1000 argc = 0;
1001 argv = NULL;
1002 rp = query = mandoc_strdup(req->q.query);
1003 for (;;) {
1004 while (isspace((unsigned char)*rp))
1005 rp++;
1006 if (*rp == '\0')
1007 break;
1008 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1009 argv[argc++] = wp = rp;
1010 for (;;) {
1011 if (isspace((unsigned char)*rp)) {
1012 *wp = '\0';
1013 rp++;
1014 break;
1015 }
1016 if (rp[0] == '\\' && rp[1] != '\0')
1017 rp++;
1018 if (wp != rp)
1019 *wp = *rp;
1020 if (*rp == '\0')
1021 break;
1022 wp++;
1023 rp++;
1024 }
1025 }
1026
1027 res = NULL;
1028 ressz = 0;
1029 if (req->isquery && req->q.equal && argc == 1)
1030 pg_redirect(req, argv[0]);
1031 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1032 pg_noresult(req, "You entered an invalid query.");
1033 else if (ressz == 0)
1034 pg_noresult(req, "No results found.");
1035 else
1036 pg_searchres(req, res, ressz);
1037
1038 free(query);
1039 mansearch_free(res, ressz);
1040 free(paths.paths[0]);
1041 free(paths.paths);
1042 }
1043
1044 int
1045 main(void)
1046 {
1047 struct req req;
1048 struct itimerval itimer;
1049 const char *path;
1050 const char *querystring;
1051 int i;
1052
1053 #if HAVE_PLEDGE
1054 /*
1055 * The "rpath" pledge could be revoked after mparse_readfd()
1056 * if the file desciptor to "/footer.html" would be opened
1057 * up front, but it's probably not worth the complication
1058 * of the code it would cause: it would require scattering
1059 * pledge() calls in multiple low-level resp_*() functions.
1060 */
1061
1062 if (pledge("stdio rpath", NULL) == -1) {
1063 warn("pledge");
1064 pg_error_internal();
1065 return EXIT_FAILURE;
1066 }
1067 #endif
1068
1069 /* Poor man's ReDoS mitigation. */
1070
1071 itimer.it_value.tv_sec = 2;
1072 itimer.it_value.tv_usec = 0;
1073 itimer.it_interval.tv_sec = 2;
1074 itimer.it_interval.tv_usec = 0;
1075 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1076 warn("setitimer");
1077 pg_error_internal();
1078 return EXIT_FAILURE;
1079 }
1080
1081 /*
1082 * First we change directory into the MAN_DIR so that
1083 * subsequent scanning for manpath directories is rooted
1084 * relative to the same position.
1085 */
1086
1087 if (chdir(MAN_DIR) == -1) {
1088 warn("MAN_DIR: %s", MAN_DIR);
1089 pg_error_internal();
1090 return EXIT_FAILURE;
1091 }
1092
1093 memset(&req, 0, sizeof(struct req));
1094 req.q.equal = 1;
1095 parse_manpath_conf(&req);
1096
1097 /* Parse the path info and the query string. */
1098
1099 if ((path = getenv("PATH_INFO")) == NULL)
1100 path = "";
1101 else if (*path == '/')
1102 path++;
1103
1104 if (*path != '\0') {
1105 parse_path_info(&req, path);
1106 if (req.q.manpath == NULL || req.q.sec == NULL ||
1107 *req.q.query == '\0' || access(path, F_OK) == -1)
1108 path = "";
1109 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1110 parse_query_string(&req, querystring);
1111
1112 /* Validate parsed data and add defaults. */
1113
1114 if (req.q.manpath == NULL)
1115 req.q.manpath = mandoc_strdup(req.p[0]);
1116 else if ( ! validate_manpath(&req, req.q.manpath)) {
1117 pg_error_badrequest(
1118 "You specified an invalid manpath.");
1119 return EXIT_FAILURE;
1120 }
1121
1122 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1123 pg_error_badrequest(
1124 "You specified an invalid architecture.");
1125 return EXIT_FAILURE;
1126 }
1127
1128 /* Dispatch to the three different pages. */
1129
1130 if ('\0' != *path)
1131 pg_show(&req, path);
1132 else if (NULL != req.q.query)
1133 pg_search(&req);
1134 else
1135 pg_index(&req);
1136
1137 free(req.q.manpath);
1138 free(req.q.arch);
1139 free(req.q.sec);
1140 free(req.q.query);
1141 for (i = 0; i < (int)req.psz; i++)
1142 free(req.p[i]);
1143 free(req.p);
1144 return EXIT_SUCCESS;
1145 }
1146
1147 /*
1148 * Translate PATH_INFO to a query.
1149 */
1150 static void
1151 parse_path_info(struct req *req, const char *path)
1152 {
1153 const char *name, *sec, *end;
1154
1155 req->isquery = 0;
1156 req->q.equal = 1;
1157 req->q.manpath = NULL;
1158 req->q.arch = NULL;
1159
1160 /* Mandatory manual page name. */
1161 if ((name = strrchr(path, '/')) == NULL)
1162 name = path;
1163 else
1164 name++;
1165
1166 /* Optional trailing section. */
1167 sec = strrchr(name, '.');
1168 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1169 req->q.query = mandoc_strndup(name, sec - name - 1);
1170 req->q.sec = mandoc_strdup(sec);
1171 } else {
1172 req->q.query = mandoc_strdup(name);
1173 req->q.sec = NULL;
1174 }
1175
1176 /* Handle the case of name[.section] only. */
1177 if (name == path)
1178 return;
1179
1180 /* Optional manpath. */
1181 end = strchr(path, '/');
1182 req->q.manpath = mandoc_strndup(path, end - path);
1183 if (validate_manpath(req, req->q.manpath)) {
1184 path = end + 1;
1185 if (name == path)
1186 return;
1187 } else {
1188 free(req->q.manpath);
1189 req->q.manpath = NULL;
1190 }
1191
1192 /* Optional section. */
1193 if (strncmp(path, "man", 3) == 0) {
1194 path += 3;
1195 end = strchr(path, '/');
1196 free(req->q.sec);
1197 req->q.sec = mandoc_strndup(path, end - path);
1198 path = end + 1;
1199 if (name == path)
1200 return;
1201 }
1202
1203 /* Optional architecture. */
1204 end = strchr(path, '/');
1205 if (end + 1 != name) {
1206 pg_error_badrequest(
1207 "You specified too many directory components.");
1208 exit(EXIT_FAILURE);
1209 }
1210 req->q.arch = mandoc_strndup(path, end - path);
1211 if (validate_arch(req->q.arch) == 0) {
1212 pg_error_badrequest(
1213 "You specified an invalid directory component.");
1214 exit(EXIT_FAILURE);
1215 }
1216 }
1217
1218 /*
1219 * Scan for indexable paths.
1220 */
1221 static void
1222 parse_manpath_conf(struct req *req)
1223 {
1224 FILE *fp;
1225 char *dp;
1226 size_t dpsz;
1227 ssize_t len;
1228
1229 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1230 warn("%s/manpath.conf", MAN_DIR);
1231 pg_error_internal();
1232 exit(EXIT_FAILURE);
1233 }
1234
1235 dp = NULL;
1236 dpsz = 0;
1237
1238 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1239 if (dp[len - 1] == '\n')
1240 dp[--len] = '\0';
1241 req->p = mandoc_realloc(req->p,
1242 (req->psz + 1) * sizeof(char *));
1243 if ( ! validate_urifrag(dp)) {
1244 warnx("%s/manpath.conf contains "
1245 "unsafe path \"%s\"", MAN_DIR, dp);
1246 pg_error_internal();
1247 exit(EXIT_FAILURE);
1248 }
1249 if (strchr(dp, '/') != NULL) {
1250 warnx("%s/manpath.conf contains "
1251 "path with slash \"%s\"", MAN_DIR, dp);
1252 pg_error_internal();
1253 exit(EXIT_FAILURE);
1254 }
1255 req->p[req->psz++] = dp;
1256 dp = NULL;
1257 dpsz = 0;
1258 }
1259 free(dp);
1260
1261 if (req->p == NULL) {
1262 warnx("%s/manpath.conf is empty", MAN_DIR);
1263 pg_error_internal();
1264 exit(EXIT_FAILURE);
1265 }
1266 }