]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
During validation, drop .br before a text line starting with a
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.161 2018/10/19 21:10:56 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45
46 /*
47 * A query as passed to the search function.
48 */
49 struct query {
50 char *manpath; /* desired manual directory */
51 char *arch; /* architecture */
52 char *sec; /* manual section */
53 char *query; /* unparsed query expression */
54 int equal; /* match whole names, not substrings */
55 };
56
57 struct req {
58 struct query q;
59 char **p; /* array of available manpaths */
60 size_t psz; /* number of available manpaths */
61 int isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63
64 enum focus {
65 FOCUS_NONE = 0,
66 FOCUS_QUERY
67 };
68
69 static void html_print(const char *);
70 static void html_putchar(char);
71 static int http_decode(char *);
72 static void http_encode(const char *p);
73 static void parse_manpath_conf(struct req *);
74 static void parse_path_info(struct req *req, const char *path);
75 static void parse_query_string(struct req *, const char *);
76 static void pg_error_badrequest(const char *);
77 static void pg_error_internal(void);
78 static void pg_index(const struct req *);
79 static void pg_noresult(const struct req *, const char *);
80 static void pg_redirect(const struct req *, const char *);
81 static void pg_search(const struct req *);
82 static void pg_searchres(const struct req *,
83 struct manpage *, size_t);
84 static void pg_show(struct req *, const char *);
85 static void resp_begin_html(int, const char *, const char *);
86 static void resp_begin_http(int, const char *);
87 static void resp_catman(const struct req *, const char *);
88 static void resp_copy(const char *);
89 static void resp_end_html(void);
90 static void resp_format(const struct req *, const char *);
91 static void resp_searchform(const struct req *, enum focus);
92 static void resp_show(const struct req *, const char *);
93 static void set_query_attr(char **, char **);
94 static int validate_arch(const char *);
95 static int validate_filename(const char *);
96 static int validate_manpath(const struct req *, const char *);
97 static int validate_urifrag(const char *);
98
99 static const char *scriptname = SCRIPT_NAME;
100
101 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
102 static const char *const sec_numbers[] = {
103 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
104 };
105 static const char *const sec_names[] = {
106 "All Sections",
107 "1 - General Commands",
108 "2 - System Calls",
109 "3 - Library Functions",
110 "3p - Perl Library",
111 "4 - Device Drivers",
112 "5 - File Formats",
113 "6 - Games",
114 "7 - Miscellaneous Information",
115 "8 - System Manager\'s Manual",
116 "9 - Kernel Developer\'s Manual"
117 };
118 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
119
120 static const char *const arch_names[] = {
121 "amd64", "alpha", "armv7", "arm64",
122 "hppa", "i386", "landisk",
123 "loongson", "luna88k", "macppc", "mips64",
124 "octeon", "sgi", "socppc", "sparc64",
125 "amiga", "arc", "armish", "arm32",
126 "atari", "aviion", "beagle", "cats",
127 "hppa64", "hp300",
128 "ia64", "mac68k", "mvme68k", "mvme88k",
129 "mvmeppc", "palm", "pc532", "pegasos",
130 "pmax", "powerpc", "solbourne", "sparc",
131 "sun3", "vax", "wgrisc", "x68k",
132 "zaurus"
133 };
134 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
135
136 /*
137 * Print a character, escaping HTML along the way.
138 * This will pass non-ASCII straight to output: be warned!
139 */
140 static void
141 html_putchar(char c)
142 {
143
144 switch (c) {
145 case '"':
146 printf("&quot;");
147 break;
148 case '&':
149 printf("&amp;");
150 break;
151 case '>':
152 printf("&gt;");
153 break;
154 case '<':
155 printf("&lt;");
156 break;
157 default:
158 putchar((unsigned char)c);
159 break;
160 }
161 }
162
163 /*
164 * Call through to html_putchar().
165 * Accepts NULL strings.
166 */
167 static void
168 html_print(const char *p)
169 {
170
171 if (NULL == p)
172 return;
173 while ('\0' != *p)
174 html_putchar(*p++);
175 }
176
177 /*
178 * Transfer the responsibility for the allocated string *val
179 * to the query structure.
180 */
181 static void
182 set_query_attr(char **attr, char **val)
183 {
184
185 free(*attr);
186 if (**val == '\0') {
187 *attr = NULL;
188 free(*val);
189 } else
190 *attr = *val;
191 *val = NULL;
192 }
193
194 /*
195 * Parse the QUERY_STRING for key-value pairs
196 * and store the values into the query structure.
197 */
198 static void
199 parse_query_string(struct req *req, const char *qs)
200 {
201 char *key, *val;
202 size_t keysz, valsz;
203
204 req->isquery = 1;
205 req->q.manpath = NULL;
206 req->q.arch = NULL;
207 req->q.sec = NULL;
208 req->q.query = NULL;
209 req->q.equal = 1;
210
211 key = val = NULL;
212 while (*qs != '\0') {
213
214 /* Parse one key. */
215
216 keysz = strcspn(qs, "=;&");
217 key = mandoc_strndup(qs, keysz);
218 qs += keysz;
219 if (*qs != '=')
220 goto next;
221
222 /* Parse one value. */
223
224 valsz = strcspn(++qs, ";&");
225 val = mandoc_strndup(qs, valsz);
226 qs += valsz;
227
228 /* Decode and catch encoding errors. */
229
230 if ( ! (http_decode(key) && http_decode(val)))
231 goto next;
232
233 /* Handle key-value pairs. */
234
235 if ( ! strcmp(key, "query"))
236 set_query_attr(&req->q.query, &val);
237
238 else if ( ! strcmp(key, "apropos"))
239 req->q.equal = !strcmp(val, "0");
240
241 else if ( ! strcmp(key, "manpath")) {
242 #ifdef COMPAT_OLDURI
243 if ( ! strncmp(val, "OpenBSD ", 8)) {
244 val[7] = '-';
245 if ('C' == val[8])
246 val[8] = 'c';
247 }
248 #endif
249 set_query_attr(&req->q.manpath, &val);
250 }
251
252 else if ( ! (strcmp(key, "sec")
253 #ifdef COMPAT_OLDURI
254 && strcmp(key, "sektion")
255 #endif
256 )) {
257 if ( ! strcmp(val, "0"))
258 *val = '\0';
259 set_query_attr(&req->q.sec, &val);
260 }
261
262 else if ( ! strcmp(key, "arch")) {
263 if ( ! strcmp(val, "default"))
264 *val = '\0';
265 set_query_attr(&req->q.arch, &val);
266 }
267
268 /*
269 * The key must be freed in any case.
270 * The val may have been handed over to the query
271 * structure, in which case it is now NULL.
272 */
273 next:
274 free(key);
275 key = NULL;
276 free(val);
277 val = NULL;
278
279 if (*qs != '\0')
280 qs++;
281 }
282 }
283
284 /*
285 * HTTP-decode a string. The standard explanation is that this turns
286 * "%4e+foo" into "n foo" in the regular way. This is done in-place
287 * over the allocated string.
288 */
289 static int
290 http_decode(char *p)
291 {
292 char hex[3];
293 char *q;
294 int c;
295
296 hex[2] = '\0';
297
298 q = p;
299 for ( ; '\0' != *p; p++, q++) {
300 if ('%' == *p) {
301 if ('\0' == (hex[0] = *(p + 1)))
302 return 0;
303 if ('\0' == (hex[1] = *(p + 2)))
304 return 0;
305 if (1 != sscanf(hex, "%x", &c))
306 return 0;
307 if ('\0' == c)
308 return 0;
309
310 *q = (char)c;
311 p += 2;
312 } else
313 *q = '+' == *p ? ' ' : *p;
314 }
315
316 *q = '\0';
317 return 1;
318 }
319
320 static void
321 http_encode(const char *p)
322 {
323 for (; *p != '\0'; p++) {
324 if (isalnum((unsigned char)*p) == 0 &&
325 strchr("-._~", *p) == NULL)
326 printf("%%%02.2X", (unsigned char)*p);
327 else
328 putchar(*p);
329 }
330 }
331
332 static void
333 resp_begin_http(int code, const char *msg)
334 {
335
336 if (200 != code)
337 printf("Status: %d %s\r\n", code, msg);
338
339 printf("Content-Type: text/html; charset=utf-8\r\n"
340 "Cache-Control: no-cache\r\n"
341 "Pragma: no-cache\r\n"
342 "\r\n");
343
344 fflush(stdout);
345 }
346
347 static void
348 resp_copy(const char *filename)
349 {
350 char buf[4096];
351 ssize_t sz;
352 int fd;
353
354 if ((fd = open(filename, O_RDONLY)) != -1) {
355 fflush(stdout);
356 while ((sz = read(fd, buf, sizeof(buf))) > 0)
357 write(STDOUT_FILENO, buf, sz);
358 close(fd);
359 }
360 }
361
362 static void
363 resp_begin_html(int code, const char *msg, const char *file)
364 {
365 char *cp;
366
367 resp_begin_http(code, msg);
368
369 printf("<!DOCTYPE html>\n"
370 "<html>\n"
371 "<head>\n"
372 " <meta charset=\"UTF-8\"/>\n"
373 " <meta name=\"viewport\""
374 " content=\"width=device-width, initial-scale=1.0\">\n"
375 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
376 " type=\"text/css\" media=\"all\">\n"
377 " <title>",
378 CSS_DIR);
379 if (file != NULL) {
380 if ((cp = strrchr(file, '/')) != NULL)
381 file = cp + 1;
382 if ((cp = strrchr(file, '.')) != NULL) {
383 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
384 } else
385 printf("%s - ", file);
386 }
387 printf("%s</title>\n"
388 "</head>\n"
389 "<body>\n",
390 CUSTOMIZE_TITLE);
391
392 resp_copy(MAN_DIR "/header.html");
393 }
394
395 static void
396 resp_end_html(void)
397 {
398
399 resp_copy(MAN_DIR "/footer.html");
400
401 puts("</body>\n"
402 "</html>");
403 }
404
405 static void
406 resp_searchform(const struct req *req, enum focus focus)
407 {
408 int i;
409
410 printf("<form action=\"/%s\" method=\"get\">\n"
411 " <fieldset>\n"
412 " <legend>Manual Page Search Parameters</legend>\n",
413 scriptname);
414
415 /* Write query input box. */
416
417 printf(" <input type=\"search\" name=\"query\" value=\"");
418 if (req->q.query != NULL)
419 html_print(req->q.query);
420 printf( "\" size=\"40\"");
421 if (focus == FOCUS_QUERY)
422 printf(" autofocus");
423 puts(">");
424
425 /* Write submission buttons. */
426
427 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
428 "man</button>\n"
429 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
430 "apropos</button>\n"
431 " <br/>\n");
432
433 /* Write section selector. */
434
435 puts(" <select name=\"sec\">");
436 for (i = 0; i < sec_MAX; i++) {
437 printf(" <option value=\"%s\"", sec_numbers[i]);
438 if (NULL != req->q.sec &&
439 0 == strcmp(sec_numbers[i], req->q.sec))
440 printf(" selected=\"selected\"");
441 printf(">%s</option>\n", sec_names[i]);
442 }
443 puts(" </select>");
444
445 /* Write architecture selector. */
446
447 printf( " <select name=\"arch\">\n"
448 " <option value=\"default\"");
449 if (NULL == req->q.arch)
450 printf(" selected=\"selected\"");
451 puts(">All Architectures</option>");
452 for (i = 0; i < arch_MAX; i++) {
453 printf(" <option");
454 if (NULL != req->q.arch &&
455 0 == strcmp(arch_names[i], req->q.arch))
456 printf(" selected=\"selected\"");
457 printf(">%s</option>\n", arch_names[i]);
458 }
459 puts(" </select>");
460
461 /* Write manpath selector. */
462
463 if (req->psz > 1) {
464 puts(" <select name=\"manpath\">");
465 for (i = 0; i < (int)req->psz; i++) {
466 printf(" <option");
467 if (strcmp(req->q.manpath, req->p[i]) == 0)
468 printf(" selected=\"selected\"");
469 printf(">");
470 html_print(req->p[i]);
471 puts("</option>");
472 }
473 puts(" </select>");
474 }
475
476 puts(" </fieldset>\n"
477 "</form>");
478 }
479
480 static int
481 validate_urifrag(const char *frag)
482 {
483
484 while ('\0' != *frag) {
485 if ( ! (isalnum((unsigned char)*frag) ||
486 '-' == *frag || '.' == *frag ||
487 '/' == *frag || '_' == *frag))
488 return 0;
489 frag++;
490 }
491 return 1;
492 }
493
494 static int
495 validate_manpath(const struct req *req, const char* manpath)
496 {
497 size_t i;
498
499 for (i = 0; i < req->psz; i++)
500 if ( ! strcmp(manpath, req->p[i]))
501 return 1;
502
503 return 0;
504 }
505
506 static int
507 validate_arch(const char *arch)
508 {
509 int i;
510
511 for (i = 0; i < arch_MAX; i++)
512 if (strcmp(arch, arch_names[i]) == 0)
513 return 1;
514
515 return 0;
516 }
517
518 static int
519 validate_filename(const char *file)
520 {
521
522 if ('.' == file[0] && '/' == file[1])
523 file += 2;
524
525 return ! (strstr(file, "../") || strstr(file, "/..") ||
526 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
527 }
528
529 static void
530 pg_index(const struct req *req)
531 {
532
533 resp_begin_html(200, NULL, NULL);
534 resp_searchform(req, FOCUS_QUERY);
535 printf("<p>\n"
536 "This web interface is documented in the\n"
537 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
538 "manual, and the\n"
539 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
540 "manual explains the query syntax.\n"
541 "</p>\n",
542 scriptname, *scriptname == '\0' ? "" : "/",
543 scriptname, *scriptname == '\0' ? "" : "/");
544 resp_end_html();
545 }
546
547 static void
548 pg_noresult(const struct req *req, const char *msg)
549 {
550 resp_begin_html(200, NULL, NULL);
551 resp_searchform(req, FOCUS_QUERY);
552 puts("<p>");
553 puts(msg);
554 puts("</p>");
555 resp_end_html();
556 }
557
558 static void
559 pg_error_badrequest(const char *msg)
560 {
561
562 resp_begin_html(400, "Bad Request", NULL);
563 puts("<h1>Bad Request</h1>\n"
564 "<p>\n");
565 puts(msg);
566 printf("Try again from the\n"
567 "<a href=\"/%s\">main page</a>.\n"
568 "</p>", scriptname);
569 resp_end_html();
570 }
571
572 static void
573 pg_error_internal(void)
574 {
575 resp_begin_html(500, "Internal Server Error", NULL);
576 puts("<p>Internal Server Error</p>");
577 resp_end_html();
578 }
579
580 static void
581 pg_redirect(const struct req *req, const char *name)
582 {
583 printf("Status: 303 See Other\r\n"
584 "Location: /");
585 if (*scriptname != '\0')
586 printf("%s/", scriptname);
587 if (strcmp(req->q.manpath, req->p[0]))
588 printf("%s/", req->q.manpath);
589 if (req->q.arch != NULL)
590 printf("%s/", req->q.arch);
591 http_encode(name);
592 if (req->q.sec != NULL) {
593 putchar('.');
594 http_encode(req->q.sec);
595 }
596 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
597 }
598
599 static void
600 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
601 {
602 char *arch, *archend;
603 const char *sec;
604 size_t i, iuse;
605 int archprio, archpriouse;
606 int prio, priouse;
607
608 for (i = 0; i < sz; i++) {
609 if (validate_filename(r[i].file))
610 continue;
611 warnx("invalid filename %s in %s database",
612 r[i].file, req->q.manpath);
613 pg_error_internal();
614 return;
615 }
616
617 if (req->isquery && sz == 1) {
618 /*
619 * If we have just one result, then jump there now
620 * without any delay.
621 */
622 printf("Status: 303 See Other\r\n"
623 "Location: /");
624 if (*scriptname != '\0')
625 printf("%s/", scriptname);
626 if (strcmp(req->q.manpath, req->p[0]))
627 printf("%s/", req->q.manpath);
628 printf("%s\r\n"
629 "Content-Type: text/html; charset=utf-8\r\n\r\n",
630 r[0].file);
631 return;
632 }
633
634 /*
635 * In man(1) mode, show one of the pages
636 * even if more than one is found.
637 */
638
639 iuse = 0;
640 if (req->q.equal || sz == 1) {
641 priouse = 20;
642 archpriouse = 3;
643 for (i = 0; i < sz; i++) {
644 sec = r[i].file;
645 sec += strcspn(sec, "123456789");
646 if (sec[0] == '\0')
647 continue;
648 prio = sec_prios[sec[0] - '1'];
649 if (sec[1] != '/')
650 prio += 10;
651 if (req->q.arch == NULL) {
652 archprio =
653 ((arch = strchr(sec + 1, '/'))
654 == NULL) ? 3 :
655 ((archend = strchr(arch + 1, '/'))
656 == NULL) ? 0 :
657 strncmp(arch, "amd64/",
658 archend - arch) ? 2 : 1;
659 if (archprio < archpriouse) {
660 archpriouse = archprio;
661 priouse = prio;
662 iuse = i;
663 continue;
664 }
665 if (archprio > archpriouse)
666 continue;
667 }
668 if (prio >= priouse)
669 continue;
670 priouse = prio;
671 iuse = i;
672 }
673 resp_begin_html(200, NULL, r[iuse].file);
674 } else
675 resp_begin_html(200, NULL, NULL);
676
677 resp_searchform(req,
678 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
679
680 if (sz > 1) {
681 puts("<table class=\"results\">");
682 for (i = 0; i < sz; i++) {
683 printf(" <tr>\n"
684 " <td>"
685 "<a class=\"Xr\" href=\"/");
686 if (*scriptname != '\0')
687 printf("%s/", scriptname);
688 if (strcmp(req->q.manpath, req->p[0]))
689 printf("%s/", req->q.manpath);
690 printf("%s\">", r[i].file);
691 html_print(r[i].names);
692 printf("</a></td>\n"
693 " <td><span class=\"Nd\">");
694 html_print(r[i].output);
695 puts("</span></td>\n"
696 " </tr>");
697 }
698 puts("</table>");
699 }
700
701 if (req->q.equal || sz == 1) {
702 puts("<hr>");
703 resp_show(req, r[iuse].file);
704 }
705
706 resp_end_html();
707 }
708
709 static void
710 resp_catman(const struct req *req, const char *file)
711 {
712 FILE *f;
713 char *p;
714 size_t sz;
715 ssize_t len;
716 int i;
717 int italic, bold;
718
719 if ((f = fopen(file, "r")) == NULL) {
720 puts("<p>You specified an invalid manual file.</p>");
721 return;
722 }
723
724 puts("<div class=\"catman\">\n"
725 "<pre>");
726
727 p = NULL;
728 sz = 0;
729
730 while ((len = getline(&p, &sz, f)) != -1) {
731 bold = italic = 0;
732 for (i = 0; i < len - 1; i++) {
733 /*
734 * This means that the catpage is out of state.
735 * Ignore it and keep going (although the
736 * catpage is bogus).
737 */
738
739 if ('\b' == p[i] || '\n' == p[i])
740 continue;
741
742 /*
743 * Print a regular character.
744 * Close out any bold/italic scopes.
745 * If we're in back-space mode, make sure we'll
746 * have something to enter when we backspace.
747 */
748
749 if ('\b' != p[i + 1]) {
750 if (italic)
751 printf("</i>");
752 if (bold)
753 printf("</b>");
754 italic = bold = 0;
755 html_putchar(p[i]);
756 continue;
757 } else if (i + 2 >= len)
758 continue;
759
760 /* Italic mode. */
761
762 if ('_' == p[i]) {
763 if (bold)
764 printf("</b>");
765 if ( ! italic)
766 printf("<i>");
767 bold = 0;
768 italic = 1;
769 i += 2;
770 html_putchar(p[i]);
771 continue;
772 }
773
774 /*
775 * Handle funny behaviour troff-isms.
776 * These grok'd from the original man2html.c.
777 */
778
779 if (('+' == p[i] && 'o' == p[i + 2]) ||
780 ('o' == p[i] && '+' == p[i + 2]) ||
781 ('|' == p[i] && '=' == p[i + 2]) ||
782 ('=' == p[i] && '|' == p[i + 2]) ||
783 ('*' == p[i] && '=' == p[i + 2]) ||
784 ('=' == p[i] && '*' == p[i + 2]) ||
785 ('*' == p[i] && '|' == p[i + 2]) ||
786 ('|' == p[i] && '*' == p[i + 2])) {
787 if (italic)
788 printf("</i>");
789 if (bold)
790 printf("</b>");
791 italic = bold = 0;
792 putchar('*');
793 i += 2;
794 continue;
795 } else if (('|' == p[i] && '-' == p[i + 2]) ||
796 ('-' == p[i] && '|' == p[i + 1]) ||
797 ('+' == p[i] && '-' == p[i + 1]) ||
798 ('-' == p[i] && '+' == p[i + 1]) ||
799 ('+' == p[i] && '|' == p[i + 1]) ||
800 ('|' == p[i] && '+' == p[i + 1])) {
801 if (italic)
802 printf("</i>");
803 if (bold)
804 printf("</b>");
805 italic = bold = 0;
806 putchar('+');
807 i += 2;
808 continue;
809 }
810
811 /* Bold mode. */
812
813 if (italic)
814 printf("</i>");
815 if ( ! bold)
816 printf("<b>");
817 bold = 1;
818 italic = 0;
819 i += 2;
820 html_putchar(p[i]);
821 }
822
823 /*
824 * Clean up the last character.
825 * We can get to a newline; don't print that.
826 */
827
828 if (italic)
829 printf("</i>");
830 if (bold)
831 printf("</b>");
832
833 if (i == len - 1 && p[i] != '\n')
834 html_putchar(p[i]);
835
836 putchar('\n');
837 }
838 free(p);
839
840 puts("</pre>\n"
841 "</div>");
842
843 fclose(f);
844 }
845
846 static void
847 resp_format(const struct req *req, const char *file)
848 {
849 struct manoutput conf;
850 struct mparse *mp;
851 struct roff_man *man;
852 void *vp;
853 int fd;
854 int usepath;
855
856 if (-1 == (fd = open(file, O_RDONLY, 0))) {
857 puts("<p>You specified an invalid manual file.</p>");
858 return;
859 }
860
861 mchars_alloc();
862 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
863 MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath);
864 mparse_readfd(mp, fd, file);
865 close(fd);
866
867 memset(&conf, 0, sizeof(conf));
868 conf.fragment = 1;
869 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
870 conf.toc = 1;
871 usepath = strcmp(req->q.manpath, req->p[0]);
872 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
873 scriptname, *scriptname == '\0' ? "" : "/",
874 usepath ? req->q.manpath : "", usepath ? "/" : "");
875
876 mparse_result(mp, &man, NULL);
877 if (man == NULL) {
878 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
879 pg_error_internal();
880 mparse_free(mp);
881 mchars_free();
882 return;
883 }
884
885 vp = html_alloc(&conf);
886
887 if (man->macroset == MACROSET_MDOC) {
888 mdoc_validate(man);
889 html_mdoc(vp, man);
890 } else {
891 man_validate(man);
892 html_man(vp, man);
893 }
894
895 html_free(vp);
896 mparse_free(mp);
897 mchars_free();
898 free(conf.man);
899 free(conf.style);
900 }
901
902 static void
903 resp_show(const struct req *req, const char *file)
904 {
905
906 if ('.' == file[0] && '/' == file[1])
907 file += 2;
908
909 if ('c' == *file)
910 resp_catman(req, file);
911 else
912 resp_format(req, file);
913 }
914
915 static void
916 pg_show(struct req *req, const char *fullpath)
917 {
918 char *manpath;
919 const char *file;
920
921 if ((file = strchr(fullpath, '/')) == NULL) {
922 pg_error_badrequest(
923 "You did not specify a page to show.");
924 return;
925 }
926 manpath = mandoc_strndup(fullpath, file - fullpath);
927 file++;
928
929 if ( ! validate_manpath(req, manpath)) {
930 pg_error_badrequest(
931 "You specified an invalid manpath.");
932 free(manpath);
933 return;
934 }
935
936 /*
937 * Begin by chdir()ing into the manpath.
938 * This way we can pick up the database files, which are
939 * relative to the manpath root.
940 */
941
942 if (chdir(manpath) == -1) {
943 warn("chdir %s", manpath);
944 pg_error_internal();
945 free(manpath);
946 return;
947 }
948 free(manpath);
949
950 if ( ! validate_filename(file)) {
951 pg_error_badrequest(
952 "You specified an invalid manual file.");
953 return;
954 }
955
956 resp_begin_html(200, NULL, file);
957 resp_searchform(req, FOCUS_NONE);
958 resp_show(req, file);
959 resp_end_html();
960 }
961
962 static void
963 pg_search(const struct req *req)
964 {
965 struct mansearch search;
966 struct manpaths paths;
967 struct manpage *res;
968 char **argv;
969 char *query, *rp, *wp;
970 size_t ressz;
971 int argc;
972
973 /*
974 * Begin by chdir()ing into the root of the manpath.
975 * This way we can pick up the database files, which are
976 * relative to the manpath root.
977 */
978
979 if (chdir(req->q.manpath) == -1) {
980 warn("chdir %s", req->q.manpath);
981 pg_error_internal();
982 return;
983 }
984
985 search.arch = req->q.arch;
986 search.sec = req->q.sec;
987 search.outkey = "Nd";
988 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
989 search.firstmatch = 1;
990
991 paths.sz = 1;
992 paths.paths = mandoc_malloc(sizeof(char *));
993 paths.paths[0] = mandoc_strdup(".");
994
995 /*
996 * Break apart at spaces with backslash-escaping.
997 */
998
999 argc = 0;
1000 argv = NULL;
1001 rp = query = mandoc_strdup(req->q.query);
1002 for (;;) {
1003 while (isspace((unsigned char)*rp))
1004 rp++;
1005 if (*rp == '\0')
1006 break;
1007 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1008 argv[argc++] = wp = rp;
1009 for (;;) {
1010 if (isspace((unsigned char)*rp)) {
1011 *wp = '\0';
1012 rp++;
1013 break;
1014 }
1015 if (rp[0] == '\\' && rp[1] != '\0')
1016 rp++;
1017 if (wp != rp)
1018 *wp = *rp;
1019 if (*rp == '\0')
1020 break;
1021 wp++;
1022 rp++;
1023 }
1024 }
1025
1026 res = NULL;
1027 ressz = 0;
1028 if (req->isquery && req->q.equal && argc == 1)
1029 pg_redirect(req, argv[0]);
1030 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1031 pg_noresult(req, "You entered an invalid query.");
1032 else if (ressz == 0)
1033 pg_noresult(req, "No results found.");
1034 else
1035 pg_searchres(req, res, ressz);
1036
1037 free(query);
1038 mansearch_free(res, ressz);
1039 free(paths.paths[0]);
1040 free(paths.paths);
1041 }
1042
1043 int
1044 main(void)
1045 {
1046 struct req req;
1047 struct itimerval itimer;
1048 const char *path;
1049 const char *querystring;
1050 int i;
1051
1052 #if HAVE_PLEDGE
1053 /*
1054 * The "rpath" pledge could be revoked after mparse_readfd()
1055 * if the file desciptor to "/footer.html" would be opened
1056 * up front, but it's probably not worth the complication
1057 * of the code it would cause: it would require scattering
1058 * pledge() calls in multiple low-level resp_*() functions.
1059 */
1060
1061 if (pledge("stdio rpath", NULL) == -1) {
1062 warn("pledge");
1063 pg_error_internal();
1064 return EXIT_FAILURE;
1065 }
1066 #endif
1067
1068 /* Poor man's ReDoS mitigation. */
1069
1070 itimer.it_value.tv_sec = 2;
1071 itimer.it_value.tv_usec = 0;
1072 itimer.it_interval.tv_sec = 2;
1073 itimer.it_interval.tv_usec = 0;
1074 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1075 warn("setitimer");
1076 pg_error_internal();
1077 return EXIT_FAILURE;
1078 }
1079
1080 /*
1081 * First we change directory into the MAN_DIR so that
1082 * subsequent scanning for manpath directories is rooted
1083 * relative to the same position.
1084 */
1085
1086 if (chdir(MAN_DIR) == -1) {
1087 warn("MAN_DIR: %s", MAN_DIR);
1088 pg_error_internal();
1089 return EXIT_FAILURE;
1090 }
1091
1092 memset(&req, 0, sizeof(struct req));
1093 req.q.equal = 1;
1094 parse_manpath_conf(&req);
1095
1096 /* Parse the path info and the query string. */
1097
1098 if ((path = getenv("PATH_INFO")) == NULL)
1099 path = "";
1100 else if (*path == '/')
1101 path++;
1102
1103 if (*path != '\0') {
1104 parse_path_info(&req, path);
1105 if (req.q.manpath == NULL || req.q.sec == NULL ||
1106 *req.q.query == '\0' || access(path, F_OK) == -1)
1107 path = "";
1108 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1109 parse_query_string(&req, querystring);
1110
1111 /* Validate parsed data and add defaults. */
1112
1113 if (req.q.manpath == NULL)
1114 req.q.manpath = mandoc_strdup(req.p[0]);
1115 else if ( ! validate_manpath(&req, req.q.manpath)) {
1116 pg_error_badrequest(
1117 "You specified an invalid manpath.");
1118 return EXIT_FAILURE;
1119 }
1120
1121 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1122 pg_error_badrequest(
1123 "You specified an invalid architecture.");
1124 return EXIT_FAILURE;
1125 }
1126
1127 /* Dispatch to the three different pages. */
1128
1129 if ('\0' != *path)
1130 pg_show(&req, path);
1131 else if (NULL != req.q.query)
1132 pg_search(&req);
1133 else
1134 pg_index(&req);
1135
1136 free(req.q.manpath);
1137 free(req.q.arch);
1138 free(req.q.sec);
1139 free(req.q.query);
1140 for (i = 0; i < (int)req.psz; i++)
1141 free(req.p[i]);
1142 free(req.p);
1143 return EXIT_SUCCESS;
1144 }
1145
1146 /*
1147 * Translate PATH_INFO to a query.
1148 */
1149 static void
1150 parse_path_info(struct req *req, const char *path)
1151 {
1152 const char *name, *sec, *end;
1153
1154 req->isquery = 0;
1155 req->q.equal = 1;
1156 req->q.manpath = NULL;
1157 req->q.arch = NULL;
1158
1159 /* Mandatory manual page name. */
1160 if ((name = strrchr(path, '/')) == NULL)
1161 name = path;
1162 else
1163 name++;
1164
1165 /* Optional trailing section. */
1166 sec = strrchr(name, '.');
1167 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1168 req->q.query = mandoc_strndup(name, sec - name - 1);
1169 req->q.sec = mandoc_strdup(sec);
1170 } else {
1171 req->q.query = mandoc_strdup(name);
1172 req->q.sec = NULL;
1173 }
1174
1175 /* Handle the case of name[.section] only. */
1176 if (name == path)
1177 return;
1178
1179 /* Optional manpath. */
1180 end = strchr(path, '/');
1181 req->q.manpath = mandoc_strndup(path, end - path);
1182 if (validate_manpath(req, req->q.manpath)) {
1183 path = end + 1;
1184 if (name == path)
1185 return;
1186 } else {
1187 free(req->q.manpath);
1188 req->q.manpath = NULL;
1189 }
1190
1191 /* Optional section. */
1192 if (strncmp(path, "man", 3) == 0) {
1193 path += 3;
1194 end = strchr(path, '/');
1195 free(req->q.sec);
1196 req->q.sec = mandoc_strndup(path, end - path);
1197 path = end + 1;
1198 if (name == path)
1199 return;
1200 }
1201
1202 /* Optional architecture. */
1203 end = strchr(path, '/');
1204 if (end + 1 != name) {
1205 pg_error_badrequest(
1206 "You specified too many directory components.");
1207 exit(EXIT_FAILURE);
1208 }
1209 req->q.arch = mandoc_strndup(path, end - path);
1210 if (validate_arch(req->q.arch) == 0) {
1211 pg_error_badrequest(
1212 "You specified an invalid directory component.");
1213 exit(EXIT_FAILURE);
1214 }
1215 }
1216
1217 /*
1218 * Scan for indexable paths.
1219 */
1220 static void
1221 parse_manpath_conf(struct req *req)
1222 {
1223 FILE *fp;
1224 char *dp;
1225 size_t dpsz;
1226 ssize_t len;
1227
1228 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1229 warn("%s/manpath.conf", MAN_DIR);
1230 pg_error_internal();
1231 exit(EXIT_FAILURE);
1232 }
1233
1234 dp = NULL;
1235 dpsz = 0;
1236
1237 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1238 if (dp[len - 1] == '\n')
1239 dp[--len] = '\0';
1240 req->p = mandoc_realloc(req->p,
1241 (req->psz + 1) * sizeof(char *));
1242 if ( ! validate_urifrag(dp)) {
1243 warnx("%s/manpath.conf contains "
1244 "unsafe path \"%s\"", MAN_DIR, dp);
1245 pg_error_internal();
1246 exit(EXIT_FAILURE);
1247 }
1248 if (strchr(dp, '/') != NULL) {
1249 warnx("%s/manpath.conf contains "
1250 "path with slash \"%s\"", MAN_DIR, dp);
1251 pg_error_internal();
1252 exit(EXIT_FAILURE);
1253 }
1254 req->p[req->psz++] = dp;
1255 dp = NULL;
1256 dpsz = 0;
1257 }
1258 free(dp);
1259
1260 if (req->p == NULL) {
1261 warnx("%s/manpath.conf is empty", MAN_DIR);
1262 pg_error_internal();
1263 exit(EXIT_FAILURE);
1264 }
1265 }