]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
When finding a bogus database entry,
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.156 2017/06/24 14:38:32 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45
46 /*
47 * A query as passed to the search function.
48 */
49 struct query {
50 char *manpath; /* desired manual directory */
51 char *arch; /* architecture */
52 char *sec; /* manual section */
53 char *query; /* unparsed query expression */
54 int equal; /* match whole names, not substrings */
55 };
56
57 struct req {
58 struct query q;
59 char **p; /* array of available manpaths */
60 size_t psz; /* number of available manpaths */
61 int isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63
64 enum focus {
65 FOCUS_NONE = 0,
66 FOCUS_QUERY
67 };
68
69 static void html_print(const char *);
70 static void html_putchar(char);
71 static int http_decode(char *);
72 static void parse_manpath_conf(struct req *);
73 static void parse_path_info(struct req *req, const char *path);
74 static void parse_query_string(struct req *, const char *);
75 static void pg_error_badrequest(const char *);
76 static void pg_error_internal(void);
77 static void pg_index(const struct req *);
78 static void pg_noresult(const struct req *, const char *);
79 static void pg_redirect(const struct req *, const char *);
80 static void pg_search(const struct req *);
81 static void pg_searchres(const struct req *,
82 struct manpage *, size_t);
83 static void pg_show(struct req *, const char *);
84 static void resp_begin_html(int, const char *, const char *);
85 static void resp_begin_http(int, const char *);
86 static void resp_catman(const struct req *, const char *);
87 static void resp_copy(const char *);
88 static void resp_end_html(void);
89 static void resp_format(const struct req *, const char *);
90 static void resp_searchform(const struct req *, enum focus);
91 static void resp_show(const struct req *, const char *);
92 static void set_query_attr(char **, char **);
93 static int validate_filename(const char *);
94 static int validate_manpath(const struct req *, const char *);
95 static int validate_urifrag(const char *);
96
97 static const char *scriptname = SCRIPT_NAME;
98
99 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100 static const char *const sec_numbers[] = {
101 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102 };
103 static const char *const sec_names[] = {
104 "All Sections",
105 "1 - General Commands",
106 "2 - System Calls",
107 "3 - Library Functions",
108 "3p - Perl Library",
109 "4 - Device Drivers",
110 "5 - File Formats",
111 "6 - Games",
112 "7 - Miscellaneous Information",
113 "8 - System Manager\'s Manual",
114 "9 - Kernel Developer\'s Manual"
115 };
116 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117
118 static const char *const arch_names[] = {
119 "amd64", "alpha", "armv7", "arm64",
120 "hppa", "i386", "landisk",
121 "loongson", "luna88k", "macppc", "mips64",
122 "octeon", "sgi", "socppc", "sparc64",
123 "amiga", "arc", "armish", "arm32",
124 "atari", "aviion", "beagle", "cats",
125 "hppa64", "hp300",
126 "ia64", "mac68k", "mvme68k", "mvme88k",
127 "mvmeppc", "palm", "pc532", "pegasos",
128 "pmax", "powerpc", "solbourne", "sparc",
129 "sun3", "vax", "wgrisc", "x68k",
130 "zaurus"
131 };
132 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133
134 /*
135 * Print a character, escaping HTML along the way.
136 * This will pass non-ASCII straight to output: be warned!
137 */
138 static void
139 html_putchar(char c)
140 {
141
142 switch (c) {
143 case '"':
144 printf("&quot;");
145 break;
146 case '&':
147 printf("&amp;");
148 break;
149 case '>':
150 printf("&gt;");
151 break;
152 case '<':
153 printf("&lt;");
154 break;
155 default:
156 putchar((unsigned char)c);
157 break;
158 }
159 }
160
161 /*
162 * Call through to html_putchar().
163 * Accepts NULL strings.
164 */
165 static void
166 html_print(const char *p)
167 {
168
169 if (NULL == p)
170 return;
171 while ('\0' != *p)
172 html_putchar(*p++);
173 }
174
175 /*
176 * Transfer the responsibility for the allocated string *val
177 * to the query structure.
178 */
179 static void
180 set_query_attr(char **attr, char **val)
181 {
182
183 free(*attr);
184 if (**val == '\0') {
185 *attr = NULL;
186 free(*val);
187 } else
188 *attr = *val;
189 *val = NULL;
190 }
191
192 /*
193 * Parse the QUERY_STRING for key-value pairs
194 * and store the values into the query structure.
195 */
196 static void
197 parse_query_string(struct req *req, const char *qs)
198 {
199 char *key, *val;
200 size_t keysz, valsz;
201
202 req->isquery = 1;
203 req->q.manpath = NULL;
204 req->q.arch = NULL;
205 req->q.sec = NULL;
206 req->q.query = NULL;
207 req->q.equal = 1;
208
209 key = val = NULL;
210 while (*qs != '\0') {
211
212 /* Parse one key. */
213
214 keysz = strcspn(qs, "=;&");
215 key = mandoc_strndup(qs, keysz);
216 qs += keysz;
217 if (*qs != '=')
218 goto next;
219
220 /* Parse one value. */
221
222 valsz = strcspn(++qs, ";&");
223 val = mandoc_strndup(qs, valsz);
224 qs += valsz;
225
226 /* Decode and catch encoding errors. */
227
228 if ( ! (http_decode(key) && http_decode(val)))
229 goto next;
230
231 /* Handle key-value pairs. */
232
233 if ( ! strcmp(key, "query"))
234 set_query_attr(&req->q.query, &val);
235
236 else if ( ! strcmp(key, "apropos"))
237 req->q.equal = !strcmp(val, "0");
238
239 else if ( ! strcmp(key, "manpath")) {
240 #ifdef COMPAT_OLDURI
241 if ( ! strncmp(val, "OpenBSD ", 8)) {
242 val[7] = '-';
243 if ('C' == val[8])
244 val[8] = 'c';
245 }
246 #endif
247 set_query_attr(&req->q.manpath, &val);
248 }
249
250 else if ( ! (strcmp(key, "sec")
251 #ifdef COMPAT_OLDURI
252 && strcmp(key, "sektion")
253 #endif
254 )) {
255 if ( ! strcmp(val, "0"))
256 *val = '\0';
257 set_query_attr(&req->q.sec, &val);
258 }
259
260 else if ( ! strcmp(key, "arch")) {
261 if ( ! strcmp(val, "default"))
262 *val = '\0';
263 set_query_attr(&req->q.arch, &val);
264 }
265
266 /*
267 * The key must be freed in any case.
268 * The val may have been handed over to the query
269 * structure, in which case it is now NULL.
270 */
271 next:
272 free(key);
273 key = NULL;
274 free(val);
275 val = NULL;
276
277 if (*qs != '\0')
278 qs++;
279 }
280 }
281
282 /*
283 * HTTP-decode a string. The standard explanation is that this turns
284 * "%4e+foo" into "n foo" in the regular way. This is done in-place
285 * over the allocated string.
286 */
287 static int
288 http_decode(char *p)
289 {
290 char hex[3];
291 char *q;
292 int c;
293
294 hex[2] = '\0';
295
296 q = p;
297 for ( ; '\0' != *p; p++, q++) {
298 if ('%' == *p) {
299 if ('\0' == (hex[0] = *(p + 1)))
300 return 0;
301 if ('\0' == (hex[1] = *(p + 2)))
302 return 0;
303 if (1 != sscanf(hex, "%x", &c))
304 return 0;
305 if ('\0' == c)
306 return 0;
307
308 *q = (char)c;
309 p += 2;
310 } else
311 *q = '+' == *p ? ' ' : *p;
312 }
313
314 *q = '\0';
315 return 1;
316 }
317
318 static void
319 resp_begin_http(int code, const char *msg)
320 {
321
322 if (200 != code)
323 printf("Status: %d %s\r\n", code, msg);
324
325 printf("Content-Type: text/html; charset=utf-8\r\n"
326 "Cache-Control: no-cache\r\n"
327 "Pragma: no-cache\r\n"
328 "\r\n");
329
330 fflush(stdout);
331 }
332
333 static void
334 resp_copy(const char *filename)
335 {
336 char buf[4096];
337 ssize_t sz;
338 int fd;
339
340 if ((fd = open(filename, O_RDONLY)) != -1) {
341 fflush(stdout);
342 while ((sz = read(fd, buf, sizeof(buf))) > 0)
343 write(STDOUT_FILENO, buf, sz);
344 close(fd);
345 }
346 }
347
348 static void
349 resp_begin_html(int code, const char *msg, const char *file)
350 {
351 char *cp;
352
353 resp_begin_http(code, msg);
354
355 printf("<!DOCTYPE html>\n"
356 "<html>\n"
357 "<head>\n"
358 " <meta charset=\"UTF-8\"/>\n"
359 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
360 " type=\"text/css\" media=\"all\">\n"
361 " <title>",
362 CSS_DIR);
363 if (file != NULL) {
364 if ((cp = strrchr(file, '/')) != NULL)
365 file = cp + 1;
366 if ((cp = strrchr(file, '.')) != NULL) {
367 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
368 } else
369 printf("%s - ", file);
370 }
371 printf("%s</title>\n"
372 "</head>\n"
373 "<body>\n",
374 CUSTOMIZE_TITLE);
375
376 resp_copy(MAN_DIR "/header.html");
377 }
378
379 static void
380 resp_end_html(void)
381 {
382
383 resp_copy(MAN_DIR "/footer.html");
384
385 puts("</body>\n"
386 "</html>");
387 }
388
389 static void
390 resp_searchform(const struct req *req, enum focus focus)
391 {
392 int i;
393
394 printf("<form action=\"/%s\" method=\"get\">\n"
395 " <fieldset>\n"
396 " <legend>Manual Page Search Parameters</legend>\n",
397 scriptname);
398
399 /* Write query input box. */
400
401 printf(" <input type=\"text\" name=\"query\" value=\"");
402 if (req->q.query != NULL)
403 html_print(req->q.query);
404 printf( "\" size=\"40\"");
405 if (focus == FOCUS_QUERY)
406 printf(" autofocus");
407 puts(">");
408
409 /* Write submission buttons. */
410
411 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
412 "man</button>\n"
413 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
414 "apropos</button>\n"
415 " <br/>\n");
416
417 /* Write section selector. */
418
419 puts(" <select name=\"sec\">");
420 for (i = 0; i < sec_MAX; i++) {
421 printf(" <option value=\"%s\"", sec_numbers[i]);
422 if (NULL != req->q.sec &&
423 0 == strcmp(sec_numbers[i], req->q.sec))
424 printf(" selected=\"selected\"");
425 printf(">%s</option>\n", sec_names[i]);
426 }
427 puts(" </select>");
428
429 /* Write architecture selector. */
430
431 printf( " <select name=\"arch\">\n"
432 " <option value=\"default\"");
433 if (NULL == req->q.arch)
434 printf(" selected=\"selected\"");
435 puts(">All Architectures</option>");
436 for (i = 0; i < arch_MAX; i++) {
437 printf(" <option value=\"%s\"", arch_names[i]);
438 if (NULL != req->q.arch &&
439 0 == strcmp(arch_names[i], req->q.arch))
440 printf(" selected=\"selected\"");
441 printf(">%s</option>\n", arch_names[i]);
442 }
443 puts(" </select>");
444
445 /* Write manpath selector. */
446
447 if (req->psz > 1) {
448 puts(" <select name=\"manpath\">");
449 for (i = 0; i < (int)req->psz; i++) {
450 printf(" <option ");
451 if (strcmp(req->q.manpath, req->p[i]) == 0)
452 printf("selected=\"selected\" ");
453 printf("value=\"");
454 html_print(req->p[i]);
455 printf("\">");
456 html_print(req->p[i]);
457 puts("</option>");
458 }
459 puts(" </select>");
460 }
461
462 puts(" </fieldset>\n"
463 "</form>");
464 }
465
466 static int
467 validate_urifrag(const char *frag)
468 {
469
470 while ('\0' != *frag) {
471 if ( ! (isalnum((unsigned char)*frag) ||
472 '-' == *frag || '.' == *frag ||
473 '/' == *frag || '_' == *frag))
474 return 0;
475 frag++;
476 }
477 return 1;
478 }
479
480 static int
481 validate_manpath(const struct req *req, const char* manpath)
482 {
483 size_t i;
484
485 for (i = 0; i < req->psz; i++)
486 if ( ! strcmp(manpath, req->p[i]))
487 return 1;
488
489 return 0;
490 }
491
492 static int
493 validate_filename(const char *file)
494 {
495
496 if ('.' == file[0] && '/' == file[1])
497 file += 2;
498
499 return ! (strstr(file, "../") || strstr(file, "/..") ||
500 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
501 }
502
503 static void
504 pg_index(const struct req *req)
505 {
506
507 resp_begin_html(200, NULL, NULL);
508 resp_searchform(req, FOCUS_QUERY);
509 printf("<p>\n"
510 "This web interface is documented in the\n"
511 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
512 "manual, and the\n"
513 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
514 "manual explains the query syntax.\n"
515 "</p>\n",
516 scriptname, *scriptname == '\0' ? "" : "/",
517 scriptname, *scriptname == '\0' ? "" : "/");
518 resp_end_html();
519 }
520
521 static void
522 pg_noresult(const struct req *req, const char *msg)
523 {
524 resp_begin_html(200, NULL, NULL);
525 resp_searchform(req, FOCUS_QUERY);
526 puts("<p>");
527 puts(msg);
528 puts("</p>");
529 resp_end_html();
530 }
531
532 static void
533 pg_error_badrequest(const char *msg)
534 {
535
536 resp_begin_html(400, "Bad Request", NULL);
537 puts("<h1>Bad Request</h1>\n"
538 "<p>\n");
539 puts(msg);
540 printf("Try again from the\n"
541 "<a href=\"/%s\">main page</a>.\n"
542 "</p>", scriptname);
543 resp_end_html();
544 }
545
546 static void
547 pg_error_internal(void)
548 {
549 resp_begin_html(500, "Internal Server Error", NULL);
550 puts("<p>Internal Server Error</p>");
551 resp_end_html();
552 }
553
554 static void
555 pg_redirect(const struct req *req, const char *name)
556 {
557 printf("Status: 303 See Other\r\n"
558 "Location: /");
559 if (*scriptname != '\0')
560 printf("%s/", scriptname);
561 if (strcmp(req->q.manpath, req->p[0]))
562 printf("%s/", req->q.manpath);
563 if (req->q.arch != NULL)
564 printf("%s/", req->q.arch);
565 printf("%s", name);
566 if (req->q.sec != NULL)
567 printf(".%s", req->q.sec);
568 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
569 }
570
571 static void
572 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
573 {
574 char *arch, *archend;
575 const char *sec;
576 size_t i, iuse;
577 int archprio, archpriouse;
578 int prio, priouse;
579
580 for (i = 0; i < sz; i++) {
581 if (validate_filename(r[i].file))
582 continue;
583 warnx("invalid filename %s in %s database",
584 r[i].file, req->q.manpath);
585 pg_error_internal();
586 return;
587 }
588
589 if (req->isquery && sz == 1) {
590 /*
591 * If we have just one result, then jump there now
592 * without any delay.
593 */
594 printf("Status: 303 See Other\r\n"
595 "Location: /");
596 if (*scriptname != '\0')
597 printf("%s/", scriptname);
598 if (strcmp(req->q.manpath, req->p[0]))
599 printf("%s/", req->q.manpath);
600 printf("%s\r\n"
601 "Content-Type: text/html; charset=utf-8\r\n\r\n",
602 r[0].file);
603 return;
604 }
605
606 /*
607 * In man(1) mode, show one of the pages
608 * even if more than one is found.
609 */
610
611 iuse = 0;
612 if (req->q.equal || sz == 1) {
613 priouse = 20;
614 archpriouse = 3;
615 for (i = 0; i < sz; i++) {
616 sec = r[i].file;
617 sec += strcspn(sec, "123456789");
618 if (sec[0] == '\0')
619 continue;
620 prio = sec_prios[sec[0] - '1'];
621 if (sec[1] != '/')
622 prio += 10;
623 if (req->q.arch == NULL) {
624 archprio =
625 ((arch = strchr(sec + 1, '/'))
626 == NULL) ? 3 :
627 ((archend = strchr(arch + 1, '/'))
628 == NULL) ? 0 :
629 strncmp(arch, "amd64/",
630 archend - arch) ? 2 : 1;
631 if (archprio < archpriouse) {
632 archpriouse = archprio;
633 priouse = prio;
634 iuse = i;
635 continue;
636 }
637 if (archprio > archpriouse)
638 continue;
639 }
640 if (prio >= priouse)
641 continue;
642 priouse = prio;
643 iuse = i;
644 }
645 resp_begin_html(200, NULL, r[iuse].file);
646 } else
647 resp_begin_html(200, NULL, NULL);
648
649 resp_searchform(req,
650 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
651
652 if (sz > 1) {
653 puts("<table class=\"results\">");
654 for (i = 0; i < sz; i++) {
655 printf(" <tr>\n"
656 " <td>"
657 "<a class=\"Xr\" href=\"/");
658 if (*scriptname != '\0')
659 printf("%s/", scriptname);
660 if (strcmp(req->q.manpath, req->p[0]))
661 printf("%s/", req->q.manpath);
662 printf("%s\">", r[i].file);
663 html_print(r[i].names);
664 printf("</a></td>\n"
665 " <td><span class=\"Nd\">");
666 html_print(r[i].output);
667 puts("</span></td>\n"
668 " </tr>");
669 }
670 puts("</table>");
671 }
672
673 if (req->q.equal || sz == 1) {
674 puts("<hr>");
675 resp_show(req, r[iuse].file);
676 }
677
678 resp_end_html();
679 }
680
681 static void
682 resp_catman(const struct req *req, const char *file)
683 {
684 FILE *f;
685 char *p;
686 size_t sz;
687 ssize_t len;
688 int i;
689 int italic, bold;
690
691 if ((f = fopen(file, "r")) == NULL) {
692 puts("<p>You specified an invalid manual file.</p>");
693 return;
694 }
695
696 puts("<div class=\"catman\">\n"
697 "<pre>");
698
699 p = NULL;
700 sz = 0;
701
702 while ((len = getline(&p, &sz, f)) != -1) {
703 bold = italic = 0;
704 for (i = 0; i < len - 1; i++) {
705 /*
706 * This means that the catpage is out of state.
707 * Ignore it and keep going (although the
708 * catpage is bogus).
709 */
710
711 if ('\b' == p[i] || '\n' == p[i])
712 continue;
713
714 /*
715 * Print a regular character.
716 * Close out any bold/italic scopes.
717 * If we're in back-space mode, make sure we'll
718 * have something to enter when we backspace.
719 */
720
721 if ('\b' != p[i + 1]) {
722 if (italic)
723 printf("</i>");
724 if (bold)
725 printf("</b>");
726 italic = bold = 0;
727 html_putchar(p[i]);
728 continue;
729 } else if (i + 2 >= len)
730 continue;
731
732 /* Italic mode. */
733
734 if ('_' == p[i]) {
735 if (bold)
736 printf("</b>");
737 if ( ! italic)
738 printf("<i>");
739 bold = 0;
740 italic = 1;
741 i += 2;
742 html_putchar(p[i]);
743 continue;
744 }
745
746 /*
747 * Handle funny behaviour troff-isms.
748 * These grok'd from the original man2html.c.
749 */
750
751 if (('+' == p[i] && 'o' == p[i + 2]) ||
752 ('o' == p[i] && '+' == p[i + 2]) ||
753 ('|' == p[i] && '=' == p[i + 2]) ||
754 ('=' == p[i] && '|' == p[i + 2]) ||
755 ('*' == p[i] && '=' == p[i + 2]) ||
756 ('=' == p[i] && '*' == p[i + 2]) ||
757 ('*' == p[i] && '|' == p[i + 2]) ||
758 ('|' == p[i] && '*' == p[i + 2])) {
759 if (italic)
760 printf("</i>");
761 if (bold)
762 printf("</b>");
763 italic = bold = 0;
764 putchar('*');
765 i += 2;
766 continue;
767 } else if (('|' == p[i] && '-' == p[i + 2]) ||
768 ('-' == p[i] && '|' == p[i + 1]) ||
769 ('+' == p[i] && '-' == p[i + 1]) ||
770 ('-' == p[i] && '+' == p[i + 1]) ||
771 ('+' == p[i] && '|' == p[i + 1]) ||
772 ('|' == p[i] && '+' == p[i + 1])) {
773 if (italic)
774 printf("</i>");
775 if (bold)
776 printf("</b>");
777 italic = bold = 0;
778 putchar('+');
779 i += 2;
780 continue;
781 }
782
783 /* Bold mode. */
784
785 if (italic)
786 printf("</i>");
787 if ( ! bold)
788 printf("<b>");
789 bold = 1;
790 italic = 0;
791 i += 2;
792 html_putchar(p[i]);
793 }
794
795 /*
796 * Clean up the last character.
797 * We can get to a newline; don't print that.
798 */
799
800 if (italic)
801 printf("</i>");
802 if (bold)
803 printf("</b>");
804
805 if (i == len - 1 && p[i] != '\n')
806 html_putchar(p[i]);
807
808 putchar('\n');
809 }
810 free(p);
811
812 puts("</pre>\n"
813 "</div>");
814
815 fclose(f);
816 }
817
818 static void
819 resp_format(const struct req *req, const char *file)
820 {
821 struct manoutput conf;
822 struct mparse *mp;
823 struct roff_man *man;
824 void *vp;
825 int fd;
826 int usepath;
827
828 if (-1 == (fd = open(file, O_RDONLY, 0))) {
829 puts("<p>You specified an invalid manual file.</p>");
830 return;
831 }
832
833 mchars_alloc();
834 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
835 MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath);
836 mparse_readfd(mp, fd, file);
837 close(fd);
838
839 memset(&conf, 0, sizeof(conf));
840 conf.fragment = 1;
841 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
842 usepath = strcmp(req->q.manpath, req->p[0]);
843 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
844 scriptname, *scriptname == '\0' ? "" : "/",
845 usepath ? req->q.manpath : "", usepath ? "/" : "");
846
847 mparse_result(mp, &man, NULL);
848 if (man == NULL) {
849 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
850 pg_error_internal();
851 mparse_free(mp);
852 mchars_free();
853 return;
854 }
855
856 vp = html_alloc(&conf);
857
858 if (man->macroset == MACROSET_MDOC) {
859 mdoc_validate(man);
860 html_mdoc(vp, man);
861 } else {
862 man_validate(man);
863 html_man(vp, man);
864 }
865
866 html_free(vp);
867 mparse_free(mp);
868 mchars_free();
869 free(conf.man);
870 free(conf.style);
871 }
872
873 static void
874 resp_show(const struct req *req, const char *file)
875 {
876
877 if ('.' == file[0] && '/' == file[1])
878 file += 2;
879
880 if ('c' == *file)
881 resp_catman(req, file);
882 else
883 resp_format(req, file);
884 }
885
886 static void
887 pg_show(struct req *req, const char *fullpath)
888 {
889 char *manpath;
890 const char *file;
891
892 if ((file = strchr(fullpath, '/')) == NULL) {
893 pg_error_badrequest(
894 "You did not specify a page to show.");
895 return;
896 }
897 manpath = mandoc_strndup(fullpath, file - fullpath);
898 file++;
899
900 if ( ! validate_manpath(req, manpath)) {
901 pg_error_badrequest(
902 "You specified an invalid manpath.");
903 free(manpath);
904 return;
905 }
906
907 /*
908 * Begin by chdir()ing into the manpath.
909 * This way we can pick up the database files, which are
910 * relative to the manpath root.
911 */
912
913 if (chdir(manpath) == -1) {
914 warn("chdir %s", manpath);
915 pg_error_internal();
916 free(manpath);
917 return;
918 }
919 free(manpath);
920
921 if ( ! validate_filename(file)) {
922 pg_error_badrequest(
923 "You specified an invalid manual file.");
924 return;
925 }
926
927 resp_begin_html(200, NULL, file);
928 resp_searchform(req, FOCUS_NONE);
929 resp_show(req, file);
930 resp_end_html();
931 }
932
933 static void
934 pg_search(const struct req *req)
935 {
936 struct mansearch search;
937 struct manpaths paths;
938 struct manpage *res;
939 char **argv;
940 char *query, *rp, *wp;
941 size_t ressz;
942 int argc;
943
944 /*
945 * Begin by chdir()ing into the root of the manpath.
946 * This way we can pick up the database files, which are
947 * relative to the manpath root.
948 */
949
950 if (chdir(req->q.manpath) == -1) {
951 warn("chdir %s", req->q.manpath);
952 pg_error_internal();
953 return;
954 }
955
956 search.arch = req->q.arch;
957 search.sec = req->q.sec;
958 search.outkey = "Nd";
959 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
960 search.firstmatch = 1;
961
962 paths.sz = 1;
963 paths.paths = mandoc_malloc(sizeof(char *));
964 paths.paths[0] = mandoc_strdup(".");
965
966 /*
967 * Break apart at spaces with backslash-escaping.
968 */
969
970 argc = 0;
971 argv = NULL;
972 rp = query = mandoc_strdup(req->q.query);
973 for (;;) {
974 while (isspace((unsigned char)*rp))
975 rp++;
976 if (*rp == '\0')
977 break;
978 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
979 argv[argc++] = wp = rp;
980 for (;;) {
981 if (isspace((unsigned char)*rp)) {
982 *wp = '\0';
983 rp++;
984 break;
985 }
986 if (rp[0] == '\\' && rp[1] != '\0')
987 rp++;
988 if (wp != rp)
989 *wp = *rp;
990 if (*rp == '\0')
991 break;
992 wp++;
993 rp++;
994 }
995 }
996
997 res = NULL;
998 ressz = 0;
999 if (req->isquery && req->q.equal && argc == 1)
1000 pg_redirect(req, argv[0]);
1001 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1002 pg_noresult(req, "You entered an invalid query.");
1003 else if (ressz == 0)
1004 pg_noresult(req, "No results found.");
1005 else
1006 pg_searchres(req, res, ressz);
1007
1008 free(query);
1009 mansearch_free(res, ressz);
1010 free(paths.paths[0]);
1011 free(paths.paths);
1012 }
1013
1014 int
1015 main(void)
1016 {
1017 struct req req;
1018 struct itimerval itimer;
1019 const char *path;
1020 const char *querystring;
1021 int i;
1022
1023 #if HAVE_PLEDGE
1024 /*
1025 * The "rpath" pledge could be revoked after mparse_readfd()
1026 * if the file desciptor to "/footer.html" would be opened
1027 * up front, but it's probably not worth the complication
1028 * of the code it would cause: it would require scattering
1029 * pledge() calls in multiple low-level resp_*() functions.
1030 */
1031
1032 if (pledge("stdio rpath", NULL) == -1) {
1033 warn("pledge");
1034 pg_error_internal();
1035 return EXIT_FAILURE;
1036 }
1037 #endif
1038
1039 /* Poor man's ReDoS mitigation. */
1040
1041 itimer.it_value.tv_sec = 2;
1042 itimer.it_value.tv_usec = 0;
1043 itimer.it_interval.tv_sec = 2;
1044 itimer.it_interval.tv_usec = 0;
1045 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1046 warn("setitimer");
1047 pg_error_internal();
1048 return EXIT_FAILURE;
1049 }
1050
1051 /*
1052 * First we change directory into the MAN_DIR so that
1053 * subsequent scanning for manpath directories is rooted
1054 * relative to the same position.
1055 */
1056
1057 if (chdir(MAN_DIR) == -1) {
1058 warn("MAN_DIR: %s", MAN_DIR);
1059 pg_error_internal();
1060 return EXIT_FAILURE;
1061 }
1062
1063 memset(&req, 0, sizeof(struct req));
1064 req.q.equal = 1;
1065 parse_manpath_conf(&req);
1066
1067 /* Parse the path info and the query string. */
1068
1069 if ((path = getenv("PATH_INFO")) == NULL)
1070 path = "";
1071 else if (*path == '/')
1072 path++;
1073
1074 if (*path != '\0') {
1075 parse_path_info(&req, path);
1076 if (req.q.manpath == NULL || req.q.sec == NULL ||
1077 *req.q.query == '\0' || access(path, F_OK) == -1)
1078 path = "";
1079 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1080 parse_query_string(&req, querystring);
1081
1082 /* Validate parsed data and add defaults. */
1083
1084 if (req.q.manpath == NULL)
1085 req.q.manpath = mandoc_strdup(req.p[0]);
1086 else if ( ! validate_manpath(&req, req.q.manpath)) {
1087 pg_error_badrequest(
1088 "You specified an invalid manpath.");
1089 return EXIT_FAILURE;
1090 }
1091
1092 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1093 pg_error_badrequest(
1094 "You specified an invalid architecture.");
1095 return EXIT_FAILURE;
1096 }
1097
1098 /* Dispatch to the three different pages. */
1099
1100 if ('\0' != *path)
1101 pg_show(&req, path);
1102 else if (NULL != req.q.query)
1103 pg_search(&req);
1104 else
1105 pg_index(&req);
1106
1107 free(req.q.manpath);
1108 free(req.q.arch);
1109 free(req.q.sec);
1110 free(req.q.query);
1111 for (i = 0; i < (int)req.psz; i++)
1112 free(req.p[i]);
1113 free(req.p);
1114 return EXIT_SUCCESS;
1115 }
1116
1117 /*
1118 * If PATH_INFO is not a file name, translate it to a query.
1119 */
1120 static void
1121 parse_path_info(struct req *req, const char *path)
1122 {
1123 char *dir[4];
1124 int i;
1125
1126 req->isquery = 0;
1127 req->q.equal = 1;
1128 req->q.manpath = mandoc_strdup(path);
1129 req->q.arch = NULL;
1130
1131 /* Mandatory manual page name. */
1132 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1133 req->q.query = req->q.manpath;
1134 req->q.manpath = NULL;
1135 } else
1136 *req->q.query++ = '\0';
1137
1138 /* Optional trailing section. */
1139 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1140 if(isdigit((unsigned char)req->q.sec[1])) {
1141 *req->q.sec++ = '\0';
1142 req->q.sec = mandoc_strdup(req->q.sec);
1143 } else
1144 req->q.sec = NULL;
1145 }
1146
1147 /* Handle the case of name[.section] only. */
1148 if (req->q.manpath == NULL)
1149 return;
1150 req->q.query = mandoc_strdup(req->q.query);
1151
1152 /* Split directory components. */
1153 dir[i = 0] = req->q.manpath;
1154 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1155 if (++i == 3) {
1156 pg_error_badrequest(
1157 "You specified too many directory components.");
1158 exit(EXIT_FAILURE);
1159 }
1160 *dir[i]++ = '\0';
1161 }
1162
1163 /* Optional manpath. */
1164 if ((i = validate_manpath(req, req->q.manpath)) == 0)
1165 req->q.manpath = NULL;
1166 else if (dir[1] == NULL)
1167 return;
1168
1169 /* Optional section. */
1170 if (strncmp(dir[i], "man", 3) == 0) {
1171 free(req->q.sec);
1172 req->q.sec = mandoc_strdup(dir[i++] + 3);
1173 }
1174 if (dir[i] == NULL) {
1175 if (req->q.manpath == NULL)
1176 free(dir[0]);
1177 return;
1178 }
1179 if (dir[i + 1] != NULL) {
1180 pg_error_badrequest(
1181 "You specified an invalid directory component.");
1182 exit(EXIT_FAILURE);
1183 }
1184
1185 /* Optional architecture. */
1186 if (i) {
1187 req->q.arch = mandoc_strdup(dir[i]);
1188 if (req->q.manpath == NULL)
1189 free(dir[0]);
1190 } else
1191 req->q.arch = dir[0];
1192 }
1193
1194 /*
1195 * Scan for indexable paths.
1196 */
1197 static void
1198 parse_manpath_conf(struct req *req)
1199 {
1200 FILE *fp;
1201 char *dp;
1202 size_t dpsz;
1203 ssize_t len;
1204
1205 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1206 warn("%s/manpath.conf", MAN_DIR);
1207 pg_error_internal();
1208 exit(EXIT_FAILURE);
1209 }
1210
1211 dp = NULL;
1212 dpsz = 0;
1213
1214 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1215 if (dp[len - 1] == '\n')
1216 dp[--len] = '\0';
1217 req->p = mandoc_realloc(req->p,
1218 (req->psz + 1) * sizeof(char *));
1219 if ( ! validate_urifrag(dp)) {
1220 warnx("%s/manpath.conf contains "
1221 "unsafe path \"%s\"", MAN_DIR, dp);
1222 pg_error_internal();
1223 exit(EXIT_FAILURE);
1224 }
1225 if (strchr(dp, '/') != NULL) {
1226 warnx("%s/manpath.conf contains "
1227 "path with slash \"%s\"", MAN_DIR, dp);
1228 pg_error_internal();
1229 exit(EXIT_FAILURE);
1230 }
1231 req->p[req->psz++] = dp;
1232 dp = NULL;
1233 dpsz = 0;
1234 }
1235 free(dp);
1236
1237 if (req->p == NULL) {
1238 warnx("%s/manpath.conf is empty", MAN_DIR);
1239 pg_error_internal();
1240 exit(EXIT_FAILURE);
1241 }
1242 }