]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
It's annoying that people keep writing URIs including redundant parts
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.149 2017/03/15 10:17:29 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45
46 /*
47 * A query as passed to the search function.
48 */
49 struct query {
50 char *manpath; /* desired manual directory */
51 char *arch; /* architecture */
52 char *sec; /* manual section */
53 char *query; /* unparsed query expression */
54 int equal; /* match whole names, not substrings */
55 };
56
57 struct req {
58 struct query q;
59 char **p; /* array of available manpaths */
60 size_t psz; /* number of available manpaths */
61 int isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63
64 enum focus {
65 FOCUS_NONE = 0,
66 FOCUS_QUERY
67 };
68
69 static void html_print(const char *);
70 static void html_putchar(char);
71 static int http_decode(char *);
72 static void parse_manpath_conf(struct req *);
73 static void parse_path_info(struct req *req, const char *path);
74 static void parse_query_string(struct req *, const char *);
75 static void pg_error_badrequest(const char *);
76 static void pg_error_internal(void);
77 static void pg_index(const struct req *);
78 static void pg_noresult(const struct req *, const char *);
79 static void pg_redirect(const struct req *, const char *);
80 static void pg_search(const struct req *);
81 static void pg_searchres(const struct req *,
82 struct manpage *, size_t);
83 static void pg_show(struct req *, const char *);
84 static void resp_begin_html(int, const char *);
85 static void resp_begin_http(int, const char *);
86 static void resp_catman(const struct req *, const char *);
87 static void resp_copy(const char *);
88 static void resp_end_html(void);
89 static void resp_format(const struct req *, const char *);
90 static void resp_searchform(const struct req *, enum focus);
91 static void resp_show(const struct req *, const char *);
92 static void set_query_attr(char **, char **);
93 static int validate_filename(const char *);
94 static int validate_manpath(const struct req *, const char *);
95 static int validate_urifrag(const char *);
96
97 static const char *scriptname = SCRIPT_NAME;
98
99 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100 static const char *const sec_numbers[] = {
101 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102 };
103 static const char *const sec_names[] = {
104 "All Sections",
105 "1 - General Commands",
106 "2 - System Calls",
107 "3 - Library Functions",
108 "3p - Perl Library",
109 "4 - Device Drivers",
110 "5 - File Formats",
111 "6 - Games",
112 "7 - Miscellaneous Information",
113 "8 - System Manager\'s Manual",
114 "9 - Kernel Developer\'s Manual"
115 };
116 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117
118 static const char *const arch_names[] = {
119 "amd64", "alpha", "armv7", "arm64",
120 "hppa", "i386", "landisk",
121 "loongson", "luna88k", "macppc", "mips64",
122 "octeon", "sgi", "socppc", "sparc64",
123 "amiga", "arc", "armish", "arm32",
124 "atari", "aviion", "beagle", "cats",
125 "hppa64", "hp300",
126 "ia64", "mac68k", "mvme68k", "mvme88k",
127 "mvmeppc", "palm", "pc532", "pegasos",
128 "pmax", "powerpc", "solbourne", "sparc",
129 "sun3", "vax", "wgrisc", "x68k",
130 "zaurus"
131 };
132 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133
134 /*
135 * Print a character, escaping HTML along the way.
136 * This will pass non-ASCII straight to output: be warned!
137 */
138 static void
139 html_putchar(char c)
140 {
141
142 switch (c) {
143 case ('"'):
144 printf("&quot;");
145 break;
146 case ('&'):
147 printf("&amp;");
148 break;
149 case ('>'):
150 printf("&gt;");
151 break;
152 case ('<'):
153 printf("&lt;");
154 break;
155 default:
156 putchar((unsigned char)c);
157 break;
158 }
159 }
160
161 /*
162 * Call through to html_putchar().
163 * Accepts NULL strings.
164 */
165 static void
166 html_print(const char *p)
167 {
168
169 if (NULL == p)
170 return;
171 while ('\0' != *p)
172 html_putchar(*p++);
173 }
174
175 /*
176 * Transfer the responsibility for the allocated string *val
177 * to the query structure.
178 */
179 static void
180 set_query_attr(char **attr, char **val)
181 {
182
183 free(*attr);
184 if (**val == '\0') {
185 *attr = NULL;
186 free(*val);
187 } else
188 *attr = *val;
189 *val = NULL;
190 }
191
192 /*
193 * Parse the QUERY_STRING for key-value pairs
194 * and store the values into the query structure.
195 */
196 static void
197 parse_query_string(struct req *req, const char *qs)
198 {
199 char *key, *val;
200 size_t keysz, valsz;
201
202 req->isquery = 1;
203 req->q.manpath = NULL;
204 req->q.arch = NULL;
205 req->q.sec = NULL;
206 req->q.query = NULL;
207 req->q.equal = 1;
208
209 key = val = NULL;
210 while (*qs != '\0') {
211
212 /* Parse one key. */
213
214 keysz = strcspn(qs, "=;&");
215 key = mandoc_strndup(qs, keysz);
216 qs += keysz;
217 if (*qs != '=')
218 goto next;
219
220 /* Parse one value. */
221
222 valsz = strcspn(++qs, ";&");
223 val = mandoc_strndup(qs, valsz);
224 qs += valsz;
225
226 /* Decode and catch encoding errors. */
227
228 if ( ! (http_decode(key) && http_decode(val)))
229 goto next;
230
231 /* Handle key-value pairs. */
232
233 if ( ! strcmp(key, "query"))
234 set_query_attr(&req->q.query, &val);
235
236 else if ( ! strcmp(key, "apropos"))
237 req->q.equal = !strcmp(val, "0");
238
239 else if ( ! strcmp(key, "manpath")) {
240 #ifdef COMPAT_OLDURI
241 if ( ! strncmp(val, "OpenBSD ", 8)) {
242 val[7] = '-';
243 if ('C' == val[8])
244 val[8] = 'c';
245 }
246 #endif
247 set_query_attr(&req->q.manpath, &val);
248 }
249
250 else if ( ! (strcmp(key, "sec")
251 #ifdef COMPAT_OLDURI
252 && strcmp(key, "sektion")
253 #endif
254 )) {
255 if ( ! strcmp(val, "0"))
256 *val = '\0';
257 set_query_attr(&req->q.sec, &val);
258 }
259
260 else if ( ! strcmp(key, "arch")) {
261 if ( ! strcmp(val, "default"))
262 *val = '\0';
263 set_query_attr(&req->q.arch, &val);
264 }
265
266 /*
267 * The key must be freed in any case.
268 * The val may have been handed over to the query
269 * structure, in which case it is now NULL.
270 */
271 next:
272 free(key);
273 key = NULL;
274 free(val);
275 val = NULL;
276
277 if (*qs != '\0')
278 qs++;
279 }
280 }
281
282 /*
283 * HTTP-decode a string. The standard explanation is that this turns
284 * "%4e+foo" into "n foo" in the regular way. This is done in-place
285 * over the allocated string.
286 */
287 static int
288 http_decode(char *p)
289 {
290 char hex[3];
291 char *q;
292 int c;
293
294 hex[2] = '\0';
295
296 q = p;
297 for ( ; '\0' != *p; p++, q++) {
298 if ('%' == *p) {
299 if ('\0' == (hex[0] = *(p + 1)))
300 return 0;
301 if ('\0' == (hex[1] = *(p + 2)))
302 return 0;
303 if (1 != sscanf(hex, "%x", &c))
304 return 0;
305 if ('\0' == c)
306 return 0;
307
308 *q = (char)c;
309 p += 2;
310 } else
311 *q = '+' == *p ? ' ' : *p;
312 }
313
314 *q = '\0';
315 return 1;
316 }
317
318 static void
319 resp_begin_http(int code, const char *msg)
320 {
321
322 if (200 != code)
323 printf("Status: %d %s\r\n", code, msg);
324
325 printf("Content-Type: text/html; charset=utf-8\r\n"
326 "Cache-Control: no-cache\r\n"
327 "Pragma: no-cache\r\n"
328 "\r\n");
329
330 fflush(stdout);
331 }
332
333 static void
334 resp_copy(const char *filename)
335 {
336 char buf[4096];
337 ssize_t sz;
338 int fd;
339
340 if ((fd = open(filename, O_RDONLY)) != -1) {
341 fflush(stdout);
342 while ((sz = read(fd, buf, sizeof(buf))) > 0)
343 write(STDOUT_FILENO, buf, sz);
344 close(fd);
345 }
346 }
347
348 static void
349 resp_begin_html(int code, const char *msg)
350 {
351
352 resp_begin_http(code, msg);
353
354 printf("<!DOCTYPE html>\n"
355 "<html>\n"
356 "<head>\n"
357 " <meta charset=\"UTF-8\"/>\n"
358 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
359 " type=\"text/css\" media=\"all\">\n"
360 " <title>%s</title>\n"
361 "</head>\n"
362 "<body>\n",
363 CSS_DIR, CUSTOMIZE_TITLE);
364
365 resp_copy(MAN_DIR "/header.html");
366 }
367
368 static void
369 resp_end_html(void)
370 {
371
372 resp_copy(MAN_DIR "/footer.html");
373
374 puts("</body>\n"
375 "</html>");
376 }
377
378 static void
379 resp_searchform(const struct req *req, enum focus focus)
380 {
381 int i;
382
383 printf("<form action=\"/%s\" method=\"get\">\n"
384 " <fieldset>\n"
385 " <legend>Manual Page Search Parameters</legend>\n",
386 scriptname);
387
388 /* Write query input box. */
389
390 printf(" <input type=\"text\" name=\"query\" value=\"");
391 if (req->q.query != NULL)
392 html_print(req->q.query);
393 printf( "\" size=\"40\"");
394 if (focus == FOCUS_QUERY)
395 printf(" autofocus");
396 puts(">");
397
398 /* Write submission buttons. */
399
400 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
401 "man</button>\n"
402 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
403 "apropos</button>\n"
404 " <br/>\n");
405
406 /* Write section selector. */
407
408 puts(" <select name=\"sec\">");
409 for (i = 0; i < sec_MAX; i++) {
410 printf(" <option value=\"%s\"", sec_numbers[i]);
411 if (NULL != req->q.sec &&
412 0 == strcmp(sec_numbers[i], req->q.sec))
413 printf(" selected=\"selected\"");
414 printf(">%s</option>\n", sec_names[i]);
415 }
416 puts(" </select>");
417
418 /* Write architecture selector. */
419
420 printf( " <select name=\"arch\">\n"
421 " <option value=\"default\"");
422 if (NULL == req->q.arch)
423 printf(" selected=\"selected\"");
424 puts(">All Architectures</option>");
425 for (i = 0; i < arch_MAX; i++) {
426 printf(" <option value=\"%s\"", arch_names[i]);
427 if (NULL != req->q.arch &&
428 0 == strcmp(arch_names[i], req->q.arch))
429 printf(" selected=\"selected\"");
430 printf(">%s</option>\n", arch_names[i]);
431 }
432 puts(" </select>");
433
434 /* Write manpath selector. */
435
436 if (req->psz > 1) {
437 puts(" <select name=\"manpath\">");
438 for (i = 0; i < (int)req->psz; i++) {
439 printf(" <option ");
440 if (strcmp(req->q.manpath, req->p[i]) == 0)
441 printf("selected=\"selected\" ");
442 printf("value=\"");
443 html_print(req->p[i]);
444 printf("\">");
445 html_print(req->p[i]);
446 puts("</option>");
447 }
448 puts(" </select>");
449 }
450
451 puts(" </fieldset>\n"
452 "</form>");
453 }
454
455 static int
456 validate_urifrag(const char *frag)
457 {
458
459 while ('\0' != *frag) {
460 if ( ! (isalnum((unsigned char)*frag) ||
461 '-' == *frag || '.' == *frag ||
462 '/' == *frag || '_' == *frag))
463 return 0;
464 frag++;
465 }
466 return 1;
467 }
468
469 static int
470 validate_manpath(const struct req *req, const char* manpath)
471 {
472 size_t i;
473
474 for (i = 0; i < req->psz; i++)
475 if ( ! strcmp(manpath, req->p[i]))
476 return 1;
477
478 return 0;
479 }
480
481 static int
482 validate_filename(const char *file)
483 {
484
485 if ('.' == file[0] && '/' == file[1])
486 file += 2;
487
488 return ! (strstr(file, "../") || strstr(file, "/..") ||
489 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
490 }
491
492 static void
493 pg_index(const struct req *req)
494 {
495
496 resp_begin_html(200, NULL);
497 resp_searchform(req, FOCUS_QUERY);
498 printf("<p>\n"
499 "This web interface is documented in the\n"
500 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
501 "manual, and the\n"
502 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
503 "manual explains the query syntax.\n"
504 "</p>\n",
505 scriptname, *scriptname == '\0' ? "" : "/",
506 scriptname, *scriptname == '\0' ? "" : "/");
507 resp_end_html();
508 }
509
510 static void
511 pg_noresult(const struct req *req, const char *msg)
512 {
513 resp_begin_html(200, NULL);
514 resp_searchform(req, FOCUS_QUERY);
515 puts("<p>");
516 puts(msg);
517 puts("</p>");
518 resp_end_html();
519 }
520
521 static void
522 pg_error_badrequest(const char *msg)
523 {
524
525 resp_begin_html(400, "Bad Request");
526 puts("<h1>Bad Request</h1>\n"
527 "<p>\n");
528 puts(msg);
529 printf("Try again from the\n"
530 "<a href=\"/%s\">main page</a>.\n"
531 "</p>", scriptname);
532 resp_end_html();
533 }
534
535 static void
536 pg_error_internal(void)
537 {
538 resp_begin_html(500, "Internal Server Error");
539 puts("<p>Internal Server Error</p>");
540 resp_end_html();
541 }
542
543 static void
544 pg_redirect(const struct req *req, const char *name)
545 {
546 printf("Status: 303 See Other\r\n");
547 printf("Location: http://%s/", HTTP_HOST);
548 if (*scriptname != '\0')
549 printf("%s/", scriptname);
550 if (strcmp(req->q.manpath, req->p[0]))
551 printf("%s/", req->q.manpath);
552 if (req->q.arch != NULL)
553 printf("%s/", req->q.arch);
554 printf("%s", name);
555 if (req->q.sec != NULL)
556 printf(".%s", req->q.sec);
557 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
558 }
559
560 static void
561 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
562 {
563 char *arch, *archend;
564 const char *sec;
565 size_t i, iuse;
566 int archprio, archpriouse;
567 int prio, priouse;
568
569 for (i = 0; i < sz; i++) {
570 if (validate_filename(r[i].file))
571 continue;
572 warnx("invalid filename %s in %s database",
573 r[i].file, req->q.manpath);
574 pg_error_internal();
575 return;
576 }
577
578 if (req->isquery && sz == 1) {
579 /*
580 * If we have just one result, then jump there now
581 * without any delay.
582 */
583 printf("Status: 303 See Other\r\n");
584 printf("Location: http://%s/%s%s%s/%s",
585 HTTP_HOST, scriptname,
586 *scriptname == '\0' ? "" : "/",
587 req->q.manpath, r[0].file);
588 printf("\r\n"
589 "Content-Type: text/html; charset=utf-8\r\n"
590 "\r\n");
591 return;
592 }
593
594 resp_begin_html(200, NULL);
595 resp_searchform(req,
596 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
597
598 if (sz > 1) {
599 puts("<table class=\"results\">");
600 for (i = 0; i < sz; i++) {
601 printf(" <tr>\n"
602 " <td>"
603 "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
604 scriptname, *scriptname == '\0' ? "" : "/",
605 req->q.manpath, r[i].file);
606 html_print(r[i].names);
607 printf("</a></td>\n"
608 " <td><span class=\"Nd\">");
609 html_print(r[i].output);
610 puts("</span></td>\n"
611 " </tr>");
612 }
613 puts("</table>");
614 }
615
616 /*
617 * In man(1) mode, show one of the pages
618 * even if more than one is found.
619 */
620
621 if (req->q.equal || sz == 1) {
622 puts("<hr>");
623 iuse = 0;
624 priouse = 20;
625 archpriouse = 3;
626 for (i = 0; i < sz; i++) {
627 sec = r[i].file;
628 sec += strcspn(sec, "123456789");
629 if (sec[0] == '\0')
630 continue;
631 prio = sec_prios[sec[0] - '1'];
632 if (sec[1] != '/')
633 prio += 10;
634 if (req->q.arch == NULL) {
635 archprio =
636 ((arch = strchr(sec + 1, '/'))
637 == NULL) ? 3 :
638 ((archend = strchr(arch + 1, '/'))
639 == NULL) ? 0 :
640 strncmp(arch, "amd64/",
641 archend - arch) ? 2 : 1;
642 if (archprio < archpriouse) {
643 archpriouse = archprio;
644 priouse = prio;
645 iuse = i;
646 continue;
647 }
648 if (archprio > archpriouse)
649 continue;
650 }
651 if (prio >= priouse)
652 continue;
653 priouse = prio;
654 iuse = i;
655 }
656 resp_show(req, r[iuse].file);
657 }
658
659 resp_end_html();
660 }
661
662 static void
663 resp_catman(const struct req *req, const char *file)
664 {
665 FILE *f;
666 char *p;
667 size_t sz;
668 ssize_t len;
669 int i;
670 int italic, bold;
671
672 if ((f = fopen(file, "r")) == NULL) {
673 puts("<p>You specified an invalid manual file.</p>");
674 return;
675 }
676
677 puts("<div class=\"catman\">\n"
678 "<pre>");
679
680 p = NULL;
681 sz = 0;
682
683 while ((len = getline(&p, &sz, f)) != -1) {
684 bold = italic = 0;
685 for (i = 0; i < len - 1; i++) {
686 /*
687 * This means that the catpage is out of state.
688 * Ignore it and keep going (although the
689 * catpage is bogus).
690 */
691
692 if ('\b' == p[i] || '\n' == p[i])
693 continue;
694
695 /*
696 * Print a regular character.
697 * Close out any bold/italic scopes.
698 * If we're in back-space mode, make sure we'll
699 * have something to enter when we backspace.
700 */
701
702 if ('\b' != p[i + 1]) {
703 if (italic)
704 printf("</i>");
705 if (bold)
706 printf("</b>");
707 italic = bold = 0;
708 html_putchar(p[i]);
709 continue;
710 } else if (i + 2 >= len)
711 continue;
712
713 /* Italic mode. */
714
715 if ('_' == p[i]) {
716 if (bold)
717 printf("</b>");
718 if ( ! italic)
719 printf("<i>");
720 bold = 0;
721 italic = 1;
722 i += 2;
723 html_putchar(p[i]);
724 continue;
725 }
726
727 /*
728 * Handle funny behaviour troff-isms.
729 * These grok'd from the original man2html.c.
730 */
731
732 if (('+' == p[i] && 'o' == p[i + 2]) ||
733 ('o' == p[i] && '+' == p[i + 2]) ||
734 ('|' == p[i] && '=' == p[i + 2]) ||
735 ('=' == p[i] && '|' == p[i + 2]) ||
736 ('*' == p[i] && '=' == p[i + 2]) ||
737 ('=' == p[i] && '*' == p[i + 2]) ||
738 ('*' == p[i] && '|' == p[i + 2]) ||
739 ('|' == p[i] && '*' == p[i + 2])) {
740 if (italic)
741 printf("</i>");
742 if (bold)
743 printf("</b>");
744 italic = bold = 0;
745 putchar('*');
746 i += 2;
747 continue;
748 } else if (('|' == p[i] && '-' == p[i + 2]) ||
749 ('-' == p[i] && '|' == p[i + 1]) ||
750 ('+' == p[i] && '-' == p[i + 1]) ||
751 ('-' == p[i] && '+' == p[i + 1]) ||
752 ('+' == p[i] && '|' == p[i + 1]) ||
753 ('|' == p[i] && '+' == p[i + 1])) {
754 if (italic)
755 printf("</i>");
756 if (bold)
757 printf("</b>");
758 italic = bold = 0;
759 putchar('+');
760 i += 2;
761 continue;
762 }
763
764 /* Bold mode. */
765
766 if (italic)
767 printf("</i>");
768 if ( ! bold)
769 printf("<b>");
770 bold = 1;
771 italic = 0;
772 i += 2;
773 html_putchar(p[i]);
774 }
775
776 /*
777 * Clean up the last character.
778 * We can get to a newline; don't print that.
779 */
780
781 if (italic)
782 printf("</i>");
783 if (bold)
784 printf("</b>");
785
786 if (i == len - 1 && p[i] != '\n')
787 html_putchar(p[i]);
788
789 putchar('\n');
790 }
791 free(p);
792
793 puts("</pre>\n"
794 "</div>");
795
796 fclose(f);
797 }
798
799 static void
800 resp_format(const struct req *req, const char *file)
801 {
802 struct manoutput conf;
803 struct mparse *mp;
804 struct roff_man *man;
805 void *vp;
806 int fd;
807 int usepath;
808
809 if (-1 == (fd = open(file, O_RDONLY, 0))) {
810 puts("<p>You specified an invalid manual file.</p>");
811 return;
812 }
813
814 mchars_alloc();
815 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
816 MANDOCLEVEL_BADARG, NULL, req->q.manpath);
817 mparse_readfd(mp, fd, file);
818 close(fd);
819
820 memset(&conf, 0, sizeof(conf));
821 conf.fragment = 1;
822 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
823 usepath = strcmp(req->q.manpath, req->p[0]);
824 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
825 usepath ? req->q.manpath : "", usepath ? "/" : "");
826
827 mparse_result(mp, &man, NULL);
828 if (man == NULL) {
829 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
830 pg_error_internal();
831 mparse_free(mp);
832 mchars_free();
833 return;
834 }
835
836 vp = html_alloc(&conf);
837
838 if (man->macroset == MACROSET_MDOC) {
839 mdoc_validate(man);
840 html_mdoc(vp, man);
841 } else {
842 man_validate(man);
843 html_man(vp, man);
844 }
845
846 html_free(vp);
847 mparse_free(mp);
848 mchars_free();
849 free(conf.man);
850 free(conf.style);
851 }
852
853 static void
854 resp_show(const struct req *req, const char *file)
855 {
856
857 if ('.' == file[0] && '/' == file[1])
858 file += 2;
859
860 if ('c' == *file)
861 resp_catman(req, file);
862 else
863 resp_format(req, file);
864 }
865
866 static void
867 pg_show(struct req *req, const char *fullpath)
868 {
869 char *manpath;
870 const char *file;
871
872 if ((file = strchr(fullpath, '/')) == NULL) {
873 pg_error_badrequest(
874 "You did not specify a page to show.");
875 return;
876 }
877 manpath = mandoc_strndup(fullpath, file - fullpath);
878 file++;
879
880 if ( ! validate_manpath(req, manpath)) {
881 pg_error_badrequest(
882 "You specified an invalid manpath.");
883 free(manpath);
884 return;
885 }
886
887 /*
888 * Begin by chdir()ing into the manpath.
889 * This way we can pick up the database files, which are
890 * relative to the manpath root.
891 */
892
893 if (chdir(manpath) == -1) {
894 warn("chdir %s", manpath);
895 pg_error_internal();
896 free(manpath);
897 return;
898 }
899 free(manpath);
900
901 if ( ! validate_filename(file)) {
902 pg_error_badrequest(
903 "You specified an invalid manual file.");
904 return;
905 }
906
907 resp_begin_html(200, NULL);
908 resp_searchform(req, FOCUS_NONE);
909 resp_show(req, file);
910 resp_end_html();
911 }
912
913 static void
914 pg_search(const struct req *req)
915 {
916 struct mansearch search;
917 struct manpaths paths;
918 struct manpage *res;
919 char **argv;
920 char *query, *rp, *wp;
921 size_t ressz;
922 int argc;
923
924 /*
925 * Begin by chdir()ing into the root of the manpath.
926 * This way we can pick up the database files, which are
927 * relative to the manpath root.
928 */
929
930 if (chdir(req->q.manpath) == -1) {
931 warn("chdir %s", req->q.manpath);
932 pg_error_internal();
933 return;
934 }
935
936 search.arch = req->q.arch;
937 search.sec = req->q.sec;
938 search.outkey = "Nd";
939 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
940 search.firstmatch = 1;
941
942 paths.sz = 1;
943 paths.paths = mandoc_malloc(sizeof(char *));
944 paths.paths[0] = mandoc_strdup(".");
945
946 /*
947 * Break apart at spaces with backslash-escaping.
948 */
949
950 argc = 0;
951 argv = NULL;
952 rp = query = mandoc_strdup(req->q.query);
953 for (;;) {
954 while (isspace((unsigned char)*rp))
955 rp++;
956 if (*rp == '\0')
957 break;
958 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
959 argv[argc++] = wp = rp;
960 for (;;) {
961 if (isspace((unsigned char)*rp)) {
962 *wp = '\0';
963 rp++;
964 break;
965 }
966 if (rp[0] == '\\' && rp[1] != '\0')
967 rp++;
968 if (wp != rp)
969 *wp = *rp;
970 if (*rp == '\0')
971 break;
972 wp++;
973 rp++;
974 }
975 }
976
977 res = NULL;
978 ressz = 0;
979 if (req->isquery && req->q.equal && argc == 1)
980 pg_redirect(req, argv[0]);
981 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
982 pg_noresult(req, "You entered an invalid query.");
983 else if (ressz == 0)
984 pg_noresult(req, "No results found.");
985 else
986 pg_searchres(req, res, ressz);
987
988 free(query);
989 mansearch_free(res, ressz);
990 free(paths.paths[0]);
991 free(paths.paths);
992 }
993
994 int
995 main(void)
996 {
997 struct req req;
998 struct itimerval itimer;
999 const char *path;
1000 const char *querystring;
1001 int i;
1002
1003 #if HAVE_PLEDGE
1004 /*
1005 * The "rpath" pledge could be revoked after mparse_readfd()
1006 * if the file desciptor to "/footer.html" would be opened
1007 * up front, but it's probably not worth the complication
1008 * of the code it would cause: it would require scattering
1009 * pledge() calls in multiple low-level resp_*() functions.
1010 */
1011
1012 if (pledge("stdio rpath", NULL) == -1) {
1013 warn("pledge");
1014 pg_error_internal();
1015 return EXIT_FAILURE;
1016 }
1017 #endif
1018
1019 /* Poor man's ReDoS mitigation. */
1020
1021 itimer.it_value.tv_sec = 2;
1022 itimer.it_value.tv_usec = 0;
1023 itimer.it_interval.tv_sec = 2;
1024 itimer.it_interval.tv_usec = 0;
1025 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1026 warn("setitimer");
1027 pg_error_internal();
1028 return EXIT_FAILURE;
1029 }
1030
1031 /*
1032 * First we change directory into the MAN_DIR so that
1033 * subsequent scanning for manpath directories is rooted
1034 * relative to the same position.
1035 */
1036
1037 if (chdir(MAN_DIR) == -1) {
1038 warn("MAN_DIR: %s", MAN_DIR);
1039 pg_error_internal();
1040 return EXIT_FAILURE;
1041 }
1042
1043 memset(&req, 0, sizeof(struct req));
1044 req.q.equal = 1;
1045 parse_manpath_conf(&req);
1046
1047 /* Parse the path info and the query string. */
1048
1049 if ((path = getenv("PATH_INFO")) == NULL)
1050 path = "";
1051 else if (*path == '/')
1052 path++;
1053
1054 if (*path != '\0') {
1055 parse_path_info(&req, path);
1056 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1057 path = "";
1058 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1059 parse_query_string(&req, querystring);
1060
1061 /* Validate parsed data and add defaults. */
1062
1063 if (req.q.manpath == NULL)
1064 req.q.manpath = mandoc_strdup(req.p[0]);
1065 else if ( ! validate_manpath(&req, req.q.manpath)) {
1066 pg_error_badrequest(
1067 "You specified an invalid manpath.");
1068 return EXIT_FAILURE;
1069 }
1070
1071 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1072 pg_error_badrequest(
1073 "You specified an invalid architecture.");
1074 return EXIT_FAILURE;
1075 }
1076
1077 /* Dispatch to the three different pages. */
1078
1079 if ('\0' != *path)
1080 pg_show(&req, path);
1081 else if (NULL != req.q.query)
1082 pg_search(&req);
1083 else
1084 pg_index(&req);
1085
1086 free(req.q.manpath);
1087 free(req.q.arch);
1088 free(req.q.sec);
1089 free(req.q.query);
1090 for (i = 0; i < (int)req.psz; i++)
1091 free(req.p[i]);
1092 free(req.p);
1093 return EXIT_SUCCESS;
1094 }
1095
1096 /*
1097 * If PATH_INFO is not a file name, translate it to a query.
1098 */
1099 static void
1100 parse_path_info(struct req *req, const char *path)
1101 {
1102 char *dir[4];
1103 int i;
1104
1105 req->isquery = 0;
1106 req->q.equal = 1;
1107 req->q.manpath = mandoc_strdup(path);
1108 req->q.arch = NULL;
1109
1110 /* Mandatory manual page name. */
1111 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1112 req->q.query = req->q.manpath;
1113 req->q.manpath = NULL;
1114 } else
1115 *req->q.query++ = '\0';
1116
1117 /* Optional trailing section. */
1118 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1119 if(isdigit((unsigned char)req->q.sec[1])) {
1120 *req->q.sec++ = '\0';
1121 req->q.sec = mandoc_strdup(req->q.sec);
1122 } else
1123 req->q.sec = NULL;
1124 }
1125
1126 /* Handle the case of name[.section] only. */
1127 if (req->q.manpath == NULL)
1128 return;
1129 req->q.query = mandoc_strdup(req->q.query);
1130
1131 /* Split directory components. */
1132 dir[i = 0] = req->q.manpath;
1133 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1134 if (++i == 3) {
1135 pg_error_badrequest(
1136 "You specified too many directory components.");
1137 exit(EXIT_FAILURE);
1138 }
1139 *dir[i]++ = '\0';
1140 }
1141
1142 /* Optional manpath. */
1143 if ((i = validate_manpath(req, req->q.manpath)) == 0)
1144 req->q.manpath = NULL;
1145 else if (dir[1] == NULL)
1146 return;
1147
1148 /* Optional section. */
1149 if (strncmp(dir[i], "man", 3) == 0) {
1150 free(req->q.sec);
1151 req->q.sec = mandoc_strdup(dir[i++] + 3);
1152 }
1153 if (dir[i] == NULL) {
1154 if (req->q.manpath == NULL)
1155 free(dir[0]);
1156 return;
1157 }
1158 if (dir[i + 1] != NULL) {
1159 pg_error_badrequest(
1160 "You specified an invalid directory component.");
1161 exit(EXIT_FAILURE);
1162 }
1163
1164 /* Optional architecture. */
1165 if (i) {
1166 req->q.arch = mandoc_strdup(dir[i]);
1167 if (req->q.manpath == NULL)
1168 free(dir[0]);
1169 } else
1170 req->q.arch = dir[0];
1171 }
1172
1173 /*
1174 * Scan for indexable paths.
1175 */
1176 static void
1177 parse_manpath_conf(struct req *req)
1178 {
1179 FILE *fp;
1180 char *dp;
1181 size_t dpsz;
1182 ssize_t len;
1183
1184 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1185 warn("%s/manpath.conf", MAN_DIR);
1186 pg_error_internal();
1187 exit(EXIT_FAILURE);
1188 }
1189
1190 dp = NULL;
1191 dpsz = 0;
1192
1193 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1194 if (dp[len - 1] == '\n')
1195 dp[--len] = '\0';
1196 req->p = mandoc_realloc(req->p,
1197 (req->psz + 1) * sizeof(char *));
1198 if ( ! validate_urifrag(dp)) {
1199 warnx("%s/manpath.conf contains "
1200 "unsafe path \"%s\"", MAN_DIR, dp);
1201 pg_error_internal();
1202 exit(EXIT_FAILURE);
1203 }
1204 if (strchr(dp, '/') != NULL) {
1205 warnx("%s/manpath.conf contains "
1206 "path with slash \"%s\"", MAN_DIR, dp);
1207 pg_error_internal();
1208 exit(EXIT_FAILURE);
1209 }
1210 req->p[req->psz++] = dp;
1211 dp = NULL;
1212 dpsz = 0;
1213 }
1214 free(dp);
1215
1216 if (req->p == NULL) {
1217 warnx("%s/manpath.conf is empty", MAN_DIR);
1218 pg_error_internal();
1219 exit(EXIT_FAILURE);
1220 }
1221 }