]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Document man(1) section selection priority,
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.151 2017/03/15 13:49:50 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45
46 /*
47 * A query as passed to the search function.
48 */
49 struct query {
50 char *manpath; /* desired manual directory */
51 char *arch; /* architecture */
52 char *sec; /* manual section */
53 char *query; /* unparsed query expression */
54 int equal; /* match whole names, not substrings */
55 };
56
57 struct req {
58 struct query q;
59 char **p; /* array of available manpaths */
60 size_t psz; /* number of available manpaths */
61 int isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63
64 enum focus {
65 FOCUS_NONE = 0,
66 FOCUS_QUERY
67 };
68
69 static void html_print(const char *);
70 static void html_putchar(char);
71 static int http_decode(char *);
72 static void parse_manpath_conf(struct req *);
73 static void parse_path_info(struct req *req, const char *path);
74 static void parse_query_string(struct req *, const char *);
75 static void pg_error_badrequest(const char *);
76 static void pg_error_internal(void);
77 static void pg_index(const struct req *);
78 static void pg_noresult(const struct req *, const char *);
79 static void pg_redirect(const struct req *, const char *);
80 static void pg_search(const struct req *);
81 static void pg_searchres(const struct req *,
82 struct manpage *, size_t);
83 static void pg_show(struct req *, const char *);
84 static void resp_begin_html(int, const char *, const char *);
85 static void resp_begin_http(int, const char *);
86 static void resp_catman(const struct req *, const char *);
87 static void resp_copy(const char *);
88 static void resp_end_html(void);
89 static void resp_format(const struct req *, const char *);
90 static void resp_searchform(const struct req *, enum focus);
91 static void resp_show(const struct req *, const char *);
92 static void set_query_attr(char **, char **);
93 static int validate_filename(const char *);
94 static int validate_manpath(const struct req *, const char *);
95 static int validate_urifrag(const char *);
96
97 static const char *scriptname = SCRIPT_NAME;
98
99 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100 static const char *const sec_numbers[] = {
101 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102 };
103 static const char *const sec_names[] = {
104 "All Sections",
105 "1 - General Commands",
106 "2 - System Calls",
107 "3 - Library Functions",
108 "3p - Perl Library",
109 "4 - Device Drivers",
110 "5 - File Formats",
111 "6 - Games",
112 "7 - Miscellaneous Information",
113 "8 - System Manager\'s Manual",
114 "9 - Kernel Developer\'s Manual"
115 };
116 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117
118 static const char *const arch_names[] = {
119 "amd64", "alpha", "armv7", "arm64",
120 "hppa", "i386", "landisk",
121 "loongson", "luna88k", "macppc", "mips64",
122 "octeon", "sgi", "socppc", "sparc64",
123 "amiga", "arc", "armish", "arm32",
124 "atari", "aviion", "beagle", "cats",
125 "hppa64", "hp300",
126 "ia64", "mac68k", "mvme68k", "mvme88k",
127 "mvmeppc", "palm", "pc532", "pegasos",
128 "pmax", "powerpc", "solbourne", "sparc",
129 "sun3", "vax", "wgrisc", "x68k",
130 "zaurus"
131 };
132 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133
134 /*
135 * Print a character, escaping HTML along the way.
136 * This will pass non-ASCII straight to output: be warned!
137 */
138 static void
139 html_putchar(char c)
140 {
141
142 switch (c) {
143 case ('"'):
144 printf("&quot;");
145 break;
146 case ('&'):
147 printf("&amp;");
148 break;
149 case ('>'):
150 printf("&gt;");
151 break;
152 case ('<'):
153 printf("&lt;");
154 break;
155 default:
156 putchar((unsigned char)c);
157 break;
158 }
159 }
160
161 /*
162 * Call through to html_putchar().
163 * Accepts NULL strings.
164 */
165 static void
166 html_print(const char *p)
167 {
168
169 if (NULL == p)
170 return;
171 while ('\0' != *p)
172 html_putchar(*p++);
173 }
174
175 /*
176 * Transfer the responsibility for the allocated string *val
177 * to the query structure.
178 */
179 static void
180 set_query_attr(char **attr, char **val)
181 {
182
183 free(*attr);
184 if (**val == '\0') {
185 *attr = NULL;
186 free(*val);
187 } else
188 *attr = *val;
189 *val = NULL;
190 }
191
192 /*
193 * Parse the QUERY_STRING for key-value pairs
194 * and store the values into the query structure.
195 */
196 static void
197 parse_query_string(struct req *req, const char *qs)
198 {
199 char *key, *val;
200 size_t keysz, valsz;
201
202 req->isquery = 1;
203 req->q.manpath = NULL;
204 req->q.arch = NULL;
205 req->q.sec = NULL;
206 req->q.query = NULL;
207 req->q.equal = 1;
208
209 key = val = NULL;
210 while (*qs != '\0') {
211
212 /* Parse one key. */
213
214 keysz = strcspn(qs, "=;&");
215 key = mandoc_strndup(qs, keysz);
216 qs += keysz;
217 if (*qs != '=')
218 goto next;
219
220 /* Parse one value. */
221
222 valsz = strcspn(++qs, ";&");
223 val = mandoc_strndup(qs, valsz);
224 qs += valsz;
225
226 /* Decode and catch encoding errors. */
227
228 if ( ! (http_decode(key) && http_decode(val)))
229 goto next;
230
231 /* Handle key-value pairs. */
232
233 if ( ! strcmp(key, "query"))
234 set_query_attr(&req->q.query, &val);
235
236 else if ( ! strcmp(key, "apropos"))
237 req->q.equal = !strcmp(val, "0");
238
239 else if ( ! strcmp(key, "manpath")) {
240 #ifdef COMPAT_OLDURI
241 if ( ! strncmp(val, "OpenBSD ", 8)) {
242 val[7] = '-';
243 if ('C' == val[8])
244 val[8] = 'c';
245 }
246 #endif
247 set_query_attr(&req->q.manpath, &val);
248 }
249
250 else if ( ! (strcmp(key, "sec")
251 #ifdef COMPAT_OLDURI
252 && strcmp(key, "sektion")
253 #endif
254 )) {
255 if ( ! strcmp(val, "0"))
256 *val = '\0';
257 set_query_attr(&req->q.sec, &val);
258 }
259
260 else if ( ! strcmp(key, "arch")) {
261 if ( ! strcmp(val, "default"))
262 *val = '\0';
263 set_query_attr(&req->q.arch, &val);
264 }
265
266 /*
267 * The key must be freed in any case.
268 * The val may have been handed over to the query
269 * structure, in which case it is now NULL.
270 */
271 next:
272 free(key);
273 key = NULL;
274 free(val);
275 val = NULL;
276
277 if (*qs != '\0')
278 qs++;
279 }
280 }
281
282 /*
283 * HTTP-decode a string. The standard explanation is that this turns
284 * "%4e+foo" into "n foo" in the regular way. This is done in-place
285 * over the allocated string.
286 */
287 static int
288 http_decode(char *p)
289 {
290 char hex[3];
291 char *q;
292 int c;
293
294 hex[2] = '\0';
295
296 q = p;
297 for ( ; '\0' != *p; p++, q++) {
298 if ('%' == *p) {
299 if ('\0' == (hex[0] = *(p + 1)))
300 return 0;
301 if ('\0' == (hex[1] = *(p + 2)))
302 return 0;
303 if (1 != sscanf(hex, "%x", &c))
304 return 0;
305 if ('\0' == c)
306 return 0;
307
308 *q = (char)c;
309 p += 2;
310 } else
311 *q = '+' == *p ? ' ' : *p;
312 }
313
314 *q = '\0';
315 return 1;
316 }
317
318 static void
319 resp_begin_http(int code, const char *msg)
320 {
321
322 if (200 != code)
323 printf("Status: %d %s\r\n", code, msg);
324
325 printf("Content-Type: text/html; charset=utf-8\r\n"
326 "Cache-Control: no-cache\r\n"
327 "Pragma: no-cache\r\n"
328 "\r\n");
329
330 fflush(stdout);
331 }
332
333 static void
334 resp_copy(const char *filename)
335 {
336 char buf[4096];
337 ssize_t sz;
338 int fd;
339
340 if ((fd = open(filename, O_RDONLY)) != -1) {
341 fflush(stdout);
342 while ((sz = read(fd, buf, sizeof(buf))) > 0)
343 write(STDOUT_FILENO, buf, sz);
344 close(fd);
345 }
346 }
347
348 static void
349 resp_begin_html(int code, const char *msg, const char *file)
350 {
351 char *cp;
352
353 resp_begin_http(code, msg);
354
355 printf("<!DOCTYPE html>\n"
356 "<html>\n"
357 "<head>\n"
358 " <meta charset=\"UTF-8\"/>\n"
359 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
360 " type=\"text/css\" media=\"all\">\n"
361 " <title>",
362 CSS_DIR);
363 if (file != NULL) {
364 if ((cp = strrchr(file, '/')) != NULL)
365 file = cp + 1;
366 if ((cp = strrchr(file, '.')) != NULL) {
367 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
368 } else
369 printf("%s - ", file);
370 }
371 printf("%s</title>\n"
372 "</head>\n"
373 "<body>\n",
374 CUSTOMIZE_TITLE);
375
376 resp_copy(MAN_DIR "/header.html");
377 }
378
379 static void
380 resp_end_html(void)
381 {
382
383 resp_copy(MAN_DIR "/footer.html");
384
385 puts("</body>\n"
386 "</html>");
387 }
388
389 static void
390 resp_searchform(const struct req *req, enum focus focus)
391 {
392 int i;
393
394 printf("<form action=\"/%s\" method=\"get\">\n"
395 " <fieldset>\n"
396 " <legend>Manual Page Search Parameters</legend>\n",
397 scriptname);
398
399 /* Write query input box. */
400
401 printf(" <input type=\"text\" name=\"query\" value=\"");
402 if (req->q.query != NULL)
403 html_print(req->q.query);
404 printf( "\" size=\"40\"");
405 if (focus == FOCUS_QUERY)
406 printf(" autofocus");
407 puts(">");
408
409 /* Write submission buttons. */
410
411 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
412 "man</button>\n"
413 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
414 "apropos</button>\n"
415 " <br/>\n");
416
417 /* Write section selector. */
418
419 puts(" <select name=\"sec\">");
420 for (i = 0; i < sec_MAX; i++) {
421 printf(" <option value=\"%s\"", sec_numbers[i]);
422 if (NULL != req->q.sec &&
423 0 == strcmp(sec_numbers[i], req->q.sec))
424 printf(" selected=\"selected\"");
425 printf(">%s</option>\n", sec_names[i]);
426 }
427 puts(" </select>");
428
429 /* Write architecture selector. */
430
431 printf( " <select name=\"arch\">\n"
432 " <option value=\"default\"");
433 if (NULL == req->q.arch)
434 printf(" selected=\"selected\"");
435 puts(">All Architectures</option>");
436 for (i = 0; i < arch_MAX; i++) {
437 printf(" <option value=\"%s\"", arch_names[i]);
438 if (NULL != req->q.arch &&
439 0 == strcmp(arch_names[i], req->q.arch))
440 printf(" selected=\"selected\"");
441 printf(">%s</option>\n", arch_names[i]);
442 }
443 puts(" </select>");
444
445 /* Write manpath selector. */
446
447 if (req->psz > 1) {
448 puts(" <select name=\"manpath\">");
449 for (i = 0; i < (int)req->psz; i++) {
450 printf(" <option ");
451 if (strcmp(req->q.manpath, req->p[i]) == 0)
452 printf("selected=\"selected\" ");
453 printf("value=\"");
454 html_print(req->p[i]);
455 printf("\">");
456 html_print(req->p[i]);
457 puts("</option>");
458 }
459 puts(" </select>");
460 }
461
462 puts(" </fieldset>\n"
463 "</form>");
464 }
465
466 static int
467 validate_urifrag(const char *frag)
468 {
469
470 while ('\0' != *frag) {
471 if ( ! (isalnum((unsigned char)*frag) ||
472 '-' == *frag || '.' == *frag ||
473 '/' == *frag || '_' == *frag))
474 return 0;
475 frag++;
476 }
477 return 1;
478 }
479
480 static int
481 validate_manpath(const struct req *req, const char* manpath)
482 {
483 size_t i;
484
485 for (i = 0; i < req->psz; i++)
486 if ( ! strcmp(manpath, req->p[i]))
487 return 1;
488
489 return 0;
490 }
491
492 static int
493 validate_filename(const char *file)
494 {
495
496 if ('.' == file[0] && '/' == file[1])
497 file += 2;
498
499 return ! (strstr(file, "../") || strstr(file, "/..") ||
500 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
501 }
502
503 static void
504 pg_index(const struct req *req)
505 {
506
507 resp_begin_html(200, NULL, NULL);
508 resp_searchform(req, FOCUS_QUERY);
509 printf("<p>\n"
510 "This web interface is documented in the\n"
511 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
512 "manual, and the\n"
513 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
514 "manual explains the query syntax.\n"
515 "</p>\n",
516 scriptname, *scriptname == '\0' ? "" : "/",
517 scriptname, *scriptname == '\0' ? "" : "/");
518 resp_end_html();
519 }
520
521 static void
522 pg_noresult(const struct req *req, const char *msg)
523 {
524 resp_begin_html(200, NULL, NULL);
525 resp_searchform(req, FOCUS_QUERY);
526 puts("<p>");
527 puts(msg);
528 puts("</p>");
529 resp_end_html();
530 }
531
532 static void
533 pg_error_badrequest(const char *msg)
534 {
535
536 resp_begin_html(400, "Bad Request", NULL);
537 puts("<h1>Bad Request</h1>\n"
538 "<p>\n");
539 puts(msg);
540 printf("Try again from the\n"
541 "<a href=\"/%s\">main page</a>.\n"
542 "</p>", scriptname);
543 resp_end_html();
544 }
545
546 static void
547 pg_error_internal(void)
548 {
549 resp_begin_html(500, "Internal Server Error", NULL);
550 puts("<p>Internal Server Error</p>");
551 resp_end_html();
552 }
553
554 static void
555 pg_redirect(const struct req *req, const char *name)
556 {
557 printf("Status: 303 See Other\r\n");
558 printf("Location: http://%s/", HTTP_HOST);
559 if (*scriptname != '\0')
560 printf("%s/", scriptname);
561 if (strcmp(req->q.manpath, req->p[0]))
562 printf("%s/", req->q.manpath);
563 if (req->q.arch != NULL)
564 printf("%s/", req->q.arch);
565 printf("%s", name);
566 if (req->q.sec != NULL)
567 printf(".%s", req->q.sec);
568 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
569 }
570
571 static void
572 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
573 {
574 char *arch, *archend;
575 const char *sec;
576 size_t i, iuse;
577 int archprio, archpriouse;
578 int prio, priouse;
579
580 for (i = 0; i < sz; i++) {
581 if (validate_filename(r[i].file))
582 continue;
583 warnx("invalid filename %s in %s database",
584 r[i].file, req->q.manpath);
585 pg_error_internal();
586 return;
587 }
588
589 if (req->isquery && sz == 1) {
590 /*
591 * If we have just one result, then jump there now
592 * without any delay.
593 */
594 printf("Status: 303 See Other\r\n");
595 printf("Location: http://%s/%s%s%s/%s",
596 HTTP_HOST, scriptname,
597 *scriptname == '\0' ? "" : "/",
598 req->q.manpath, r[0].file);
599 printf("\r\n"
600 "Content-Type: text/html; charset=utf-8\r\n"
601 "\r\n");
602 return;
603 }
604
605 /*
606 * In man(1) mode, show one of the pages
607 * even if more than one is found.
608 */
609
610 iuse = 0;
611 if (req->q.equal || sz == 1) {
612 priouse = 20;
613 archpriouse = 3;
614 for (i = 0; i < sz; i++) {
615 sec = r[i].file;
616 sec += strcspn(sec, "123456789");
617 if (sec[0] == '\0')
618 continue;
619 prio = sec_prios[sec[0] - '1'];
620 if (sec[1] != '/')
621 prio += 10;
622 if (req->q.arch == NULL) {
623 archprio =
624 ((arch = strchr(sec + 1, '/'))
625 == NULL) ? 3 :
626 ((archend = strchr(arch + 1, '/'))
627 == NULL) ? 0 :
628 strncmp(arch, "amd64/",
629 archend - arch) ? 2 : 1;
630 if (archprio < archpriouse) {
631 archpriouse = archprio;
632 priouse = prio;
633 iuse = i;
634 continue;
635 }
636 if (archprio > archpriouse)
637 continue;
638 }
639 if (prio >= priouse)
640 continue;
641 priouse = prio;
642 iuse = i;
643 }
644 resp_begin_html(200, NULL, r[iuse].file);
645 } else
646 resp_begin_html(200, NULL, NULL);
647
648 resp_searchform(req,
649 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
650
651 if (sz > 1) {
652 puts("<table class=\"results\">");
653 for (i = 0; i < sz; i++) {
654 printf(" <tr>\n"
655 " <td>"
656 "<a class=\"Xr\" href=\"/");
657 if (*scriptname != '\0')
658 printf("%s/", scriptname);
659 if (strcmp(req->q.manpath, req->p[0]))
660 printf("%s/", req->q.manpath);
661 printf("%s\">", r[i].file);
662 html_print(r[i].names);
663 printf("</a></td>\n"
664 " <td><span class=\"Nd\">");
665 html_print(r[i].output);
666 puts("</span></td>\n"
667 " </tr>");
668 }
669 puts("</table>");
670 }
671
672 if (req->q.equal || sz == 1) {
673 puts("<hr>");
674 resp_show(req, r[iuse].file);
675 }
676
677 resp_end_html();
678 }
679
680 static void
681 resp_catman(const struct req *req, const char *file)
682 {
683 FILE *f;
684 char *p;
685 size_t sz;
686 ssize_t len;
687 int i;
688 int italic, bold;
689
690 if ((f = fopen(file, "r")) == NULL) {
691 puts("<p>You specified an invalid manual file.</p>");
692 return;
693 }
694
695 puts("<div class=\"catman\">\n"
696 "<pre>");
697
698 p = NULL;
699 sz = 0;
700
701 while ((len = getline(&p, &sz, f)) != -1) {
702 bold = italic = 0;
703 for (i = 0; i < len - 1; i++) {
704 /*
705 * This means that the catpage is out of state.
706 * Ignore it and keep going (although the
707 * catpage is bogus).
708 */
709
710 if ('\b' == p[i] || '\n' == p[i])
711 continue;
712
713 /*
714 * Print a regular character.
715 * Close out any bold/italic scopes.
716 * If we're in back-space mode, make sure we'll
717 * have something to enter when we backspace.
718 */
719
720 if ('\b' != p[i + 1]) {
721 if (italic)
722 printf("</i>");
723 if (bold)
724 printf("</b>");
725 italic = bold = 0;
726 html_putchar(p[i]);
727 continue;
728 } else if (i + 2 >= len)
729 continue;
730
731 /* Italic mode. */
732
733 if ('_' == p[i]) {
734 if (bold)
735 printf("</b>");
736 if ( ! italic)
737 printf("<i>");
738 bold = 0;
739 italic = 1;
740 i += 2;
741 html_putchar(p[i]);
742 continue;
743 }
744
745 /*
746 * Handle funny behaviour troff-isms.
747 * These grok'd from the original man2html.c.
748 */
749
750 if (('+' == p[i] && 'o' == p[i + 2]) ||
751 ('o' == p[i] && '+' == p[i + 2]) ||
752 ('|' == p[i] && '=' == p[i + 2]) ||
753 ('=' == p[i] && '|' == p[i + 2]) ||
754 ('*' == p[i] && '=' == p[i + 2]) ||
755 ('=' == p[i] && '*' == p[i + 2]) ||
756 ('*' == p[i] && '|' == p[i + 2]) ||
757 ('|' == p[i] && '*' == p[i + 2])) {
758 if (italic)
759 printf("</i>");
760 if (bold)
761 printf("</b>");
762 italic = bold = 0;
763 putchar('*');
764 i += 2;
765 continue;
766 } else if (('|' == p[i] && '-' == p[i + 2]) ||
767 ('-' == p[i] && '|' == p[i + 1]) ||
768 ('+' == p[i] && '-' == p[i + 1]) ||
769 ('-' == p[i] && '+' == p[i + 1]) ||
770 ('+' == p[i] && '|' == p[i + 1]) ||
771 ('|' == p[i] && '+' == p[i + 1])) {
772 if (italic)
773 printf("</i>");
774 if (bold)
775 printf("</b>");
776 italic = bold = 0;
777 putchar('+');
778 i += 2;
779 continue;
780 }
781
782 /* Bold mode. */
783
784 if (italic)
785 printf("</i>");
786 if ( ! bold)
787 printf("<b>");
788 bold = 1;
789 italic = 0;
790 i += 2;
791 html_putchar(p[i]);
792 }
793
794 /*
795 * Clean up the last character.
796 * We can get to a newline; don't print that.
797 */
798
799 if (italic)
800 printf("</i>");
801 if (bold)
802 printf("</b>");
803
804 if (i == len - 1 && p[i] != '\n')
805 html_putchar(p[i]);
806
807 putchar('\n');
808 }
809 free(p);
810
811 puts("</pre>\n"
812 "</div>");
813
814 fclose(f);
815 }
816
817 static void
818 resp_format(const struct req *req, const char *file)
819 {
820 struct manoutput conf;
821 struct mparse *mp;
822 struct roff_man *man;
823 void *vp;
824 int fd;
825 int usepath;
826
827 if (-1 == (fd = open(file, O_RDONLY, 0))) {
828 puts("<p>You specified an invalid manual file.</p>");
829 return;
830 }
831
832 mchars_alloc();
833 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
834 MANDOCLEVEL_BADARG, NULL, req->q.manpath);
835 mparse_readfd(mp, fd, file);
836 close(fd);
837
838 memset(&conf, 0, sizeof(conf));
839 conf.fragment = 1;
840 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
841 usepath = strcmp(req->q.manpath, req->p[0]);
842 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
843 usepath ? req->q.manpath : "", usepath ? "/" : "");
844
845 mparse_result(mp, &man, NULL);
846 if (man == NULL) {
847 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
848 pg_error_internal();
849 mparse_free(mp);
850 mchars_free();
851 return;
852 }
853
854 vp = html_alloc(&conf);
855
856 if (man->macroset == MACROSET_MDOC) {
857 mdoc_validate(man);
858 html_mdoc(vp, man);
859 } else {
860 man_validate(man);
861 html_man(vp, man);
862 }
863
864 html_free(vp);
865 mparse_free(mp);
866 mchars_free();
867 free(conf.man);
868 free(conf.style);
869 }
870
871 static void
872 resp_show(const struct req *req, const char *file)
873 {
874
875 if ('.' == file[0] && '/' == file[1])
876 file += 2;
877
878 if ('c' == *file)
879 resp_catman(req, file);
880 else
881 resp_format(req, file);
882 }
883
884 static void
885 pg_show(struct req *req, const char *fullpath)
886 {
887 char *manpath;
888 const char *file;
889
890 if ((file = strchr(fullpath, '/')) == NULL) {
891 pg_error_badrequest(
892 "You did not specify a page to show.");
893 return;
894 }
895 manpath = mandoc_strndup(fullpath, file - fullpath);
896 file++;
897
898 if ( ! validate_manpath(req, manpath)) {
899 pg_error_badrequest(
900 "You specified an invalid manpath.");
901 free(manpath);
902 return;
903 }
904
905 /*
906 * Begin by chdir()ing into the manpath.
907 * This way we can pick up the database files, which are
908 * relative to the manpath root.
909 */
910
911 if (chdir(manpath) == -1) {
912 warn("chdir %s", manpath);
913 pg_error_internal();
914 free(manpath);
915 return;
916 }
917 free(manpath);
918
919 if ( ! validate_filename(file)) {
920 pg_error_badrequest(
921 "You specified an invalid manual file.");
922 return;
923 }
924
925 resp_begin_html(200, NULL, file);
926 resp_searchform(req, FOCUS_NONE);
927 resp_show(req, file);
928 resp_end_html();
929 }
930
931 static void
932 pg_search(const struct req *req)
933 {
934 struct mansearch search;
935 struct manpaths paths;
936 struct manpage *res;
937 char **argv;
938 char *query, *rp, *wp;
939 size_t ressz;
940 int argc;
941
942 /*
943 * Begin by chdir()ing into the root of the manpath.
944 * This way we can pick up the database files, which are
945 * relative to the manpath root.
946 */
947
948 if (chdir(req->q.manpath) == -1) {
949 warn("chdir %s", req->q.manpath);
950 pg_error_internal();
951 return;
952 }
953
954 search.arch = req->q.arch;
955 search.sec = req->q.sec;
956 search.outkey = "Nd";
957 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
958 search.firstmatch = 1;
959
960 paths.sz = 1;
961 paths.paths = mandoc_malloc(sizeof(char *));
962 paths.paths[0] = mandoc_strdup(".");
963
964 /*
965 * Break apart at spaces with backslash-escaping.
966 */
967
968 argc = 0;
969 argv = NULL;
970 rp = query = mandoc_strdup(req->q.query);
971 for (;;) {
972 while (isspace((unsigned char)*rp))
973 rp++;
974 if (*rp == '\0')
975 break;
976 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
977 argv[argc++] = wp = rp;
978 for (;;) {
979 if (isspace((unsigned char)*rp)) {
980 *wp = '\0';
981 rp++;
982 break;
983 }
984 if (rp[0] == '\\' && rp[1] != '\0')
985 rp++;
986 if (wp != rp)
987 *wp = *rp;
988 if (*rp == '\0')
989 break;
990 wp++;
991 rp++;
992 }
993 }
994
995 res = NULL;
996 ressz = 0;
997 if (req->isquery && req->q.equal && argc == 1)
998 pg_redirect(req, argv[0]);
999 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1000 pg_noresult(req, "You entered an invalid query.");
1001 else if (ressz == 0)
1002 pg_noresult(req, "No results found.");
1003 else
1004 pg_searchres(req, res, ressz);
1005
1006 free(query);
1007 mansearch_free(res, ressz);
1008 free(paths.paths[0]);
1009 free(paths.paths);
1010 }
1011
1012 int
1013 main(void)
1014 {
1015 struct req req;
1016 struct itimerval itimer;
1017 const char *path;
1018 const char *querystring;
1019 int i;
1020
1021 #if HAVE_PLEDGE
1022 /*
1023 * The "rpath" pledge could be revoked after mparse_readfd()
1024 * if the file desciptor to "/footer.html" would be opened
1025 * up front, but it's probably not worth the complication
1026 * of the code it would cause: it would require scattering
1027 * pledge() calls in multiple low-level resp_*() functions.
1028 */
1029
1030 if (pledge("stdio rpath", NULL) == -1) {
1031 warn("pledge");
1032 pg_error_internal();
1033 return EXIT_FAILURE;
1034 }
1035 #endif
1036
1037 /* Poor man's ReDoS mitigation. */
1038
1039 itimer.it_value.tv_sec = 2;
1040 itimer.it_value.tv_usec = 0;
1041 itimer.it_interval.tv_sec = 2;
1042 itimer.it_interval.tv_usec = 0;
1043 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1044 warn("setitimer");
1045 pg_error_internal();
1046 return EXIT_FAILURE;
1047 }
1048
1049 /*
1050 * First we change directory into the MAN_DIR so that
1051 * subsequent scanning for manpath directories is rooted
1052 * relative to the same position.
1053 */
1054
1055 if (chdir(MAN_DIR) == -1) {
1056 warn("MAN_DIR: %s", MAN_DIR);
1057 pg_error_internal();
1058 return EXIT_FAILURE;
1059 }
1060
1061 memset(&req, 0, sizeof(struct req));
1062 req.q.equal = 1;
1063 parse_manpath_conf(&req);
1064
1065 /* Parse the path info and the query string. */
1066
1067 if ((path = getenv("PATH_INFO")) == NULL)
1068 path = "";
1069 else if (*path == '/')
1070 path++;
1071
1072 if (*path != '\0') {
1073 parse_path_info(&req, path);
1074 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1075 path = "";
1076 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1077 parse_query_string(&req, querystring);
1078
1079 /* Validate parsed data and add defaults. */
1080
1081 if (req.q.manpath == NULL)
1082 req.q.manpath = mandoc_strdup(req.p[0]);
1083 else if ( ! validate_manpath(&req, req.q.manpath)) {
1084 pg_error_badrequest(
1085 "You specified an invalid manpath.");
1086 return EXIT_FAILURE;
1087 }
1088
1089 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1090 pg_error_badrequest(
1091 "You specified an invalid architecture.");
1092 return EXIT_FAILURE;
1093 }
1094
1095 /* Dispatch to the three different pages. */
1096
1097 if ('\0' != *path)
1098 pg_show(&req, path);
1099 else if (NULL != req.q.query)
1100 pg_search(&req);
1101 else
1102 pg_index(&req);
1103
1104 free(req.q.manpath);
1105 free(req.q.arch);
1106 free(req.q.sec);
1107 free(req.q.query);
1108 for (i = 0; i < (int)req.psz; i++)
1109 free(req.p[i]);
1110 free(req.p);
1111 return EXIT_SUCCESS;
1112 }
1113
1114 /*
1115 * If PATH_INFO is not a file name, translate it to a query.
1116 */
1117 static void
1118 parse_path_info(struct req *req, const char *path)
1119 {
1120 char *dir[4];
1121 int i;
1122
1123 req->isquery = 0;
1124 req->q.equal = 1;
1125 req->q.manpath = mandoc_strdup(path);
1126 req->q.arch = NULL;
1127
1128 /* Mandatory manual page name. */
1129 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1130 req->q.query = req->q.manpath;
1131 req->q.manpath = NULL;
1132 } else
1133 *req->q.query++ = '\0';
1134
1135 /* Optional trailing section. */
1136 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1137 if(isdigit((unsigned char)req->q.sec[1])) {
1138 *req->q.sec++ = '\0';
1139 req->q.sec = mandoc_strdup(req->q.sec);
1140 } else
1141 req->q.sec = NULL;
1142 }
1143
1144 /* Handle the case of name[.section] only. */
1145 if (req->q.manpath == NULL)
1146 return;
1147 req->q.query = mandoc_strdup(req->q.query);
1148
1149 /* Split directory components. */
1150 dir[i = 0] = req->q.manpath;
1151 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1152 if (++i == 3) {
1153 pg_error_badrequest(
1154 "You specified too many directory components.");
1155 exit(EXIT_FAILURE);
1156 }
1157 *dir[i]++ = '\0';
1158 }
1159
1160 /* Optional manpath. */
1161 if ((i = validate_manpath(req, req->q.manpath)) == 0)
1162 req->q.manpath = NULL;
1163 else if (dir[1] == NULL)
1164 return;
1165
1166 /* Optional section. */
1167 if (strncmp(dir[i], "man", 3) == 0) {
1168 free(req->q.sec);
1169 req->q.sec = mandoc_strdup(dir[i++] + 3);
1170 }
1171 if (dir[i] == NULL) {
1172 if (req->q.manpath == NULL)
1173 free(dir[0]);
1174 return;
1175 }
1176 if (dir[i + 1] != NULL) {
1177 pg_error_badrequest(
1178 "You specified an invalid directory component.");
1179 exit(EXIT_FAILURE);
1180 }
1181
1182 /* Optional architecture. */
1183 if (i) {
1184 req->q.arch = mandoc_strdup(dir[i]);
1185 if (req->q.manpath == NULL)
1186 free(dir[0]);
1187 } else
1188 req->q.arch = dir[0];
1189 }
1190
1191 /*
1192 * Scan for indexable paths.
1193 */
1194 static void
1195 parse_manpath_conf(struct req *req)
1196 {
1197 FILE *fp;
1198 char *dp;
1199 size_t dpsz;
1200 ssize_t len;
1201
1202 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1203 warn("%s/manpath.conf", MAN_DIR);
1204 pg_error_internal();
1205 exit(EXIT_FAILURE);
1206 }
1207
1208 dp = NULL;
1209 dpsz = 0;
1210
1211 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1212 if (dp[len - 1] == '\n')
1213 dp[--len] = '\0';
1214 req->p = mandoc_realloc(req->p,
1215 (req->psz + 1) * sizeof(char *));
1216 if ( ! validate_urifrag(dp)) {
1217 warnx("%s/manpath.conf contains "
1218 "unsafe path \"%s\"", MAN_DIR, dp);
1219 pg_error_internal();
1220 exit(EXIT_FAILURE);
1221 }
1222 if (strchr(dp, '/') != NULL) {
1223 warnx("%s/manpath.conf contains "
1224 "path with slash \"%s\"", MAN_DIR, dp);
1225 pg_error_internal();
1226 exit(EXIT_FAILURE);
1227 }
1228 req->p[req->psz++] = dp;
1229 dp = NULL;
1230 dpsz = 0;
1231 }
1232 free(dp);
1233
1234 if (req->p == NULL) {
1235 warnx("%s/manpath.conf is empty", MAN_DIR);
1236 pg_error_internal();
1237 exit(EXIT_FAILURE);
1238 }
1239 }