]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
use the new function man_validate() here, too
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.112 2015/10/22 22:06:43 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc.h"
35 #include "roff.h"
36 #include "mdoc.h"
37 #include "man.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42
43 /*
44 * A query as passed to the search function.
45 */
46 struct query {
47 char *manpath; /* desired manual directory */
48 char *arch; /* architecture */
49 char *sec; /* manual section */
50 char *query; /* unparsed query expression */
51 int equal; /* match whole names, not substrings */
52 };
53
54 struct req {
55 struct query q;
56 char **p; /* array of available manpaths */
57 size_t psz; /* number of available manpaths */
58 };
59
60 static void catman(const struct req *, const char *);
61 static void format(const struct req *, const char *);
62 static void html_print(const char *);
63 static void html_putchar(char);
64 static int http_decode(char *);
65 static void http_parse(struct req *, const char *);
66 static void http_print(const char *);
67 static void http_putchar(char);
68 static void http_printquery(const struct req *, const char *);
69 static void pathgen(struct req *);
70 static void pg_error_badrequest(const char *);
71 static void pg_error_internal(void);
72 static void pg_index(const struct req *);
73 static void pg_noresult(const struct req *, const char *);
74 static void pg_search(const struct req *);
75 static void pg_searchres(const struct req *,
76 struct manpage *, size_t);
77 static void pg_show(struct req *, const char *);
78 static void resp_begin_html(int, const char *);
79 static void resp_begin_http(int, const char *);
80 static void resp_end_html(void);
81 static void resp_searchform(const struct req *);
82 static void resp_show(const struct req *, const char *);
83 static void set_query_attr(char **, char **);
84 static int validate_filename(const char *);
85 static int validate_manpath(const struct req *, const char *);
86 static int validate_urifrag(const char *);
87
88 static const char *scriptname; /* CGI script name */
89
90 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
91 static const char *const sec_numbers[] = {
92 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
93 };
94 static const char *const sec_names[] = {
95 "All Sections",
96 "1 - General Commands",
97 "2 - System Calls",
98 "3 - Library Functions",
99 "3p - Perl Library",
100 "4 - Device Drivers",
101 "5 - File Formats",
102 "6 - Games",
103 "7 - Miscellaneous Information",
104 "8 - System Manager\'s Manual",
105 "9 - Kernel Developer\'s Manual"
106 };
107 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
108
109 static const char *const arch_names[] = {
110 "amd64", "alpha", "armish", "armv7",
111 "aviion", "hppa", "hppa64", "i386",
112 "ia64", "landisk", "loongson", "luna88k",
113 "macppc", "mips64", "octeon", "sgi",
114 "socppc", "solbourne", "sparc", "sparc64",
115 "vax", "zaurus",
116 "amiga", "arc", "arm32", "atari",
117 "beagle", "cats", "hp300", "mac68k",
118 "mvme68k", "mvme88k", "mvmeppc", "palm",
119 "pc532", "pegasos", "pmax", "powerpc",
120 "sun3", "wgrisc", "x68k"
121 };
122 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
123
124 /*
125 * Print a character, escaping HTML along the way.
126 * This will pass non-ASCII straight to output: be warned!
127 */
128 static void
129 html_putchar(char c)
130 {
131
132 switch (c) {
133 case ('"'):
134 printf("&quote;");
135 break;
136 case ('&'):
137 printf("&amp;");
138 break;
139 case ('>'):
140 printf("&gt;");
141 break;
142 case ('<'):
143 printf("&lt;");
144 break;
145 default:
146 putchar((unsigned char)c);
147 break;
148 }
149 }
150
151 static void
152 http_printquery(const struct req *req, const char *sep)
153 {
154
155 if (NULL != req->q.query) {
156 printf("query=");
157 http_print(req->q.query);
158 }
159 if (0 == req->q.equal)
160 printf("%sapropos=1", sep);
161 if (NULL != req->q.sec) {
162 printf("%ssec=", sep);
163 http_print(req->q.sec);
164 }
165 if (NULL != req->q.arch) {
166 printf("%sarch=", sep);
167 http_print(req->q.arch);
168 }
169 if (strcmp(req->q.manpath, req->p[0])) {
170 printf("%smanpath=", sep);
171 http_print(req->q.manpath);
172 }
173 }
174
175 static void
176 http_print(const char *p)
177 {
178
179 if (NULL == p)
180 return;
181 while ('\0' != *p)
182 http_putchar(*p++);
183 }
184
185 /*
186 * Call through to html_putchar().
187 * Accepts NULL strings.
188 */
189 static void
190 html_print(const char *p)
191 {
192
193 if (NULL == p)
194 return;
195 while ('\0' != *p)
196 html_putchar(*p++);
197 }
198
199 /*
200 * Transfer the responsibility for the allocated string *val
201 * to the query structure.
202 */
203 static void
204 set_query_attr(char **attr, char **val)
205 {
206
207 free(*attr);
208 if (**val == '\0') {
209 *attr = NULL;
210 free(*val);
211 } else
212 *attr = *val;
213 *val = NULL;
214 }
215
216 /*
217 * Parse the QUERY_STRING for key-value pairs
218 * and store the values into the query structure.
219 */
220 static void
221 http_parse(struct req *req, const char *qs)
222 {
223 char *key, *val;
224 size_t keysz, valsz;
225
226 req->q.manpath = NULL;
227 req->q.arch = NULL;
228 req->q.sec = NULL;
229 req->q.query = NULL;
230 req->q.equal = 1;
231
232 key = val = NULL;
233 while (*qs != '\0') {
234
235 /* Parse one key. */
236
237 keysz = strcspn(qs, "=;&");
238 key = mandoc_strndup(qs, keysz);
239 qs += keysz;
240 if (*qs != '=')
241 goto next;
242
243 /* Parse one value. */
244
245 valsz = strcspn(++qs, ";&");
246 val = mandoc_strndup(qs, valsz);
247 qs += valsz;
248
249 /* Decode and catch encoding errors. */
250
251 if ( ! (http_decode(key) && http_decode(val)))
252 goto next;
253
254 /* Handle key-value pairs. */
255
256 if ( ! strcmp(key, "query"))
257 set_query_attr(&req->q.query, &val);
258
259 else if ( ! strcmp(key, "apropos"))
260 req->q.equal = !strcmp(val, "0");
261
262 else if ( ! strcmp(key, "manpath")) {
263 #ifdef COMPAT_OLDURI
264 if ( ! strncmp(val, "OpenBSD ", 8)) {
265 val[7] = '-';
266 if ('C' == val[8])
267 val[8] = 'c';
268 }
269 #endif
270 set_query_attr(&req->q.manpath, &val);
271 }
272
273 else if ( ! (strcmp(key, "sec")
274 #ifdef COMPAT_OLDURI
275 && strcmp(key, "sektion")
276 #endif
277 )) {
278 if ( ! strcmp(val, "0"))
279 *val = '\0';
280 set_query_attr(&req->q.sec, &val);
281 }
282
283 else if ( ! strcmp(key, "arch")) {
284 if ( ! strcmp(val, "default"))
285 *val = '\0';
286 set_query_attr(&req->q.arch, &val);
287 }
288
289 /*
290 * The key must be freed in any case.
291 * The val may have been handed over to the query
292 * structure, in which case it is now NULL.
293 */
294 next:
295 free(key);
296 key = NULL;
297 free(val);
298 val = NULL;
299
300 if (*qs != '\0')
301 qs++;
302 }
303 }
304
305 static void
306 http_putchar(char c)
307 {
308
309 if (isalnum((unsigned char)c)) {
310 putchar((unsigned char)c);
311 return;
312 } else if (' ' == c) {
313 putchar('+');
314 return;
315 }
316 printf("%%%.2x", c);
317 }
318
319 /*
320 * HTTP-decode a string. The standard explanation is that this turns
321 * "%4e+foo" into "n foo" in the regular way. This is done in-place
322 * over the allocated string.
323 */
324 static int
325 http_decode(char *p)
326 {
327 char hex[3];
328 char *q;
329 int c;
330
331 hex[2] = '\0';
332
333 q = p;
334 for ( ; '\0' != *p; p++, q++) {
335 if ('%' == *p) {
336 if ('\0' == (hex[0] = *(p + 1)))
337 return 0;
338 if ('\0' == (hex[1] = *(p + 2)))
339 return 0;
340 if (1 != sscanf(hex, "%x", &c))
341 return 0;
342 if ('\0' == c)
343 return 0;
344
345 *q = (char)c;
346 p += 2;
347 } else
348 *q = '+' == *p ? ' ' : *p;
349 }
350
351 *q = '\0';
352 return 1;
353 }
354
355 static void
356 resp_begin_http(int code, const char *msg)
357 {
358
359 if (200 != code)
360 printf("Status: %d %s\r\n", code, msg);
361
362 printf("Content-Type: text/html; charset=utf-8\r\n"
363 "Cache-Control: no-cache\r\n"
364 "Pragma: no-cache\r\n"
365 "\r\n");
366
367 fflush(stdout);
368 }
369
370 static void
371 resp_begin_html(int code, const char *msg)
372 {
373
374 resp_begin_http(code, msg);
375
376 printf("<!DOCTYPE html>\n"
377 "<HTML>\n"
378 "<HEAD>\n"
379 "<META CHARSET=\"UTF-8\" />\n"
380 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
381 " TYPE=\"text/css\" media=\"all\">\n"
382 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
383 " TYPE=\"text/css\" media=\"all\">\n"
384 "<TITLE>%s</TITLE>\n"
385 "</HEAD>\n"
386 "<BODY>\n"
387 "<!-- Begin page content. //-->\n",
388 CSS_DIR, CSS_DIR, CUSTOMIZE_TITLE);
389 }
390
391 static void
392 resp_end_html(void)
393 {
394
395 puts("</BODY>\n"
396 "</HTML>");
397 }
398
399 static void
400 resp_searchform(const struct req *req)
401 {
402 int i;
403
404 puts(CUSTOMIZE_BEGIN);
405 puts("<!-- Begin search form. //-->");
406 printf("<DIV ID=\"mancgi\">\n"
407 "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
408 "<FIELDSET>\n"
409 "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
410 scriptname);
411
412 /* Write query input box. */
413
414 printf( "<TABLE><TR><TD>\n"
415 "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
416 if (NULL != req->q.query)
417 html_print(req->q.query);
418 puts("\" SIZE=\"40\">");
419
420 /* Write submission and reset buttons. */
421
422 printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
423 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
424
425 /* Write show radio button */
426
427 printf( "</TD><TD>\n"
428 "<INPUT TYPE=\"radio\" ");
429 if (req->q.equal)
430 printf("CHECKED=\"checked\" ");
431 printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
432 "<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
433
434 /* Write section selector. */
435
436 puts( "</TD></TR><TR><TD>\n"
437 "<SELECT NAME=\"sec\">");
438 for (i = 0; i < sec_MAX; i++) {
439 printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
440 if (NULL != req->q.sec &&
441 0 == strcmp(sec_numbers[i], req->q.sec))
442 printf(" SELECTED=\"selected\"");
443 printf(">%s</OPTION>\n", sec_names[i]);
444 }
445 puts("</SELECT>");
446
447 /* Write architecture selector. */
448
449 printf( "<SELECT NAME=\"arch\">\n"
450 "<OPTION VALUE=\"default\"");
451 if (NULL == req->q.arch)
452 printf(" SELECTED=\"selected\"");
453 puts(">All Architectures</OPTION>");
454 for (i = 0; i < arch_MAX; i++) {
455 printf("<OPTION VALUE=\"%s\"", arch_names[i]);
456 if (NULL != req->q.arch &&
457 0 == strcmp(arch_names[i], req->q.arch))
458 printf(" SELECTED=\"selected\"");
459 printf(">%s</OPTION>\n", arch_names[i]);
460 }
461 puts("</SELECT>");
462
463 /* Write manpath selector. */
464
465 if (req->psz > 1) {
466 puts("<SELECT NAME=\"manpath\">");
467 for (i = 0; i < (int)req->psz; i++) {
468 printf("<OPTION ");
469 if (strcmp(req->q.manpath, req->p[i]) == 0)
470 printf("SELECTED=\"selected\" ");
471 printf("VALUE=\"");
472 html_print(req->p[i]);
473 printf("\">");
474 html_print(req->p[i]);
475 puts("</OPTION>");
476 }
477 puts("</SELECT>");
478 }
479
480 /* Write search radio button */
481
482 printf( "</TD><TD>\n"
483 "<INPUT TYPE=\"radio\" ");
484 if (0 == req->q.equal)
485 printf("CHECKED=\"checked\" ");
486 printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
487 "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
488
489 puts("</TD></TR></TABLE>\n"
490 "</FIELDSET>\n"
491 "</FORM>\n"
492 "</DIV>");
493 puts("<!-- End search form. //-->");
494 }
495
496 static int
497 validate_urifrag(const char *frag)
498 {
499
500 while ('\0' != *frag) {
501 if ( ! (isalnum((unsigned char)*frag) ||
502 '-' == *frag || '.' == *frag ||
503 '/' == *frag || '_' == *frag))
504 return 0;
505 frag++;
506 }
507 return 1;
508 }
509
510 static int
511 validate_manpath(const struct req *req, const char* manpath)
512 {
513 size_t i;
514
515 if ( ! strcmp(manpath, "mandoc"))
516 return 1;
517
518 for (i = 0; i < req->psz; i++)
519 if ( ! strcmp(manpath, req->p[i]))
520 return 1;
521
522 return 0;
523 }
524
525 static int
526 validate_filename(const char *file)
527 {
528
529 if ('.' == file[0] && '/' == file[1])
530 file += 2;
531
532 return ! (strstr(file, "../") || strstr(file, "/..") ||
533 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
534 }
535
536 static void
537 pg_index(const struct req *req)
538 {
539
540 resp_begin_html(200, NULL);
541 resp_searchform(req);
542 printf("<P>\n"
543 "This web interface is documented in the\n"
544 "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
545 "manual, and the\n"
546 "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
547 "manual explains the query syntax.\n"
548 "</P>\n",
549 scriptname, scriptname);
550 resp_end_html();
551 }
552
553 static void
554 pg_noresult(const struct req *req, const char *msg)
555 {
556 resp_begin_html(200, NULL);
557 resp_searchform(req);
558 puts("<P>");
559 puts(msg);
560 puts("</P>");
561 resp_end_html();
562 }
563
564 static void
565 pg_error_badrequest(const char *msg)
566 {
567
568 resp_begin_html(400, "Bad Request");
569 puts("<H1>Bad Request</H1>\n"
570 "<P>\n");
571 puts(msg);
572 printf("Try again from the\n"
573 "<A HREF=\"%s\">main page</A>.\n"
574 "</P>", scriptname);
575 resp_end_html();
576 }
577
578 static void
579 pg_error_internal(void)
580 {
581 resp_begin_html(500, "Internal Server Error");
582 puts("<P>Internal Server Error</P>");
583 resp_end_html();
584 }
585
586 static void
587 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
588 {
589 char *arch, *archend;
590 size_t i, iuse, isec;
591 int archprio, archpriouse;
592 int prio, priouse;
593 char sec;
594
595 for (i = 0; i < sz; i++) {
596 if (validate_filename(r[i].file))
597 continue;
598 fprintf(stderr, "invalid filename %s in %s database\n",
599 r[i].file, req->q.manpath);
600 pg_error_internal();
601 return;
602 }
603
604 if (1 == sz) {
605 /*
606 * If we have just one result, then jump there now
607 * without any delay.
608 */
609 printf("Status: 303 See Other\r\n");
610 printf("Location: http://%s%s/%s/%s?",
611 HTTP_HOST, scriptname, req->q.manpath, r[0].file);
612 http_printquery(req, "&");
613 printf("\r\n"
614 "Content-Type: text/html; charset=utf-8\r\n"
615 "\r\n");
616 return;
617 }
618
619 resp_begin_html(200, NULL);
620 resp_searchform(req);
621 puts("<DIV CLASS=\"results\">");
622 puts("<TABLE>");
623
624 for (i = 0; i < sz; i++) {
625 printf("<TR>\n"
626 "<TD CLASS=\"title\">\n"
627 "<A HREF=\"%s/%s/%s?",
628 scriptname, req->q.manpath, r[i].file);
629 http_printquery(req, "&amp;");
630 printf("\">");
631 html_print(r[i].names);
632 printf("</A>\n"
633 "</TD>\n"
634 "<TD CLASS=\"desc\">");
635 html_print(r[i].output);
636 puts("</TD>\n"
637 "</TR>");
638 }
639
640 puts("</TABLE>\n"
641 "</DIV>");
642
643 /*
644 * In man(1) mode, show one of the pages
645 * even if more than one is found.
646 */
647
648 if (req->q.equal) {
649 puts("<HR>");
650 iuse = 0;
651 priouse = 10;
652 archpriouse = 3;
653 for (i = 0; i < sz; i++) {
654 isec = strcspn(r[i].file, "123456789");
655 sec = r[i].file[isec];
656 if ('\0' == sec)
657 continue;
658 prio = sec_prios[sec - '1'];
659 if (NULL == req->q.arch) {
660 archprio =
661 (NULL == (arch = strchr(
662 r[i].file + isec, '/'))) ? 3 :
663 (NULL == (archend = strchr(
664 arch + 1, '/'))) ? 0 :
665 strncmp(arch, "amd64/",
666 archend - arch) ? 2 : 1;
667 if (archprio < archpriouse) {
668 archpriouse = archprio;
669 priouse = prio;
670 iuse = i;
671 continue;
672 }
673 if (archprio > archpriouse)
674 continue;
675 }
676 if (prio >= priouse)
677 continue;
678 priouse = prio;
679 iuse = i;
680 }
681 resp_show(req, r[iuse].file);
682 }
683
684 resp_end_html();
685 }
686
687 static void
688 catman(const struct req *req, const char *file)
689 {
690 FILE *f;
691 size_t len;
692 int i;
693 char *p;
694 int italic, bold;
695
696 if (NULL == (f = fopen(file, "r"))) {
697 puts("<P>You specified an invalid manual file.</P>");
698 return;
699 }
700
701 puts("<DIV CLASS=\"catman\">\n"
702 "<PRE>");
703
704 while (NULL != (p = fgetln(f, &len))) {
705 bold = italic = 0;
706 for (i = 0; i < (int)len - 1; i++) {
707 /*
708 * This means that the catpage is out of state.
709 * Ignore it and keep going (although the
710 * catpage is bogus).
711 */
712
713 if ('\b' == p[i] || '\n' == p[i])
714 continue;
715
716 /*
717 * Print a regular character.
718 * Close out any bold/italic scopes.
719 * If we're in back-space mode, make sure we'll
720 * have something to enter when we backspace.
721 */
722
723 if ('\b' != p[i + 1]) {
724 if (italic)
725 printf("</I>");
726 if (bold)
727 printf("</B>");
728 italic = bold = 0;
729 html_putchar(p[i]);
730 continue;
731 } else if (i + 2 >= (int)len)
732 continue;
733
734 /* Italic mode. */
735
736 if ('_' == p[i]) {
737 if (bold)
738 printf("</B>");
739 if ( ! italic)
740 printf("<I>");
741 bold = 0;
742 italic = 1;
743 i += 2;
744 html_putchar(p[i]);
745 continue;
746 }
747
748 /*
749 * Handle funny behaviour troff-isms.
750 * These grok'd from the original man2html.c.
751 */
752
753 if (('+' == p[i] && 'o' == p[i + 2]) ||
754 ('o' == p[i] && '+' == p[i + 2]) ||
755 ('|' == p[i] && '=' == p[i + 2]) ||
756 ('=' == p[i] && '|' == p[i + 2]) ||
757 ('*' == p[i] && '=' == p[i + 2]) ||
758 ('=' == p[i] && '*' == p[i + 2]) ||
759 ('*' == p[i] && '|' == p[i + 2]) ||
760 ('|' == p[i] && '*' == p[i + 2])) {
761 if (italic)
762 printf("</I>");
763 if (bold)
764 printf("</B>");
765 italic = bold = 0;
766 putchar('*');
767 i += 2;
768 continue;
769 } else if (('|' == p[i] && '-' == p[i + 2]) ||
770 ('-' == p[i] && '|' == p[i + 1]) ||
771 ('+' == p[i] && '-' == p[i + 1]) ||
772 ('-' == p[i] && '+' == p[i + 1]) ||
773 ('+' == p[i] && '|' == p[i + 1]) ||
774 ('|' == p[i] && '+' == p[i + 1])) {
775 if (italic)
776 printf("</I>");
777 if (bold)
778 printf("</B>");
779 italic = bold = 0;
780 putchar('+');
781 i += 2;
782 continue;
783 }
784
785 /* Bold mode. */
786
787 if (italic)
788 printf("</I>");
789 if ( ! bold)
790 printf("<B>");
791 bold = 1;
792 italic = 0;
793 i += 2;
794 html_putchar(p[i]);
795 }
796
797 /*
798 * Clean up the last character.
799 * We can get to a newline; don't print that.
800 */
801
802 if (italic)
803 printf("</I>");
804 if (bold)
805 printf("</B>");
806
807 if (i == (int)len - 1 && '\n' != p[i])
808 html_putchar(p[i]);
809
810 putchar('\n');
811 }
812
813 puts("</PRE>\n"
814 "</DIV>");
815
816 fclose(f);
817 }
818
819 static void
820 format(const struct req *req, const char *file)
821 {
822 struct manoutput conf;
823 struct mparse *mp;
824 struct roff_man *man;
825 void *vp;
826 int fd;
827 int usepath;
828
829 if (-1 == (fd = open(file, O_RDONLY, 0))) {
830 puts("<P>You specified an invalid manual file.</P>");
831 return;
832 }
833
834 mchars_alloc();
835 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
836 mparse_readfd(mp, fd, file);
837 close(fd);
838
839 memset(&conf, 0, sizeof(conf));
840 conf.fragment = 1;
841 usepath = strcmp(req->q.manpath, req->p[0]);
842 mandoc_asprintf(&conf.man, "%s?query=%%N&sec=%%S%s%s%s%s",
843 scriptname,
844 req->q.arch ? "&arch=" : "",
845 req->q.arch ? req->q.arch : "",
846 usepath ? "&manpath=" : "",
847 usepath ? req->q.manpath : "");
848
849 mparse_result(mp, &man, NULL);
850 if (man == NULL) {
851 fprintf(stderr, "fatal mandoc error: %s/%s\n",
852 req->q.manpath, file);
853 pg_error_internal();
854 mparse_free(mp);
855 mchars_free();
856 return;
857 }
858
859 vp = html_alloc(&conf);
860
861 if (man->macroset == MACROSET_MDOC) {
862 mdoc_validate(man);
863 html_mdoc(vp, man);
864 } else {
865 man_validate(man);
866 html_man(vp, man);
867 }
868
869 html_free(vp);
870 mparse_free(mp);
871 mchars_free();
872 free(conf.man);
873 }
874
875 static void
876 resp_show(const struct req *req, const char *file)
877 {
878
879 if ('.' == file[0] && '/' == file[1])
880 file += 2;
881
882 if ('c' == *file)
883 catman(req, file);
884 else
885 format(req, file);
886 }
887
888 static void
889 pg_show(struct req *req, const char *fullpath)
890 {
891 char *manpath;
892 const char *file;
893
894 if ((file = strchr(fullpath, '/')) == NULL) {
895 pg_error_badrequest(
896 "You did not specify a page to show.");
897 return;
898 }
899 manpath = mandoc_strndup(fullpath, file - fullpath);
900 file++;
901
902 if ( ! validate_manpath(req, manpath)) {
903 pg_error_badrequest(
904 "You specified an invalid manpath.");
905 free(manpath);
906 return;
907 }
908
909 /*
910 * Begin by chdir()ing into the manpath.
911 * This way we can pick up the database files, which are
912 * relative to the manpath root.
913 */
914
915 if (chdir(manpath) == -1) {
916 fprintf(stderr, "chdir %s: %s\n",
917 manpath, strerror(errno));
918 pg_error_internal();
919 free(manpath);
920 return;
921 }
922
923 if (strcmp(manpath, "mandoc")) {
924 free(req->q.manpath);
925 req->q.manpath = manpath;
926 } else
927 free(manpath);
928
929 if ( ! validate_filename(file)) {
930 pg_error_badrequest(
931 "You specified an invalid manual file.");
932 return;
933 }
934
935 resp_begin_html(200, NULL);
936 resp_searchform(req);
937 resp_show(req, file);
938 resp_end_html();
939 }
940
941 static void
942 pg_search(const struct req *req)
943 {
944 struct mansearch search;
945 struct manpaths paths;
946 struct manpage *res;
947 char **argv;
948 char *query, *rp, *wp;
949 size_t ressz;
950 int argc;
951
952 /*
953 * Begin by chdir()ing into the root of the manpath.
954 * This way we can pick up the database files, which are
955 * relative to the manpath root.
956 */
957
958 if (-1 == (chdir(req->q.manpath))) {
959 fprintf(stderr, "chdir %s: %s\n",
960 req->q.manpath, strerror(errno));
961 pg_error_internal();
962 return;
963 }
964
965 search.arch = req->q.arch;
966 search.sec = req->q.sec;
967 search.outkey = "Nd";
968 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
969 search.firstmatch = 1;
970
971 paths.sz = 1;
972 paths.paths = mandoc_malloc(sizeof(char *));
973 paths.paths[0] = mandoc_strdup(".");
974
975 /*
976 * Break apart at spaces with backslash-escaping.
977 */
978
979 argc = 0;
980 argv = NULL;
981 rp = query = mandoc_strdup(req->q.query);
982 for (;;) {
983 while (isspace((unsigned char)*rp))
984 rp++;
985 if (*rp == '\0')
986 break;
987 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
988 argv[argc++] = wp = rp;
989 for (;;) {
990 if (isspace((unsigned char)*rp)) {
991 *wp = '\0';
992 rp++;
993 break;
994 }
995 if (rp[0] == '\\' && rp[1] != '\0')
996 rp++;
997 if (wp != rp)
998 *wp = *rp;
999 if (*rp == '\0')
1000 break;
1001 wp++;
1002 rp++;
1003 }
1004 }
1005
1006 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
1007 pg_noresult(req, "You entered an invalid query.");
1008 else if (0 == ressz)
1009 pg_noresult(req, "No results found.");
1010 else
1011 pg_searchres(req, res, ressz);
1012
1013 free(query);
1014 mansearch_free(res, ressz);
1015 free(paths.paths[0]);
1016 free(paths.paths);
1017 }
1018
1019 int
1020 main(void)
1021 {
1022 struct req req;
1023 struct itimerval itimer;
1024 const char *path;
1025 const char *querystring;
1026 int i;
1027
1028 /* Poor man's ReDoS mitigation. */
1029
1030 itimer.it_value.tv_sec = 2;
1031 itimer.it_value.tv_usec = 0;
1032 itimer.it_interval.tv_sec = 2;
1033 itimer.it_interval.tv_usec = 0;
1034 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1035 fprintf(stderr, "setitimer: %s\n", strerror(errno));
1036 pg_error_internal();
1037 return EXIT_FAILURE;
1038 }
1039
1040 /* Scan our run-time environment. */
1041
1042 if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1043 scriptname = "";
1044
1045 if ( ! validate_urifrag(scriptname)) {
1046 fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1047 scriptname);
1048 pg_error_internal();
1049 return EXIT_FAILURE;
1050 }
1051
1052 /*
1053 * First we change directory into the MAN_DIR so that
1054 * subsequent scanning for manpath directories is rooted
1055 * relative to the same position.
1056 */
1057
1058 if (-1 == chdir(MAN_DIR)) {
1059 fprintf(stderr, "MAN_DIR: %s: %s\n",
1060 MAN_DIR, strerror(errno));
1061 pg_error_internal();
1062 return EXIT_FAILURE;
1063 }
1064
1065 memset(&req, 0, sizeof(struct req));
1066 pathgen(&req);
1067
1068 /* Next parse out the query string. */
1069
1070 if (NULL != (querystring = getenv("QUERY_STRING")))
1071 http_parse(&req, querystring);
1072
1073 if (req.q.manpath == NULL)
1074 req.q.manpath = mandoc_strdup(req.p[0]);
1075 else if ( ! validate_manpath(&req, req.q.manpath)) {
1076 pg_error_badrequest(
1077 "You specified an invalid manpath.");
1078 return EXIT_FAILURE;
1079 }
1080
1081 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1082 pg_error_badrequest(
1083 "You specified an invalid architecture.");
1084 return EXIT_FAILURE;
1085 }
1086
1087 /* Dispatch to the three different pages. */
1088
1089 path = getenv("PATH_INFO");
1090 if (NULL == path)
1091 path = "";
1092 else if ('/' == *path)
1093 path++;
1094
1095 if ('\0' != *path)
1096 pg_show(&req, path);
1097 else if (NULL != req.q.query)
1098 pg_search(&req);
1099 else
1100 pg_index(&req);
1101
1102 free(req.q.manpath);
1103 free(req.q.arch);
1104 free(req.q.sec);
1105 free(req.q.query);
1106 for (i = 0; i < (int)req.psz; i++)
1107 free(req.p[i]);
1108 free(req.p);
1109 return EXIT_SUCCESS;
1110 }
1111
1112 /*
1113 * Scan for indexable paths.
1114 */
1115 static void
1116 pathgen(struct req *req)
1117 {
1118 FILE *fp;
1119 char *dp;
1120 size_t dpsz;
1121
1122 if (NULL == (fp = fopen("manpath.conf", "r"))) {
1123 fprintf(stderr, "%s/manpath.conf: %s\n",
1124 MAN_DIR, strerror(errno));
1125 pg_error_internal();
1126 exit(EXIT_FAILURE);
1127 }
1128
1129 while (NULL != (dp = fgetln(fp, &dpsz))) {
1130 if ('\n' == dp[dpsz - 1])
1131 dpsz--;
1132 req->p = mandoc_realloc(req->p,
1133 (req->psz + 1) * sizeof(char *));
1134 dp = mandoc_strndup(dp, dpsz);
1135 if ( ! validate_urifrag(dp)) {
1136 fprintf(stderr, "%s/manpath.conf contains "
1137 "unsafe path \"%s\"\n", MAN_DIR, dp);
1138 pg_error_internal();
1139 exit(EXIT_FAILURE);
1140 }
1141 if (NULL != strchr(dp, '/')) {
1142 fprintf(stderr, "%s/manpath.conf contains "
1143 "path with slash \"%s\"\n", MAN_DIR, dp);
1144 pg_error_internal();
1145 exit(EXIT_FAILURE);
1146 }
1147 req->p[req->psz++] = dp;
1148 }
1149
1150 if ( req->p == NULL ) {
1151 fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1152 pg_error_internal();
1153 exit(EXIT_FAILURE);
1154 }
1155 }