]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Improve -Tascii output for Unicode escape sequences: For the first 512
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.99 2014/10/07 18:20:06 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc.h"
34 #include "mandoc_aux.h"
35 #include "main.h"
36 #include "manpath.h"
37 #include "mansearch.h"
38 #include "cgi.h"
39
40 /*
41 * A query as passed to the search function.
42 */
43 struct query {
44 char *manpath; /* desired manual directory */
45 char *arch; /* architecture */
46 char *sec; /* manual section */
47 char *query; /* unparsed query expression */
48 int equal; /* match whole names, not substrings */
49 };
50
51 struct req {
52 struct query q;
53 char **p; /* array of available manpaths */
54 size_t psz; /* number of available manpaths */
55 };
56
57 static void catman(const struct req *, const char *);
58 static void format(const struct req *, const char *);
59 static void html_print(const char *);
60 static void html_putchar(char);
61 static int http_decode(char *);
62 static void http_parse(struct req *, const char *);
63 static void http_print(const char *);
64 static void http_putchar(char);
65 static void http_printquery(const struct req *, const char *);
66 static void pathgen(struct req *);
67 static void pg_error_badrequest(const char *);
68 static void pg_error_internal(void);
69 static void pg_index(const struct req *);
70 static void pg_noresult(const struct req *, const char *);
71 static void pg_search(const struct req *);
72 static void pg_searchres(const struct req *,
73 struct manpage *, size_t);
74 static void pg_show(struct req *, const char *);
75 static void resp_begin_html(int, const char *);
76 static void resp_begin_http(int, const char *);
77 static void resp_end_html(void);
78 static void resp_searchform(const struct req *);
79 static void resp_show(const struct req *, const char *);
80 static void set_query_attr(char **, char **);
81 static int validate_filename(const char *);
82 static int validate_manpath(const struct req *, const char *);
83 static int validate_urifrag(const char *);
84
85 static const char *scriptname; /* CGI script name */
86
87 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
88 static const char *const sec_numbers[] = {
89 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
90 };
91 static const char *const sec_names[] = {
92 "All Sections",
93 "1 - General Commands",
94 "2 - System Calls",
95 "3 - Library Functions",
96 "3p - Perl Library",
97 "4 - Device Drivers",
98 "5 - File Formats",
99 "6 - Games",
100 "7 - Miscellaneous Information",
101 "8 - System Manager\'s Manual",
102 "9 - Kernel Developer\'s Manual"
103 };
104 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
105
106 static const char *const arch_names[] = {
107 "amd64", "alpha", "armish", "armv7",
108 "aviion", "hppa", "hppa64", "i386",
109 "ia64", "landisk", "loongson", "luna88k",
110 "macppc", "mips64", "octeon", "sgi",
111 "socppc", "solbourne", "sparc", "sparc64",
112 "vax", "zaurus",
113 "amiga", "arc", "arm32", "atari",
114 "beagle", "cats", "hp300", "mac68k",
115 "mvme68k", "mvme88k", "mvmeppc", "palm",
116 "pc532", "pegasos", "pmax", "powerpc",
117 "sun3", "wgrisc", "x68k"
118 };
119 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
120
121 /*
122 * Print a character, escaping HTML along the way.
123 * This will pass non-ASCII straight to output: be warned!
124 */
125 static void
126 html_putchar(char c)
127 {
128
129 switch (c) {
130 case ('"'):
131 printf("&quote;");
132 break;
133 case ('&'):
134 printf("&amp;");
135 break;
136 case ('>'):
137 printf("&gt;");
138 break;
139 case ('<'):
140 printf("&lt;");
141 break;
142 default:
143 putchar((unsigned char)c);
144 break;
145 }
146 }
147
148 static void
149 http_printquery(const struct req *req, const char *sep)
150 {
151
152 if (NULL != req->q.query) {
153 printf("query=");
154 http_print(req->q.query);
155 }
156 if (0 == req->q.equal)
157 printf("%sapropos=1", sep);
158 if (NULL != req->q.sec) {
159 printf("%ssec=", sep);
160 http_print(req->q.sec);
161 }
162 if (NULL != req->q.arch) {
163 printf("%sarch=", sep);
164 http_print(req->q.arch);
165 }
166 if (NULL != req->q.manpath &&
167 strcmp(req->q.manpath, req->p[0])) {
168 printf("%smanpath=", sep);
169 http_print(req->q.manpath);
170 }
171 }
172
173 static void
174 http_print(const char *p)
175 {
176
177 if (NULL == p)
178 return;
179 while ('\0' != *p)
180 http_putchar(*p++);
181 }
182
183 /*
184 * Call through to html_putchar().
185 * Accepts NULL strings.
186 */
187 static void
188 html_print(const char *p)
189 {
190
191 if (NULL == p)
192 return;
193 while ('\0' != *p)
194 html_putchar(*p++);
195 }
196
197 /*
198 * Transfer the responsibility for the allocated string *val
199 * to the query structure.
200 */
201 static void
202 set_query_attr(char **attr, char **val)
203 {
204
205 free(*attr);
206 if (**val == '\0') {
207 *attr = NULL;
208 free(*val);
209 } else
210 *attr = *val;
211 *val = NULL;
212 }
213
214 /*
215 * Parse the QUERY_STRING for key-value pairs
216 * and store the values into the query structure.
217 */
218 static void
219 http_parse(struct req *req, const char *qs)
220 {
221 char *key, *val;
222 size_t keysz, valsz;
223
224 req->q.manpath = NULL;
225 req->q.arch = NULL;
226 req->q.sec = NULL;
227 req->q.query = NULL;
228 req->q.equal = 1;
229
230 key = val = NULL;
231 while (*qs != '\0') {
232
233 /* Parse one key. */
234
235 keysz = strcspn(qs, "=;&");
236 key = mandoc_strndup(qs, keysz);
237 qs += keysz;
238 if (*qs != '=')
239 goto next;
240
241 /* Parse one value. */
242
243 valsz = strcspn(++qs, ";&");
244 val = mandoc_strndup(qs, valsz);
245 qs += valsz;
246
247 /* Decode and catch encoding errors. */
248
249 if ( ! (http_decode(key) && http_decode(val)))
250 goto next;
251
252 /* Handle key-value pairs. */
253
254 if ( ! strcmp(key, "query"))
255 set_query_attr(&req->q.query, &val);
256
257 else if ( ! strcmp(key, "apropos"))
258 req->q.equal = !strcmp(val, "0");
259
260 else if ( ! strcmp(key, "manpath")) {
261 #ifdef COMPAT_OLDURI
262 if ( ! strncmp(val, "OpenBSD ", 8)) {
263 val[7] = '-';
264 if ('C' == val[8])
265 val[8] = 'c';
266 }
267 #endif
268 set_query_attr(&req->q.manpath, &val);
269 }
270
271 else if ( ! (strcmp(key, "sec")
272 #ifdef COMPAT_OLDURI
273 && strcmp(key, "sektion")
274 #endif
275 )) {
276 if ( ! strcmp(val, "0"))
277 *val = '\0';
278 set_query_attr(&req->q.sec, &val);
279 }
280
281 else if ( ! strcmp(key, "arch")) {
282 if ( ! strcmp(val, "default"))
283 *val = '\0';
284 set_query_attr(&req->q.arch, &val);
285 }
286
287 /*
288 * The key must be freed in any case.
289 * The val may have been handed over to the query
290 * structure, in which case it is now NULL.
291 */
292 next:
293 free(key);
294 key = NULL;
295 free(val);
296 val = NULL;
297
298 if (*qs != '\0')
299 qs++;
300 }
301
302 /* Fall back to the default manpath. */
303
304 if (req->q.manpath == NULL)
305 req->q.manpath = mandoc_strdup(req->p[0]);
306 }
307
308 static void
309 http_putchar(char c)
310 {
311
312 if (isalnum((unsigned char)c)) {
313 putchar((unsigned char)c);
314 return;
315 } else if (' ' == c) {
316 putchar('+');
317 return;
318 }
319 printf("%%%.2x", c);
320 }
321
322 /*
323 * HTTP-decode a string. The standard explanation is that this turns
324 * "%4e+foo" into "n foo" in the regular way. This is done in-place
325 * over the allocated string.
326 */
327 static int
328 http_decode(char *p)
329 {
330 char hex[3];
331 char *q;
332 int c;
333
334 hex[2] = '\0';
335
336 q = p;
337 for ( ; '\0' != *p; p++, q++) {
338 if ('%' == *p) {
339 if ('\0' == (hex[0] = *(p + 1)))
340 return(0);
341 if ('\0' == (hex[1] = *(p + 2)))
342 return(0);
343 if (1 != sscanf(hex, "%x", &c))
344 return(0);
345 if ('\0' == c)
346 return(0);
347
348 *q = (char)c;
349 p += 2;
350 } else
351 *q = '+' == *p ? ' ' : *p;
352 }
353
354 *q = '\0';
355 return(1);
356 }
357
358 static void
359 resp_begin_http(int code, const char *msg)
360 {
361
362 if (200 != code)
363 printf("Status: %d %s\r\n", code, msg);
364
365 printf("Content-Type: text/html; charset=utf-8\r\n"
366 "Cache-Control: no-cache\r\n"
367 "Pragma: no-cache\r\n"
368 "\r\n");
369
370 fflush(stdout);
371 }
372
373 static void
374 resp_begin_html(int code, const char *msg)
375 {
376
377 resp_begin_http(code, msg);
378
379 printf("<!DOCTYPE html>\n"
380 "<HTML>\n"
381 "<HEAD>\n"
382 "<META CHARSET=\"UTF-8\" />\n"
383 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
384 " TYPE=\"text/css\" media=\"all\">\n"
385 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
386 " TYPE=\"text/css\" media=\"all\">\n"
387 "<TITLE>%s</TITLE>\n"
388 "</HEAD>\n"
389 "<BODY>\n"
390 "<!-- Begin page content. //-->\n",
391 CSS_DIR, CSS_DIR, CUSTOMIZE_TITLE);
392 }
393
394 static void
395 resp_end_html(void)
396 {
397
398 puts("</BODY>\n"
399 "</HTML>");
400 }
401
402 static void
403 resp_searchform(const struct req *req)
404 {
405 int i;
406
407 puts(CUSTOMIZE_BEGIN);
408 puts("<!-- Begin search form. //-->");
409 printf("<DIV ID=\"mancgi\">\n"
410 "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
411 "<FIELDSET>\n"
412 "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
413 scriptname);
414
415 /* Write query input box. */
416
417 printf( "<TABLE><TR><TD>\n"
418 "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
419 if (NULL != req->q.query)
420 html_print(req->q.query);
421 puts("\" SIZE=\"40\">");
422
423 /* Write submission and reset buttons. */
424
425 printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
426 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
427
428 /* Write show radio button */
429
430 printf( "</TD><TD>\n"
431 "<INPUT TYPE=\"radio\" ");
432 if (req->q.equal)
433 printf("CHECKED=\"checked\" ");
434 printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
435 "<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
436
437 /* Write section selector. */
438
439 puts( "</TD></TR><TR><TD>\n"
440 "<SELECT NAME=\"sec\">");
441 for (i = 0; i < sec_MAX; i++) {
442 printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
443 if (NULL != req->q.sec &&
444 0 == strcmp(sec_numbers[i], req->q.sec))
445 printf(" SELECTED=\"selected\"");
446 printf(">%s</OPTION>\n", sec_names[i]);
447 }
448 puts("</SELECT>");
449
450 /* Write architecture selector. */
451
452 printf( "<SELECT NAME=\"arch\">\n"
453 "<OPTION VALUE=\"default\"");
454 if (NULL == req->q.arch)
455 printf(" SELECTED=\"selected\"");
456 puts(">All Architectures</OPTION>");
457 for (i = 0; i < arch_MAX; i++) {
458 printf("<OPTION VALUE=\"%s\"", arch_names[i]);
459 if (NULL != req->q.arch &&
460 0 == strcmp(arch_names[i], req->q.arch))
461 printf(" SELECTED=\"selected\"");
462 printf(">%s</OPTION>\n", arch_names[i]);
463 }
464 puts("</SELECT>");
465
466 /* Write manpath selector. */
467
468 if (req->psz > 1) {
469 puts("<SELECT NAME=\"manpath\">");
470 for (i = 0; i < (int)req->psz; i++) {
471 printf("<OPTION ");
472 if (NULL == req->q.manpath ? 0 == i :
473 0 == strcmp(req->q.manpath, req->p[i]))
474 printf("SELECTED=\"selected\" ");
475 printf("VALUE=\"");
476 html_print(req->p[i]);
477 printf("\">");
478 html_print(req->p[i]);
479 puts("</OPTION>");
480 }
481 puts("</SELECT>");
482 }
483
484 /* Write search radio button */
485
486 printf( "</TD><TD>\n"
487 "<INPUT TYPE=\"radio\" ");
488 if (0 == req->q.equal)
489 printf("CHECKED=\"checked\" ");
490 printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
491 "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
492
493 puts("</TD></TR></TABLE>\n"
494 "</FIELDSET>\n"
495 "</FORM>\n"
496 "</DIV>");
497 puts("<!-- End search form. //-->");
498 }
499
500 static int
501 validate_urifrag(const char *frag)
502 {
503
504 while ('\0' != *frag) {
505 if ( ! (isalnum((unsigned char)*frag) ||
506 '-' == *frag || '.' == *frag ||
507 '/' == *frag || '_' == *frag))
508 return(0);
509 frag++;
510 }
511 return(1);
512 }
513
514 static int
515 validate_manpath(const struct req *req, const char* manpath)
516 {
517 size_t i;
518
519 if ( ! strcmp(manpath, "mandoc"))
520 return(1);
521
522 for (i = 0; i < req->psz; i++)
523 if ( ! strcmp(manpath, req->p[i]))
524 return(1);
525
526 return(0);
527 }
528
529 static int
530 validate_filename(const char *file)
531 {
532
533 if ('.' == file[0] && '/' == file[1])
534 file += 2;
535
536 return ( ! (strstr(file, "../") || strstr(file, "/..") ||
537 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))));
538 }
539
540 static void
541 pg_index(const struct req *req)
542 {
543
544 resp_begin_html(200, NULL);
545 resp_searchform(req);
546 printf("<P>\n"
547 "This web interface is documented in the\n"
548 "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
549 "manual, and the\n"
550 "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
551 "manual explains the query syntax.\n"
552 "</P>\n",
553 scriptname, scriptname);
554 resp_end_html();
555 }
556
557 static void
558 pg_noresult(const struct req *req, const char *msg)
559 {
560 resp_begin_html(200, NULL);
561 resp_searchform(req);
562 puts("<P>");
563 puts(msg);
564 puts("</P>");
565 resp_end_html();
566 }
567
568 static void
569 pg_error_badrequest(const char *msg)
570 {
571
572 resp_begin_html(400, "Bad Request");
573 puts("<H1>Bad Request</H1>\n"
574 "<P>\n");
575 puts(msg);
576 printf("Try again from the\n"
577 "<A HREF=\"%s\">main page</A>.\n"
578 "</P>", scriptname);
579 resp_end_html();
580 }
581
582 static void
583 pg_error_internal(void)
584 {
585 resp_begin_html(500, "Internal Server Error");
586 puts("<P>Internal Server Error</P>");
587 resp_end_html();
588 }
589
590 static void
591 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
592 {
593 char *arch, *archend;
594 size_t i, iuse, isec;
595 int archprio, archpriouse;
596 int prio, priouse;
597 char sec;
598
599 for (i = 0; i < sz; i++) {
600 if (validate_filename(r[i].file))
601 continue;
602 fprintf(stderr, "invalid filename %s in %s database\n",
603 r[i].file, req->q.manpath);
604 pg_error_internal();
605 return;
606 }
607
608 if (1 == sz) {
609 /*
610 * If we have just one result, then jump there now
611 * without any delay.
612 */
613 printf("Status: 303 See Other\r\n");
614 printf("Location: http://%s%s/%s/%s?",
615 HTTP_HOST, scriptname, req->q.manpath, r[0].file);
616 http_printquery(req, "&");
617 printf("\r\n"
618 "Content-Type: text/html; charset=utf-8\r\n"
619 "\r\n");
620 return;
621 }
622
623 resp_begin_html(200, NULL);
624 resp_searchform(req);
625 puts("<DIV CLASS=\"results\">");
626 puts("<TABLE>");
627
628 for (i = 0; i < sz; i++) {
629 printf("<TR>\n"
630 "<TD CLASS=\"title\">\n"
631 "<A HREF=\"%s/%s/%s?",
632 scriptname, req->q.manpath, r[i].file);
633 http_printquery(req, "&amp;");
634 printf("\">");
635 html_print(r[i].names);
636 printf("</A>\n"
637 "</TD>\n"
638 "<TD CLASS=\"desc\">");
639 html_print(r[i].output);
640 puts("</TD>\n"
641 "</TR>");
642 }
643
644 puts("</TABLE>\n"
645 "</DIV>");
646
647 /*
648 * In man(1) mode, show one of the pages
649 * even if more than one is found.
650 */
651
652 if (req->q.equal) {
653 puts("<HR>");
654 iuse = 0;
655 priouse = 10;
656 archpriouse = 3;
657 for (i = 0; i < sz; i++) {
658 isec = strcspn(r[i].file, "123456789");
659 sec = r[i].file[isec];
660 if ('\0' == sec)
661 continue;
662 prio = sec_prios[sec - '1'];
663 if (NULL == req->q.arch) {
664 archprio =
665 (NULL == (arch = strchr(
666 r[i].file + isec, '/'))) ? 3 :
667 (NULL == (archend = strchr(
668 arch + 1, '/'))) ? 0 :
669 strncmp(arch, "amd64/",
670 archend - arch) ? 2 : 1;
671 if (archprio < archpriouse) {
672 archpriouse = archprio;
673 priouse = prio;
674 iuse = i;
675 continue;
676 }
677 if (archprio > archpriouse)
678 continue;
679 }
680 if (prio >= priouse)
681 continue;
682 priouse = prio;
683 iuse = i;
684 }
685 resp_show(req, r[iuse].file);
686 }
687
688 resp_end_html();
689 }
690
691 static void
692 catman(const struct req *req, const char *file)
693 {
694 FILE *f;
695 size_t len;
696 int i;
697 char *p;
698 int italic, bold;
699
700 if (NULL == (f = fopen(file, "r"))) {
701 puts("<P>You specified an invalid manual file.</P>");
702 return;
703 }
704
705 puts("<DIV CLASS=\"catman\">\n"
706 "<PRE>");
707
708 while (NULL != (p = fgetln(f, &len))) {
709 bold = italic = 0;
710 for (i = 0; i < (int)len - 1; i++) {
711 /*
712 * This means that the catpage is out of state.
713 * Ignore it and keep going (although the
714 * catpage is bogus).
715 */
716
717 if ('\b' == p[i] || '\n' == p[i])
718 continue;
719
720 /*
721 * Print a regular character.
722 * Close out any bold/italic scopes.
723 * If we're in back-space mode, make sure we'll
724 * have something to enter when we backspace.
725 */
726
727 if ('\b' != p[i + 1]) {
728 if (italic)
729 printf("</I>");
730 if (bold)
731 printf("</B>");
732 italic = bold = 0;
733 html_putchar(p[i]);
734 continue;
735 } else if (i + 2 >= (int)len)
736 continue;
737
738 /* Italic mode. */
739
740 if ('_' == p[i]) {
741 if (bold)
742 printf("</B>");
743 if ( ! italic)
744 printf("<I>");
745 bold = 0;
746 italic = 1;
747 i += 2;
748 html_putchar(p[i]);
749 continue;
750 }
751
752 /*
753 * Handle funny behaviour troff-isms.
754 * These grok'd from the original man2html.c.
755 */
756
757 if (('+' == p[i] && 'o' == p[i + 2]) ||
758 ('o' == p[i] && '+' == p[i + 2]) ||
759 ('|' == p[i] && '=' == p[i + 2]) ||
760 ('=' == p[i] && '|' == p[i + 2]) ||
761 ('*' == p[i] && '=' == p[i + 2]) ||
762 ('=' == p[i] && '*' == p[i + 2]) ||
763 ('*' == p[i] && '|' == p[i + 2]) ||
764 ('|' == p[i] && '*' == p[i + 2])) {
765 if (italic)
766 printf("</I>");
767 if (bold)
768 printf("</B>");
769 italic = bold = 0;
770 putchar('*');
771 i += 2;
772 continue;
773 } else if (('|' == p[i] && '-' == p[i + 2]) ||
774 ('-' == p[i] && '|' == p[i + 1]) ||
775 ('+' == p[i] && '-' == p[i + 1]) ||
776 ('-' == p[i] && '+' == p[i + 1]) ||
777 ('+' == p[i] && '|' == p[i + 1]) ||
778 ('|' == p[i] && '+' == p[i + 1])) {
779 if (italic)
780 printf("</I>");
781 if (bold)
782 printf("</B>");
783 italic = bold = 0;
784 putchar('+');
785 i += 2;
786 continue;
787 }
788
789 /* Bold mode. */
790
791 if (italic)
792 printf("</I>");
793 if ( ! bold)
794 printf("<B>");
795 bold = 1;
796 italic = 0;
797 i += 2;
798 html_putchar(p[i]);
799 }
800
801 /*
802 * Clean up the last character.
803 * We can get to a newline; don't print that.
804 */
805
806 if (italic)
807 printf("</I>");
808 if (bold)
809 printf("</B>");
810
811 if (i == (int)len - 1 && '\n' != p[i])
812 html_putchar(p[i]);
813
814 putchar('\n');
815 }
816
817 puts("</PRE>\n"
818 "</DIV>");
819
820 fclose(f);
821 }
822
823 static void
824 format(const struct req *req, const char *file)
825 {
826 struct mparse *mp;
827 struct mdoc *mdoc;
828 struct man *man;
829 void *vp;
830 char *opts;
831 enum mandoclevel rc;
832 int fd;
833 int usepath;
834
835 if (-1 == (fd = open(file, O_RDONLY, 0))) {
836 puts("<P>You specified an invalid manual file.</P>");
837 return;
838 }
839
840 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_FATAL, NULL,
841 req->q.manpath);
842 rc = mparse_readfd(mp, fd, file);
843 close(fd);
844
845 if (rc >= MANDOCLEVEL_FATAL) {
846 fprintf(stderr, "fatal mandoc error: %s/%s\n",
847 req->q.manpath, file);
848 pg_error_internal();
849 return;
850 }
851
852 usepath = strcmp(req->q.manpath, req->p[0]);
853 mandoc_asprintf(&opts,
854 "fragment,man=%s?query=%%N&sec=%%S%s%s%s%s",
855 scriptname,
856 req->q.arch ? "&arch=" : "",
857 req->q.arch ? req->q.arch : "",
858 usepath ? "&manpath=" : "",
859 usepath ? req->q.manpath : "");
860
861 mparse_result(mp, &mdoc, &man, NULL);
862 if (NULL == man && NULL == mdoc) {
863 fprintf(stderr, "fatal mandoc error: %s/%s\n",
864 req->q.manpath, file);
865 pg_error_internal();
866 mparse_free(mp);
867 return;
868 }
869
870 vp = html_alloc(opts);
871
872 if (NULL != mdoc)
873 html_mdoc(vp, mdoc);
874 else
875 html_man(vp, man);
876
877 html_free(vp);
878 mparse_free(mp);
879 free(opts);
880 }
881
882 static void
883 resp_show(const struct req *req, const char *file)
884 {
885
886 if ('.' == file[0] && '/' == file[1])
887 file += 2;
888
889 if ('c' == *file)
890 catman(req, file);
891 else
892 format(req, file);
893 }
894
895 static void
896 pg_show(struct req *req, const char *fullpath)
897 {
898 char *manpath;
899 const char *file;
900
901 if ((file = strchr(fullpath, '/')) == NULL) {
902 pg_error_badrequest(
903 "You did not specify a page to show.");
904 return;
905 }
906 manpath = mandoc_strndup(fullpath, file - fullpath);
907 file++;
908
909 if ( ! validate_manpath(req, manpath)) {
910 pg_error_badrequest(
911 "You specified an invalid manpath.");
912 free(manpath);
913 return;
914 }
915
916 /*
917 * Begin by chdir()ing into the manpath.
918 * This way we can pick up the database files, which are
919 * relative to the manpath root.
920 */
921
922 if (chdir(manpath) == -1) {
923 fprintf(stderr, "chdir %s: %s\n",
924 manpath, strerror(errno));
925 pg_error_internal();
926 free(manpath);
927 return;
928 }
929
930 if (strcmp(manpath, "mandoc")) {
931 free(req->q.manpath);
932 req->q.manpath = manpath;
933 } else
934 free(manpath);
935
936 if ( ! validate_filename(file)) {
937 pg_error_badrequest(
938 "You specified an invalid manual file.");
939 return;
940 }
941
942 resp_begin_html(200, NULL);
943 resp_searchform(req);
944 resp_show(req, file);
945 resp_end_html();
946 }
947
948 static void
949 pg_search(const struct req *req)
950 {
951 struct mansearch search;
952 struct manpaths paths;
953 struct manpage *res;
954 char **argv;
955 char *query, *rp, *wp;
956 size_t ressz;
957 int argc;
958
959 /*
960 * Begin by chdir()ing into the root of the manpath.
961 * This way we can pick up the database files, which are
962 * relative to the manpath root.
963 */
964
965 if (-1 == (chdir(req->q.manpath))) {
966 fprintf(stderr, "chdir %s: %s\n",
967 req->q.manpath, strerror(errno));
968 pg_error_internal();
969 return;
970 }
971
972 search.arch = req->q.arch;
973 search.sec = req->q.sec;
974 search.outkey = "Nd";
975 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
976
977 paths.sz = 1;
978 paths.paths = mandoc_malloc(sizeof(char *));
979 paths.paths[0] = mandoc_strdup(".");
980
981 /*
982 * Break apart at spaces with backslash-escaping.
983 */
984
985 argc = 0;
986 argv = NULL;
987 rp = query = mandoc_strdup(req->q.query);
988 for (;;) {
989 while (isspace((unsigned char)*rp))
990 rp++;
991 if (*rp == '\0')
992 break;
993 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
994 argv[argc++] = wp = rp;
995 for (;;) {
996 if (isspace((unsigned char)*rp)) {
997 *wp = '\0';
998 rp++;
999 break;
1000 }
1001 if (rp[0] == '\\' && rp[1] != '\0')
1002 rp++;
1003 if (wp != rp)
1004 *wp = *rp;
1005 if (*rp == '\0')
1006 break;
1007 wp++;
1008 rp++;
1009 }
1010 }
1011
1012 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
1013 pg_noresult(req, "You entered an invalid query.");
1014 else if (0 == ressz)
1015 pg_noresult(req, "No results found.");
1016 else
1017 pg_searchres(req, res, ressz);
1018
1019 free(query);
1020 mansearch_free(res, ressz);
1021 free(paths.paths[0]);
1022 free(paths.paths);
1023 }
1024
1025 int
1026 main(void)
1027 {
1028 struct req req;
1029 struct itimerval itimer;
1030 const char *path;
1031 const char *querystring;
1032 int i;
1033
1034 /* Poor man's ReDoS mitigation. */
1035
1036 itimer.it_value.tv_sec = 2;
1037 itimer.it_value.tv_usec = 0;
1038 itimer.it_interval.tv_sec = 2;
1039 itimer.it_interval.tv_usec = 0;
1040 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1041 fprintf(stderr, "setitimer: %s\n", strerror(errno));
1042 pg_error_internal();
1043 return(EXIT_FAILURE);
1044 }
1045
1046 /* Scan our run-time environment. */
1047
1048 if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1049 scriptname = "";
1050
1051 if ( ! validate_urifrag(scriptname)) {
1052 fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1053 scriptname);
1054 pg_error_internal();
1055 return(EXIT_FAILURE);
1056 }
1057
1058 /*
1059 * First we change directory into the MAN_DIR so that
1060 * subsequent scanning for manpath directories is rooted
1061 * relative to the same position.
1062 */
1063
1064 if (-1 == chdir(MAN_DIR)) {
1065 fprintf(stderr, "MAN_DIR: %s: %s\n",
1066 MAN_DIR, strerror(errno));
1067 pg_error_internal();
1068 return(EXIT_FAILURE);
1069 }
1070
1071 memset(&req, 0, sizeof(struct req));
1072 pathgen(&req);
1073
1074 /* Next parse out the query string. */
1075
1076 if (NULL != (querystring = getenv("QUERY_STRING")))
1077 http_parse(&req, querystring);
1078
1079 if ( ! (NULL == req.q.manpath ||
1080 validate_manpath(&req, req.q.manpath))) {
1081 pg_error_badrequest(
1082 "You specified an invalid manpath.");
1083 return(EXIT_FAILURE);
1084 }
1085
1086 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1087 pg_error_badrequest(
1088 "You specified an invalid architecture.");
1089 return(EXIT_FAILURE);
1090 }
1091
1092 /* Dispatch to the three different pages. */
1093
1094 path = getenv("PATH_INFO");
1095 if (NULL == path)
1096 path = "";
1097 else if ('/' == *path)
1098 path++;
1099
1100 if ('\0' != *path)
1101 pg_show(&req, path);
1102 else if (NULL != req.q.query)
1103 pg_search(&req);
1104 else
1105 pg_index(&req);
1106
1107 free(req.q.manpath);
1108 free(req.q.arch);
1109 free(req.q.sec);
1110 free(req.q.query);
1111 for (i = 0; i < (int)req.psz; i++)
1112 free(req.p[i]);
1113 free(req.p);
1114 return(EXIT_SUCCESS);
1115 }
1116
1117 /*
1118 * Scan for indexable paths.
1119 */
1120 static void
1121 pathgen(struct req *req)
1122 {
1123 FILE *fp;
1124 char *dp;
1125 size_t dpsz;
1126
1127 if (NULL == (fp = fopen("manpath.conf", "r"))) {
1128 fprintf(stderr, "%s/manpath.conf: %s\n",
1129 MAN_DIR, strerror(errno));
1130 pg_error_internal();
1131 exit(EXIT_FAILURE);
1132 }
1133
1134 while (NULL != (dp = fgetln(fp, &dpsz))) {
1135 if ('\n' == dp[dpsz - 1])
1136 dpsz--;
1137 req->p = mandoc_realloc(req->p,
1138 (req->psz + 1) * sizeof(char *));
1139 dp = mandoc_strndup(dp, dpsz);
1140 if ( ! validate_urifrag(dp)) {
1141 fprintf(stderr, "%s/manpath.conf contains "
1142 "unsafe path \"%s\"\n", MAN_DIR, dp);
1143 pg_error_internal();
1144 exit(EXIT_FAILURE);
1145 }
1146 if (NULL != strchr(dp, '/')) {
1147 fprintf(stderr, "%s/manpath.conf contains "
1148 "path with slash \"%s\"\n", MAN_DIR, dp);
1149 pg_error_internal();
1150 exit(EXIT_FAILURE);
1151 }
1152 req->p[req->psz++] = dp;
1153 }
1154
1155 if ( req->p == NULL ) {
1156 fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1157 pg_error_internal();
1158 exit(EXIT_FAILURE);
1159 }
1160 }