]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Make the SCRIPT_NAME logic simpler, safer, and make it actually work;
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.119 2016/03/18 13:22:27 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc.h"
35 #include "roff.h"
36 #include "mdoc.h"
37 #include "man.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42
43 /*
44 * A query as passed to the search function.
45 */
46 struct query {
47 char *manpath; /* desired manual directory */
48 char *arch; /* architecture */
49 char *sec; /* manual section */
50 char *query; /* unparsed query expression */
51 int equal; /* match whole names, not substrings */
52 };
53
54 struct req {
55 struct query q;
56 char **p; /* array of available manpaths */
57 size_t psz; /* number of available manpaths */
58 };
59
60 static void catman(const struct req *, const char *);
61 static void format(const struct req *, const char *);
62 static void html_print(const char *);
63 static void html_putchar(char);
64 static int http_decode(char *);
65 static void http_parse(struct req *, const char *);
66 static void pathgen(struct req *);
67 static void path_parse(struct req *req, const char *path);
68 static void pg_error_badrequest(const char *);
69 static void pg_error_internal(void);
70 static void pg_index(const struct req *);
71 static void pg_noresult(const struct req *, const char *);
72 static void pg_search(const struct req *);
73 static void pg_searchres(const struct req *,
74 struct manpage *, size_t);
75 static void pg_show(struct req *, const char *);
76 static void resp_begin_html(int, const char *);
77 static void resp_begin_http(int, const char *);
78 static void resp_copy(const char *);
79 static void resp_end_html(void);
80 static void resp_searchform(const struct req *);
81 static void resp_show(const struct req *, const char *);
82 static void set_query_attr(char **, char **);
83 static int validate_filename(const char *);
84 static int validate_manpath(const struct req *, const char *);
85 static int validate_urifrag(const char *);
86
87 static const char *scriptname = SCRIPT_NAME;
88
89 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
90 static const char *const sec_numbers[] = {
91 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
92 };
93 static const char *const sec_names[] = {
94 "All Sections",
95 "1 - General Commands",
96 "2 - System Calls",
97 "3 - Library Functions",
98 "3p - Perl Library",
99 "4 - Device Drivers",
100 "5 - File Formats",
101 "6 - Games",
102 "7 - Miscellaneous Information",
103 "8 - System Manager\'s Manual",
104 "9 - Kernel Developer\'s Manual"
105 };
106 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
107
108 static const char *const arch_names[] = {
109 "amd64", "alpha", "armish", "armv7",
110 "aviion", "hppa", "hppa64", "i386",
111 "ia64", "landisk", "loongson", "luna88k",
112 "macppc", "mips64", "octeon", "sgi",
113 "socppc", "solbourne", "sparc", "sparc64",
114 "vax", "zaurus",
115 "amiga", "arc", "arm32", "atari",
116 "beagle", "cats", "hp300", "mac68k",
117 "mvme68k", "mvme88k", "mvmeppc", "palm",
118 "pc532", "pegasos", "pmax", "powerpc",
119 "sun3", "wgrisc", "x68k"
120 };
121 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
122
123 /*
124 * Print a character, escaping HTML along the way.
125 * This will pass non-ASCII straight to output: be warned!
126 */
127 static void
128 html_putchar(char c)
129 {
130
131 switch (c) {
132 case ('"'):
133 printf("&quote;");
134 break;
135 case ('&'):
136 printf("&amp;");
137 break;
138 case ('>'):
139 printf("&gt;");
140 break;
141 case ('<'):
142 printf("&lt;");
143 break;
144 default:
145 putchar((unsigned char)c);
146 break;
147 }
148 }
149
150 /*
151 * Call through to html_putchar().
152 * Accepts NULL strings.
153 */
154 static void
155 html_print(const char *p)
156 {
157
158 if (NULL == p)
159 return;
160 while ('\0' != *p)
161 html_putchar(*p++);
162 }
163
164 /*
165 * Transfer the responsibility for the allocated string *val
166 * to the query structure.
167 */
168 static void
169 set_query_attr(char **attr, char **val)
170 {
171
172 free(*attr);
173 if (**val == '\0') {
174 *attr = NULL;
175 free(*val);
176 } else
177 *attr = *val;
178 *val = NULL;
179 }
180
181 /*
182 * Parse the QUERY_STRING for key-value pairs
183 * and store the values into the query structure.
184 */
185 static void
186 http_parse(struct req *req, const char *qs)
187 {
188 char *key, *val;
189 size_t keysz, valsz;
190
191 req->q.manpath = NULL;
192 req->q.arch = NULL;
193 req->q.sec = NULL;
194 req->q.query = NULL;
195 req->q.equal = 1;
196
197 key = val = NULL;
198 while (*qs != '\0') {
199
200 /* Parse one key. */
201
202 keysz = strcspn(qs, "=;&");
203 key = mandoc_strndup(qs, keysz);
204 qs += keysz;
205 if (*qs != '=')
206 goto next;
207
208 /* Parse one value. */
209
210 valsz = strcspn(++qs, ";&");
211 val = mandoc_strndup(qs, valsz);
212 qs += valsz;
213
214 /* Decode and catch encoding errors. */
215
216 if ( ! (http_decode(key) && http_decode(val)))
217 goto next;
218
219 /* Handle key-value pairs. */
220
221 if ( ! strcmp(key, "query"))
222 set_query_attr(&req->q.query, &val);
223
224 else if ( ! strcmp(key, "apropos"))
225 req->q.equal = !strcmp(val, "0");
226
227 else if ( ! strcmp(key, "manpath")) {
228 #ifdef COMPAT_OLDURI
229 if ( ! strncmp(val, "OpenBSD ", 8)) {
230 val[7] = '-';
231 if ('C' == val[8])
232 val[8] = 'c';
233 }
234 #endif
235 set_query_attr(&req->q.manpath, &val);
236 }
237
238 else if ( ! (strcmp(key, "sec")
239 #ifdef COMPAT_OLDURI
240 && strcmp(key, "sektion")
241 #endif
242 )) {
243 if ( ! strcmp(val, "0"))
244 *val = '\0';
245 set_query_attr(&req->q.sec, &val);
246 }
247
248 else if ( ! strcmp(key, "arch")) {
249 if ( ! strcmp(val, "default"))
250 *val = '\0';
251 set_query_attr(&req->q.arch, &val);
252 }
253
254 /*
255 * The key must be freed in any case.
256 * The val may have been handed over to the query
257 * structure, in which case it is now NULL.
258 */
259 next:
260 free(key);
261 key = NULL;
262 free(val);
263 val = NULL;
264
265 if (*qs != '\0')
266 qs++;
267 }
268 }
269
270 /*
271 * HTTP-decode a string. The standard explanation is that this turns
272 * "%4e+foo" into "n foo" in the regular way. This is done in-place
273 * over the allocated string.
274 */
275 static int
276 http_decode(char *p)
277 {
278 char hex[3];
279 char *q;
280 int c;
281
282 hex[2] = '\0';
283
284 q = p;
285 for ( ; '\0' != *p; p++, q++) {
286 if ('%' == *p) {
287 if ('\0' == (hex[0] = *(p + 1)))
288 return 0;
289 if ('\0' == (hex[1] = *(p + 2)))
290 return 0;
291 if (1 != sscanf(hex, "%x", &c))
292 return 0;
293 if ('\0' == c)
294 return 0;
295
296 *q = (char)c;
297 p += 2;
298 } else
299 *q = '+' == *p ? ' ' : *p;
300 }
301
302 *q = '\0';
303 return 1;
304 }
305
306 static void
307 resp_begin_http(int code, const char *msg)
308 {
309
310 if (200 != code)
311 printf("Status: %d %s\r\n", code, msg);
312
313 printf("Content-Type: text/html; charset=utf-8\r\n"
314 "Cache-Control: no-cache\r\n"
315 "Pragma: no-cache\r\n"
316 "\r\n");
317
318 fflush(stdout);
319 }
320
321 static void
322 resp_copy(const char *filename)
323 {
324 char buf[4096];
325 ssize_t sz;
326 int fd;
327
328 if ((fd = open(filename, O_RDONLY)) != -1) {
329 fflush(stdout);
330 while ((sz = read(fd, buf, sizeof(buf))) > 0)
331 write(STDOUT_FILENO, buf, sz);
332 }
333 }
334
335 static void
336 resp_begin_html(int code, const char *msg)
337 {
338
339 resp_begin_http(code, msg);
340
341 printf("<!DOCTYPE html>\n"
342 "<HTML>\n"
343 "<HEAD>\n"
344 "<META CHARSET=\"UTF-8\" />\n"
345 "<LINK REL=\"stylesheet\" HREF=\"%s/mandoc.css\""
346 " TYPE=\"text/css\" media=\"all\">\n"
347 "<TITLE>%s</TITLE>\n"
348 "</HEAD>\n"
349 "<BODY>\n"
350 "<!-- Begin page content. //-->\n",
351 CSS_DIR, CUSTOMIZE_TITLE);
352
353 resp_copy(MAN_DIR "/header.html");
354 }
355
356 static void
357 resp_end_html(void)
358 {
359
360 resp_copy(MAN_DIR "/footer.html");
361
362 puts("</BODY>\n"
363 "</HTML>");
364 }
365
366 static void
367 resp_searchform(const struct req *req)
368 {
369 int i;
370
371 puts("<!-- Begin search form. //-->");
372 printf("<DIV ID=\"mancgi\">\n"
373 "<FORM ACTION=\"/%s\" METHOD=\"get\">\n"
374 "<FIELDSET>\n"
375 "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
376 scriptname);
377
378 /* Write query input box. */
379
380 printf( "<TABLE><TR><TD>\n"
381 "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
382 if (NULL != req->q.query)
383 html_print(req->q.query);
384 puts("\" SIZE=\"40\">");
385
386 /* Write submission and reset buttons. */
387
388 printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
389 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
390
391 /* Write show radio button */
392
393 printf( "</TD><TD>\n"
394 "<INPUT TYPE=\"radio\" ");
395 if (req->q.equal)
396 printf("CHECKED=\"checked\" ");
397 printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
398 "<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
399
400 /* Write section selector. */
401
402 puts( "</TD></TR><TR><TD>\n"
403 "<SELECT NAME=\"sec\">");
404 for (i = 0; i < sec_MAX; i++) {
405 printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
406 if (NULL != req->q.sec &&
407 0 == strcmp(sec_numbers[i], req->q.sec))
408 printf(" SELECTED=\"selected\"");
409 printf(">%s</OPTION>\n", sec_names[i]);
410 }
411 puts("</SELECT>");
412
413 /* Write architecture selector. */
414
415 printf( "<SELECT NAME=\"arch\">\n"
416 "<OPTION VALUE=\"default\"");
417 if (NULL == req->q.arch)
418 printf(" SELECTED=\"selected\"");
419 puts(">All Architectures</OPTION>");
420 for (i = 0; i < arch_MAX; i++) {
421 printf("<OPTION VALUE=\"%s\"", arch_names[i]);
422 if (NULL != req->q.arch &&
423 0 == strcmp(arch_names[i], req->q.arch))
424 printf(" SELECTED=\"selected\"");
425 printf(">%s</OPTION>\n", arch_names[i]);
426 }
427 puts("</SELECT>");
428
429 /* Write manpath selector. */
430
431 if (req->psz > 1) {
432 puts("<SELECT NAME=\"manpath\">");
433 for (i = 0; i < (int)req->psz; i++) {
434 printf("<OPTION ");
435 if (strcmp(req->q.manpath, req->p[i]) == 0)
436 printf("SELECTED=\"selected\" ");
437 printf("VALUE=\"");
438 html_print(req->p[i]);
439 printf("\">");
440 html_print(req->p[i]);
441 puts("</OPTION>");
442 }
443 puts("</SELECT>");
444 }
445
446 /* Write search radio button */
447
448 printf( "</TD><TD>\n"
449 "<INPUT TYPE=\"radio\" ");
450 if (0 == req->q.equal)
451 printf("CHECKED=\"checked\" ");
452 printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
453 "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
454
455 puts("</TD></TR></TABLE>\n"
456 "</FIELDSET>\n"
457 "</FORM>\n"
458 "</DIV>");
459 puts("<!-- End search form. //-->");
460 }
461
462 static int
463 validate_urifrag(const char *frag)
464 {
465
466 while ('\0' != *frag) {
467 if ( ! (isalnum((unsigned char)*frag) ||
468 '-' == *frag || '.' == *frag ||
469 '/' == *frag || '_' == *frag))
470 return 0;
471 frag++;
472 }
473 return 1;
474 }
475
476 static int
477 validate_manpath(const struct req *req, const char* manpath)
478 {
479 size_t i;
480
481 if ( ! strcmp(manpath, "mandoc"))
482 return 1;
483
484 for (i = 0; i < req->psz; i++)
485 if ( ! strcmp(manpath, req->p[i]))
486 return 1;
487
488 return 0;
489 }
490
491 static int
492 validate_filename(const char *file)
493 {
494
495 if ('.' == file[0] && '/' == file[1])
496 file += 2;
497
498 return ! (strstr(file, "../") || strstr(file, "/..") ||
499 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
500 }
501
502 static void
503 pg_index(const struct req *req)
504 {
505
506 resp_begin_html(200, NULL);
507 resp_searchform(req);
508 printf("<P>\n"
509 "This web interface is documented in the\n"
510 "<A HREF=\"/%s%smandoc/man8/man.cgi.8\">man.cgi</A>\n"
511 "manual, and the\n"
512 "<A HREF=\"/%s%smandoc/man1/apropos.1\">apropos</A>\n"
513 "manual explains the query syntax.\n"
514 "</P>\n",
515 scriptname, *scriptname == '\0' ? "" : "/",
516 scriptname, *scriptname == '\0' ? "" : "/");
517 resp_end_html();
518 }
519
520 static void
521 pg_noresult(const struct req *req, const char *msg)
522 {
523 resp_begin_html(200, NULL);
524 resp_searchform(req);
525 puts("<P>");
526 puts(msg);
527 puts("</P>");
528 resp_end_html();
529 }
530
531 static void
532 pg_error_badrequest(const char *msg)
533 {
534
535 resp_begin_html(400, "Bad Request");
536 puts("<H1>Bad Request</H1>\n"
537 "<P>\n");
538 puts(msg);
539 printf("Try again from the\n"
540 "<A HREF=\"/%s\">main page</A>.\n"
541 "</P>", scriptname);
542 resp_end_html();
543 }
544
545 static void
546 pg_error_internal(void)
547 {
548 resp_begin_html(500, "Internal Server Error");
549 puts("<P>Internal Server Error</P>");
550 resp_end_html();
551 }
552
553 static void
554 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
555 {
556 char *arch, *archend;
557 size_t i, iuse, isec;
558 int archprio, archpriouse;
559 int prio, priouse;
560 char sec;
561
562 for (i = 0; i < sz; i++) {
563 if (validate_filename(r[i].file))
564 continue;
565 fprintf(stderr, "invalid filename %s in %s database\n",
566 r[i].file, req->q.manpath);
567 pg_error_internal();
568 return;
569 }
570
571 if (1 == sz) {
572 /*
573 * If we have just one result, then jump there now
574 * without any delay.
575 */
576 printf("Status: 303 See Other\r\n");
577 printf("Location: http://%s/%s%s%s/%s",
578 HTTP_HOST, scriptname,
579 *scriptname == '\0' ? "" : "/",
580 req->q.manpath, r[0].file);
581 printf("\r\n"
582 "Content-Type: text/html; charset=utf-8\r\n"
583 "\r\n");
584 return;
585 }
586
587 resp_begin_html(200, NULL);
588 resp_searchform(req);
589 puts("<DIV CLASS=\"results\">");
590 puts("<TABLE>");
591
592 for (i = 0; i < sz; i++) {
593 printf("<TR>\n"
594 "<TD CLASS=\"title\">\n"
595 "<A HREF=\"/%s%s%s/%s",
596 scriptname, *scriptname == '\0' ? "" : "/",
597 req->q.manpath, r[i].file);
598 printf("\">");
599 html_print(r[i].names);
600 printf("</A>\n"
601 "</TD>\n"
602 "<TD CLASS=\"desc\">");
603 html_print(r[i].output);
604 puts("</TD>\n"
605 "</TR>");
606 }
607
608 puts("</TABLE>\n"
609 "</DIV>");
610
611 /*
612 * In man(1) mode, show one of the pages
613 * even if more than one is found.
614 */
615
616 if (req->q.equal) {
617 puts("<HR>");
618 iuse = 0;
619 priouse = 10;
620 archpriouse = 3;
621 for (i = 0; i < sz; i++) {
622 isec = strcspn(r[i].file, "123456789");
623 sec = r[i].file[isec];
624 if ('\0' == sec)
625 continue;
626 prio = sec_prios[sec - '1'];
627 if (NULL == req->q.arch) {
628 archprio =
629 (NULL == (arch = strchr(
630 r[i].file + isec, '/'))) ? 3 :
631 (NULL == (archend = strchr(
632 arch + 1, '/'))) ? 0 :
633 strncmp(arch, "amd64/",
634 archend - arch) ? 2 : 1;
635 if (archprio < archpriouse) {
636 archpriouse = archprio;
637 priouse = prio;
638 iuse = i;
639 continue;
640 }
641 if (archprio > archpriouse)
642 continue;
643 }
644 if (prio >= priouse)
645 continue;
646 priouse = prio;
647 iuse = i;
648 }
649 resp_show(req, r[iuse].file);
650 }
651
652 resp_end_html();
653 }
654
655 static void
656 catman(const struct req *req, const char *file)
657 {
658 FILE *f;
659 char *p;
660 size_t sz;
661 ssize_t len;
662 int i;
663 int italic, bold;
664
665 if ((f = fopen(file, "r")) == NULL) {
666 puts("<P>You specified an invalid manual file.</P>");
667 return;
668 }
669
670 puts("<DIV CLASS=\"catman\">\n"
671 "<PRE>");
672
673 p = NULL;
674 sz = 0;
675
676 while ((len = getline(&p, &sz, f)) != -1) {
677 bold = italic = 0;
678 for (i = 0; i < len - 1; i++) {
679 /*
680 * This means that the catpage is out of state.
681 * Ignore it and keep going (although the
682 * catpage is bogus).
683 */
684
685 if ('\b' == p[i] || '\n' == p[i])
686 continue;
687
688 /*
689 * Print a regular character.
690 * Close out any bold/italic scopes.
691 * If we're in back-space mode, make sure we'll
692 * have something to enter when we backspace.
693 */
694
695 if ('\b' != p[i + 1]) {
696 if (italic)
697 printf("</I>");
698 if (bold)
699 printf("</B>");
700 italic = bold = 0;
701 html_putchar(p[i]);
702 continue;
703 } else if (i + 2 >= len)
704 continue;
705
706 /* Italic mode. */
707
708 if ('_' == p[i]) {
709 if (bold)
710 printf("</B>");
711 if ( ! italic)
712 printf("<I>");
713 bold = 0;
714 italic = 1;
715 i += 2;
716 html_putchar(p[i]);
717 continue;
718 }
719
720 /*
721 * Handle funny behaviour troff-isms.
722 * These grok'd from the original man2html.c.
723 */
724
725 if (('+' == p[i] && 'o' == p[i + 2]) ||
726 ('o' == p[i] && '+' == p[i + 2]) ||
727 ('|' == p[i] && '=' == p[i + 2]) ||
728 ('=' == p[i] && '|' == p[i + 2]) ||
729 ('*' == p[i] && '=' == p[i + 2]) ||
730 ('=' == p[i] && '*' == p[i + 2]) ||
731 ('*' == p[i] && '|' == p[i + 2]) ||
732 ('|' == p[i] && '*' == p[i + 2])) {
733 if (italic)
734 printf("</I>");
735 if (bold)
736 printf("</B>");
737 italic = bold = 0;
738 putchar('*');
739 i += 2;
740 continue;
741 } else if (('|' == p[i] && '-' == p[i + 2]) ||
742 ('-' == p[i] && '|' == p[i + 1]) ||
743 ('+' == p[i] && '-' == p[i + 1]) ||
744 ('-' == p[i] && '+' == p[i + 1]) ||
745 ('+' == p[i] && '|' == p[i + 1]) ||
746 ('|' == p[i] && '+' == p[i + 1])) {
747 if (italic)
748 printf("</I>");
749 if (bold)
750 printf("</B>");
751 italic = bold = 0;
752 putchar('+');
753 i += 2;
754 continue;
755 }
756
757 /* Bold mode. */
758
759 if (italic)
760 printf("</I>");
761 if ( ! bold)
762 printf("<B>");
763 bold = 1;
764 italic = 0;
765 i += 2;
766 html_putchar(p[i]);
767 }
768
769 /*
770 * Clean up the last character.
771 * We can get to a newline; don't print that.
772 */
773
774 if (italic)
775 printf("</I>");
776 if (bold)
777 printf("</B>");
778
779 if (i == len - 1 && p[i] != '\n')
780 html_putchar(p[i]);
781
782 putchar('\n');
783 }
784 free(p);
785
786 puts("</PRE>\n"
787 "</DIV>");
788
789 fclose(f);
790 }
791
792 static void
793 format(const struct req *req, const char *file)
794 {
795 struct manoutput conf;
796 struct mparse *mp;
797 struct roff_man *man;
798 void *vp;
799 int fd;
800 int usepath;
801
802 if (-1 == (fd = open(file, O_RDONLY, 0))) {
803 puts("<P>You specified an invalid manual file.</P>");
804 return;
805 }
806
807 mchars_alloc();
808 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
809 mparse_readfd(mp, fd, file);
810 close(fd);
811
812 memset(&conf, 0, sizeof(conf));
813 conf.fragment = 1;
814 usepath = strcmp(req->q.manpath, req->p[0]);
815 mandoc_asprintf(&conf.man, "/%s?query=%%N&sec=%%S%s%s%s%s",
816 scriptname,
817 req->q.arch ? "&arch=" : "",
818 req->q.arch ? req->q.arch : "",
819 usepath ? "&manpath=" : "",
820 usepath ? req->q.manpath : "");
821
822 mparse_result(mp, &man, NULL);
823 if (man == NULL) {
824 fprintf(stderr, "fatal mandoc error: %s/%s\n",
825 req->q.manpath, file);
826 pg_error_internal();
827 mparse_free(mp);
828 mchars_free();
829 return;
830 }
831
832 vp = html_alloc(&conf);
833
834 if (man->macroset == MACROSET_MDOC) {
835 mdoc_validate(man);
836 html_mdoc(vp, man);
837 } else {
838 man_validate(man);
839 html_man(vp, man);
840 }
841
842 html_free(vp);
843 mparse_free(mp);
844 mchars_free();
845 free(conf.man);
846 }
847
848 static void
849 resp_show(const struct req *req, const char *file)
850 {
851
852 if ('.' == file[0] && '/' == file[1])
853 file += 2;
854
855 if ('c' == *file)
856 catman(req, file);
857 else
858 format(req, file);
859 }
860
861 static void
862 pg_show(struct req *req, const char *fullpath)
863 {
864 char *manpath;
865 const char *file;
866
867 if ((file = strchr(fullpath, '/')) == NULL) {
868 pg_error_badrequest(
869 "You did not specify a page to show.");
870 return;
871 }
872 manpath = mandoc_strndup(fullpath, file - fullpath);
873 file++;
874
875 if ( ! validate_manpath(req, manpath)) {
876 pg_error_badrequest(
877 "You specified an invalid manpath.");
878 free(manpath);
879 return;
880 }
881
882 /*
883 * Begin by chdir()ing into the manpath.
884 * This way we can pick up the database files, which are
885 * relative to the manpath root.
886 */
887
888 if (chdir(manpath) == -1) {
889 fprintf(stderr, "chdir %s: %s\n",
890 manpath, strerror(errno));
891 pg_error_internal();
892 free(manpath);
893 return;
894 }
895
896 if (strcmp(manpath, "mandoc")) {
897 free(req->q.manpath);
898 req->q.manpath = manpath;
899 } else
900 free(manpath);
901
902 if ( ! validate_filename(file)) {
903 pg_error_badrequest(
904 "You specified an invalid manual file.");
905 return;
906 }
907
908 resp_begin_html(200, NULL);
909 resp_searchform(req);
910 resp_show(req, file);
911 resp_end_html();
912 }
913
914 static void
915 pg_search(const struct req *req)
916 {
917 struct mansearch search;
918 struct manpaths paths;
919 struct manpage *res;
920 char **argv;
921 char *query, *rp, *wp;
922 size_t ressz;
923 int argc;
924
925 /*
926 * Begin by chdir()ing into the root of the manpath.
927 * This way we can pick up the database files, which are
928 * relative to the manpath root.
929 */
930
931 if (-1 == (chdir(req->q.manpath))) {
932 fprintf(stderr, "chdir %s: %s\n",
933 req->q.manpath, strerror(errno));
934 pg_error_internal();
935 return;
936 }
937
938 search.arch = req->q.arch;
939 search.sec = req->q.sec;
940 search.outkey = "Nd";
941 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
942 search.firstmatch = 1;
943
944 paths.sz = 1;
945 paths.paths = mandoc_malloc(sizeof(char *));
946 paths.paths[0] = mandoc_strdup(".");
947
948 /*
949 * Break apart at spaces with backslash-escaping.
950 */
951
952 argc = 0;
953 argv = NULL;
954 rp = query = mandoc_strdup(req->q.query);
955 for (;;) {
956 while (isspace((unsigned char)*rp))
957 rp++;
958 if (*rp == '\0')
959 break;
960 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
961 argv[argc++] = wp = rp;
962 for (;;) {
963 if (isspace((unsigned char)*rp)) {
964 *wp = '\0';
965 rp++;
966 break;
967 }
968 if (rp[0] == '\\' && rp[1] != '\0')
969 rp++;
970 if (wp != rp)
971 *wp = *rp;
972 if (*rp == '\0')
973 break;
974 wp++;
975 rp++;
976 }
977 }
978
979 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
980 pg_noresult(req, "You entered an invalid query.");
981 else if (0 == ressz)
982 pg_noresult(req, "No results found.");
983 else
984 pg_searchres(req, res, ressz);
985
986 free(query);
987 mansearch_free(res, ressz);
988 free(paths.paths[0]);
989 free(paths.paths);
990 }
991
992 int
993 main(void)
994 {
995 struct req req;
996 struct itimerval itimer;
997 const char *path;
998 const char *querystring;
999 int i;
1000
1001 /* Poor man's ReDoS mitigation. */
1002
1003 itimer.it_value.tv_sec = 2;
1004 itimer.it_value.tv_usec = 0;
1005 itimer.it_interval.tv_sec = 2;
1006 itimer.it_interval.tv_usec = 0;
1007 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1008 fprintf(stderr, "setitimer: %s\n", strerror(errno));
1009 pg_error_internal();
1010 return EXIT_FAILURE;
1011 }
1012
1013 /*
1014 * First we change directory into the MAN_DIR so that
1015 * subsequent scanning for manpath directories is rooted
1016 * relative to the same position.
1017 */
1018
1019 if (-1 == chdir(MAN_DIR)) {
1020 fprintf(stderr, "MAN_DIR: %s: %s\n",
1021 MAN_DIR, strerror(errno));
1022 pg_error_internal();
1023 return EXIT_FAILURE;
1024 }
1025
1026 memset(&req, 0, sizeof(struct req));
1027 req.q.equal = 1;
1028 pathgen(&req);
1029
1030 /* Parse the path info and the query string. */
1031
1032 if ((path = getenv("PATH_INFO")) == NULL)
1033 path = "";
1034 else if (*path == '/')
1035 path++;
1036
1037 if (*path != '\0' && access(path, F_OK) == -1) {
1038 path_parse(&req, path);
1039 path = "";
1040 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1041 http_parse(&req, querystring);
1042
1043 /* Validate parsed data and add defaults. */
1044
1045 if (req.q.manpath == NULL)
1046 req.q.manpath = mandoc_strdup(req.p[0]);
1047 else if ( ! validate_manpath(&req, req.q.manpath)) {
1048 pg_error_badrequest(
1049 "You specified an invalid manpath.");
1050 return EXIT_FAILURE;
1051 }
1052
1053 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1054 pg_error_badrequest(
1055 "You specified an invalid architecture.");
1056 return EXIT_FAILURE;
1057 }
1058
1059 /* Dispatch to the three different pages. */
1060
1061 if ('\0' != *path)
1062 pg_show(&req, path);
1063 else if (NULL != req.q.query)
1064 pg_search(&req);
1065 else
1066 pg_index(&req);
1067
1068 free(req.q.manpath);
1069 free(req.q.arch);
1070 free(req.q.sec);
1071 free(req.q.query);
1072 for (i = 0; i < (int)req.psz; i++)
1073 free(req.p[i]);
1074 free(req.p);
1075 return EXIT_SUCCESS;
1076 }
1077
1078 /*
1079 * If PATH_INFO is not a file name, translate it to a query.
1080 */
1081 static void
1082 path_parse(struct req *req, const char *path)
1083 {
1084 int dir_done;
1085
1086 req->q.equal = 1;
1087 req->q.manpath = mandoc_strdup(path);
1088
1089 /* Mandatory manual page name. */
1090 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1091 req->q.query = req->q.manpath;
1092 req->q.manpath = NULL;
1093 } else
1094 *req->q.query++ = '\0';
1095
1096 /* Optional trailing section. */
1097 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1098 if(isdigit((unsigned char)req->q.sec[1])) {
1099 *req->q.sec++ = '\0';
1100 req->q.sec = mandoc_strdup(req->q.sec);
1101 } else
1102 req->q.sec = NULL;
1103 }
1104
1105 /* Handle the case of name[.section] only. */
1106 if (req->q.manpath == NULL) {
1107 req->q.arch = NULL;
1108 return;
1109 }
1110 req->q.query = mandoc_strdup(req->q.query);
1111
1112 /* Optional architecture. */
1113 dir_done = 0;
1114 for (;;) {
1115 if ((req->q.arch = strrchr(req->q.manpath, '/')) == NULL)
1116 break;
1117 *req->q.arch++ = '\0';
1118 if (dir_done || strncmp(req->q.arch, "man", 3)) {
1119 req->q.arch = mandoc_strdup(req->q.arch);
1120 break;
1121 }
1122
1123 /* Optional directory name. */
1124 req->q.arch += 3;
1125 if (*req->q.arch != '\0') {
1126 free(req->q.sec);
1127 req->q.sec = mandoc_strdup(req->q.arch);
1128 }
1129 dir_done = 1;
1130 }
1131 }
1132
1133 /*
1134 * Scan for indexable paths.
1135 */
1136 static void
1137 pathgen(struct req *req)
1138 {
1139 FILE *fp;
1140 char *dp;
1141 size_t dpsz;
1142 ssize_t len;
1143
1144 if (NULL == (fp = fopen("manpath.conf", "r"))) {
1145 fprintf(stderr, "%s/manpath.conf: %s\n",
1146 MAN_DIR, strerror(errno));
1147 pg_error_internal();
1148 exit(EXIT_FAILURE);
1149 }
1150
1151 dp = NULL;
1152 dpsz = 0;
1153
1154 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1155 if (dp[len - 1] == '\n')
1156 dp[--len] = '\0';
1157 req->p = mandoc_realloc(req->p,
1158 (req->psz + 1) * sizeof(char *));
1159 if ( ! validate_urifrag(dp)) {
1160 fprintf(stderr, "%s/manpath.conf contains "
1161 "unsafe path \"%s\"\n", MAN_DIR, dp);
1162 pg_error_internal();
1163 exit(EXIT_FAILURE);
1164 }
1165 if (NULL != strchr(dp, '/')) {
1166 fprintf(stderr, "%s/manpath.conf contains "
1167 "path with slash \"%s\"\n", MAN_DIR, dp);
1168 pg_error_internal();
1169 exit(EXIT_FAILURE);
1170 }
1171 req->p[req->psz++] = dp;
1172 dp = NULL;
1173 dpsz = 0;
1174 }
1175 free(dp);
1176
1177 if ( req->p == NULL ) {
1178 fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1179 pg_error_internal();
1180 exit(EXIT_FAILURE);
1181 }
1182 }