]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Don't retain the search query in the resulting manual links.
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.116 2016/01/04 12:36:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc.h"
35 #include "roff.h"
36 #include "mdoc.h"
37 #include "man.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42
43 /*
44 * A query as passed to the search function.
45 */
46 struct query {
47 char *manpath; /* desired manual directory */
48 char *arch; /* architecture */
49 char *sec; /* manual section */
50 char *query; /* unparsed query expression */
51 int equal; /* match whole names, not substrings */
52 };
53
54 struct req {
55 struct query q;
56 char **p; /* array of available manpaths */
57 size_t psz; /* number of available manpaths */
58 };
59
60 static void catman(const struct req *, const char *);
61 static void format(const struct req *, const char *);
62 static void html_print(const char *);
63 static void html_putchar(char);
64 static int http_decode(char *);
65 static void http_parse(struct req *, const char *);
66 static void pathgen(struct req *);
67 static void pg_error_badrequest(const char *);
68 static void pg_error_internal(void);
69 static void pg_index(const struct req *);
70 static void pg_noresult(const struct req *, const char *);
71 static void pg_search(const struct req *);
72 static void pg_searchres(const struct req *,
73 struct manpage *, size_t);
74 static void pg_show(struct req *, const char *);
75 static void resp_begin_html(int, const char *);
76 static void resp_begin_http(int, const char *);
77 static void resp_copy(const char *);
78 static void resp_end_html(void);
79 static void resp_searchform(const struct req *);
80 static void resp_show(const struct req *, const char *);
81 static void set_query_attr(char **, char **);
82 static int validate_filename(const char *);
83 static int validate_manpath(const struct req *, const char *);
84 static int validate_urifrag(const char *);
85
86 static const char *scriptname; /* CGI script name */
87
88 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
89 static const char *const sec_numbers[] = {
90 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
91 };
92 static const char *const sec_names[] = {
93 "All Sections",
94 "1 - General Commands",
95 "2 - System Calls",
96 "3 - Library Functions",
97 "3p - Perl Library",
98 "4 - Device Drivers",
99 "5 - File Formats",
100 "6 - Games",
101 "7 - Miscellaneous Information",
102 "8 - System Manager\'s Manual",
103 "9 - Kernel Developer\'s Manual"
104 };
105 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
106
107 static const char *const arch_names[] = {
108 "amd64", "alpha", "armish", "armv7",
109 "aviion", "hppa", "hppa64", "i386",
110 "ia64", "landisk", "loongson", "luna88k",
111 "macppc", "mips64", "octeon", "sgi",
112 "socppc", "solbourne", "sparc", "sparc64",
113 "vax", "zaurus",
114 "amiga", "arc", "arm32", "atari",
115 "beagle", "cats", "hp300", "mac68k",
116 "mvme68k", "mvme88k", "mvmeppc", "palm",
117 "pc532", "pegasos", "pmax", "powerpc",
118 "sun3", "wgrisc", "x68k"
119 };
120 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
121
122 /*
123 * Print a character, escaping HTML along the way.
124 * This will pass non-ASCII straight to output: be warned!
125 */
126 static void
127 html_putchar(char c)
128 {
129
130 switch (c) {
131 case ('"'):
132 printf("&quote;");
133 break;
134 case ('&'):
135 printf("&amp;");
136 break;
137 case ('>'):
138 printf("&gt;");
139 break;
140 case ('<'):
141 printf("&lt;");
142 break;
143 default:
144 putchar((unsigned char)c);
145 break;
146 }
147 }
148
149 /*
150 * Call through to html_putchar().
151 * Accepts NULL strings.
152 */
153 static void
154 html_print(const char *p)
155 {
156
157 if (NULL == p)
158 return;
159 while ('\0' != *p)
160 html_putchar(*p++);
161 }
162
163 /*
164 * Transfer the responsibility for the allocated string *val
165 * to the query structure.
166 */
167 static void
168 set_query_attr(char **attr, char **val)
169 {
170
171 free(*attr);
172 if (**val == '\0') {
173 *attr = NULL;
174 free(*val);
175 } else
176 *attr = *val;
177 *val = NULL;
178 }
179
180 /*
181 * Parse the QUERY_STRING for key-value pairs
182 * and store the values into the query structure.
183 */
184 static void
185 http_parse(struct req *req, const char *qs)
186 {
187 char *key, *val;
188 size_t keysz, valsz;
189
190 req->q.manpath = NULL;
191 req->q.arch = NULL;
192 req->q.sec = NULL;
193 req->q.query = NULL;
194 req->q.equal = 1;
195
196 key = val = NULL;
197 while (*qs != '\0') {
198
199 /* Parse one key. */
200
201 keysz = strcspn(qs, "=;&");
202 key = mandoc_strndup(qs, keysz);
203 qs += keysz;
204 if (*qs != '=')
205 goto next;
206
207 /* Parse one value. */
208
209 valsz = strcspn(++qs, ";&");
210 val = mandoc_strndup(qs, valsz);
211 qs += valsz;
212
213 /* Decode and catch encoding errors. */
214
215 if ( ! (http_decode(key) && http_decode(val)))
216 goto next;
217
218 /* Handle key-value pairs. */
219
220 if ( ! strcmp(key, "query"))
221 set_query_attr(&req->q.query, &val);
222
223 else if ( ! strcmp(key, "apropos"))
224 req->q.equal = !strcmp(val, "0");
225
226 else if ( ! strcmp(key, "manpath")) {
227 #ifdef COMPAT_OLDURI
228 if ( ! strncmp(val, "OpenBSD ", 8)) {
229 val[7] = '-';
230 if ('C' == val[8])
231 val[8] = 'c';
232 }
233 #endif
234 set_query_attr(&req->q.manpath, &val);
235 }
236
237 else if ( ! (strcmp(key, "sec")
238 #ifdef COMPAT_OLDURI
239 && strcmp(key, "sektion")
240 #endif
241 )) {
242 if ( ! strcmp(val, "0"))
243 *val = '\0';
244 set_query_attr(&req->q.sec, &val);
245 }
246
247 else if ( ! strcmp(key, "arch")) {
248 if ( ! strcmp(val, "default"))
249 *val = '\0';
250 set_query_attr(&req->q.arch, &val);
251 }
252
253 /*
254 * The key must be freed in any case.
255 * The val may have been handed over to the query
256 * structure, in which case it is now NULL.
257 */
258 next:
259 free(key);
260 key = NULL;
261 free(val);
262 val = NULL;
263
264 if (*qs != '\0')
265 qs++;
266 }
267 }
268
269 /*
270 * HTTP-decode a string. The standard explanation is that this turns
271 * "%4e+foo" into "n foo" in the regular way. This is done in-place
272 * over the allocated string.
273 */
274 static int
275 http_decode(char *p)
276 {
277 char hex[3];
278 char *q;
279 int c;
280
281 hex[2] = '\0';
282
283 q = p;
284 for ( ; '\0' != *p; p++, q++) {
285 if ('%' == *p) {
286 if ('\0' == (hex[0] = *(p + 1)))
287 return 0;
288 if ('\0' == (hex[1] = *(p + 2)))
289 return 0;
290 if (1 != sscanf(hex, "%x", &c))
291 return 0;
292 if ('\0' == c)
293 return 0;
294
295 *q = (char)c;
296 p += 2;
297 } else
298 *q = '+' == *p ? ' ' : *p;
299 }
300
301 *q = '\0';
302 return 1;
303 }
304
305 static void
306 resp_begin_http(int code, const char *msg)
307 {
308
309 if (200 != code)
310 printf("Status: %d %s\r\n", code, msg);
311
312 printf("Content-Type: text/html; charset=utf-8\r\n"
313 "Cache-Control: no-cache\r\n"
314 "Pragma: no-cache\r\n"
315 "\r\n");
316
317 fflush(stdout);
318 }
319
320 static void
321 resp_copy(const char *filename)
322 {
323 char buf[4096];
324 ssize_t sz;
325 int fd;
326
327 if ((fd = open(filename, O_RDONLY)) != -1) {
328 fflush(stdout);
329 while ((sz = read(fd, buf, sizeof(buf))) > 0)
330 write(STDOUT_FILENO, buf, sz);
331 }
332 }
333
334 static void
335 resp_begin_html(int code, const char *msg)
336 {
337
338 resp_begin_http(code, msg);
339
340 printf("<!DOCTYPE html>\n"
341 "<HTML>\n"
342 "<HEAD>\n"
343 "<META CHARSET=\"UTF-8\" />\n"
344 "<LINK REL=\"stylesheet\" HREF=\"%s/mandoc.css\""
345 " TYPE=\"text/css\" media=\"all\">\n"
346 "<TITLE>%s</TITLE>\n"
347 "</HEAD>\n"
348 "<BODY>\n"
349 "<!-- Begin page content. //-->\n",
350 CSS_DIR, CUSTOMIZE_TITLE);
351
352 resp_copy(MAN_DIR "/header.html");
353 }
354
355 static void
356 resp_end_html(void)
357 {
358
359 resp_copy(MAN_DIR "/footer.html");
360
361 puts("</BODY>\n"
362 "</HTML>");
363 }
364
365 static void
366 resp_searchform(const struct req *req)
367 {
368 int i;
369
370 puts("<!-- Begin search form. //-->");
371 printf("<DIV ID=\"mancgi\">\n"
372 "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
373 "<FIELDSET>\n"
374 "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
375 scriptname);
376
377 /* Write query input box. */
378
379 printf( "<TABLE><TR><TD>\n"
380 "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
381 if (NULL != req->q.query)
382 html_print(req->q.query);
383 puts("\" SIZE=\"40\">");
384
385 /* Write submission and reset buttons. */
386
387 printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
388 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
389
390 /* Write show radio button */
391
392 printf( "</TD><TD>\n"
393 "<INPUT TYPE=\"radio\" ");
394 if (req->q.equal)
395 printf("CHECKED=\"checked\" ");
396 printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
397 "<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
398
399 /* Write section selector. */
400
401 puts( "</TD></TR><TR><TD>\n"
402 "<SELECT NAME=\"sec\">");
403 for (i = 0; i < sec_MAX; i++) {
404 printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
405 if (NULL != req->q.sec &&
406 0 == strcmp(sec_numbers[i], req->q.sec))
407 printf(" SELECTED=\"selected\"");
408 printf(">%s</OPTION>\n", sec_names[i]);
409 }
410 puts("</SELECT>");
411
412 /* Write architecture selector. */
413
414 printf( "<SELECT NAME=\"arch\">\n"
415 "<OPTION VALUE=\"default\"");
416 if (NULL == req->q.arch)
417 printf(" SELECTED=\"selected\"");
418 puts(">All Architectures</OPTION>");
419 for (i = 0; i < arch_MAX; i++) {
420 printf("<OPTION VALUE=\"%s\"", arch_names[i]);
421 if (NULL != req->q.arch &&
422 0 == strcmp(arch_names[i], req->q.arch))
423 printf(" SELECTED=\"selected\"");
424 printf(">%s</OPTION>\n", arch_names[i]);
425 }
426 puts("</SELECT>");
427
428 /* Write manpath selector. */
429
430 if (req->psz > 1) {
431 puts("<SELECT NAME=\"manpath\">");
432 for (i = 0; i < (int)req->psz; i++) {
433 printf("<OPTION ");
434 if (strcmp(req->q.manpath, req->p[i]) == 0)
435 printf("SELECTED=\"selected\" ");
436 printf("VALUE=\"");
437 html_print(req->p[i]);
438 printf("\">");
439 html_print(req->p[i]);
440 puts("</OPTION>");
441 }
442 puts("</SELECT>");
443 }
444
445 /* Write search radio button */
446
447 printf( "</TD><TD>\n"
448 "<INPUT TYPE=\"radio\" ");
449 if (0 == req->q.equal)
450 printf("CHECKED=\"checked\" ");
451 printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
452 "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
453
454 puts("</TD></TR></TABLE>\n"
455 "</FIELDSET>\n"
456 "</FORM>\n"
457 "</DIV>");
458 puts("<!-- End search form. //-->");
459 }
460
461 static int
462 validate_urifrag(const char *frag)
463 {
464
465 while ('\0' != *frag) {
466 if ( ! (isalnum((unsigned char)*frag) ||
467 '-' == *frag || '.' == *frag ||
468 '/' == *frag || '_' == *frag))
469 return 0;
470 frag++;
471 }
472 return 1;
473 }
474
475 static int
476 validate_manpath(const struct req *req, const char* manpath)
477 {
478 size_t i;
479
480 if ( ! strcmp(manpath, "mandoc"))
481 return 1;
482
483 for (i = 0; i < req->psz; i++)
484 if ( ! strcmp(manpath, req->p[i]))
485 return 1;
486
487 return 0;
488 }
489
490 static int
491 validate_filename(const char *file)
492 {
493
494 if ('.' == file[0] && '/' == file[1])
495 file += 2;
496
497 return ! (strstr(file, "../") || strstr(file, "/..") ||
498 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
499 }
500
501 static void
502 pg_index(const struct req *req)
503 {
504
505 resp_begin_html(200, NULL);
506 resp_searchform(req);
507 printf("<P>\n"
508 "This web interface is documented in the\n"
509 "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
510 "manual, and the\n"
511 "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
512 "manual explains the query syntax.\n"
513 "</P>\n",
514 scriptname, scriptname);
515 resp_end_html();
516 }
517
518 static void
519 pg_noresult(const struct req *req, const char *msg)
520 {
521 resp_begin_html(200, NULL);
522 resp_searchform(req);
523 puts("<P>");
524 puts(msg);
525 puts("</P>");
526 resp_end_html();
527 }
528
529 static void
530 pg_error_badrequest(const char *msg)
531 {
532
533 resp_begin_html(400, "Bad Request");
534 puts("<H1>Bad Request</H1>\n"
535 "<P>\n");
536 puts(msg);
537 printf("Try again from the\n"
538 "<A HREF=\"%s\">main page</A>.\n"
539 "</P>", scriptname);
540 resp_end_html();
541 }
542
543 static void
544 pg_error_internal(void)
545 {
546 resp_begin_html(500, "Internal Server Error");
547 puts("<P>Internal Server Error</P>");
548 resp_end_html();
549 }
550
551 static void
552 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
553 {
554 char *arch, *archend;
555 size_t i, iuse, isec;
556 int archprio, archpriouse;
557 int prio, priouse;
558 char sec;
559
560 for (i = 0; i < sz; i++) {
561 if (validate_filename(r[i].file))
562 continue;
563 fprintf(stderr, "invalid filename %s in %s database\n",
564 r[i].file, req->q.manpath);
565 pg_error_internal();
566 return;
567 }
568
569 if (1 == sz) {
570 /*
571 * If we have just one result, then jump there now
572 * without any delay.
573 */
574 printf("Status: 303 See Other\r\n");
575 printf("Location: http://%s%s/%s/%s",
576 HTTP_HOST, scriptname, req->q.manpath, r[0].file);
577 printf("\r\n"
578 "Content-Type: text/html; charset=utf-8\r\n"
579 "\r\n");
580 return;
581 }
582
583 resp_begin_html(200, NULL);
584 resp_searchform(req);
585 puts("<DIV CLASS=\"results\">");
586 puts("<TABLE>");
587
588 for (i = 0; i < sz; i++) {
589 printf("<TR>\n"
590 "<TD CLASS=\"title\">\n"
591 "<A HREF=\"%s/%s/%s",
592 scriptname, req->q.manpath, r[i].file);
593 printf("\">");
594 html_print(r[i].names);
595 printf("</A>\n"
596 "</TD>\n"
597 "<TD CLASS=\"desc\">");
598 html_print(r[i].output);
599 puts("</TD>\n"
600 "</TR>");
601 }
602
603 puts("</TABLE>\n"
604 "</DIV>");
605
606 /*
607 * In man(1) mode, show one of the pages
608 * even if more than one is found.
609 */
610
611 if (req->q.equal) {
612 puts("<HR>");
613 iuse = 0;
614 priouse = 10;
615 archpriouse = 3;
616 for (i = 0; i < sz; i++) {
617 isec = strcspn(r[i].file, "123456789");
618 sec = r[i].file[isec];
619 if ('\0' == sec)
620 continue;
621 prio = sec_prios[sec - '1'];
622 if (NULL == req->q.arch) {
623 archprio =
624 (NULL == (arch = strchr(
625 r[i].file + isec, '/'))) ? 3 :
626 (NULL == (archend = strchr(
627 arch + 1, '/'))) ? 0 :
628 strncmp(arch, "amd64/",
629 archend - arch) ? 2 : 1;
630 if (archprio < archpriouse) {
631 archpriouse = archprio;
632 priouse = prio;
633 iuse = i;
634 continue;
635 }
636 if (archprio > archpriouse)
637 continue;
638 }
639 if (prio >= priouse)
640 continue;
641 priouse = prio;
642 iuse = i;
643 }
644 resp_show(req, r[iuse].file);
645 }
646
647 resp_end_html();
648 }
649
650 static void
651 catman(const struct req *req, const char *file)
652 {
653 FILE *f;
654 char *p;
655 size_t sz;
656 ssize_t len;
657 int i;
658 int italic, bold;
659
660 if ((f = fopen(file, "r")) == NULL) {
661 puts("<P>You specified an invalid manual file.</P>");
662 return;
663 }
664
665 puts("<DIV CLASS=\"catman\">\n"
666 "<PRE>");
667
668 p = NULL;
669 sz = 0;
670
671 while ((len = getline(&p, &sz, f)) != -1) {
672 bold = italic = 0;
673 for (i = 0; i < len - 1; i++) {
674 /*
675 * This means that the catpage is out of state.
676 * Ignore it and keep going (although the
677 * catpage is bogus).
678 */
679
680 if ('\b' == p[i] || '\n' == p[i])
681 continue;
682
683 /*
684 * Print a regular character.
685 * Close out any bold/italic scopes.
686 * If we're in back-space mode, make sure we'll
687 * have something to enter when we backspace.
688 */
689
690 if ('\b' != p[i + 1]) {
691 if (italic)
692 printf("</I>");
693 if (bold)
694 printf("</B>");
695 italic = bold = 0;
696 html_putchar(p[i]);
697 continue;
698 } else if (i + 2 >= len)
699 continue;
700
701 /* Italic mode. */
702
703 if ('_' == p[i]) {
704 if (bold)
705 printf("</B>");
706 if ( ! italic)
707 printf("<I>");
708 bold = 0;
709 italic = 1;
710 i += 2;
711 html_putchar(p[i]);
712 continue;
713 }
714
715 /*
716 * Handle funny behaviour troff-isms.
717 * These grok'd from the original man2html.c.
718 */
719
720 if (('+' == p[i] && 'o' == p[i + 2]) ||
721 ('o' == p[i] && '+' == p[i + 2]) ||
722 ('|' == p[i] && '=' == p[i + 2]) ||
723 ('=' == p[i] && '|' == p[i + 2]) ||
724 ('*' == p[i] && '=' == p[i + 2]) ||
725 ('=' == p[i] && '*' == p[i + 2]) ||
726 ('*' == p[i] && '|' == p[i + 2]) ||
727 ('|' == p[i] && '*' == p[i + 2])) {
728 if (italic)
729 printf("</I>");
730 if (bold)
731 printf("</B>");
732 italic = bold = 0;
733 putchar('*');
734 i += 2;
735 continue;
736 } else if (('|' == p[i] && '-' == p[i + 2]) ||
737 ('-' == p[i] && '|' == p[i + 1]) ||
738 ('+' == p[i] && '-' == p[i + 1]) ||
739 ('-' == p[i] && '+' == p[i + 1]) ||
740 ('+' == p[i] && '|' == p[i + 1]) ||
741 ('|' == p[i] && '+' == p[i + 1])) {
742 if (italic)
743 printf("</I>");
744 if (bold)
745 printf("</B>");
746 italic = bold = 0;
747 putchar('+');
748 i += 2;
749 continue;
750 }
751
752 /* Bold mode. */
753
754 if (italic)
755 printf("</I>");
756 if ( ! bold)
757 printf("<B>");
758 bold = 1;
759 italic = 0;
760 i += 2;
761 html_putchar(p[i]);
762 }
763
764 /*
765 * Clean up the last character.
766 * We can get to a newline; don't print that.
767 */
768
769 if (italic)
770 printf("</I>");
771 if (bold)
772 printf("</B>");
773
774 if (i == len - 1 && p[i] != '\n')
775 html_putchar(p[i]);
776
777 putchar('\n');
778 }
779 free(p);
780
781 puts("</PRE>\n"
782 "</DIV>");
783
784 fclose(f);
785 }
786
787 static void
788 format(const struct req *req, const char *file)
789 {
790 struct manoutput conf;
791 struct mparse *mp;
792 struct roff_man *man;
793 void *vp;
794 int fd;
795 int usepath;
796
797 if (-1 == (fd = open(file, O_RDONLY, 0))) {
798 puts("<P>You specified an invalid manual file.</P>");
799 return;
800 }
801
802 mchars_alloc();
803 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
804 mparse_readfd(mp, fd, file);
805 close(fd);
806
807 memset(&conf, 0, sizeof(conf));
808 conf.fragment = 1;
809 usepath = strcmp(req->q.manpath, req->p[0]);
810 mandoc_asprintf(&conf.man, "%s?query=%%N&sec=%%S%s%s%s%s",
811 scriptname,
812 req->q.arch ? "&arch=" : "",
813 req->q.arch ? req->q.arch : "",
814 usepath ? "&manpath=" : "",
815 usepath ? req->q.manpath : "");
816
817 mparse_result(mp, &man, NULL);
818 if (man == NULL) {
819 fprintf(stderr, "fatal mandoc error: %s/%s\n",
820 req->q.manpath, file);
821 pg_error_internal();
822 mparse_free(mp);
823 mchars_free();
824 return;
825 }
826
827 vp = html_alloc(&conf);
828
829 if (man->macroset == MACROSET_MDOC) {
830 mdoc_validate(man);
831 html_mdoc(vp, man);
832 } else {
833 man_validate(man);
834 html_man(vp, man);
835 }
836
837 html_free(vp);
838 mparse_free(mp);
839 mchars_free();
840 free(conf.man);
841 }
842
843 static void
844 resp_show(const struct req *req, const char *file)
845 {
846
847 if ('.' == file[0] && '/' == file[1])
848 file += 2;
849
850 if ('c' == *file)
851 catman(req, file);
852 else
853 format(req, file);
854 }
855
856 static void
857 pg_show(struct req *req, const char *fullpath)
858 {
859 char *manpath;
860 const char *file;
861
862 if ((file = strchr(fullpath, '/')) == NULL) {
863 pg_error_badrequest(
864 "You did not specify a page to show.");
865 return;
866 }
867 manpath = mandoc_strndup(fullpath, file - fullpath);
868 file++;
869
870 if ( ! validate_manpath(req, manpath)) {
871 pg_error_badrequest(
872 "You specified an invalid manpath.");
873 free(manpath);
874 return;
875 }
876
877 /*
878 * Begin by chdir()ing into the manpath.
879 * This way we can pick up the database files, which are
880 * relative to the manpath root.
881 */
882
883 if (chdir(manpath) == -1) {
884 fprintf(stderr, "chdir %s: %s\n",
885 manpath, strerror(errno));
886 pg_error_internal();
887 free(manpath);
888 return;
889 }
890
891 if (strcmp(manpath, "mandoc")) {
892 free(req->q.manpath);
893 req->q.manpath = manpath;
894 } else
895 free(manpath);
896
897 if ( ! validate_filename(file)) {
898 pg_error_badrequest(
899 "You specified an invalid manual file.");
900 return;
901 }
902
903 resp_begin_html(200, NULL);
904 resp_searchform(req);
905 resp_show(req, file);
906 resp_end_html();
907 }
908
909 static void
910 pg_search(const struct req *req)
911 {
912 struct mansearch search;
913 struct manpaths paths;
914 struct manpage *res;
915 char **argv;
916 char *query, *rp, *wp;
917 size_t ressz;
918 int argc;
919
920 /*
921 * Begin by chdir()ing into the root of the manpath.
922 * This way we can pick up the database files, which are
923 * relative to the manpath root.
924 */
925
926 if (-1 == (chdir(req->q.manpath))) {
927 fprintf(stderr, "chdir %s: %s\n",
928 req->q.manpath, strerror(errno));
929 pg_error_internal();
930 return;
931 }
932
933 search.arch = req->q.arch;
934 search.sec = req->q.sec;
935 search.outkey = "Nd";
936 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
937 search.firstmatch = 1;
938
939 paths.sz = 1;
940 paths.paths = mandoc_malloc(sizeof(char *));
941 paths.paths[0] = mandoc_strdup(".");
942
943 /*
944 * Break apart at spaces with backslash-escaping.
945 */
946
947 argc = 0;
948 argv = NULL;
949 rp = query = mandoc_strdup(req->q.query);
950 for (;;) {
951 while (isspace((unsigned char)*rp))
952 rp++;
953 if (*rp == '\0')
954 break;
955 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
956 argv[argc++] = wp = rp;
957 for (;;) {
958 if (isspace((unsigned char)*rp)) {
959 *wp = '\0';
960 rp++;
961 break;
962 }
963 if (rp[0] == '\\' && rp[1] != '\0')
964 rp++;
965 if (wp != rp)
966 *wp = *rp;
967 if (*rp == '\0')
968 break;
969 wp++;
970 rp++;
971 }
972 }
973
974 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
975 pg_noresult(req, "You entered an invalid query.");
976 else if (0 == ressz)
977 pg_noresult(req, "No results found.");
978 else
979 pg_searchres(req, res, ressz);
980
981 free(query);
982 mansearch_free(res, ressz);
983 free(paths.paths[0]);
984 free(paths.paths);
985 }
986
987 int
988 main(void)
989 {
990 struct req req;
991 struct itimerval itimer;
992 const char *path;
993 const char *querystring;
994 int i;
995
996 /* Poor man's ReDoS mitigation. */
997
998 itimer.it_value.tv_sec = 2;
999 itimer.it_value.tv_usec = 0;
1000 itimer.it_interval.tv_sec = 2;
1001 itimer.it_interval.tv_usec = 0;
1002 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1003 fprintf(stderr, "setitimer: %s\n", strerror(errno));
1004 pg_error_internal();
1005 return EXIT_FAILURE;
1006 }
1007
1008 /* Scan our run-time environment. */
1009
1010 if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1011 scriptname = "";
1012
1013 if ( ! validate_urifrag(scriptname)) {
1014 fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1015 scriptname);
1016 pg_error_internal();
1017 return EXIT_FAILURE;
1018 }
1019
1020 /*
1021 * First we change directory into the MAN_DIR so that
1022 * subsequent scanning for manpath directories is rooted
1023 * relative to the same position.
1024 */
1025
1026 if (-1 == chdir(MAN_DIR)) {
1027 fprintf(stderr, "MAN_DIR: %s: %s\n",
1028 MAN_DIR, strerror(errno));
1029 pg_error_internal();
1030 return EXIT_FAILURE;
1031 }
1032
1033 memset(&req, 0, sizeof(struct req));
1034 pathgen(&req);
1035
1036 /* Next parse out the query string. */
1037
1038 if (NULL != (querystring = getenv("QUERY_STRING")))
1039 http_parse(&req, querystring);
1040
1041 if (req.q.manpath == NULL)
1042 req.q.manpath = mandoc_strdup(req.p[0]);
1043 else if ( ! validate_manpath(&req, req.q.manpath)) {
1044 pg_error_badrequest(
1045 "You specified an invalid manpath.");
1046 return EXIT_FAILURE;
1047 }
1048
1049 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1050 pg_error_badrequest(
1051 "You specified an invalid architecture.");
1052 return EXIT_FAILURE;
1053 }
1054
1055 /* Dispatch to the three different pages. */
1056
1057 path = getenv("PATH_INFO");
1058 if (NULL == path)
1059 path = "";
1060 else if ('/' == *path)
1061 path++;
1062
1063 if ('\0' != *path)
1064 pg_show(&req, path);
1065 else if (NULL != req.q.query)
1066 pg_search(&req);
1067 else
1068 pg_index(&req);
1069
1070 free(req.q.manpath);
1071 free(req.q.arch);
1072 free(req.q.sec);
1073 free(req.q.query);
1074 for (i = 0; i < (int)req.psz; i++)
1075 free(req.p[i]);
1076 free(req.p);
1077 return EXIT_SUCCESS;
1078 }
1079
1080 /*
1081 * Scan for indexable paths.
1082 */
1083 static void
1084 pathgen(struct req *req)
1085 {
1086 FILE *fp;
1087 char *dp;
1088 size_t dpsz;
1089 ssize_t len;
1090
1091 if (NULL == (fp = fopen("manpath.conf", "r"))) {
1092 fprintf(stderr, "%s/manpath.conf: %s\n",
1093 MAN_DIR, strerror(errno));
1094 pg_error_internal();
1095 exit(EXIT_FAILURE);
1096 }
1097
1098 dp = NULL;
1099 dpsz = 0;
1100
1101 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1102 if (dp[len - 1] == '\n')
1103 dp[--len] = '\0';
1104 req->p = mandoc_realloc(req->p,
1105 (req->psz + 1) * sizeof(char *));
1106 if ( ! validate_urifrag(dp)) {
1107 fprintf(stderr, "%s/manpath.conf contains "
1108 "unsafe path \"%s\"\n", MAN_DIR, dp);
1109 pg_error_internal();
1110 exit(EXIT_FAILURE);
1111 }
1112 if (NULL != strchr(dp, '/')) {
1113 fprintf(stderr, "%s/manpath.conf contains "
1114 "path with slash \"%s\"\n", MAN_DIR, dp);
1115 pg_error_internal();
1116 exit(EXIT_FAILURE);
1117 }
1118 req->p[req->psz++] = dp;
1119 dp = NULL;
1120 dpsz = 0;
1121 }
1122 free(dp);
1123
1124 if ( req->p == NULL ) {
1125 fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1126 pg_error_internal();
1127 exit(EXIT_FAILURE);
1128 }
1129 }