]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
0x%llu is a bad idea, make that 0x%llx;
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.141 2016/09/12 00:06:20 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "main.h"
40 #include "manconf.h"
41 #include "mansearch.h"
42 #include "cgi.h"
43
44 /*
45 * A query as passed to the search function.
46 */
47 struct query {
48 char *manpath; /* desired manual directory */
49 char *arch; /* architecture */
50 char *sec; /* manual section */
51 char *query; /* unparsed query expression */
52 int equal; /* match whole names, not substrings */
53 };
54
55 struct req {
56 struct query q;
57 char **p; /* array of available manpaths */
58 size_t psz; /* number of available manpaths */
59 int isquery; /* QUERY_STRING used, not PATH_INFO */
60 };
61
62 enum focus {
63 FOCUS_NONE = 0,
64 FOCUS_QUERY
65 };
66
67 static void html_print(const char *);
68 static void html_putchar(char);
69 static int http_decode(char *);
70 static void parse_manpath_conf(struct req *);
71 static void parse_path_info(struct req *req, const char *path);
72 static void parse_query_string(struct req *, const char *);
73 static void pg_error_badrequest(const char *);
74 static void pg_error_internal(void);
75 static void pg_index(const struct req *);
76 static void pg_noresult(const struct req *, const char *);
77 static void pg_search(const struct req *);
78 static void pg_searchres(const struct req *,
79 struct manpage *, size_t);
80 static void pg_show(struct req *, const char *);
81 static void resp_begin_html(int, const char *);
82 static void resp_begin_http(int, const char *);
83 static void resp_catman(const struct req *, const char *);
84 static void resp_copy(const char *);
85 static void resp_end_html(void);
86 static void resp_format(const struct req *, const char *);
87 static void resp_searchform(const struct req *, enum focus);
88 static void resp_show(const struct req *, const char *);
89 static void set_query_attr(char **, char **);
90 static int validate_filename(const char *);
91 static int validate_manpath(const struct req *, const char *);
92 static int validate_urifrag(const char *);
93
94 static const char *scriptname = SCRIPT_NAME;
95
96 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
97 static const char *const sec_numbers[] = {
98 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
99 };
100 static const char *const sec_names[] = {
101 "All Sections",
102 "1 - General Commands",
103 "2 - System Calls",
104 "3 - Library Functions",
105 "3p - Perl Library",
106 "4 - Device Drivers",
107 "5 - File Formats",
108 "6 - Games",
109 "7 - Miscellaneous Information",
110 "8 - System Manager\'s Manual",
111 "9 - Kernel Developer\'s Manual"
112 };
113 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
114
115 static const char *const arch_names[] = {
116 "amd64", "alpha", "armv7",
117 "hppa", "i386", "landisk",
118 "loongson", "luna88k", "macppc", "mips64",
119 "octeon", "sgi", "socppc", "sparc64",
120 "amiga", "arc", "armish", "arm32",
121 "atari", "aviion", "beagle", "cats",
122 "hppa64", "hp300",
123 "ia64", "mac68k", "mvme68k", "mvme88k",
124 "mvmeppc", "palm", "pc532", "pegasos",
125 "pmax", "powerpc", "solbourne", "sparc",
126 "sun3", "vax", "wgrisc", "x68k",
127 "zaurus"
128 };
129 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
130
131 /*
132 * Print a character, escaping HTML along the way.
133 * This will pass non-ASCII straight to output: be warned!
134 */
135 static void
136 html_putchar(char c)
137 {
138
139 switch (c) {
140 case ('"'):
141 printf("&quot;");
142 break;
143 case ('&'):
144 printf("&amp;");
145 break;
146 case ('>'):
147 printf("&gt;");
148 break;
149 case ('<'):
150 printf("&lt;");
151 break;
152 default:
153 putchar((unsigned char)c);
154 break;
155 }
156 }
157
158 /*
159 * Call through to html_putchar().
160 * Accepts NULL strings.
161 */
162 static void
163 html_print(const char *p)
164 {
165
166 if (NULL == p)
167 return;
168 while ('\0' != *p)
169 html_putchar(*p++);
170 }
171
172 /*
173 * Transfer the responsibility for the allocated string *val
174 * to the query structure.
175 */
176 static void
177 set_query_attr(char **attr, char **val)
178 {
179
180 free(*attr);
181 if (**val == '\0') {
182 *attr = NULL;
183 free(*val);
184 } else
185 *attr = *val;
186 *val = NULL;
187 }
188
189 /*
190 * Parse the QUERY_STRING for key-value pairs
191 * and store the values into the query structure.
192 */
193 static void
194 parse_query_string(struct req *req, const char *qs)
195 {
196 char *key, *val;
197 size_t keysz, valsz;
198
199 req->isquery = 1;
200 req->q.manpath = NULL;
201 req->q.arch = NULL;
202 req->q.sec = NULL;
203 req->q.query = NULL;
204 req->q.equal = 1;
205
206 key = val = NULL;
207 while (*qs != '\0') {
208
209 /* Parse one key. */
210
211 keysz = strcspn(qs, "=;&");
212 key = mandoc_strndup(qs, keysz);
213 qs += keysz;
214 if (*qs != '=')
215 goto next;
216
217 /* Parse one value. */
218
219 valsz = strcspn(++qs, ";&");
220 val = mandoc_strndup(qs, valsz);
221 qs += valsz;
222
223 /* Decode and catch encoding errors. */
224
225 if ( ! (http_decode(key) && http_decode(val)))
226 goto next;
227
228 /* Handle key-value pairs. */
229
230 if ( ! strcmp(key, "query"))
231 set_query_attr(&req->q.query, &val);
232
233 else if ( ! strcmp(key, "apropos"))
234 req->q.equal = !strcmp(val, "0");
235
236 else if ( ! strcmp(key, "manpath")) {
237 #ifdef COMPAT_OLDURI
238 if ( ! strncmp(val, "OpenBSD ", 8)) {
239 val[7] = '-';
240 if ('C' == val[8])
241 val[8] = 'c';
242 }
243 #endif
244 set_query_attr(&req->q.manpath, &val);
245 }
246
247 else if ( ! (strcmp(key, "sec")
248 #ifdef COMPAT_OLDURI
249 && strcmp(key, "sektion")
250 #endif
251 )) {
252 if ( ! strcmp(val, "0"))
253 *val = '\0';
254 set_query_attr(&req->q.sec, &val);
255 }
256
257 else if ( ! strcmp(key, "arch")) {
258 if ( ! strcmp(val, "default"))
259 *val = '\0';
260 set_query_attr(&req->q.arch, &val);
261 }
262
263 /*
264 * The key must be freed in any case.
265 * The val may have been handed over to the query
266 * structure, in which case it is now NULL.
267 */
268 next:
269 free(key);
270 key = NULL;
271 free(val);
272 val = NULL;
273
274 if (*qs != '\0')
275 qs++;
276 }
277 }
278
279 /*
280 * HTTP-decode a string. The standard explanation is that this turns
281 * "%4e+foo" into "n foo" in the regular way. This is done in-place
282 * over the allocated string.
283 */
284 static int
285 http_decode(char *p)
286 {
287 char hex[3];
288 char *q;
289 int c;
290
291 hex[2] = '\0';
292
293 q = p;
294 for ( ; '\0' != *p; p++, q++) {
295 if ('%' == *p) {
296 if ('\0' == (hex[0] = *(p + 1)))
297 return 0;
298 if ('\0' == (hex[1] = *(p + 2)))
299 return 0;
300 if (1 != sscanf(hex, "%x", &c))
301 return 0;
302 if ('\0' == c)
303 return 0;
304
305 *q = (char)c;
306 p += 2;
307 } else
308 *q = '+' == *p ? ' ' : *p;
309 }
310
311 *q = '\0';
312 return 1;
313 }
314
315 static void
316 resp_begin_http(int code, const char *msg)
317 {
318
319 if (200 != code)
320 printf("Status: %d %s\r\n", code, msg);
321
322 printf("Content-Type: text/html; charset=utf-8\r\n"
323 "Cache-Control: no-cache\r\n"
324 "Pragma: no-cache\r\n"
325 "\r\n");
326
327 fflush(stdout);
328 }
329
330 static void
331 resp_copy(const char *filename)
332 {
333 char buf[4096];
334 ssize_t sz;
335 int fd;
336
337 if ((fd = open(filename, O_RDONLY)) != -1) {
338 fflush(stdout);
339 while ((sz = read(fd, buf, sizeof(buf))) > 0)
340 write(STDOUT_FILENO, buf, sz);
341 close(fd);
342 }
343 }
344
345 static void
346 resp_begin_html(int code, const char *msg)
347 {
348
349 resp_begin_http(code, msg);
350
351 printf("<!DOCTYPE html>\n"
352 "<html>\n"
353 "<head>\n"
354 "<meta charset=\"UTF-8\"/>\n"
355 "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
356 " type=\"text/css\" media=\"all\">\n"
357 "<title>%s</title>\n"
358 "</head>\n"
359 "<body>\n"
360 "<!-- Begin page content. //-->\n",
361 CSS_DIR, CUSTOMIZE_TITLE);
362
363 resp_copy(MAN_DIR "/header.html");
364 }
365
366 static void
367 resp_end_html(void)
368 {
369
370 resp_copy(MAN_DIR "/footer.html");
371
372 puts("</body>\n"
373 "</html>");
374 }
375
376 static void
377 resp_searchform(const struct req *req, enum focus focus)
378 {
379 int i;
380
381 puts("<!-- Begin search form. //-->");
382 printf("<div id=\"mancgi\">\n"
383 "<form action=\"/%s\" method=\"get\">\n"
384 "<fieldset>\n"
385 "<legend>Manual Page Search Parameters</legend>\n",
386 scriptname);
387
388 /* Write query input box. */
389
390 printf("<input type=\"text\" name=\"query\" value=\"");
391 if (req->q.query != NULL)
392 html_print(req->q.query);
393 printf( "\" size=\"40\"");
394 if (focus == FOCUS_QUERY)
395 printf(" autofocus");
396 puts(">");
397
398 /* Write submission buttons. */
399
400 printf( "<button type=\"submit\" name=\"apropos\" value=\"0\">"
401 "man</button>\n"
402 "<button type=\"submit\" name=\"apropos\" value=\"1\">"
403 "apropos</button>\n<br/>\n");
404
405 /* Write section selector. */
406
407 puts("<select name=\"sec\">");
408 for (i = 0; i < sec_MAX; i++) {
409 printf("<option value=\"%s\"", sec_numbers[i]);
410 if (NULL != req->q.sec &&
411 0 == strcmp(sec_numbers[i], req->q.sec))
412 printf(" selected=\"selected\"");
413 printf(">%s</option>\n", sec_names[i]);
414 }
415 puts("</select>");
416
417 /* Write architecture selector. */
418
419 printf( "<select name=\"arch\">\n"
420 "<option value=\"default\"");
421 if (NULL == req->q.arch)
422 printf(" selected=\"selected\"");
423 puts(">All Architectures</option>");
424 for (i = 0; i < arch_MAX; i++) {
425 printf("<option value=\"%s\"", arch_names[i]);
426 if (NULL != req->q.arch &&
427 0 == strcmp(arch_names[i], req->q.arch))
428 printf(" selected=\"selected\"");
429 printf(">%s</option>\n", arch_names[i]);
430 }
431 puts("</select>");
432
433 /* Write manpath selector. */
434
435 if (req->psz > 1) {
436 puts("<select name=\"manpath\">");
437 for (i = 0; i < (int)req->psz; i++) {
438 printf("<option ");
439 if (strcmp(req->q.manpath, req->p[i]) == 0)
440 printf("selected=\"selected\" ");
441 printf("value=\"");
442 html_print(req->p[i]);
443 printf("\">");
444 html_print(req->p[i]);
445 puts("</option>");
446 }
447 puts("</select>");
448 }
449
450 puts("</fieldset>\n"
451 "</form>\n"
452 "</div>");
453 puts("<!-- End search form. //-->");
454 }
455
456 static int
457 validate_urifrag(const char *frag)
458 {
459
460 while ('\0' != *frag) {
461 if ( ! (isalnum((unsigned char)*frag) ||
462 '-' == *frag || '.' == *frag ||
463 '/' == *frag || '_' == *frag))
464 return 0;
465 frag++;
466 }
467 return 1;
468 }
469
470 static int
471 validate_manpath(const struct req *req, const char* manpath)
472 {
473 size_t i;
474
475 for (i = 0; i < req->psz; i++)
476 if ( ! strcmp(manpath, req->p[i]))
477 return 1;
478
479 return 0;
480 }
481
482 static int
483 validate_filename(const char *file)
484 {
485
486 if ('.' == file[0] && '/' == file[1])
487 file += 2;
488
489 return ! (strstr(file, "../") || strstr(file, "/..") ||
490 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
491 }
492
493 static void
494 pg_index(const struct req *req)
495 {
496
497 resp_begin_html(200, NULL);
498 resp_searchform(req, FOCUS_QUERY);
499 printf("<p>\n"
500 "This web interface is documented in the\n"
501 "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
502 "manual, and the\n"
503 "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n"
504 "manual explains the query syntax.\n"
505 "</p>\n",
506 scriptname, *scriptname == '\0' ? "" : "/",
507 scriptname, *scriptname == '\0' ? "" : "/");
508 resp_end_html();
509 }
510
511 static void
512 pg_noresult(const struct req *req, const char *msg)
513 {
514 resp_begin_html(200, NULL);
515 resp_searchform(req, FOCUS_QUERY);
516 puts("<p>");
517 puts(msg);
518 puts("</p>");
519 resp_end_html();
520 }
521
522 static void
523 pg_error_badrequest(const char *msg)
524 {
525
526 resp_begin_html(400, "Bad Request");
527 puts("<h1>Bad Request</h1>\n"
528 "<p>\n");
529 puts(msg);
530 printf("Try again from the\n"
531 "<a href=\"/%s\">main page</a>.\n"
532 "</p>", scriptname);
533 resp_end_html();
534 }
535
536 static void
537 pg_error_internal(void)
538 {
539 resp_begin_html(500, "Internal Server Error");
540 puts("<p>Internal Server Error</p>");
541 resp_end_html();
542 }
543
544 static void
545 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
546 {
547 char *arch, *archend;
548 const char *sec;
549 size_t i, iuse;
550 int archprio, archpriouse;
551 int prio, priouse;
552
553 for (i = 0; i < sz; i++) {
554 if (validate_filename(r[i].file))
555 continue;
556 warnx("invalid filename %s in %s database",
557 r[i].file, req->q.manpath);
558 pg_error_internal();
559 return;
560 }
561
562 if (req->isquery && sz == 1) {
563 /*
564 * If we have just one result, then jump there now
565 * without any delay.
566 */
567 printf("Status: 303 See Other\r\n");
568 printf("Location: http://%s/%s%s%s/%s",
569 HTTP_HOST, scriptname,
570 *scriptname == '\0' ? "" : "/",
571 req->q.manpath, r[0].file);
572 printf("\r\n"
573 "Content-Type: text/html; charset=utf-8\r\n"
574 "\r\n");
575 return;
576 }
577
578 resp_begin_html(200, NULL);
579 resp_searchform(req,
580 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
581
582 if (sz > 1) {
583 puts("<div class=\"results\">");
584 puts("<table>");
585
586 for (i = 0; i < sz; i++) {
587 printf("<tr>\n"
588 "<td class=\"title\">\n"
589 "<a href=\"/%s%s%s/%s",
590 scriptname, *scriptname == '\0' ? "" : "/",
591 req->q.manpath, r[i].file);
592 printf("\">");
593 html_print(r[i].names);
594 printf("</a>\n"
595 "</td>\n"
596 "<td class=\"desc\">");
597 html_print(r[i].output);
598 puts("</td>\n"
599 "</tr>");
600 }
601
602 puts("</table>\n"
603 "</div>");
604 }
605
606 /*
607 * In man(1) mode, show one of the pages
608 * even if more than one is found.
609 */
610
611 if (req->q.equal || sz == 1) {
612 puts("<hr>");
613 iuse = 0;
614 priouse = 20;
615 archpriouse = 3;
616 for (i = 0; i < sz; i++) {
617 sec = r[i].file;
618 sec += strcspn(sec, "123456789");
619 if (sec[0] == '\0')
620 continue;
621 prio = sec_prios[sec[0] - '1'];
622 if (sec[1] != '/')
623 prio += 10;
624 if (req->q.arch == NULL) {
625 archprio =
626 ((arch = strchr(sec + 1, '/'))
627 == NULL) ? 3 :
628 ((archend = strchr(arch + 1, '/'))
629 == NULL) ? 0 :
630 strncmp(arch, "amd64/",
631 archend - arch) ? 2 : 1;
632 if (archprio < archpriouse) {
633 archpriouse = archprio;
634 priouse = prio;
635 iuse = i;
636 continue;
637 }
638 if (archprio > archpriouse)
639 continue;
640 }
641 if (prio >= priouse)
642 continue;
643 priouse = prio;
644 iuse = i;
645 }
646 resp_show(req, r[iuse].file);
647 }
648
649 resp_end_html();
650 }
651
652 static void
653 resp_catman(const struct req *req, const char *file)
654 {
655 FILE *f;
656 char *p;
657 size_t sz;
658 ssize_t len;
659 int i;
660 int italic, bold;
661
662 if ((f = fopen(file, "r")) == NULL) {
663 puts("<p>You specified an invalid manual file.</p>");
664 return;
665 }
666
667 puts("<div class=\"catman\">\n"
668 "<pre>");
669
670 p = NULL;
671 sz = 0;
672
673 while ((len = getline(&p, &sz, f)) != -1) {
674 bold = italic = 0;
675 for (i = 0; i < len - 1; i++) {
676 /*
677 * This means that the catpage is out of state.
678 * Ignore it and keep going (although the
679 * catpage is bogus).
680 */
681
682 if ('\b' == p[i] || '\n' == p[i])
683 continue;
684
685 /*
686 * Print a regular character.
687 * Close out any bold/italic scopes.
688 * If we're in back-space mode, make sure we'll
689 * have something to enter when we backspace.
690 */
691
692 if ('\b' != p[i + 1]) {
693 if (italic)
694 printf("</i>");
695 if (bold)
696 printf("</b>");
697 italic = bold = 0;
698 html_putchar(p[i]);
699 continue;
700 } else if (i + 2 >= len)
701 continue;
702
703 /* Italic mode. */
704
705 if ('_' == p[i]) {
706 if (bold)
707 printf("</b>");
708 if ( ! italic)
709 printf("<i>");
710 bold = 0;
711 italic = 1;
712 i += 2;
713 html_putchar(p[i]);
714 continue;
715 }
716
717 /*
718 * Handle funny behaviour troff-isms.
719 * These grok'd from the original man2html.c.
720 */
721
722 if (('+' == p[i] && 'o' == p[i + 2]) ||
723 ('o' == p[i] && '+' == p[i + 2]) ||
724 ('|' == p[i] && '=' == p[i + 2]) ||
725 ('=' == p[i] && '|' == p[i + 2]) ||
726 ('*' == p[i] && '=' == p[i + 2]) ||
727 ('=' == p[i] && '*' == p[i + 2]) ||
728 ('*' == p[i] && '|' == p[i + 2]) ||
729 ('|' == p[i] && '*' == p[i + 2])) {
730 if (italic)
731 printf("</i>");
732 if (bold)
733 printf("</b>");
734 italic = bold = 0;
735 putchar('*');
736 i += 2;
737 continue;
738 } else if (('|' == p[i] && '-' == p[i + 2]) ||
739 ('-' == p[i] && '|' == p[i + 1]) ||
740 ('+' == p[i] && '-' == p[i + 1]) ||
741 ('-' == p[i] && '+' == p[i + 1]) ||
742 ('+' == p[i] && '|' == p[i + 1]) ||
743 ('|' == p[i] && '+' == p[i + 1])) {
744 if (italic)
745 printf("</i>");
746 if (bold)
747 printf("</b>");
748 italic = bold = 0;
749 putchar('+');
750 i += 2;
751 continue;
752 }
753
754 /* Bold mode. */
755
756 if (italic)
757 printf("</i>");
758 if ( ! bold)
759 printf("<b>");
760 bold = 1;
761 italic = 0;
762 i += 2;
763 html_putchar(p[i]);
764 }
765
766 /*
767 * Clean up the last character.
768 * We can get to a newline; don't print that.
769 */
770
771 if (italic)
772 printf("</i>");
773 if (bold)
774 printf("</b>");
775
776 if (i == len - 1 && p[i] != '\n')
777 html_putchar(p[i]);
778
779 putchar('\n');
780 }
781 free(p);
782
783 puts("</pre>\n"
784 "</div>");
785
786 fclose(f);
787 }
788
789 static void
790 resp_format(const struct req *req, const char *file)
791 {
792 struct manoutput conf;
793 struct mparse *mp;
794 struct roff_man *man;
795 void *vp;
796 int fd;
797 int usepath;
798
799 if (-1 == (fd = open(file, O_RDONLY, 0))) {
800 puts("<p>You specified an invalid manual file.</p>");
801 return;
802 }
803
804 mchars_alloc();
805 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
806 MANDOCLEVEL_BADARG, NULL, req->q.manpath);
807 mparse_readfd(mp, fd, file);
808 close(fd);
809
810 memset(&conf, 0, sizeof(conf));
811 conf.fragment = 1;
812 usepath = strcmp(req->q.manpath, req->p[0]);
813 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
814 usepath ? req->q.manpath : "", usepath ? "/" : "");
815
816 mparse_result(mp, &man, NULL);
817 if (man == NULL) {
818 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
819 pg_error_internal();
820 mparse_free(mp);
821 mchars_free();
822 return;
823 }
824
825 vp = html_alloc(&conf);
826
827 if (man->macroset == MACROSET_MDOC) {
828 mdoc_validate(man);
829 html_mdoc(vp, man);
830 } else {
831 man_validate(man);
832 html_man(vp, man);
833 }
834
835 html_free(vp);
836 mparse_free(mp);
837 mchars_free();
838 free(conf.man);
839 }
840
841 static void
842 resp_show(const struct req *req, const char *file)
843 {
844
845 if ('.' == file[0] && '/' == file[1])
846 file += 2;
847
848 if ('c' == *file)
849 resp_catman(req, file);
850 else
851 resp_format(req, file);
852 }
853
854 static void
855 pg_show(struct req *req, const char *fullpath)
856 {
857 char *manpath;
858 const char *file;
859
860 if ((file = strchr(fullpath, '/')) == NULL) {
861 pg_error_badrequest(
862 "You did not specify a page to show.");
863 return;
864 }
865 manpath = mandoc_strndup(fullpath, file - fullpath);
866 file++;
867
868 if ( ! validate_manpath(req, manpath)) {
869 pg_error_badrequest(
870 "You specified an invalid manpath.");
871 free(manpath);
872 return;
873 }
874
875 /*
876 * Begin by chdir()ing into the manpath.
877 * This way we can pick up the database files, which are
878 * relative to the manpath root.
879 */
880
881 if (chdir(manpath) == -1) {
882 warn("chdir %s", manpath);
883 pg_error_internal();
884 free(manpath);
885 return;
886 }
887 free(manpath);
888
889 if ( ! validate_filename(file)) {
890 pg_error_badrequest(
891 "You specified an invalid manual file.");
892 return;
893 }
894
895 resp_begin_html(200, NULL);
896 resp_searchform(req, FOCUS_NONE);
897 resp_show(req, file);
898 resp_end_html();
899 }
900
901 static void
902 pg_search(const struct req *req)
903 {
904 struct mansearch search;
905 struct manpaths paths;
906 struct manpage *res;
907 char **argv;
908 char *query, *rp, *wp;
909 size_t ressz;
910 int argc;
911
912 /*
913 * Begin by chdir()ing into the root of the manpath.
914 * This way we can pick up the database files, which are
915 * relative to the manpath root.
916 */
917
918 if (chdir(req->q.manpath) == -1) {
919 warn("chdir %s", req->q.manpath);
920 pg_error_internal();
921 return;
922 }
923
924 search.arch = req->q.arch;
925 search.sec = req->q.sec;
926 search.outkey = "Nd";
927 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
928 search.firstmatch = 1;
929
930 paths.sz = 1;
931 paths.paths = mandoc_malloc(sizeof(char *));
932 paths.paths[0] = mandoc_strdup(".");
933
934 /*
935 * Break apart at spaces with backslash-escaping.
936 */
937
938 argc = 0;
939 argv = NULL;
940 rp = query = mandoc_strdup(req->q.query);
941 for (;;) {
942 while (isspace((unsigned char)*rp))
943 rp++;
944 if (*rp == '\0')
945 break;
946 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
947 argv[argc++] = wp = rp;
948 for (;;) {
949 if (isspace((unsigned char)*rp)) {
950 *wp = '\0';
951 rp++;
952 break;
953 }
954 if (rp[0] == '\\' && rp[1] != '\0')
955 rp++;
956 if (wp != rp)
957 *wp = *rp;
958 if (*rp == '\0')
959 break;
960 wp++;
961 rp++;
962 }
963 }
964
965 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
966 pg_noresult(req, "You entered an invalid query.");
967 else if (0 == ressz)
968 pg_noresult(req, "No results found.");
969 else
970 pg_searchres(req, res, ressz);
971
972 free(query);
973 mansearch_free(res, ressz);
974 free(paths.paths[0]);
975 free(paths.paths);
976 }
977
978 int
979 main(void)
980 {
981 struct req req;
982 struct itimerval itimer;
983 const char *path;
984 const char *querystring;
985 int i;
986
987 /* Poor man's ReDoS mitigation. */
988
989 itimer.it_value.tv_sec = 2;
990 itimer.it_value.tv_usec = 0;
991 itimer.it_interval.tv_sec = 2;
992 itimer.it_interval.tv_usec = 0;
993 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
994 warn("setitimer");
995 pg_error_internal();
996 return EXIT_FAILURE;
997 }
998
999 /*
1000 * First we change directory into the MAN_DIR so that
1001 * subsequent scanning for manpath directories is rooted
1002 * relative to the same position.
1003 */
1004
1005 if (chdir(MAN_DIR) == -1) {
1006 warn("MAN_DIR: %s", MAN_DIR);
1007 pg_error_internal();
1008 return EXIT_FAILURE;
1009 }
1010
1011 memset(&req, 0, sizeof(struct req));
1012 req.q.equal = 1;
1013 parse_manpath_conf(&req);
1014
1015 /* Parse the path info and the query string. */
1016
1017 if ((path = getenv("PATH_INFO")) == NULL)
1018 path = "";
1019 else if (*path == '/')
1020 path++;
1021
1022 if (*path != '\0') {
1023 parse_path_info(&req, path);
1024 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1025 path = "";
1026 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1027 parse_query_string(&req, querystring);
1028
1029 /* Validate parsed data and add defaults. */
1030
1031 if (req.q.manpath == NULL)
1032 req.q.manpath = mandoc_strdup(req.p[0]);
1033 else if ( ! validate_manpath(&req, req.q.manpath)) {
1034 pg_error_badrequest(
1035 "You specified an invalid manpath.");
1036 return EXIT_FAILURE;
1037 }
1038
1039 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1040 pg_error_badrequest(
1041 "You specified an invalid architecture.");
1042 return EXIT_FAILURE;
1043 }
1044
1045 /* Dispatch to the three different pages. */
1046
1047 if ('\0' != *path)
1048 pg_show(&req, path);
1049 else if (NULL != req.q.query)
1050 pg_search(&req);
1051 else
1052 pg_index(&req);
1053
1054 free(req.q.manpath);
1055 free(req.q.arch);
1056 free(req.q.sec);
1057 free(req.q.query);
1058 for (i = 0; i < (int)req.psz; i++)
1059 free(req.p[i]);
1060 free(req.p);
1061 return EXIT_SUCCESS;
1062 }
1063
1064 /*
1065 * If PATH_INFO is not a file name, translate it to a query.
1066 */
1067 static void
1068 parse_path_info(struct req *req, const char *path)
1069 {
1070 char *dir[4];
1071 int i;
1072
1073 req->isquery = 0;
1074 req->q.equal = 1;
1075 req->q.manpath = mandoc_strdup(path);
1076 req->q.arch = NULL;
1077
1078 /* Mandatory manual page name. */
1079 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1080 req->q.query = req->q.manpath;
1081 req->q.manpath = NULL;
1082 } else
1083 *req->q.query++ = '\0';
1084
1085 /* Optional trailing section. */
1086 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1087 if(isdigit((unsigned char)req->q.sec[1])) {
1088 *req->q.sec++ = '\0';
1089 req->q.sec = mandoc_strdup(req->q.sec);
1090 } else
1091 req->q.sec = NULL;
1092 }
1093
1094 /* Handle the case of name[.section] only. */
1095 if (req->q.manpath == NULL)
1096 return;
1097 req->q.query = mandoc_strdup(req->q.query);
1098
1099 /* Split directory components. */
1100 dir[i = 0] = req->q.manpath;
1101 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1102 if (++i == 3) {
1103 pg_error_badrequest(
1104 "You specified too many directory components.");
1105 exit(EXIT_FAILURE);
1106 }
1107 *dir[i]++ = '\0';
1108 }
1109
1110 /* Optional manpath. */
1111 if ((i = validate_manpath(req, req->q.manpath)) == 0)
1112 req->q.manpath = NULL;
1113 else if (dir[1] == NULL)
1114 return;
1115
1116 /* Optional section. */
1117 if (strncmp(dir[i], "man", 3) == 0) {
1118 free(req->q.sec);
1119 req->q.sec = mandoc_strdup(dir[i++] + 3);
1120 }
1121 if (dir[i] == NULL) {
1122 if (req->q.manpath == NULL)
1123 free(dir[0]);
1124 return;
1125 }
1126 if (dir[i + 1] != NULL) {
1127 pg_error_badrequest(
1128 "You specified an invalid directory component.");
1129 exit(EXIT_FAILURE);
1130 }
1131
1132 /* Optional architecture. */
1133 if (i) {
1134 req->q.arch = mandoc_strdup(dir[i]);
1135 if (req->q.manpath == NULL)
1136 free(dir[0]);
1137 } else
1138 req->q.arch = dir[0];
1139 }
1140
1141 /*
1142 * Scan for indexable paths.
1143 */
1144 static void
1145 parse_manpath_conf(struct req *req)
1146 {
1147 FILE *fp;
1148 char *dp;
1149 size_t dpsz;
1150 ssize_t len;
1151
1152 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1153 warn("%s/manpath.conf", MAN_DIR);
1154 pg_error_internal();
1155 exit(EXIT_FAILURE);
1156 }
1157
1158 dp = NULL;
1159 dpsz = 0;
1160
1161 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1162 if (dp[len - 1] == '\n')
1163 dp[--len] = '\0';
1164 req->p = mandoc_realloc(req->p,
1165 (req->psz + 1) * sizeof(char *));
1166 if ( ! validate_urifrag(dp)) {
1167 warnx("%s/manpath.conf contains "
1168 "unsafe path \"%s\"", MAN_DIR, dp);
1169 pg_error_internal();
1170 exit(EXIT_FAILURE);
1171 }
1172 if (strchr(dp, '/') != NULL) {
1173 warnx("%s/manpath.conf contains "
1174 "path with slash \"%s\"", MAN_DIR, dp);
1175 pg_error_internal();
1176 exit(EXIT_FAILURE);
1177 }
1178 req->p[req->psz++] = dp;
1179 dp = NULL;
1180 dpsz = 0;
1181 }
1182 free(dp);
1183
1184 if (req->p == NULL) {
1185 warnx("%s/manpath.conf is empty", MAN_DIR);
1186 pg_error_internal();
1187 exit(EXIT_FAILURE);
1188 }
1189 }