]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
fix a typo that prevented names from .Dt from getting priority
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.136 2016/07/31 23:37:23 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "main.h"
40 #include "manconf.h"
41 #include "mansearch.h"
42 #include "cgi.h"
43
44 /*
45 * A query as passed to the search function.
46 */
47 struct query {
48 char *manpath; /* desired manual directory */
49 char *arch; /* architecture */
50 char *sec; /* manual section */
51 char *query; /* unparsed query expression */
52 int equal; /* match whole names, not substrings */
53 };
54
55 struct req {
56 struct query q;
57 char **p; /* array of available manpaths */
58 size_t psz; /* number of available manpaths */
59 int isquery; /* QUERY_STRING used, not PATH_INFO */
60 };
61
62 enum focus {
63 FOCUS_NONE = 0,
64 FOCUS_QUERY
65 };
66
67 static void html_print(const char *);
68 static void html_putchar(char);
69 static int http_decode(char *);
70 static void parse_manpath_conf(struct req *);
71 static void parse_path_info(struct req *req, const char *path);
72 static void parse_query_string(struct req *, const char *);
73 static void pg_error_badrequest(const char *);
74 static void pg_error_internal(void);
75 static void pg_index(const struct req *);
76 static void pg_noresult(const struct req *, const char *);
77 static void pg_search(const struct req *);
78 static void pg_searchres(const struct req *,
79 struct manpage *, size_t);
80 static void pg_show(struct req *, const char *);
81 static void resp_begin_html(int, const char *);
82 static void resp_begin_http(int, const char *);
83 static void resp_catman(const struct req *, const char *);
84 static void resp_copy(const char *);
85 static void resp_end_html(void);
86 static void resp_format(const struct req *, const char *);
87 static void resp_searchform(const struct req *, enum focus);
88 static void resp_show(const struct req *, const char *);
89 static void set_query_attr(char **, char **);
90 static int validate_filename(const char *);
91 static int validate_manpath(const struct req *, const char *);
92 static int validate_urifrag(const char *);
93
94 static const char *scriptname = SCRIPT_NAME;
95
96 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
97 static const char *const sec_numbers[] = {
98 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
99 };
100 static const char *const sec_names[] = {
101 "All Sections",
102 "1 - General Commands",
103 "2 - System Calls",
104 "3 - Library Functions",
105 "3p - Perl Library",
106 "4 - Device Drivers",
107 "5 - File Formats",
108 "6 - Games",
109 "7 - Miscellaneous Information",
110 "8 - System Manager\'s Manual",
111 "9 - Kernel Developer\'s Manual"
112 };
113 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
114
115 static const char *const arch_names[] = {
116 "amd64", "alpha", "armish", "armv7",
117 "hppa", "hppa64", "i386", "landisk",
118 "loongson", "luna88k", "macppc", "mips64",
119 "octeon", "sgi", "socppc", "sparc",
120 "sparc64", "zaurus",
121 "amiga", "arc", "arm32", "atari",
122 "aviion", "beagle", "cats", "hp300",
123 "ia64", "mac68k", "mvme68k", "mvme88k",
124 "mvmeppc", "palm", "pc532", "pegasos",
125 "pmax", "powerpc", "solbourne", "sun3",
126 "vax", "wgrisc", "x68k"
127 };
128 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
129
130 /*
131 * Print a character, escaping HTML along the way.
132 * This will pass non-ASCII straight to output: be warned!
133 */
134 static void
135 html_putchar(char c)
136 {
137
138 switch (c) {
139 case ('"'):
140 printf("&quote;");
141 break;
142 case ('&'):
143 printf("&amp;");
144 break;
145 case ('>'):
146 printf("&gt;");
147 break;
148 case ('<'):
149 printf("&lt;");
150 break;
151 default:
152 putchar((unsigned char)c);
153 break;
154 }
155 }
156
157 /*
158 * Call through to html_putchar().
159 * Accepts NULL strings.
160 */
161 static void
162 html_print(const char *p)
163 {
164
165 if (NULL == p)
166 return;
167 while ('\0' != *p)
168 html_putchar(*p++);
169 }
170
171 /*
172 * Transfer the responsibility for the allocated string *val
173 * to the query structure.
174 */
175 static void
176 set_query_attr(char **attr, char **val)
177 {
178
179 free(*attr);
180 if (**val == '\0') {
181 *attr = NULL;
182 free(*val);
183 } else
184 *attr = *val;
185 *val = NULL;
186 }
187
188 /*
189 * Parse the QUERY_STRING for key-value pairs
190 * and store the values into the query structure.
191 */
192 static void
193 parse_query_string(struct req *req, const char *qs)
194 {
195 char *key, *val;
196 size_t keysz, valsz;
197
198 req->isquery = 1;
199 req->q.manpath = NULL;
200 req->q.arch = NULL;
201 req->q.sec = NULL;
202 req->q.query = NULL;
203 req->q.equal = 1;
204
205 key = val = NULL;
206 while (*qs != '\0') {
207
208 /* Parse one key. */
209
210 keysz = strcspn(qs, "=;&");
211 key = mandoc_strndup(qs, keysz);
212 qs += keysz;
213 if (*qs != '=')
214 goto next;
215
216 /* Parse one value. */
217
218 valsz = strcspn(++qs, ";&");
219 val = mandoc_strndup(qs, valsz);
220 qs += valsz;
221
222 /* Decode and catch encoding errors. */
223
224 if ( ! (http_decode(key) && http_decode(val)))
225 goto next;
226
227 /* Handle key-value pairs. */
228
229 if ( ! strcmp(key, "query"))
230 set_query_attr(&req->q.query, &val);
231
232 else if ( ! strcmp(key, "apropos"))
233 req->q.equal = !strcmp(val, "0");
234
235 else if ( ! strcmp(key, "manpath")) {
236 #ifdef COMPAT_OLDURI
237 if ( ! strncmp(val, "OpenBSD ", 8)) {
238 val[7] = '-';
239 if ('C' == val[8])
240 val[8] = 'c';
241 }
242 #endif
243 set_query_attr(&req->q.manpath, &val);
244 }
245
246 else if ( ! (strcmp(key, "sec")
247 #ifdef COMPAT_OLDURI
248 && strcmp(key, "sektion")
249 #endif
250 )) {
251 if ( ! strcmp(val, "0"))
252 *val = '\0';
253 set_query_attr(&req->q.sec, &val);
254 }
255
256 else if ( ! strcmp(key, "arch")) {
257 if ( ! strcmp(val, "default"))
258 *val = '\0';
259 set_query_attr(&req->q.arch, &val);
260 }
261
262 /*
263 * The key must be freed in any case.
264 * The val may have been handed over to the query
265 * structure, in which case it is now NULL.
266 */
267 next:
268 free(key);
269 key = NULL;
270 free(val);
271 val = NULL;
272
273 if (*qs != '\0')
274 qs++;
275 }
276 }
277
278 /*
279 * HTTP-decode a string. The standard explanation is that this turns
280 * "%4e+foo" into "n foo" in the regular way. This is done in-place
281 * over the allocated string.
282 */
283 static int
284 http_decode(char *p)
285 {
286 char hex[3];
287 char *q;
288 int c;
289
290 hex[2] = '\0';
291
292 q = p;
293 for ( ; '\0' != *p; p++, q++) {
294 if ('%' == *p) {
295 if ('\0' == (hex[0] = *(p + 1)))
296 return 0;
297 if ('\0' == (hex[1] = *(p + 2)))
298 return 0;
299 if (1 != sscanf(hex, "%x", &c))
300 return 0;
301 if ('\0' == c)
302 return 0;
303
304 *q = (char)c;
305 p += 2;
306 } else
307 *q = '+' == *p ? ' ' : *p;
308 }
309
310 *q = '\0';
311 return 1;
312 }
313
314 static void
315 resp_begin_http(int code, const char *msg)
316 {
317
318 if (200 != code)
319 printf("Status: %d %s\r\n", code, msg);
320
321 printf("Content-Type: text/html; charset=utf-8\r\n"
322 "Cache-Control: no-cache\r\n"
323 "Pragma: no-cache\r\n"
324 "\r\n");
325
326 fflush(stdout);
327 }
328
329 static void
330 resp_copy(const char *filename)
331 {
332 char buf[4096];
333 ssize_t sz;
334 int fd;
335
336 if ((fd = open(filename, O_RDONLY)) != -1) {
337 fflush(stdout);
338 while ((sz = read(fd, buf, sizeof(buf))) > 0)
339 write(STDOUT_FILENO, buf, sz);
340 }
341 }
342
343 static void
344 resp_begin_html(int code, const char *msg)
345 {
346
347 resp_begin_http(code, msg);
348
349 printf("<!DOCTYPE html>\n"
350 "<html>\n"
351 "<head>\n"
352 "<meta charset=\"UTF-8\"/>\n"
353 "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
354 " type=\"text/css\" media=\"all\">\n"
355 "<title>%s</title>\n"
356 "</head>\n"
357 "<body>\n"
358 "<!-- Begin page content. //-->\n",
359 CSS_DIR, CUSTOMIZE_TITLE);
360
361 resp_copy(MAN_DIR "/header.html");
362 }
363
364 static void
365 resp_end_html(void)
366 {
367
368 resp_copy(MAN_DIR "/footer.html");
369
370 puts("</body>\n"
371 "</html>");
372 }
373
374 static void
375 resp_searchform(const struct req *req, enum focus focus)
376 {
377 int i;
378
379 puts("<!-- Begin search form. //-->");
380 printf("<div id=\"mancgi\">\n"
381 "<form action=\"/%s\" method=\"get\">\n"
382 "<fieldset>\n"
383 "<legend>Manual Page Search Parameters</legend>\n",
384 scriptname);
385
386 /* Write query input box. */
387
388 printf("<input type=\"text\" name=\"query\" value=\"");
389 if (req->q.query != NULL)
390 html_print(req->q.query);
391 printf( "\" size=\"40\"");
392 if (focus == FOCUS_QUERY)
393 printf(" autofocus");
394 puts(">");
395
396 /* Write submission buttons. */
397
398 printf( "<button type=\"submit\" name=\"apropos\" value=\"0\">"
399 "man</button>\n"
400 "<button type=\"submit\" name=\"apropos\" value=\"1\">"
401 "apropos</button>\n<br/>\n");
402
403 /* Write section selector. */
404
405 puts("<select name=\"sec\">");
406 for (i = 0; i < sec_MAX; i++) {
407 printf("<option value=\"%s\"", sec_numbers[i]);
408 if (NULL != req->q.sec &&
409 0 == strcmp(sec_numbers[i], req->q.sec))
410 printf(" selected=\"selected\"");
411 printf(">%s</option>\n", sec_names[i]);
412 }
413 puts("</select>");
414
415 /* Write architecture selector. */
416
417 printf( "<select name=\"arch\">\n"
418 "<option value=\"default\"");
419 if (NULL == req->q.arch)
420 printf(" selected=\"selected\"");
421 puts(">All Architectures</option>");
422 for (i = 0; i < arch_MAX; i++) {
423 printf("<option value=\"%s\"", arch_names[i]);
424 if (NULL != req->q.arch &&
425 0 == strcmp(arch_names[i], req->q.arch))
426 printf(" selected=\"selected\"");
427 printf(">%s</option>\n", arch_names[i]);
428 }
429 puts("</select>");
430
431 /* Write manpath selector. */
432
433 if (req->psz > 1) {
434 puts("<select name=\"manpath\">");
435 for (i = 0; i < (int)req->psz; i++) {
436 printf("<option ");
437 if (strcmp(req->q.manpath, req->p[i]) == 0)
438 printf("selected=\"selected\" ");
439 printf("value=\"");
440 html_print(req->p[i]);
441 printf("\">");
442 html_print(req->p[i]);
443 puts("</option>");
444 }
445 puts("</select>");
446 }
447
448 puts("</fieldset>\n"
449 "</form>\n"
450 "</div>");
451 puts("<!-- End search form. //-->");
452 }
453
454 static int
455 validate_urifrag(const char *frag)
456 {
457
458 while ('\0' != *frag) {
459 if ( ! (isalnum((unsigned char)*frag) ||
460 '-' == *frag || '.' == *frag ||
461 '/' == *frag || '_' == *frag))
462 return 0;
463 frag++;
464 }
465 return 1;
466 }
467
468 static int
469 validate_manpath(const struct req *req, const char* manpath)
470 {
471 size_t i;
472
473 for (i = 0; i < req->psz; i++)
474 if ( ! strcmp(manpath, req->p[i]))
475 return 1;
476
477 return 0;
478 }
479
480 static int
481 validate_filename(const char *file)
482 {
483
484 if ('.' == file[0] && '/' == file[1])
485 file += 2;
486
487 return ! (strstr(file, "../") || strstr(file, "/..") ||
488 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
489 }
490
491 static void
492 pg_index(const struct req *req)
493 {
494
495 resp_begin_html(200, NULL);
496 resp_searchform(req, FOCUS_QUERY);
497 printf("<p>\n"
498 "This web interface is documented in the\n"
499 "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
500 "manual, and the\n"
501 "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n"
502 "manual explains the query syntax.\n"
503 "</p>\n",
504 scriptname, *scriptname == '\0' ? "" : "/",
505 scriptname, *scriptname == '\0' ? "" : "/");
506 resp_end_html();
507 }
508
509 static void
510 pg_noresult(const struct req *req, const char *msg)
511 {
512 resp_begin_html(200, NULL);
513 resp_searchform(req, FOCUS_QUERY);
514 puts("<p>");
515 puts(msg);
516 puts("</p>");
517 resp_end_html();
518 }
519
520 static void
521 pg_error_badrequest(const char *msg)
522 {
523
524 resp_begin_html(400, "Bad Request");
525 puts("<h1>Bad Request</h1>\n"
526 "<p>\n");
527 puts(msg);
528 printf("Try again from the\n"
529 "<a href=\"/%s\">main page</a>.\n"
530 "</p>", scriptname);
531 resp_end_html();
532 }
533
534 static void
535 pg_error_internal(void)
536 {
537 resp_begin_html(500, "Internal Server Error");
538 puts("<p>Internal Server Error</p>");
539 resp_end_html();
540 }
541
542 static void
543 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
544 {
545 char *arch, *archend;
546 const char *sec;
547 size_t i, iuse;
548 int archprio, archpriouse;
549 int prio, priouse;
550
551 for (i = 0; i < sz; i++) {
552 if (validate_filename(r[i].file))
553 continue;
554 warnx("invalid filename %s in %s database",
555 r[i].file, req->q.manpath);
556 pg_error_internal();
557 return;
558 }
559
560 if (req->isquery && sz == 1) {
561 /*
562 * If we have just one result, then jump there now
563 * without any delay.
564 */
565 printf("Status: 303 See Other\r\n");
566 printf("Location: http://%s/%s%s%s/%s",
567 HTTP_HOST, scriptname,
568 *scriptname == '\0' ? "" : "/",
569 req->q.manpath, r[0].file);
570 printf("\r\n"
571 "Content-Type: text/html; charset=utf-8\r\n"
572 "\r\n");
573 return;
574 }
575
576 resp_begin_html(200, NULL);
577 resp_searchform(req,
578 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
579
580 if (sz > 1) {
581 puts("<div class=\"results\">");
582 puts("<table>");
583
584 for (i = 0; i < sz; i++) {
585 printf("<tr>\n"
586 "<td class=\"title\">\n"
587 "<a href=\"/%s%s%s/%s",
588 scriptname, *scriptname == '\0' ? "" : "/",
589 req->q.manpath, r[i].file);
590 printf("\">");
591 html_print(r[i].names);
592 printf("</a>\n"
593 "</td>\n"
594 "<td class=\"desc\">");
595 html_print(r[i].output);
596 puts("</td>\n"
597 "</tr>");
598 }
599
600 puts("</table>\n"
601 "</div>");
602 }
603
604 /*
605 * In man(1) mode, show one of the pages
606 * even if more than one is found.
607 */
608
609 if (req->q.equal || sz == 1) {
610 puts("<hr>");
611 iuse = 0;
612 priouse = 20;
613 archpriouse = 3;
614 for (i = 0; i < sz; i++) {
615 sec = r[i].file;
616 sec += strcspn(sec, "123456789");
617 if (sec[0] == '\0')
618 continue;
619 prio = sec_prios[sec[0] - '1'];
620 if (sec[1] != '/')
621 prio += 10;
622 if (req->q.arch == NULL) {
623 archprio =
624 ((arch = strchr(sec + 1, '/'))
625 == NULL) ? 3 :
626 ((archend = strchr(arch + 1, '/'))
627 == NULL) ? 0 :
628 strncmp(arch, "amd64/",
629 archend - arch) ? 2 : 1;
630 if (archprio < archpriouse) {
631 archpriouse = archprio;
632 priouse = prio;
633 iuse = i;
634 continue;
635 }
636 if (archprio > archpriouse)
637 continue;
638 }
639 if (prio >= priouse)
640 continue;
641 priouse = prio;
642 iuse = i;
643 }
644 resp_show(req, r[iuse].file);
645 }
646
647 resp_end_html();
648 }
649
650 static void
651 resp_catman(const struct req *req, const char *file)
652 {
653 FILE *f;
654 char *p;
655 size_t sz;
656 ssize_t len;
657 int i;
658 int italic, bold;
659
660 if ((f = fopen(file, "r")) == NULL) {
661 puts("<p>You specified an invalid manual file.</p>");
662 return;
663 }
664
665 puts("<div class=\"catman\">\n"
666 "<pre>");
667
668 p = NULL;
669 sz = 0;
670
671 while ((len = getline(&p, &sz, f)) != -1) {
672 bold = italic = 0;
673 for (i = 0; i < len - 1; i++) {
674 /*
675 * This means that the catpage is out of state.
676 * Ignore it and keep going (although the
677 * catpage is bogus).
678 */
679
680 if ('\b' == p[i] || '\n' == p[i])
681 continue;
682
683 /*
684 * Print a regular character.
685 * Close out any bold/italic scopes.
686 * If we're in back-space mode, make sure we'll
687 * have something to enter when we backspace.
688 */
689
690 if ('\b' != p[i + 1]) {
691 if (italic)
692 printf("</i>");
693 if (bold)
694 printf("</b>");
695 italic = bold = 0;
696 html_putchar(p[i]);
697 continue;
698 } else if (i + 2 >= len)
699 continue;
700
701 /* Italic mode. */
702
703 if ('_' == p[i]) {
704 if (bold)
705 printf("</b>");
706 if ( ! italic)
707 printf("<i>");
708 bold = 0;
709 italic = 1;
710 i += 2;
711 html_putchar(p[i]);
712 continue;
713 }
714
715 /*
716 * Handle funny behaviour troff-isms.
717 * These grok'd from the original man2html.c.
718 */
719
720 if (('+' == p[i] && 'o' == p[i + 2]) ||
721 ('o' == p[i] && '+' == p[i + 2]) ||
722 ('|' == p[i] && '=' == p[i + 2]) ||
723 ('=' == p[i] && '|' == p[i + 2]) ||
724 ('*' == p[i] && '=' == p[i + 2]) ||
725 ('=' == p[i] && '*' == p[i + 2]) ||
726 ('*' == p[i] && '|' == p[i + 2]) ||
727 ('|' == p[i] && '*' == p[i + 2])) {
728 if (italic)
729 printf("</i>");
730 if (bold)
731 printf("</b>");
732 italic = bold = 0;
733 putchar('*');
734 i += 2;
735 continue;
736 } else if (('|' == p[i] && '-' == p[i + 2]) ||
737 ('-' == p[i] && '|' == p[i + 1]) ||
738 ('+' == p[i] && '-' == p[i + 1]) ||
739 ('-' == p[i] && '+' == p[i + 1]) ||
740 ('+' == p[i] && '|' == p[i + 1]) ||
741 ('|' == p[i] && '+' == p[i + 1])) {
742 if (italic)
743 printf("</i>");
744 if (bold)
745 printf("</b>");
746 italic = bold = 0;
747 putchar('+');
748 i += 2;
749 continue;
750 }
751
752 /* Bold mode. */
753
754 if (italic)
755 printf("</i>");
756 if ( ! bold)
757 printf("<b>");
758 bold = 1;
759 italic = 0;
760 i += 2;
761 html_putchar(p[i]);
762 }
763
764 /*
765 * Clean up the last character.
766 * We can get to a newline; don't print that.
767 */
768
769 if (italic)
770 printf("</i>");
771 if (bold)
772 printf("</b>");
773
774 if (i == len - 1 && p[i] != '\n')
775 html_putchar(p[i]);
776
777 putchar('\n');
778 }
779 free(p);
780
781 puts("</pre>\n"
782 "</div>");
783
784 fclose(f);
785 }
786
787 static void
788 resp_format(const struct req *req, const char *file)
789 {
790 struct manoutput conf;
791 struct mparse *mp;
792 struct roff_man *man;
793 void *vp;
794 int fd;
795 int usepath;
796
797 if (-1 == (fd = open(file, O_RDONLY, 0))) {
798 puts("<p>You specified an invalid manual file.</p>");
799 return;
800 }
801
802 mchars_alloc();
803 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
804 MANDOCLEVEL_BADARG, NULL, req->q.manpath);
805 mparse_readfd(mp, fd, file);
806 close(fd);
807
808 memset(&conf, 0, sizeof(conf));
809 conf.fragment = 1;
810 usepath = strcmp(req->q.manpath, req->p[0]);
811 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
812 usepath ? req->q.manpath : "", usepath ? "/" : "");
813
814 mparse_result(mp, &man, NULL);
815 if (man == NULL) {
816 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
817 pg_error_internal();
818 mparse_free(mp);
819 mchars_free();
820 return;
821 }
822
823 vp = html_alloc(&conf);
824
825 if (man->macroset == MACROSET_MDOC) {
826 mdoc_validate(man);
827 html_mdoc(vp, man);
828 } else {
829 man_validate(man);
830 html_man(vp, man);
831 }
832
833 html_free(vp);
834 mparse_free(mp);
835 mchars_free();
836 free(conf.man);
837 }
838
839 static void
840 resp_show(const struct req *req, const char *file)
841 {
842
843 if ('.' == file[0] && '/' == file[1])
844 file += 2;
845
846 if ('c' == *file)
847 resp_catman(req, file);
848 else
849 resp_format(req, file);
850 }
851
852 static void
853 pg_show(struct req *req, const char *fullpath)
854 {
855 char *manpath;
856 const char *file;
857
858 if ((file = strchr(fullpath, '/')) == NULL) {
859 pg_error_badrequest(
860 "You did not specify a page to show.");
861 return;
862 }
863 manpath = mandoc_strndup(fullpath, file - fullpath);
864 file++;
865
866 if ( ! validate_manpath(req, manpath)) {
867 pg_error_badrequest(
868 "You specified an invalid manpath.");
869 free(manpath);
870 return;
871 }
872
873 /*
874 * Begin by chdir()ing into the manpath.
875 * This way we can pick up the database files, which are
876 * relative to the manpath root.
877 */
878
879 if (chdir(manpath) == -1) {
880 warn("chdir %s", manpath);
881 pg_error_internal();
882 free(manpath);
883 return;
884 }
885 free(manpath);
886
887 if ( ! validate_filename(file)) {
888 pg_error_badrequest(
889 "You specified an invalid manual file.");
890 return;
891 }
892
893 resp_begin_html(200, NULL);
894 resp_searchform(req, FOCUS_NONE);
895 resp_show(req, file);
896 resp_end_html();
897 }
898
899 static void
900 pg_search(const struct req *req)
901 {
902 struct mansearch search;
903 struct manpaths paths;
904 struct manpage *res;
905 char **argv;
906 char *query, *rp, *wp;
907 size_t ressz;
908 int argc;
909
910 /*
911 * Begin by chdir()ing into the root of the manpath.
912 * This way we can pick up the database files, which are
913 * relative to the manpath root.
914 */
915
916 if (chdir(req->q.manpath) == -1) {
917 warn("chdir %s", req->q.manpath);
918 pg_error_internal();
919 return;
920 }
921
922 search.arch = req->q.arch;
923 search.sec = req->q.sec;
924 search.outkey = "Nd";
925 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
926 search.firstmatch = 1;
927
928 paths.sz = 1;
929 paths.paths = mandoc_malloc(sizeof(char *));
930 paths.paths[0] = mandoc_strdup(".");
931
932 /*
933 * Break apart at spaces with backslash-escaping.
934 */
935
936 argc = 0;
937 argv = NULL;
938 rp = query = mandoc_strdup(req->q.query);
939 for (;;) {
940 while (isspace((unsigned char)*rp))
941 rp++;
942 if (*rp == '\0')
943 break;
944 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
945 argv[argc++] = wp = rp;
946 for (;;) {
947 if (isspace((unsigned char)*rp)) {
948 *wp = '\0';
949 rp++;
950 break;
951 }
952 if (rp[0] == '\\' && rp[1] != '\0')
953 rp++;
954 if (wp != rp)
955 *wp = *rp;
956 if (*rp == '\0')
957 break;
958 wp++;
959 rp++;
960 }
961 }
962
963 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
964 pg_noresult(req, "You entered an invalid query.");
965 else if (0 == ressz)
966 pg_noresult(req, "No results found.");
967 else
968 pg_searchres(req, res, ressz);
969
970 free(query);
971 mansearch_free(res, ressz);
972 free(paths.paths[0]);
973 free(paths.paths);
974 }
975
976 int
977 main(void)
978 {
979 struct req req;
980 struct itimerval itimer;
981 const char *path;
982 const char *querystring;
983 int i;
984
985 /* Poor man's ReDoS mitigation. */
986
987 itimer.it_value.tv_sec = 2;
988 itimer.it_value.tv_usec = 0;
989 itimer.it_interval.tv_sec = 2;
990 itimer.it_interval.tv_usec = 0;
991 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
992 warn("setitimer");
993 pg_error_internal();
994 return EXIT_FAILURE;
995 }
996
997 /*
998 * First we change directory into the MAN_DIR so that
999 * subsequent scanning for manpath directories is rooted
1000 * relative to the same position.
1001 */
1002
1003 if (chdir(MAN_DIR) == -1) {
1004 warn("MAN_DIR: %s", MAN_DIR);
1005 pg_error_internal();
1006 return EXIT_FAILURE;
1007 }
1008
1009 memset(&req, 0, sizeof(struct req));
1010 req.q.equal = 1;
1011 parse_manpath_conf(&req);
1012
1013 /* Parse the path info and the query string. */
1014
1015 if ((path = getenv("PATH_INFO")) == NULL)
1016 path = "";
1017 else if (*path == '/')
1018 path++;
1019
1020 if (*path != '\0') {
1021 parse_path_info(&req, path);
1022 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1023 path = "";
1024 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1025 parse_query_string(&req, querystring);
1026
1027 /* Validate parsed data and add defaults. */
1028
1029 if (req.q.manpath == NULL)
1030 req.q.manpath = mandoc_strdup(req.p[0]);
1031 else if ( ! validate_manpath(&req, req.q.manpath)) {
1032 pg_error_badrequest(
1033 "You specified an invalid manpath.");
1034 return EXIT_FAILURE;
1035 }
1036
1037 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1038 pg_error_badrequest(
1039 "You specified an invalid architecture.");
1040 return EXIT_FAILURE;
1041 }
1042
1043 /* Dispatch to the three different pages. */
1044
1045 if ('\0' != *path)
1046 pg_show(&req, path);
1047 else if (NULL != req.q.query)
1048 pg_search(&req);
1049 else
1050 pg_index(&req);
1051
1052 free(req.q.manpath);
1053 free(req.q.arch);
1054 free(req.q.sec);
1055 free(req.q.query);
1056 for (i = 0; i < (int)req.psz; i++)
1057 free(req.p[i]);
1058 free(req.p);
1059 return EXIT_SUCCESS;
1060 }
1061
1062 /*
1063 * If PATH_INFO is not a file name, translate it to a query.
1064 */
1065 static void
1066 parse_path_info(struct req *req, const char *path)
1067 {
1068 char *dir[4];
1069 int i;
1070
1071 req->isquery = 0;
1072 req->q.equal = 1;
1073 req->q.manpath = mandoc_strdup(path);
1074 req->q.arch = NULL;
1075
1076 /* Mandatory manual page name. */
1077 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1078 req->q.query = req->q.manpath;
1079 req->q.manpath = NULL;
1080 } else
1081 *req->q.query++ = '\0';
1082
1083 /* Optional trailing section. */
1084 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1085 if(isdigit((unsigned char)req->q.sec[1])) {
1086 *req->q.sec++ = '\0';
1087 req->q.sec = mandoc_strdup(req->q.sec);
1088 } else
1089 req->q.sec = NULL;
1090 }
1091
1092 /* Handle the case of name[.section] only. */
1093 if (req->q.manpath == NULL)
1094 return;
1095 req->q.query = mandoc_strdup(req->q.query);
1096
1097 /* Split directory components. */
1098 dir[i = 0] = req->q.manpath;
1099 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1100 if (++i == 3) {
1101 pg_error_badrequest(
1102 "You specified too many directory components.");
1103 exit(EXIT_FAILURE);
1104 }
1105 *dir[i]++ = '\0';
1106 }
1107
1108 /* Optional manpath. */
1109 if ((i = validate_manpath(req, req->q.manpath)) == 0)
1110 req->q.manpath = NULL;
1111 else if (dir[1] == NULL)
1112 return;
1113
1114 /* Optional section. */
1115 if (strncmp(dir[i], "man", 3) == 0) {
1116 free(req->q.sec);
1117 req->q.sec = mandoc_strdup(dir[i++] + 3);
1118 }
1119 if (dir[i] == NULL) {
1120 if (req->q.manpath == NULL)
1121 free(dir[0]);
1122 return;
1123 }
1124 if (dir[i + 1] != NULL) {
1125 pg_error_badrequest(
1126 "You specified an invalid directory component.");
1127 exit(EXIT_FAILURE);
1128 }
1129
1130 /* Optional architecture. */
1131 if (i) {
1132 req->q.arch = mandoc_strdup(dir[i]);
1133 if (req->q.manpath == NULL)
1134 free(dir[0]);
1135 } else
1136 req->q.arch = dir[0];
1137 }
1138
1139 /*
1140 * Scan for indexable paths.
1141 */
1142 static void
1143 parse_manpath_conf(struct req *req)
1144 {
1145 FILE *fp;
1146 char *dp;
1147 size_t dpsz;
1148 ssize_t len;
1149
1150 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1151 warn("%s/manpath.conf", MAN_DIR);
1152 pg_error_internal();
1153 exit(EXIT_FAILURE);
1154 }
1155
1156 dp = NULL;
1157 dpsz = 0;
1158
1159 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1160 if (dp[len - 1] == '\n')
1161 dp[--len] = '\0';
1162 req->p = mandoc_realloc(req->p,
1163 (req->psz + 1) * sizeof(char *));
1164 if ( ! validate_urifrag(dp)) {
1165 warnx("%s/manpath.conf contains "
1166 "unsafe path \"%s\"", MAN_DIR, dp);
1167 pg_error_internal();
1168 exit(EXIT_FAILURE);
1169 }
1170 if (strchr(dp, '/') != NULL) {
1171 warnx("%s/manpath.conf contains "
1172 "path with slash \"%s\"", MAN_DIR, dp);
1173 pg_error_internal();
1174 exit(EXIT_FAILURE);
1175 }
1176 req->p[req->psz++] = dp;
1177 dp = NULL;
1178 dpsz = 0;
1179 }
1180 free(dp);
1181
1182 if (req->p == NULL) {
1183 warnx("%s/manpath.conf is empty", MAN_DIR);
1184 pg_error_internal();
1185 exit(EXIT_FAILURE);
1186 }
1187 }