]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
prefer warn[x](3) over fprintf(3) where appropriate
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.128 2016/04/15 16:42:52 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "main.h"
40 #include "manconf.h"
41 #include "mansearch.h"
42 #include "cgi.h"
43
44 /*
45 * A query as passed to the search function.
46 */
47 struct query {
48 char *manpath; /* desired manual directory */
49 char *arch; /* architecture */
50 char *sec; /* manual section */
51 char *query; /* unparsed query expression */
52 int equal; /* match whole names, not substrings */
53 };
54
55 struct req {
56 struct query q;
57 char **p; /* array of available manpaths */
58 size_t psz; /* number of available manpaths */
59 int isquery; /* QUERY_STRING used, not PATH_INFO */
60 };
61
62 static void catman(const struct req *, const char *);
63 static void format(const struct req *, const char *);
64 static void html_print(const char *);
65 static void html_putchar(char);
66 static int http_decode(char *);
67 static void http_parse(struct req *, const char *);
68 static void pathgen(struct req *);
69 static void path_parse(struct req *req, const char *path);
70 static void pg_error_badrequest(const char *);
71 static void pg_error_internal(void);
72 static void pg_index(const struct req *);
73 static void pg_noresult(const struct req *, const char *);
74 static void pg_search(const struct req *);
75 static void pg_searchres(const struct req *,
76 struct manpage *, size_t);
77 static void pg_show(struct req *, const char *);
78 static void resp_begin_html(int, const char *);
79 static void resp_begin_http(int, const char *);
80 static void resp_copy(const char *);
81 static void resp_end_html(void);
82 static void resp_searchform(const struct req *);
83 static void resp_show(const struct req *, const char *);
84 static void set_query_attr(char **, char **);
85 static int validate_filename(const char *);
86 static int validate_manpath(const struct req *, const char *);
87 static int validate_urifrag(const char *);
88
89 static const char *scriptname = SCRIPT_NAME;
90
91 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
92 static const char *const sec_numbers[] = {
93 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
94 };
95 static const char *const sec_names[] = {
96 "All Sections",
97 "1 - General Commands",
98 "2 - System Calls",
99 "3 - Library Functions",
100 "3p - Perl Library",
101 "4 - Device Drivers",
102 "5 - File Formats",
103 "6 - Games",
104 "7 - Miscellaneous Information",
105 "8 - System Manager\'s Manual",
106 "9 - Kernel Developer\'s Manual"
107 };
108 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
109
110 static const char *const arch_names[] = {
111 "amd64", "alpha", "armish", "armv7",
112 "hppa", "hppa64", "i386", "landisk",
113 "loongson", "luna88k", "macppc", "mips64",
114 "octeon", "sgi", "socppc", "sparc",
115 "sparc64", "zaurus",
116 "amiga", "arc", "arm32", "atari",
117 "aviion", "beagle", "cats", "hp300",
118 "ia64", "mac68k", "mvme68k", "mvme88k",
119 "mvmeppc", "palm", "pc532", "pegasos",
120 "pmax", "powerpc", "solbourne", "sun3",
121 "vax", "wgrisc", "x68k"
122 };
123 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
124
125 /*
126 * Print a character, escaping HTML along the way.
127 * This will pass non-ASCII straight to output: be warned!
128 */
129 static void
130 html_putchar(char c)
131 {
132
133 switch (c) {
134 case ('"'):
135 printf("&quote;");
136 break;
137 case ('&'):
138 printf("&amp;");
139 break;
140 case ('>'):
141 printf("&gt;");
142 break;
143 case ('<'):
144 printf("&lt;");
145 break;
146 default:
147 putchar((unsigned char)c);
148 break;
149 }
150 }
151
152 /*
153 * Call through to html_putchar().
154 * Accepts NULL strings.
155 */
156 static void
157 html_print(const char *p)
158 {
159
160 if (NULL == p)
161 return;
162 while ('\0' != *p)
163 html_putchar(*p++);
164 }
165
166 /*
167 * Transfer the responsibility for the allocated string *val
168 * to the query structure.
169 */
170 static void
171 set_query_attr(char **attr, char **val)
172 {
173
174 free(*attr);
175 if (**val == '\0') {
176 *attr = NULL;
177 free(*val);
178 } else
179 *attr = *val;
180 *val = NULL;
181 }
182
183 /*
184 * Parse the QUERY_STRING for key-value pairs
185 * and store the values into the query structure.
186 */
187 static void
188 http_parse(struct req *req, const char *qs)
189 {
190 char *key, *val;
191 size_t keysz, valsz;
192
193 req->isquery = 1;
194 req->q.manpath = NULL;
195 req->q.arch = NULL;
196 req->q.sec = NULL;
197 req->q.query = NULL;
198 req->q.equal = 1;
199
200 key = val = NULL;
201 while (*qs != '\0') {
202
203 /* Parse one key. */
204
205 keysz = strcspn(qs, "=;&");
206 key = mandoc_strndup(qs, keysz);
207 qs += keysz;
208 if (*qs != '=')
209 goto next;
210
211 /* Parse one value. */
212
213 valsz = strcspn(++qs, ";&");
214 val = mandoc_strndup(qs, valsz);
215 qs += valsz;
216
217 /* Decode and catch encoding errors. */
218
219 if ( ! (http_decode(key) && http_decode(val)))
220 goto next;
221
222 /* Handle key-value pairs. */
223
224 if ( ! strcmp(key, "query"))
225 set_query_attr(&req->q.query, &val);
226
227 else if ( ! strcmp(key, "apropos"))
228 req->q.equal = !strcmp(val, "0");
229
230 else if ( ! strcmp(key, "manpath")) {
231 #ifdef COMPAT_OLDURI
232 if ( ! strncmp(val, "OpenBSD ", 8)) {
233 val[7] = '-';
234 if ('C' == val[8])
235 val[8] = 'c';
236 }
237 #endif
238 set_query_attr(&req->q.manpath, &val);
239 }
240
241 else if ( ! (strcmp(key, "sec")
242 #ifdef COMPAT_OLDURI
243 && strcmp(key, "sektion")
244 #endif
245 )) {
246 if ( ! strcmp(val, "0"))
247 *val = '\0';
248 set_query_attr(&req->q.sec, &val);
249 }
250
251 else if ( ! strcmp(key, "arch")) {
252 if ( ! strcmp(val, "default"))
253 *val = '\0';
254 set_query_attr(&req->q.arch, &val);
255 }
256
257 /*
258 * The key must be freed in any case.
259 * The val may have been handed over to the query
260 * structure, in which case it is now NULL.
261 */
262 next:
263 free(key);
264 key = NULL;
265 free(val);
266 val = NULL;
267
268 if (*qs != '\0')
269 qs++;
270 }
271 }
272
273 /*
274 * HTTP-decode a string. The standard explanation is that this turns
275 * "%4e+foo" into "n foo" in the regular way. This is done in-place
276 * over the allocated string.
277 */
278 static int
279 http_decode(char *p)
280 {
281 char hex[3];
282 char *q;
283 int c;
284
285 hex[2] = '\0';
286
287 q = p;
288 for ( ; '\0' != *p; p++, q++) {
289 if ('%' == *p) {
290 if ('\0' == (hex[0] = *(p + 1)))
291 return 0;
292 if ('\0' == (hex[1] = *(p + 2)))
293 return 0;
294 if (1 != sscanf(hex, "%x", &c))
295 return 0;
296 if ('\0' == c)
297 return 0;
298
299 *q = (char)c;
300 p += 2;
301 } else
302 *q = '+' == *p ? ' ' : *p;
303 }
304
305 *q = '\0';
306 return 1;
307 }
308
309 static void
310 resp_begin_http(int code, const char *msg)
311 {
312
313 if (200 != code)
314 printf("Status: %d %s\r\n", code, msg);
315
316 printf("Content-Type: text/html; charset=utf-8\r\n"
317 "Cache-Control: no-cache\r\n"
318 "Pragma: no-cache\r\n"
319 "\r\n");
320
321 fflush(stdout);
322 }
323
324 static void
325 resp_copy(const char *filename)
326 {
327 char buf[4096];
328 ssize_t sz;
329 int fd;
330
331 if ((fd = open(filename, O_RDONLY)) != -1) {
332 fflush(stdout);
333 while ((sz = read(fd, buf, sizeof(buf))) > 0)
334 write(STDOUT_FILENO, buf, sz);
335 }
336 }
337
338 static void
339 resp_begin_html(int code, const char *msg)
340 {
341
342 resp_begin_http(code, msg);
343
344 printf("<!DOCTYPE html>\n"
345 "<html>\n"
346 "<head>\n"
347 "<meta charset=\"UTF-8\"/>\n"
348 "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
349 " type=\"text/css\" media=\"all\">\n"
350 "<title>%s</title>\n"
351 "</head>\n"
352 "<body>\n"
353 "<!-- Begin page content. //-->\n",
354 CSS_DIR, CUSTOMIZE_TITLE);
355
356 resp_copy(MAN_DIR "/header.html");
357 }
358
359 static void
360 resp_end_html(void)
361 {
362
363 resp_copy(MAN_DIR "/footer.html");
364
365 puts("</body>\n"
366 "</html>");
367 }
368
369 static void
370 resp_searchform(const struct req *req)
371 {
372 int i;
373
374 puts("<!-- Begin search form. //-->");
375 printf("<div id=\"mancgi\">\n"
376 "<form action=\"/%s\" method=\"get\">\n"
377 "<fieldset>\n"
378 "<legend>Manual Page Search Parameters</legend>\n",
379 scriptname);
380
381 /* Write query input box. */
382
383 printf( "<table><tr><td>\n"
384 "<input type=\"text\" name=\"query\" value=\"");
385 if (NULL != req->q.query)
386 html_print(req->q.query);
387 puts("\" size=\"40\">");
388
389 /* Write submission and reset buttons. */
390
391 printf( "<input type=\"submit\" value=\"Submit\">\n"
392 "<input type=\"reset\" value=\"Reset\">\n");
393
394 /* Write show radio button */
395
396 printf( "</td><td>\n"
397 "<input type=\"radio\" ");
398 if (req->q.equal)
399 printf("checked=\"checked\" ");
400 printf( "name=\"apropos\" id=\"show\" value=\"0\">\n"
401 "<label for=\"show\">Show named manual page</label>\n");
402
403 /* Write section selector. */
404
405 puts( "</td></tr><tr><td>\n"
406 "<select name=\"sec\">");
407 for (i = 0; i < sec_MAX; i++) {
408 printf("<option value=\"%s\"", sec_numbers[i]);
409 if (NULL != req->q.sec &&
410 0 == strcmp(sec_numbers[i], req->q.sec))
411 printf(" selected=\"selected\"");
412 printf(">%s</option>\n", sec_names[i]);
413 }
414 puts("</select>");
415
416 /* Write architecture selector. */
417
418 printf( "<select name=\"arch\">\n"
419 "<option value=\"default\"");
420 if (NULL == req->q.arch)
421 printf(" selected=\"selected\"");
422 puts(">All Architectures</option>");
423 for (i = 0; i < arch_MAX; i++) {
424 printf("<option value=\"%s\"", arch_names[i]);
425 if (NULL != req->q.arch &&
426 0 == strcmp(arch_names[i], req->q.arch))
427 printf(" selected=\"selected\"");
428 printf(">%s</option>\n", arch_names[i]);
429 }
430 puts("</select>");
431
432 /* Write manpath selector. */
433
434 if (req->psz > 1) {
435 puts("<select name=\"manpath\">");
436 for (i = 0; i < (int)req->psz; i++) {
437 printf("<option ");
438 if (strcmp(req->q.manpath, req->p[i]) == 0)
439 printf("selected=\"selected\" ");
440 printf("value=\"");
441 html_print(req->p[i]);
442 printf("\">");
443 html_print(req->p[i]);
444 puts("</option>");
445 }
446 puts("</select>");
447 }
448
449 /* Write search radio button */
450
451 printf( "</td><td>\n"
452 "<input type=\"radio\" ");
453 if (0 == req->q.equal)
454 printf("checked=\"checked\" ");
455 printf( "name=\"apropos\" id=\"search\" value=\"1\">\n"
456 "<label for=\"search\">Search with apropos query</label>\n");
457
458 puts("</td></tr></table>\n"
459 "</fieldset>\n"
460 "</form>\n"
461 "</div>");
462 puts("<!-- End search form. //-->");
463 }
464
465 static int
466 validate_urifrag(const char *frag)
467 {
468
469 while ('\0' != *frag) {
470 if ( ! (isalnum((unsigned char)*frag) ||
471 '-' == *frag || '.' == *frag ||
472 '/' == *frag || '_' == *frag))
473 return 0;
474 frag++;
475 }
476 return 1;
477 }
478
479 static int
480 validate_manpath(const struct req *req, const char* manpath)
481 {
482 size_t i;
483
484 if ( ! strcmp(manpath, "mandoc"))
485 return 1;
486
487 for (i = 0; i < req->psz; i++)
488 if ( ! strcmp(manpath, req->p[i]))
489 return 1;
490
491 return 0;
492 }
493
494 static int
495 validate_filename(const char *file)
496 {
497
498 if ('.' == file[0] && '/' == file[1])
499 file += 2;
500
501 return ! (strstr(file, "../") || strstr(file, "/..") ||
502 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
503 }
504
505 static void
506 pg_index(const struct req *req)
507 {
508
509 resp_begin_html(200, NULL);
510 resp_searchform(req);
511 printf("<p>\n"
512 "This web interface is documented in the\n"
513 "<a href=\"/%s%smandoc/man8/man.cgi.8\">man.cgi</a>\n"
514 "manual, and the\n"
515 "<a href=\"/%s%smandoc/man1/apropos.1\">apropos</a>\n"
516 "manual explains the query syntax.\n"
517 "</p>\n",
518 scriptname, *scriptname == '\0' ? "" : "/",
519 scriptname, *scriptname == '\0' ? "" : "/");
520 resp_end_html();
521 }
522
523 static void
524 pg_noresult(const struct req *req, const char *msg)
525 {
526 resp_begin_html(200, NULL);
527 resp_searchform(req);
528 puts("<p>");
529 puts(msg);
530 puts("</p>");
531 resp_end_html();
532 }
533
534 static void
535 pg_error_badrequest(const char *msg)
536 {
537
538 resp_begin_html(400, "Bad Request");
539 puts("<h1>Bad Request</h1>\n"
540 "<p>\n");
541 puts(msg);
542 printf("Try again from the\n"
543 "<a href=\"/%s\">main page</a>.\n"
544 "</p>", scriptname);
545 resp_end_html();
546 }
547
548 static void
549 pg_error_internal(void)
550 {
551 resp_begin_html(500, "Internal Server Error");
552 puts("<p>Internal Server Error</p>");
553 resp_end_html();
554 }
555
556 static void
557 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
558 {
559 char *arch, *archend;
560 const char *sec;
561 size_t i, iuse;
562 int archprio, archpriouse;
563 int prio, priouse;
564
565 for (i = 0; i < sz; i++) {
566 if (validate_filename(r[i].file))
567 continue;
568 warnx("invalid filename %s in %s database",
569 r[i].file, req->q.manpath);
570 pg_error_internal();
571 return;
572 }
573
574 if (req->isquery && sz == 1) {
575 /*
576 * If we have just one result, then jump there now
577 * without any delay.
578 */
579 printf("Status: 303 See Other\r\n");
580 printf("Location: http://%s/%s%s%s/%s",
581 HTTP_HOST, scriptname,
582 *scriptname == '\0' ? "" : "/",
583 req->q.manpath, r[0].file);
584 printf("\r\n"
585 "Content-Type: text/html; charset=utf-8\r\n"
586 "\r\n");
587 return;
588 }
589
590 resp_begin_html(200, NULL);
591 resp_searchform(req);
592
593 if (sz > 1) {
594 puts("<div class=\"results\">");
595 puts("<table>");
596
597 for (i = 0; i < sz; i++) {
598 printf("<tr>\n"
599 "<td class=\"title\">\n"
600 "<a href=\"/%s%s%s/%s",
601 scriptname, *scriptname == '\0' ? "" : "/",
602 req->q.manpath, r[i].file);
603 printf("\">");
604 html_print(r[i].names);
605 printf("</a>\n"
606 "</td>\n"
607 "<td class=\"desc\">");
608 html_print(r[i].output);
609 puts("</td>\n"
610 "</tr>");
611 }
612
613 puts("</table>\n"
614 "</div>");
615 }
616
617 /*
618 * In man(1) mode, show one of the pages
619 * even if more than one is found.
620 */
621
622 if (req->q.equal || sz == 1) {
623 puts("<hr>");
624 iuse = 0;
625 priouse = 20;
626 archpriouse = 3;
627 for (i = 0; i < sz; i++) {
628 sec = r[i].file;
629 sec += strcspn(sec, "123456789");
630 if (sec[0] == '\0')
631 continue;
632 prio = sec_prios[sec[0] - '1'];
633 if (sec[1] != '/')
634 prio += 10;
635 if (req->q.arch == NULL) {
636 archprio =
637 ((arch = strchr(sec + 1, '/'))
638 == NULL) ? 3 :
639 ((archend = strchr(arch + 1, '/'))
640 == NULL) ? 0 :
641 strncmp(arch, "amd64/",
642 archend - arch) ? 2 : 1;
643 if (archprio < archpriouse) {
644 archpriouse = archprio;
645 priouse = prio;
646 iuse = i;
647 continue;
648 }
649 if (archprio > archpriouse)
650 continue;
651 }
652 if (prio >= priouse)
653 continue;
654 priouse = prio;
655 iuse = i;
656 }
657 resp_show(req, r[iuse].file);
658 }
659
660 resp_end_html();
661 }
662
663 static void
664 catman(const struct req *req, const char *file)
665 {
666 FILE *f;
667 char *p;
668 size_t sz;
669 ssize_t len;
670 int i;
671 int italic, bold;
672
673 if ((f = fopen(file, "r")) == NULL) {
674 puts("<p>You specified an invalid manual file.</p>");
675 return;
676 }
677
678 puts("<div class=\"catman\">\n"
679 "<pre>");
680
681 p = NULL;
682 sz = 0;
683
684 while ((len = getline(&p, &sz, f)) != -1) {
685 bold = italic = 0;
686 for (i = 0; i < len - 1; i++) {
687 /*
688 * This means that the catpage is out of state.
689 * Ignore it and keep going (although the
690 * catpage is bogus).
691 */
692
693 if ('\b' == p[i] || '\n' == p[i])
694 continue;
695
696 /*
697 * Print a regular character.
698 * Close out any bold/italic scopes.
699 * If we're in back-space mode, make sure we'll
700 * have something to enter when we backspace.
701 */
702
703 if ('\b' != p[i + 1]) {
704 if (italic)
705 printf("</i>");
706 if (bold)
707 printf("</b>");
708 italic = bold = 0;
709 html_putchar(p[i]);
710 continue;
711 } else if (i + 2 >= len)
712 continue;
713
714 /* Italic mode. */
715
716 if ('_' == p[i]) {
717 if (bold)
718 printf("</b>");
719 if ( ! italic)
720 printf("<i>");
721 bold = 0;
722 italic = 1;
723 i += 2;
724 html_putchar(p[i]);
725 continue;
726 }
727
728 /*
729 * Handle funny behaviour troff-isms.
730 * These grok'd from the original man2html.c.
731 */
732
733 if (('+' == p[i] && 'o' == p[i + 2]) ||
734 ('o' == p[i] && '+' == p[i + 2]) ||
735 ('|' == p[i] && '=' == p[i + 2]) ||
736 ('=' == p[i] && '|' == p[i + 2]) ||
737 ('*' == p[i] && '=' == p[i + 2]) ||
738 ('=' == p[i] && '*' == p[i + 2]) ||
739 ('*' == p[i] && '|' == p[i + 2]) ||
740 ('|' == p[i] && '*' == p[i + 2])) {
741 if (italic)
742 printf("</i>");
743 if (bold)
744 printf("</b>");
745 italic = bold = 0;
746 putchar('*');
747 i += 2;
748 continue;
749 } else if (('|' == p[i] && '-' == p[i + 2]) ||
750 ('-' == p[i] && '|' == p[i + 1]) ||
751 ('+' == p[i] && '-' == p[i + 1]) ||
752 ('-' == p[i] && '+' == p[i + 1]) ||
753 ('+' == p[i] && '|' == p[i + 1]) ||
754 ('|' == p[i] && '+' == p[i + 1])) {
755 if (italic)
756 printf("</i>");
757 if (bold)
758 printf("</b>");
759 italic = bold = 0;
760 putchar('+');
761 i += 2;
762 continue;
763 }
764
765 /* Bold mode. */
766
767 if (italic)
768 printf("</i>");
769 if ( ! bold)
770 printf("<b>");
771 bold = 1;
772 italic = 0;
773 i += 2;
774 html_putchar(p[i]);
775 }
776
777 /*
778 * Clean up the last character.
779 * We can get to a newline; don't print that.
780 */
781
782 if (italic)
783 printf("</i>");
784 if (bold)
785 printf("</b>");
786
787 if (i == len - 1 && p[i] != '\n')
788 html_putchar(p[i]);
789
790 putchar('\n');
791 }
792 free(p);
793
794 puts("</pre>\n"
795 "</div>");
796
797 fclose(f);
798 }
799
800 static void
801 format(const struct req *req, const char *file)
802 {
803 struct manoutput conf;
804 struct mparse *mp;
805 struct roff_man *man;
806 void *vp;
807 int fd;
808 int usepath;
809
810 if (-1 == (fd = open(file, O_RDONLY, 0))) {
811 puts("<p>You specified an invalid manual file.</p>");
812 return;
813 }
814
815 mchars_alloc();
816 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
817 mparse_readfd(mp, fd, file);
818 close(fd);
819
820 memset(&conf, 0, sizeof(conf));
821 conf.fragment = 1;
822 usepath = strcmp(req->q.manpath, req->p[0]);
823 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
824 usepath ? req->q.manpath : "", usepath ? "/" : "");
825
826 mparse_result(mp, &man, NULL);
827 if (man == NULL) {
828 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
829 pg_error_internal();
830 mparse_free(mp);
831 mchars_free();
832 return;
833 }
834
835 vp = html_alloc(&conf);
836
837 if (man->macroset == MACROSET_MDOC) {
838 mdoc_validate(man);
839 html_mdoc(vp, man);
840 } else {
841 man_validate(man);
842 html_man(vp, man);
843 }
844
845 html_free(vp);
846 mparse_free(mp);
847 mchars_free();
848 free(conf.man);
849 }
850
851 static void
852 resp_show(const struct req *req, const char *file)
853 {
854
855 if ('.' == file[0] && '/' == file[1])
856 file += 2;
857
858 if ('c' == *file)
859 catman(req, file);
860 else
861 format(req, file);
862 }
863
864 static void
865 pg_show(struct req *req, const char *fullpath)
866 {
867 char *manpath;
868 const char *file;
869
870 if ((file = strchr(fullpath, '/')) == NULL) {
871 pg_error_badrequest(
872 "You did not specify a page to show.");
873 return;
874 }
875 manpath = mandoc_strndup(fullpath, file - fullpath);
876 file++;
877
878 if ( ! validate_manpath(req, manpath)) {
879 pg_error_badrequest(
880 "You specified an invalid manpath.");
881 free(manpath);
882 return;
883 }
884
885 /*
886 * Begin by chdir()ing into the manpath.
887 * This way we can pick up the database files, which are
888 * relative to the manpath root.
889 */
890
891 if (chdir(manpath) == -1) {
892 warn("chdir %s", manpath);
893 pg_error_internal();
894 free(manpath);
895 return;
896 }
897
898 if (strcmp(manpath, "mandoc")) {
899 free(req->q.manpath);
900 req->q.manpath = manpath;
901 } else
902 free(manpath);
903
904 if ( ! validate_filename(file)) {
905 pg_error_badrequest(
906 "You specified an invalid manual file.");
907 return;
908 }
909
910 resp_begin_html(200, NULL);
911 resp_searchform(req);
912 resp_show(req, file);
913 resp_end_html();
914 }
915
916 static void
917 pg_search(const struct req *req)
918 {
919 struct mansearch search;
920 struct manpaths paths;
921 struct manpage *res;
922 char **argv;
923 char *query, *rp, *wp;
924 size_t ressz;
925 int argc;
926
927 /*
928 * Begin by chdir()ing into the root of the manpath.
929 * This way we can pick up the database files, which are
930 * relative to the manpath root.
931 */
932
933 if (chdir(req->q.manpath) == -1) {
934 warn("chdir %s", req->q.manpath);
935 pg_error_internal();
936 return;
937 }
938
939 search.arch = req->q.arch;
940 search.sec = req->q.sec;
941 search.outkey = "Nd";
942 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
943 search.firstmatch = 1;
944
945 paths.sz = 1;
946 paths.paths = mandoc_malloc(sizeof(char *));
947 paths.paths[0] = mandoc_strdup(".");
948
949 /*
950 * Break apart at spaces with backslash-escaping.
951 */
952
953 argc = 0;
954 argv = NULL;
955 rp = query = mandoc_strdup(req->q.query);
956 for (;;) {
957 while (isspace((unsigned char)*rp))
958 rp++;
959 if (*rp == '\0')
960 break;
961 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
962 argv[argc++] = wp = rp;
963 for (;;) {
964 if (isspace((unsigned char)*rp)) {
965 *wp = '\0';
966 rp++;
967 break;
968 }
969 if (rp[0] == '\\' && rp[1] != '\0')
970 rp++;
971 if (wp != rp)
972 *wp = *rp;
973 if (*rp == '\0')
974 break;
975 wp++;
976 rp++;
977 }
978 }
979
980 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
981 pg_noresult(req, "You entered an invalid query.");
982 else if (0 == ressz)
983 pg_noresult(req, "No results found.");
984 else
985 pg_searchres(req, res, ressz);
986
987 free(query);
988 mansearch_free(res, ressz);
989 free(paths.paths[0]);
990 free(paths.paths);
991 }
992
993 int
994 main(void)
995 {
996 struct req req;
997 struct itimerval itimer;
998 const char *path;
999 const char *querystring;
1000 int i;
1001
1002 /* Poor man's ReDoS mitigation. */
1003
1004 itimer.it_value.tv_sec = 2;
1005 itimer.it_value.tv_usec = 0;
1006 itimer.it_interval.tv_sec = 2;
1007 itimer.it_interval.tv_usec = 0;
1008 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1009 warn("setitimer");
1010 pg_error_internal();
1011 return EXIT_FAILURE;
1012 }
1013
1014 /*
1015 * First we change directory into the MAN_DIR so that
1016 * subsequent scanning for manpath directories is rooted
1017 * relative to the same position.
1018 */
1019
1020 if (chdir(MAN_DIR) == -1) {
1021 warn("MAN_DIR: %s", MAN_DIR);
1022 pg_error_internal();
1023 return EXIT_FAILURE;
1024 }
1025
1026 memset(&req, 0, sizeof(struct req));
1027 req.q.equal = 1;
1028 pathgen(&req);
1029
1030 /* Parse the path info and the query string. */
1031
1032 if ((path = getenv("PATH_INFO")) == NULL)
1033 path = "";
1034 else if (*path == '/')
1035 path++;
1036
1037 if (*path != '\0') {
1038 path_parse(&req, path);
1039 if (access(path, F_OK) == -1)
1040 path = "";
1041 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1042 http_parse(&req, querystring);
1043
1044 /* Validate parsed data and add defaults. */
1045
1046 if (req.q.manpath == NULL)
1047 req.q.manpath = mandoc_strdup(req.p[0]);
1048 else if ( ! validate_manpath(&req, req.q.manpath)) {
1049 pg_error_badrequest(
1050 "You specified an invalid manpath.");
1051 return EXIT_FAILURE;
1052 }
1053
1054 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1055 pg_error_badrequest(
1056 "You specified an invalid architecture.");
1057 return EXIT_FAILURE;
1058 }
1059
1060 /* Dispatch to the three different pages. */
1061
1062 if ('\0' != *path)
1063 pg_show(&req, path);
1064 else if (NULL != req.q.query)
1065 pg_search(&req);
1066 else
1067 pg_index(&req);
1068
1069 free(req.q.manpath);
1070 free(req.q.arch);
1071 free(req.q.sec);
1072 free(req.q.query);
1073 for (i = 0; i < (int)req.psz; i++)
1074 free(req.p[i]);
1075 free(req.p);
1076 return EXIT_SUCCESS;
1077 }
1078
1079 /*
1080 * If PATH_INFO is not a file name, translate it to a query.
1081 */
1082 static void
1083 path_parse(struct req *req, const char *path)
1084 {
1085 char *dir;
1086
1087 req->isquery = 0;
1088 req->q.equal = 1;
1089 req->q.manpath = mandoc_strdup(path);
1090
1091 /* Mandatory manual page name. */
1092 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1093 req->q.query = req->q.manpath;
1094 req->q.manpath = NULL;
1095 } else
1096 *req->q.query++ = '\0';
1097
1098 /* Optional trailing section. */
1099 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1100 if(isdigit((unsigned char)req->q.sec[1])) {
1101 *req->q.sec++ = '\0';
1102 req->q.sec = mandoc_strdup(req->q.sec);
1103 } else
1104 req->q.sec = NULL;
1105 }
1106
1107 /* Handle the case of name[.section] only. */
1108 if (req->q.manpath == NULL) {
1109 req->q.arch = NULL;
1110 return;
1111 }
1112 req->q.query = mandoc_strdup(req->q.query);
1113
1114 /* Optional architecture. */
1115 dir = strrchr(req->q.manpath, '/');
1116 if (dir != NULL && strncmp(dir + 1, "man", 3) != 0) {
1117 *dir++ = '\0';
1118 req->q.arch = mandoc_strdup(dir);
1119 dir = strrchr(req->q.manpath, '/');
1120 } else
1121 req->q.arch = NULL;
1122
1123 /* Optional directory name. */
1124 if (dir != NULL && strncmp(dir + 1, "man", 3) == 0) {
1125 *dir++ = '\0';
1126 free(req->q.sec);
1127 req->q.sec = mandoc_strdup(dir + 3);
1128 }
1129 }
1130
1131 /*
1132 * Scan for indexable paths.
1133 */
1134 static void
1135 pathgen(struct req *req)
1136 {
1137 FILE *fp;
1138 char *dp;
1139 size_t dpsz;
1140 ssize_t len;
1141
1142 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1143 warn("%s/manpath.conf", MAN_DIR);
1144 pg_error_internal();
1145 exit(EXIT_FAILURE);
1146 }
1147
1148 dp = NULL;
1149 dpsz = 0;
1150
1151 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1152 if (dp[len - 1] == '\n')
1153 dp[--len] = '\0';
1154 req->p = mandoc_realloc(req->p,
1155 (req->psz + 1) * sizeof(char *));
1156 if ( ! validate_urifrag(dp)) {
1157 warnx("%s/manpath.conf contains "
1158 "unsafe path \"%s\"", MAN_DIR, dp);
1159 pg_error_internal();
1160 exit(EXIT_FAILURE);
1161 }
1162 if (strchr(dp, '/') != NULL) {
1163 warnx("%s/manpath.conf contains "
1164 "path with slash \"%s\"", MAN_DIR, dp);
1165 pg_error_internal();
1166 exit(EXIT_FAILURE);
1167 }
1168 req->p[req->psz++] = dp;
1169 dp = NULL;
1170 dpsz = 0;
1171 }
1172 free(dp);
1173
1174 if (req->p == NULL) {
1175 warnx("%s/manpath.conf is empty", MAN_DIR);
1176 pg_error_internal();
1177 exit(EXIT_FAILURE);
1178 }
1179 }