]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Make HTML tags lower case for better stylistic agreement with what
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.126 2016/04/15 01:34:51 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc.h"
35 #include "roff.h"
36 #include "mdoc.h"
37 #include "man.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42
43 /*
44 * A query as passed to the search function.
45 */
46 struct query {
47 char *manpath; /* desired manual directory */
48 char *arch; /* architecture */
49 char *sec; /* manual section */
50 char *query; /* unparsed query expression */
51 int equal; /* match whole names, not substrings */
52 };
53
54 struct req {
55 struct query q;
56 char **p; /* array of available manpaths */
57 size_t psz; /* number of available manpaths */
58 int isquery; /* QUERY_STRING used, not PATH_INFO */
59 };
60
61 static void catman(const struct req *, const char *);
62 static void format(const struct req *, const char *);
63 static void html_print(const char *);
64 static void html_putchar(char);
65 static int http_decode(char *);
66 static void http_parse(struct req *, const char *);
67 static void pathgen(struct req *);
68 static void path_parse(struct req *req, const char *path);
69 static void pg_error_badrequest(const char *);
70 static void pg_error_internal(void);
71 static void pg_index(const struct req *);
72 static void pg_noresult(const struct req *, const char *);
73 static void pg_search(const struct req *);
74 static void pg_searchres(const struct req *,
75 struct manpage *, size_t);
76 static void pg_show(struct req *, const char *);
77 static void resp_begin_html(int, const char *);
78 static void resp_begin_http(int, const char *);
79 static void resp_copy(const char *);
80 static void resp_end_html(void);
81 static void resp_searchform(const struct req *);
82 static void resp_show(const struct req *, const char *);
83 static void set_query_attr(char **, char **);
84 static int validate_filename(const char *);
85 static int validate_manpath(const struct req *, const char *);
86 static int validate_urifrag(const char *);
87
88 static const char *scriptname = SCRIPT_NAME;
89
90 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
91 static const char *const sec_numbers[] = {
92 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
93 };
94 static const char *const sec_names[] = {
95 "All Sections",
96 "1 - General Commands",
97 "2 - System Calls",
98 "3 - Library Functions",
99 "3p - Perl Library",
100 "4 - Device Drivers",
101 "5 - File Formats",
102 "6 - Games",
103 "7 - Miscellaneous Information",
104 "8 - System Manager\'s Manual",
105 "9 - Kernel Developer\'s Manual"
106 };
107 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
108
109 static const char *const arch_names[] = {
110 "amd64", "alpha", "armish", "armv7",
111 "hppa", "hppa64", "i386", "landisk",
112 "loongson", "luna88k", "macppc", "mips64",
113 "octeon", "sgi", "socppc", "sparc",
114 "sparc64", "zaurus",
115 "amiga", "arc", "arm32", "atari",
116 "aviion", "beagle", "cats", "hp300",
117 "ia64", "mac68k", "mvme68k", "mvme88k",
118 "mvmeppc", "palm", "pc532", "pegasos",
119 "pmax", "powerpc", "solbourne", "sun3",
120 "vax", "wgrisc", "x68k"
121 };
122 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
123
124 /*
125 * Print a character, escaping HTML along the way.
126 * This will pass non-ASCII straight to output: be warned!
127 */
128 static void
129 html_putchar(char c)
130 {
131
132 switch (c) {
133 case ('"'):
134 printf("&quote;");
135 break;
136 case ('&'):
137 printf("&amp;");
138 break;
139 case ('>'):
140 printf("&gt;");
141 break;
142 case ('<'):
143 printf("&lt;");
144 break;
145 default:
146 putchar((unsigned char)c);
147 break;
148 }
149 }
150
151 /*
152 * Call through to html_putchar().
153 * Accepts NULL strings.
154 */
155 static void
156 html_print(const char *p)
157 {
158
159 if (NULL == p)
160 return;
161 while ('\0' != *p)
162 html_putchar(*p++);
163 }
164
165 /*
166 * Transfer the responsibility for the allocated string *val
167 * to the query structure.
168 */
169 static void
170 set_query_attr(char **attr, char **val)
171 {
172
173 free(*attr);
174 if (**val == '\0') {
175 *attr = NULL;
176 free(*val);
177 } else
178 *attr = *val;
179 *val = NULL;
180 }
181
182 /*
183 * Parse the QUERY_STRING for key-value pairs
184 * and store the values into the query structure.
185 */
186 static void
187 http_parse(struct req *req, const char *qs)
188 {
189 char *key, *val;
190 size_t keysz, valsz;
191
192 req->isquery = 1;
193 req->q.manpath = NULL;
194 req->q.arch = NULL;
195 req->q.sec = NULL;
196 req->q.query = NULL;
197 req->q.equal = 1;
198
199 key = val = NULL;
200 while (*qs != '\0') {
201
202 /* Parse one key. */
203
204 keysz = strcspn(qs, "=;&");
205 key = mandoc_strndup(qs, keysz);
206 qs += keysz;
207 if (*qs != '=')
208 goto next;
209
210 /* Parse one value. */
211
212 valsz = strcspn(++qs, ";&");
213 val = mandoc_strndup(qs, valsz);
214 qs += valsz;
215
216 /* Decode and catch encoding errors. */
217
218 if ( ! (http_decode(key) && http_decode(val)))
219 goto next;
220
221 /* Handle key-value pairs. */
222
223 if ( ! strcmp(key, "query"))
224 set_query_attr(&req->q.query, &val);
225
226 else if ( ! strcmp(key, "apropos"))
227 req->q.equal = !strcmp(val, "0");
228
229 else if ( ! strcmp(key, "manpath")) {
230 #ifdef COMPAT_OLDURI
231 if ( ! strncmp(val, "OpenBSD ", 8)) {
232 val[7] = '-';
233 if ('C' == val[8])
234 val[8] = 'c';
235 }
236 #endif
237 set_query_attr(&req->q.manpath, &val);
238 }
239
240 else if ( ! (strcmp(key, "sec")
241 #ifdef COMPAT_OLDURI
242 && strcmp(key, "sektion")
243 #endif
244 )) {
245 if ( ! strcmp(val, "0"))
246 *val = '\0';
247 set_query_attr(&req->q.sec, &val);
248 }
249
250 else if ( ! strcmp(key, "arch")) {
251 if ( ! strcmp(val, "default"))
252 *val = '\0';
253 set_query_attr(&req->q.arch, &val);
254 }
255
256 /*
257 * The key must be freed in any case.
258 * The val may have been handed over to the query
259 * structure, in which case it is now NULL.
260 */
261 next:
262 free(key);
263 key = NULL;
264 free(val);
265 val = NULL;
266
267 if (*qs != '\0')
268 qs++;
269 }
270 }
271
272 /*
273 * HTTP-decode a string. The standard explanation is that this turns
274 * "%4e+foo" into "n foo" in the regular way. This is done in-place
275 * over the allocated string.
276 */
277 static int
278 http_decode(char *p)
279 {
280 char hex[3];
281 char *q;
282 int c;
283
284 hex[2] = '\0';
285
286 q = p;
287 for ( ; '\0' != *p; p++, q++) {
288 if ('%' == *p) {
289 if ('\0' == (hex[0] = *(p + 1)))
290 return 0;
291 if ('\0' == (hex[1] = *(p + 2)))
292 return 0;
293 if (1 != sscanf(hex, "%x", &c))
294 return 0;
295 if ('\0' == c)
296 return 0;
297
298 *q = (char)c;
299 p += 2;
300 } else
301 *q = '+' == *p ? ' ' : *p;
302 }
303
304 *q = '\0';
305 return 1;
306 }
307
308 static void
309 resp_begin_http(int code, const char *msg)
310 {
311
312 if (200 != code)
313 printf("Status: %d %s\r\n", code, msg);
314
315 printf("Content-Type: text/html; charset=utf-8\r\n"
316 "Cache-Control: no-cache\r\n"
317 "Pragma: no-cache\r\n"
318 "\r\n");
319
320 fflush(stdout);
321 }
322
323 static void
324 resp_copy(const char *filename)
325 {
326 char buf[4096];
327 ssize_t sz;
328 int fd;
329
330 if ((fd = open(filename, O_RDONLY)) != -1) {
331 fflush(stdout);
332 while ((sz = read(fd, buf, sizeof(buf))) > 0)
333 write(STDOUT_FILENO, buf, sz);
334 }
335 }
336
337 static void
338 resp_begin_html(int code, const char *msg)
339 {
340
341 resp_begin_http(code, msg);
342
343 printf("<!DOCTYPE html>\n"
344 "<html>\n"
345 "<head>\n"
346 "<meta charset=\"UTF-8\"/>\n"
347 "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
348 " type=\"text/css\" media=\"all\">\n"
349 "<title>%s</title>\n"
350 "</head>\n"
351 "<body>\n"
352 "<!-- Begin page content. //-->\n",
353 CSS_DIR, CUSTOMIZE_TITLE);
354
355 resp_copy(MAN_DIR "/header.html");
356 }
357
358 static void
359 resp_end_html(void)
360 {
361
362 resp_copy(MAN_DIR "/footer.html");
363
364 puts("</body>\n"
365 "</html>");
366 }
367
368 static void
369 resp_searchform(const struct req *req)
370 {
371 int i;
372
373 puts("<!-- Begin search form. //-->");
374 printf("<div id=\"mancgi\">\n"
375 "<form action=\"/%s\" method=\"get\">\n"
376 "<fieldset>\n"
377 "<legend>Manual Page Search Parameters</legend>\n",
378 scriptname);
379
380 /* Write query input box. */
381
382 printf( "<table><tr><td>\n"
383 "<input type=\"text\" name=\"query\" value=\"");
384 if (NULL != req->q.query)
385 html_print(req->q.query);
386 puts("\" size=\"40\">");
387
388 /* Write submission and reset buttons. */
389
390 printf( "<input type=\"submit\" value=\"Submit\">\n"
391 "<input type=\"reset\" value=\"Reset\">\n");
392
393 /* Write show radio button */
394
395 printf( "</td><td>\n"
396 "<input type=\"radio\" ");
397 if (req->q.equal)
398 printf("checked=\"checked\" ");
399 printf( "name=\"apropos\" id=\"show\" value=\"0\">\n"
400 "<label for=\"show\">Show named manual page</label>\n");
401
402 /* Write section selector. */
403
404 puts( "</td></tr><tr><td>\n"
405 "<select name=\"sec\">");
406 for (i = 0; i < sec_MAX; i++) {
407 printf("<option value=\"%s\"", sec_numbers[i]);
408 if (NULL != req->q.sec &&
409 0 == strcmp(sec_numbers[i], req->q.sec))
410 printf(" selected=\"selected\"");
411 printf(">%s</option>\n", sec_names[i]);
412 }
413 puts("</select>");
414
415 /* Write architecture selector. */
416
417 printf( "<select name=\"arch\">\n"
418 "<option value=\"default\"");
419 if (NULL == req->q.arch)
420 printf(" selected=\"selected\"");
421 puts(">All Architectures</option>");
422 for (i = 0; i < arch_MAX; i++) {
423 printf("<option value=\"%s\"", arch_names[i]);
424 if (NULL != req->q.arch &&
425 0 == strcmp(arch_names[i], req->q.arch))
426 printf(" selected=\"selected\"");
427 printf(">%s</option>\n", arch_names[i]);
428 }
429 puts("</select>");
430
431 /* Write manpath selector. */
432
433 if (req->psz > 1) {
434 puts("<select name=\"manpath\">");
435 for (i = 0; i < (int)req->psz; i++) {
436 printf("<option ");
437 if (strcmp(req->q.manpath, req->p[i]) == 0)
438 printf("selected=\"selected\" ");
439 printf("value=\"");
440 html_print(req->p[i]);
441 printf("\">");
442 html_print(req->p[i]);
443 puts("</option>");
444 }
445 puts("</select>");
446 }
447
448 /* Write search radio button */
449
450 printf( "</td><td>\n"
451 "<input type=\"radio\" ");
452 if (0 == req->q.equal)
453 printf("checked=\"checked\" ");
454 printf( "name=\"apropos\" id=\"search\" value=\"1\">\n"
455 "<label for=\"search\">Search with apropos query</label>\n");
456
457 puts("</td></tr></table>\n"
458 "</fieldset>\n"
459 "</form>\n"
460 "</div>");
461 puts("<!-- End search form. //-->");
462 }
463
464 static int
465 validate_urifrag(const char *frag)
466 {
467
468 while ('\0' != *frag) {
469 if ( ! (isalnum((unsigned char)*frag) ||
470 '-' == *frag || '.' == *frag ||
471 '/' == *frag || '_' == *frag))
472 return 0;
473 frag++;
474 }
475 return 1;
476 }
477
478 static int
479 validate_manpath(const struct req *req, const char* manpath)
480 {
481 size_t i;
482
483 if ( ! strcmp(manpath, "mandoc"))
484 return 1;
485
486 for (i = 0; i < req->psz; i++)
487 if ( ! strcmp(manpath, req->p[i]))
488 return 1;
489
490 return 0;
491 }
492
493 static int
494 validate_filename(const char *file)
495 {
496
497 if ('.' == file[0] && '/' == file[1])
498 file += 2;
499
500 return ! (strstr(file, "../") || strstr(file, "/..") ||
501 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
502 }
503
504 static void
505 pg_index(const struct req *req)
506 {
507
508 resp_begin_html(200, NULL);
509 resp_searchform(req);
510 printf("<p>\n"
511 "This web interface is documented in the\n"
512 "<a href=\"/%s%smandoc/man8/man.cgi.8\">man.cgi</a>\n"
513 "manual, and the\n"
514 "<a href=\"/%s%smandoc/man1/apropos.1\">apropos</a>\n"
515 "manual explains the query syntax.\n"
516 "</p>\n",
517 scriptname, *scriptname == '\0' ? "" : "/",
518 scriptname, *scriptname == '\0' ? "" : "/");
519 resp_end_html();
520 }
521
522 static void
523 pg_noresult(const struct req *req, const char *msg)
524 {
525 resp_begin_html(200, NULL);
526 resp_searchform(req);
527 puts("<p>");
528 puts(msg);
529 puts("</p>");
530 resp_end_html();
531 }
532
533 static void
534 pg_error_badrequest(const char *msg)
535 {
536
537 resp_begin_html(400, "Bad Request");
538 puts("<h1>Bad Request</h1>\n"
539 "<p>\n");
540 puts(msg);
541 printf("Try again from the\n"
542 "<a href=\"/%s\">main page</a>.\n"
543 "</p>", scriptname);
544 resp_end_html();
545 }
546
547 static void
548 pg_error_internal(void)
549 {
550 resp_begin_html(500, "Internal Server Error");
551 puts("<p>Internal Server Error</p>");
552 resp_end_html();
553 }
554
555 static void
556 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
557 {
558 char *arch, *archend;
559 const char *sec;
560 size_t i, iuse;
561 int archprio, archpriouse;
562 int prio, priouse;
563
564 for (i = 0; i < sz; i++) {
565 if (validate_filename(r[i].file))
566 continue;
567 fprintf(stderr, "invalid filename %s in %s database\n",
568 r[i].file, req->q.manpath);
569 pg_error_internal();
570 return;
571 }
572
573 if (req->isquery && sz == 1) {
574 /*
575 * If we have just one result, then jump there now
576 * without any delay.
577 */
578 printf("Status: 303 See Other\r\n");
579 printf("Location: http://%s/%s%s%s/%s",
580 HTTP_HOST, scriptname,
581 *scriptname == '\0' ? "" : "/",
582 req->q.manpath, r[0].file);
583 printf("\r\n"
584 "Content-Type: text/html; charset=utf-8\r\n"
585 "\r\n");
586 return;
587 }
588
589 resp_begin_html(200, NULL);
590 resp_searchform(req);
591
592 if (sz > 1) {
593 puts("<div class=\"results\">");
594 puts("<table>");
595
596 for (i = 0; i < sz; i++) {
597 printf("<tr>\n"
598 "<td class=\"title\">\n"
599 "<a href=\"/%s%s%s/%s",
600 scriptname, *scriptname == '\0' ? "" : "/",
601 req->q.manpath, r[i].file);
602 printf("\">");
603 html_print(r[i].names);
604 printf("</a>\n"
605 "</td>\n"
606 "<td class=\"desc\">");
607 html_print(r[i].output);
608 puts("</td>\n"
609 "</tr>");
610 }
611
612 puts("</table>\n"
613 "</div>");
614 }
615
616 /*
617 * In man(1) mode, show one of the pages
618 * even if more than one is found.
619 */
620
621 if (req->q.equal || sz == 1) {
622 puts("<hr>");
623 iuse = 0;
624 priouse = 20;
625 archpriouse = 3;
626 for (i = 0; i < sz; i++) {
627 sec = r[i].file;
628 sec += strcspn(sec, "123456789");
629 if (sec[0] == '\0')
630 continue;
631 prio = sec_prios[sec[0] - '1'];
632 if (sec[1] != '/')
633 prio += 10;
634 if (req->q.arch == NULL) {
635 archprio =
636 ((arch = strchr(sec + 1, '/'))
637 == NULL) ? 3 :
638 ((archend = strchr(arch + 1, '/'))
639 == NULL) ? 0 :
640 strncmp(arch, "amd64/",
641 archend - arch) ? 2 : 1;
642 if (archprio < archpriouse) {
643 archpriouse = archprio;
644 priouse = prio;
645 iuse = i;
646 continue;
647 }
648 if (archprio > archpriouse)
649 continue;
650 }
651 if (prio >= priouse)
652 continue;
653 priouse = prio;
654 iuse = i;
655 }
656 resp_show(req, r[iuse].file);
657 }
658
659 resp_end_html();
660 }
661
662 static void
663 catman(const struct req *req, const char *file)
664 {
665 FILE *f;
666 char *p;
667 size_t sz;
668 ssize_t len;
669 int i;
670 int italic, bold;
671
672 if ((f = fopen(file, "r")) == NULL) {
673 puts("<p>You specified an invalid manual file.</p>");
674 return;
675 }
676
677 puts("<div class=\"catman\">\n"
678 "<pre>");
679
680 p = NULL;
681 sz = 0;
682
683 while ((len = getline(&p, &sz, f)) != -1) {
684 bold = italic = 0;
685 for (i = 0; i < len - 1; i++) {
686 /*
687 * This means that the catpage is out of state.
688 * Ignore it and keep going (although the
689 * catpage is bogus).
690 */
691
692 if ('\b' == p[i] || '\n' == p[i])
693 continue;
694
695 /*
696 * Print a regular character.
697 * Close out any bold/italic scopes.
698 * If we're in back-space mode, make sure we'll
699 * have something to enter when we backspace.
700 */
701
702 if ('\b' != p[i + 1]) {
703 if (italic)
704 printf("</i>");
705 if (bold)
706 printf("</b>");
707 italic = bold = 0;
708 html_putchar(p[i]);
709 continue;
710 } else if (i + 2 >= len)
711 continue;
712
713 /* Italic mode. */
714
715 if ('_' == p[i]) {
716 if (bold)
717 printf("</b>");
718 if ( ! italic)
719 printf("<i>");
720 bold = 0;
721 italic = 1;
722 i += 2;
723 html_putchar(p[i]);
724 continue;
725 }
726
727 /*
728 * Handle funny behaviour troff-isms.
729 * These grok'd from the original man2html.c.
730 */
731
732 if (('+' == p[i] && 'o' == p[i + 2]) ||
733 ('o' == p[i] && '+' == p[i + 2]) ||
734 ('|' == p[i] && '=' == p[i + 2]) ||
735 ('=' == p[i] && '|' == p[i + 2]) ||
736 ('*' == p[i] && '=' == p[i + 2]) ||
737 ('=' == p[i] && '*' == p[i + 2]) ||
738 ('*' == p[i] && '|' == p[i + 2]) ||
739 ('|' == p[i] && '*' == p[i + 2])) {
740 if (italic)
741 printf("</i>");
742 if (bold)
743 printf("</b>");
744 italic = bold = 0;
745 putchar('*');
746 i += 2;
747 continue;
748 } else if (('|' == p[i] && '-' == p[i + 2]) ||
749 ('-' == p[i] && '|' == p[i + 1]) ||
750 ('+' == p[i] && '-' == p[i + 1]) ||
751 ('-' == p[i] && '+' == p[i + 1]) ||
752 ('+' == p[i] && '|' == p[i + 1]) ||
753 ('|' == p[i] && '+' == p[i + 1])) {
754 if (italic)
755 printf("</i>");
756 if (bold)
757 printf("</b>");
758 italic = bold = 0;
759 putchar('+');
760 i += 2;
761 continue;
762 }
763
764 /* Bold mode. */
765
766 if (italic)
767 printf("</i>");
768 if ( ! bold)
769 printf("<b>");
770 bold = 1;
771 italic = 0;
772 i += 2;
773 html_putchar(p[i]);
774 }
775
776 /*
777 * Clean up the last character.
778 * We can get to a newline; don't print that.
779 */
780
781 if (italic)
782 printf("</i>");
783 if (bold)
784 printf("</b>");
785
786 if (i == len - 1 && p[i] != '\n')
787 html_putchar(p[i]);
788
789 putchar('\n');
790 }
791 free(p);
792
793 puts("</pre>\n"
794 "</div>");
795
796 fclose(f);
797 }
798
799 static void
800 format(const struct req *req, const char *file)
801 {
802 struct manoutput conf;
803 struct mparse *mp;
804 struct roff_man *man;
805 void *vp;
806 int fd;
807 int usepath;
808
809 if (-1 == (fd = open(file, O_RDONLY, 0))) {
810 puts("<p>You specified an invalid manual file.</p>");
811 return;
812 }
813
814 mchars_alloc();
815 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
816 mparse_readfd(mp, fd, file);
817 close(fd);
818
819 memset(&conf, 0, sizeof(conf));
820 conf.fragment = 1;
821 usepath = strcmp(req->q.manpath, req->p[0]);
822 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
823 usepath ? req->q.manpath : "", usepath ? "/" : "");
824
825 mparse_result(mp, &man, NULL);
826 if (man == NULL) {
827 fprintf(stderr, "fatal mandoc error: %s/%s\n",
828 req->q.manpath, file);
829 pg_error_internal();
830 mparse_free(mp);
831 mchars_free();
832 return;
833 }
834
835 vp = html_alloc(&conf);
836
837 if (man->macroset == MACROSET_MDOC) {
838 mdoc_validate(man);
839 html_mdoc(vp, man);
840 } else {
841 man_validate(man);
842 html_man(vp, man);
843 }
844
845 html_free(vp);
846 mparse_free(mp);
847 mchars_free();
848 free(conf.man);
849 }
850
851 static void
852 resp_show(const struct req *req, const char *file)
853 {
854
855 if ('.' == file[0] && '/' == file[1])
856 file += 2;
857
858 if ('c' == *file)
859 catman(req, file);
860 else
861 format(req, file);
862 }
863
864 static void
865 pg_show(struct req *req, const char *fullpath)
866 {
867 char *manpath;
868 const char *file;
869
870 if ((file = strchr(fullpath, '/')) == NULL) {
871 pg_error_badrequest(
872 "You did not specify a page to show.");
873 return;
874 }
875 manpath = mandoc_strndup(fullpath, file - fullpath);
876 file++;
877
878 if ( ! validate_manpath(req, manpath)) {
879 pg_error_badrequest(
880 "You specified an invalid manpath.");
881 free(manpath);
882 return;
883 }
884
885 /*
886 * Begin by chdir()ing into the manpath.
887 * This way we can pick up the database files, which are
888 * relative to the manpath root.
889 */
890
891 if (chdir(manpath) == -1) {
892 fprintf(stderr, "chdir %s: %s\n",
893 manpath, strerror(errno));
894 pg_error_internal();
895 free(manpath);
896 return;
897 }
898
899 if (strcmp(manpath, "mandoc")) {
900 free(req->q.manpath);
901 req->q.manpath = manpath;
902 } else
903 free(manpath);
904
905 if ( ! validate_filename(file)) {
906 pg_error_badrequest(
907 "You specified an invalid manual file.");
908 return;
909 }
910
911 resp_begin_html(200, NULL);
912 resp_searchform(req);
913 resp_show(req, file);
914 resp_end_html();
915 }
916
917 static void
918 pg_search(const struct req *req)
919 {
920 struct mansearch search;
921 struct manpaths paths;
922 struct manpage *res;
923 char **argv;
924 char *query, *rp, *wp;
925 size_t ressz;
926 int argc;
927
928 /*
929 * Begin by chdir()ing into the root of the manpath.
930 * This way we can pick up the database files, which are
931 * relative to the manpath root.
932 */
933
934 if (-1 == (chdir(req->q.manpath))) {
935 fprintf(stderr, "chdir %s: %s\n",
936 req->q.manpath, strerror(errno));
937 pg_error_internal();
938 return;
939 }
940
941 search.arch = req->q.arch;
942 search.sec = req->q.sec;
943 search.outkey = "Nd";
944 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
945 search.firstmatch = 1;
946
947 paths.sz = 1;
948 paths.paths = mandoc_malloc(sizeof(char *));
949 paths.paths[0] = mandoc_strdup(".");
950
951 /*
952 * Break apart at spaces with backslash-escaping.
953 */
954
955 argc = 0;
956 argv = NULL;
957 rp = query = mandoc_strdup(req->q.query);
958 for (;;) {
959 while (isspace((unsigned char)*rp))
960 rp++;
961 if (*rp == '\0')
962 break;
963 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
964 argv[argc++] = wp = rp;
965 for (;;) {
966 if (isspace((unsigned char)*rp)) {
967 *wp = '\0';
968 rp++;
969 break;
970 }
971 if (rp[0] == '\\' && rp[1] != '\0')
972 rp++;
973 if (wp != rp)
974 *wp = *rp;
975 if (*rp == '\0')
976 break;
977 wp++;
978 rp++;
979 }
980 }
981
982 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
983 pg_noresult(req, "You entered an invalid query.");
984 else if (0 == ressz)
985 pg_noresult(req, "No results found.");
986 else
987 pg_searchres(req, res, ressz);
988
989 free(query);
990 mansearch_free(res, ressz);
991 free(paths.paths[0]);
992 free(paths.paths);
993 }
994
995 int
996 main(void)
997 {
998 struct req req;
999 struct itimerval itimer;
1000 const char *path;
1001 const char *querystring;
1002 int i;
1003
1004 /* Poor man's ReDoS mitigation. */
1005
1006 itimer.it_value.tv_sec = 2;
1007 itimer.it_value.tv_usec = 0;
1008 itimer.it_interval.tv_sec = 2;
1009 itimer.it_interval.tv_usec = 0;
1010 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1011 fprintf(stderr, "setitimer: %s\n", strerror(errno));
1012 pg_error_internal();
1013 return EXIT_FAILURE;
1014 }
1015
1016 /*
1017 * First we change directory into the MAN_DIR so that
1018 * subsequent scanning for manpath directories is rooted
1019 * relative to the same position.
1020 */
1021
1022 if (-1 == chdir(MAN_DIR)) {
1023 fprintf(stderr, "MAN_DIR: %s: %s\n",
1024 MAN_DIR, strerror(errno));
1025 pg_error_internal();
1026 return EXIT_FAILURE;
1027 }
1028
1029 memset(&req, 0, sizeof(struct req));
1030 req.q.equal = 1;
1031 pathgen(&req);
1032
1033 /* Parse the path info and the query string. */
1034
1035 if ((path = getenv("PATH_INFO")) == NULL)
1036 path = "";
1037 else if (*path == '/')
1038 path++;
1039
1040 if (*path != '\0') {
1041 path_parse(&req, path);
1042 if (access(path, F_OK) == -1)
1043 path = "";
1044 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1045 http_parse(&req, querystring);
1046
1047 /* Validate parsed data and add defaults. */
1048
1049 if (req.q.manpath == NULL)
1050 req.q.manpath = mandoc_strdup(req.p[0]);
1051 else if ( ! validate_manpath(&req, req.q.manpath)) {
1052 pg_error_badrequest(
1053 "You specified an invalid manpath.");
1054 return EXIT_FAILURE;
1055 }
1056
1057 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1058 pg_error_badrequest(
1059 "You specified an invalid architecture.");
1060 return EXIT_FAILURE;
1061 }
1062
1063 /* Dispatch to the three different pages. */
1064
1065 if ('\0' != *path)
1066 pg_show(&req, path);
1067 else if (NULL != req.q.query)
1068 pg_search(&req);
1069 else
1070 pg_index(&req);
1071
1072 free(req.q.manpath);
1073 free(req.q.arch);
1074 free(req.q.sec);
1075 free(req.q.query);
1076 for (i = 0; i < (int)req.psz; i++)
1077 free(req.p[i]);
1078 free(req.p);
1079 return EXIT_SUCCESS;
1080 }
1081
1082 /*
1083 * If PATH_INFO is not a file name, translate it to a query.
1084 */
1085 static void
1086 path_parse(struct req *req, const char *path)
1087 {
1088 int dir_done;
1089
1090 req->isquery = 0;
1091 req->q.equal = 1;
1092 req->q.manpath = mandoc_strdup(path);
1093
1094 /* Mandatory manual page name. */
1095 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1096 req->q.query = req->q.manpath;
1097 req->q.manpath = NULL;
1098 } else
1099 *req->q.query++ = '\0';
1100
1101 /* Optional trailing section. */
1102 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1103 if(isdigit((unsigned char)req->q.sec[1])) {
1104 *req->q.sec++ = '\0';
1105 req->q.sec = mandoc_strdup(req->q.sec);
1106 } else
1107 req->q.sec = NULL;
1108 }
1109
1110 /* Handle the case of name[.section] only. */
1111 if (req->q.manpath == NULL) {
1112 req->q.arch = NULL;
1113 return;
1114 }
1115 req->q.query = mandoc_strdup(req->q.query);
1116
1117 /* Optional architecture. */
1118 dir_done = 0;
1119 for (;;) {
1120 if ((req->q.arch = strrchr(req->q.manpath, '/')) == NULL)
1121 break;
1122 *req->q.arch++ = '\0';
1123 if (dir_done || strncmp(req->q.arch, "man", 3)) {
1124 req->q.arch = mandoc_strdup(req->q.arch);
1125 break;
1126 }
1127
1128 /* Optional directory name. */
1129 req->q.arch += 3;
1130 if (*req->q.arch != '\0') {
1131 free(req->q.sec);
1132 req->q.sec = mandoc_strdup(req->q.arch);
1133 }
1134 dir_done = 1;
1135 }
1136 }
1137
1138 /*
1139 * Scan for indexable paths.
1140 */
1141 static void
1142 pathgen(struct req *req)
1143 {
1144 FILE *fp;
1145 char *dp;
1146 size_t dpsz;
1147 ssize_t len;
1148
1149 if (NULL == (fp = fopen("manpath.conf", "r"))) {
1150 fprintf(stderr, "%s/manpath.conf: %s\n",
1151 MAN_DIR, strerror(errno));
1152 pg_error_internal();
1153 exit(EXIT_FAILURE);
1154 }
1155
1156 dp = NULL;
1157 dpsz = 0;
1158
1159 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1160 if (dp[len - 1] == '\n')
1161 dp[--len] = '\0';
1162 req->p = mandoc_realloc(req->p,
1163 (req->psz + 1) * sizeof(char *));
1164 if ( ! validate_urifrag(dp)) {
1165 fprintf(stderr, "%s/manpath.conf contains "
1166 "unsafe path \"%s\"\n", MAN_DIR, dp);
1167 pg_error_internal();
1168 exit(EXIT_FAILURE);
1169 }
1170 if (NULL != strchr(dp, '/')) {
1171 fprintf(stderr, "%s/manpath.conf contains "
1172 "path with slash \"%s\"\n", MAN_DIR, dp);
1173 pg_error_internal();
1174 exit(EXIT_FAILURE);
1175 }
1176 req->p[req->psz++] = dp;
1177 dp = NULL;
1178 dpsz = 0;
1179 }
1180 free(dp);
1181
1182 if ( req->p == NULL ) {
1183 fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1184 pg_error_internal();
1185 exit(EXIT_FAILURE);
1186 }
1187 }