]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
more info on man(7) .Xr hyperlinking
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.146 2017/01/25 03:49:38 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "main.h"
40 #include "manconf.h"
41 #include "mansearch.h"
42 #include "cgi.h"
43
44 /*
45 * A query as passed to the search function.
46 */
47 struct query {
48 char *manpath; /* desired manual directory */
49 char *arch; /* architecture */
50 char *sec; /* manual section */
51 char *query; /* unparsed query expression */
52 int equal; /* match whole names, not substrings */
53 };
54
55 struct req {
56 struct query q;
57 char **p; /* array of available manpaths */
58 size_t psz; /* number of available manpaths */
59 int isquery; /* QUERY_STRING used, not PATH_INFO */
60 };
61
62 enum focus {
63 FOCUS_NONE = 0,
64 FOCUS_QUERY
65 };
66
67 static void html_print(const char *);
68 static void html_putchar(char);
69 static int http_decode(char *);
70 static void parse_manpath_conf(struct req *);
71 static void parse_path_info(struct req *req, const char *path);
72 static void parse_query_string(struct req *, const char *);
73 static void pg_error_badrequest(const char *);
74 static void pg_error_internal(void);
75 static void pg_index(const struct req *);
76 static void pg_noresult(const struct req *, const char *);
77 static void pg_search(const struct req *);
78 static void pg_searchres(const struct req *,
79 struct manpage *, size_t);
80 static void pg_show(struct req *, const char *);
81 static void resp_begin_html(int, const char *);
82 static void resp_begin_http(int, const char *);
83 static void resp_catman(const struct req *, const char *);
84 static void resp_copy(const char *);
85 static void resp_end_html(void);
86 static void resp_format(const struct req *, const char *);
87 static void resp_searchform(const struct req *, enum focus);
88 static void resp_show(const struct req *, const char *);
89 static void set_query_attr(char **, char **);
90 static int validate_filename(const char *);
91 static int validate_manpath(const struct req *, const char *);
92 static int validate_urifrag(const char *);
93
94 static const char *scriptname = SCRIPT_NAME;
95
96 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
97 static const char *const sec_numbers[] = {
98 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
99 };
100 static const char *const sec_names[] = {
101 "All Sections",
102 "1 - General Commands",
103 "2 - System Calls",
104 "3 - Library Functions",
105 "3p - Perl Library",
106 "4 - Device Drivers",
107 "5 - File Formats",
108 "6 - Games",
109 "7 - Miscellaneous Information",
110 "8 - System Manager\'s Manual",
111 "9 - Kernel Developer\'s Manual"
112 };
113 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
114
115 static const char *const arch_names[] = {
116 "amd64", "alpha", "armv7", "arm64",
117 "hppa", "i386", "landisk",
118 "loongson", "luna88k", "macppc", "mips64",
119 "octeon", "sgi", "socppc", "sparc64",
120 "amiga", "arc", "armish", "arm32",
121 "atari", "aviion", "beagle", "cats",
122 "hppa64", "hp300",
123 "ia64", "mac68k", "mvme68k", "mvme88k",
124 "mvmeppc", "palm", "pc532", "pegasos",
125 "pmax", "powerpc", "solbourne", "sparc",
126 "sun3", "vax", "wgrisc", "x68k",
127 "zaurus"
128 };
129 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
130
131 /*
132 * Print a character, escaping HTML along the way.
133 * This will pass non-ASCII straight to output: be warned!
134 */
135 static void
136 html_putchar(char c)
137 {
138
139 switch (c) {
140 case ('"'):
141 printf("&quot;");
142 break;
143 case ('&'):
144 printf("&amp;");
145 break;
146 case ('>'):
147 printf("&gt;");
148 break;
149 case ('<'):
150 printf("&lt;");
151 break;
152 default:
153 putchar((unsigned char)c);
154 break;
155 }
156 }
157
158 /*
159 * Call through to html_putchar().
160 * Accepts NULL strings.
161 */
162 static void
163 html_print(const char *p)
164 {
165
166 if (NULL == p)
167 return;
168 while ('\0' != *p)
169 html_putchar(*p++);
170 }
171
172 /*
173 * Transfer the responsibility for the allocated string *val
174 * to the query structure.
175 */
176 static void
177 set_query_attr(char **attr, char **val)
178 {
179
180 free(*attr);
181 if (**val == '\0') {
182 *attr = NULL;
183 free(*val);
184 } else
185 *attr = *val;
186 *val = NULL;
187 }
188
189 /*
190 * Parse the QUERY_STRING for key-value pairs
191 * and store the values into the query structure.
192 */
193 static void
194 parse_query_string(struct req *req, const char *qs)
195 {
196 char *key, *val;
197 size_t keysz, valsz;
198
199 req->isquery = 1;
200 req->q.manpath = NULL;
201 req->q.arch = NULL;
202 req->q.sec = NULL;
203 req->q.query = NULL;
204 req->q.equal = 1;
205
206 key = val = NULL;
207 while (*qs != '\0') {
208
209 /* Parse one key. */
210
211 keysz = strcspn(qs, "=;&");
212 key = mandoc_strndup(qs, keysz);
213 qs += keysz;
214 if (*qs != '=')
215 goto next;
216
217 /* Parse one value. */
218
219 valsz = strcspn(++qs, ";&");
220 val = mandoc_strndup(qs, valsz);
221 qs += valsz;
222
223 /* Decode and catch encoding errors. */
224
225 if ( ! (http_decode(key) && http_decode(val)))
226 goto next;
227
228 /* Handle key-value pairs. */
229
230 if ( ! strcmp(key, "query"))
231 set_query_attr(&req->q.query, &val);
232
233 else if ( ! strcmp(key, "apropos"))
234 req->q.equal = !strcmp(val, "0");
235
236 else if ( ! strcmp(key, "manpath")) {
237 #ifdef COMPAT_OLDURI
238 if ( ! strncmp(val, "OpenBSD ", 8)) {
239 val[7] = '-';
240 if ('C' == val[8])
241 val[8] = 'c';
242 }
243 #endif
244 set_query_attr(&req->q.manpath, &val);
245 }
246
247 else if ( ! (strcmp(key, "sec")
248 #ifdef COMPAT_OLDURI
249 && strcmp(key, "sektion")
250 #endif
251 )) {
252 if ( ! strcmp(val, "0"))
253 *val = '\0';
254 set_query_attr(&req->q.sec, &val);
255 }
256
257 else if ( ! strcmp(key, "arch")) {
258 if ( ! strcmp(val, "default"))
259 *val = '\0';
260 set_query_attr(&req->q.arch, &val);
261 }
262
263 /*
264 * The key must be freed in any case.
265 * The val may have been handed over to the query
266 * structure, in which case it is now NULL.
267 */
268 next:
269 free(key);
270 key = NULL;
271 free(val);
272 val = NULL;
273
274 if (*qs != '\0')
275 qs++;
276 }
277 }
278
279 /*
280 * HTTP-decode a string. The standard explanation is that this turns
281 * "%4e+foo" into "n foo" in the regular way. This is done in-place
282 * over the allocated string.
283 */
284 static int
285 http_decode(char *p)
286 {
287 char hex[3];
288 char *q;
289 int c;
290
291 hex[2] = '\0';
292
293 q = p;
294 for ( ; '\0' != *p; p++, q++) {
295 if ('%' == *p) {
296 if ('\0' == (hex[0] = *(p + 1)))
297 return 0;
298 if ('\0' == (hex[1] = *(p + 2)))
299 return 0;
300 if (1 != sscanf(hex, "%x", &c))
301 return 0;
302 if ('\0' == c)
303 return 0;
304
305 *q = (char)c;
306 p += 2;
307 } else
308 *q = '+' == *p ? ' ' : *p;
309 }
310
311 *q = '\0';
312 return 1;
313 }
314
315 static void
316 resp_begin_http(int code, const char *msg)
317 {
318
319 if (200 != code)
320 printf("Status: %d %s\r\n", code, msg);
321
322 printf("Content-Type: text/html; charset=utf-8\r\n"
323 "Cache-Control: no-cache\r\n"
324 "Pragma: no-cache\r\n"
325 "\r\n");
326
327 fflush(stdout);
328 }
329
330 static void
331 resp_copy(const char *filename)
332 {
333 char buf[4096];
334 ssize_t sz;
335 int fd;
336
337 if ((fd = open(filename, O_RDONLY)) != -1) {
338 fflush(stdout);
339 while ((sz = read(fd, buf, sizeof(buf))) > 0)
340 write(STDOUT_FILENO, buf, sz);
341 close(fd);
342 }
343 }
344
345 static void
346 resp_begin_html(int code, const char *msg)
347 {
348
349 resp_begin_http(code, msg);
350
351 printf("<!DOCTYPE html>\n"
352 "<html>\n"
353 "<head>\n"
354 " <meta charset=\"UTF-8\"/>\n"
355 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
356 " type=\"text/css\" media=\"all\">\n"
357 " <title>%s</title>\n"
358 "</head>\n"
359 "<body>\n",
360 CSS_DIR, CUSTOMIZE_TITLE);
361
362 resp_copy(MAN_DIR "/header.html");
363 }
364
365 static void
366 resp_end_html(void)
367 {
368
369 resp_copy(MAN_DIR "/footer.html");
370
371 puts("</body>\n"
372 "</html>");
373 }
374
375 static void
376 resp_searchform(const struct req *req, enum focus focus)
377 {
378 int i;
379
380 printf("<form action=\"/%s\" method=\"get\">\n"
381 " <fieldset>\n"
382 " <legend>Manual Page Search Parameters</legend>\n",
383 scriptname);
384
385 /* Write query input box. */
386
387 printf(" <input type=\"text\" name=\"query\" value=\"");
388 if (req->q.query != NULL)
389 html_print(req->q.query);
390 printf( "\" size=\"40\"");
391 if (focus == FOCUS_QUERY)
392 printf(" autofocus");
393 puts(">");
394
395 /* Write submission buttons. */
396
397 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
398 "man</button>\n"
399 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
400 "apropos</button>\n"
401 " <br/>\n");
402
403 /* Write section selector. */
404
405 puts(" <select name=\"sec\">");
406 for (i = 0; i < sec_MAX; i++) {
407 printf(" <option value=\"%s\"", sec_numbers[i]);
408 if (NULL != req->q.sec &&
409 0 == strcmp(sec_numbers[i], req->q.sec))
410 printf(" selected=\"selected\"");
411 printf(">%s</option>\n", sec_names[i]);
412 }
413 puts(" </select>");
414
415 /* Write architecture selector. */
416
417 printf( " <select name=\"arch\">\n"
418 " <option value=\"default\"");
419 if (NULL == req->q.arch)
420 printf(" selected=\"selected\"");
421 puts(">All Architectures</option>");
422 for (i = 0; i < arch_MAX; i++) {
423 printf(" <option value=\"%s\"", arch_names[i]);
424 if (NULL != req->q.arch &&
425 0 == strcmp(arch_names[i], req->q.arch))
426 printf(" selected=\"selected\"");
427 printf(">%s</option>\n", arch_names[i]);
428 }
429 puts(" </select>");
430
431 /* Write manpath selector. */
432
433 if (req->psz > 1) {
434 puts(" <select name=\"manpath\">");
435 for (i = 0; i < (int)req->psz; i++) {
436 printf(" <option ");
437 if (strcmp(req->q.manpath, req->p[i]) == 0)
438 printf("selected=\"selected\" ");
439 printf("value=\"");
440 html_print(req->p[i]);
441 printf("\">");
442 html_print(req->p[i]);
443 puts("</option>");
444 }
445 puts(" </select>");
446 }
447
448 puts(" </fieldset>\n"
449 "</form>");
450 }
451
452 static int
453 validate_urifrag(const char *frag)
454 {
455
456 while ('\0' != *frag) {
457 if ( ! (isalnum((unsigned char)*frag) ||
458 '-' == *frag || '.' == *frag ||
459 '/' == *frag || '_' == *frag))
460 return 0;
461 frag++;
462 }
463 return 1;
464 }
465
466 static int
467 validate_manpath(const struct req *req, const char* manpath)
468 {
469 size_t i;
470
471 for (i = 0; i < req->psz; i++)
472 if ( ! strcmp(manpath, req->p[i]))
473 return 1;
474
475 return 0;
476 }
477
478 static int
479 validate_filename(const char *file)
480 {
481
482 if ('.' == file[0] && '/' == file[1])
483 file += 2;
484
485 return ! (strstr(file, "../") || strstr(file, "/..") ||
486 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
487 }
488
489 static void
490 pg_index(const struct req *req)
491 {
492
493 resp_begin_html(200, NULL);
494 resp_searchform(req, FOCUS_QUERY);
495 printf("<p>\n"
496 "This web interface is documented in the\n"
497 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
498 "manual, and the\n"
499 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
500 "manual explains the query syntax.\n"
501 "</p>\n",
502 scriptname, *scriptname == '\0' ? "" : "/",
503 scriptname, *scriptname == '\0' ? "" : "/");
504 resp_end_html();
505 }
506
507 static void
508 pg_noresult(const struct req *req, const char *msg)
509 {
510 resp_begin_html(200, NULL);
511 resp_searchform(req, FOCUS_QUERY);
512 puts("<p>");
513 puts(msg);
514 puts("</p>");
515 resp_end_html();
516 }
517
518 static void
519 pg_error_badrequest(const char *msg)
520 {
521
522 resp_begin_html(400, "Bad Request");
523 puts("<h1>Bad Request</h1>\n"
524 "<p>\n");
525 puts(msg);
526 printf("Try again from the\n"
527 "<a href=\"/%s\">main page</a>.\n"
528 "</p>", scriptname);
529 resp_end_html();
530 }
531
532 static void
533 pg_error_internal(void)
534 {
535 resp_begin_html(500, "Internal Server Error");
536 puts("<p>Internal Server Error</p>");
537 resp_end_html();
538 }
539
540 static void
541 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
542 {
543 char *arch, *archend;
544 const char *sec;
545 size_t i, iuse;
546 int archprio, archpriouse;
547 int prio, priouse;
548
549 for (i = 0; i < sz; i++) {
550 if (validate_filename(r[i].file))
551 continue;
552 warnx("invalid filename %s in %s database",
553 r[i].file, req->q.manpath);
554 pg_error_internal();
555 return;
556 }
557
558 if (req->isquery && sz == 1) {
559 /*
560 * If we have just one result, then jump there now
561 * without any delay.
562 */
563 printf("Status: 303 See Other\r\n");
564 printf("Location: http://%s/%s%s%s/%s",
565 HTTP_HOST, scriptname,
566 *scriptname == '\0' ? "" : "/",
567 req->q.manpath, r[0].file);
568 printf("\r\n"
569 "Content-Type: text/html; charset=utf-8\r\n"
570 "\r\n");
571 return;
572 }
573
574 resp_begin_html(200, NULL);
575 resp_searchform(req,
576 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
577
578 if (sz > 1) {
579 puts("<table class=\"results\">");
580 for (i = 0; i < sz; i++) {
581 printf(" <tr>\n"
582 " <td>"
583 "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
584 scriptname, *scriptname == '\0' ? "" : "/",
585 req->q.manpath, r[i].file);
586 html_print(r[i].names);
587 printf("</a></td>\n"
588 " <td><span class=\"Nd\">");
589 html_print(r[i].output);
590 puts("</span></td>\n"
591 " </tr>");
592 }
593 puts("</table>");
594 }
595
596 /*
597 * In man(1) mode, show one of the pages
598 * even if more than one is found.
599 */
600
601 if (req->q.equal || sz == 1) {
602 puts("<hr>");
603 iuse = 0;
604 priouse = 20;
605 archpriouse = 3;
606 for (i = 0; i < sz; i++) {
607 sec = r[i].file;
608 sec += strcspn(sec, "123456789");
609 if (sec[0] == '\0')
610 continue;
611 prio = sec_prios[sec[0] - '1'];
612 if (sec[1] != '/')
613 prio += 10;
614 if (req->q.arch == NULL) {
615 archprio =
616 ((arch = strchr(sec + 1, '/'))
617 == NULL) ? 3 :
618 ((archend = strchr(arch + 1, '/'))
619 == NULL) ? 0 :
620 strncmp(arch, "amd64/",
621 archend - arch) ? 2 : 1;
622 if (archprio < archpriouse) {
623 archpriouse = archprio;
624 priouse = prio;
625 iuse = i;
626 continue;
627 }
628 if (archprio > archpriouse)
629 continue;
630 }
631 if (prio >= priouse)
632 continue;
633 priouse = prio;
634 iuse = i;
635 }
636 resp_show(req, r[iuse].file);
637 }
638
639 resp_end_html();
640 }
641
642 static void
643 resp_catman(const struct req *req, const char *file)
644 {
645 FILE *f;
646 char *p;
647 size_t sz;
648 ssize_t len;
649 int i;
650 int italic, bold;
651
652 if ((f = fopen(file, "r")) == NULL) {
653 puts("<p>You specified an invalid manual file.</p>");
654 return;
655 }
656
657 puts("<div class=\"catman\">\n"
658 "<pre>");
659
660 p = NULL;
661 sz = 0;
662
663 while ((len = getline(&p, &sz, f)) != -1) {
664 bold = italic = 0;
665 for (i = 0; i < len - 1; i++) {
666 /*
667 * This means that the catpage is out of state.
668 * Ignore it and keep going (although the
669 * catpage is bogus).
670 */
671
672 if ('\b' == p[i] || '\n' == p[i])
673 continue;
674
675 /*
676 * Print a regular character.
677 * Close out any bold/italic scopes.
678 * If we're in back-space mode, make sure we'll
679 * have something to enter when we backspace.
680 */
681
682 if ('\b' != p[i + 1]) {
683 if (italic)
684 printf("</i>");
685 if (bold)
686 printf("</b>");
687 italic = bold = 0;
688 html_putchar(p[i]);
689 continue;
690 } else if (i + 2 >= len)
691 continue;
692
693 /* Italic mode. */
694
695 if ('_' == p[i]) {
696 if (bold)
697 printf("</b>");
698 if ( ! italic)
699 printf("<i>");
700 bold = 0;
701 italic = 1;
702 i += 2;
703 html_putchar(p[i]);
704 continue;
705 }
706
707 /*
708 * Handle funny behaviour troff-isms.
709 * These grok'd from the original man2html.c.
710 */
711
712 if (('+' == p[i] && 'o' == p[i + 2]) ||
713 ('o' == p[i] && '+' == p[i + 2]) ||
714 ('|' == p[i] && '=' == p[i + 2]) ||
715 ('=' == p[i] && '|' == p[i + 2]) ||
716 ('*' == p[i] && '=' == p[i + 2]) ||
717 ('=' == p[i] && '*' == p[i + 2]) ||
718 ('*' == p[i] && '|' == p[i + 2]) ||
719 ('|' == p[i] && '*' == p[i + 2])) {
720 if (italic)
721 printf("</i>");
722 if (bold)
723 printf("</b>");
724 italic = bold = 0;
725 putchar('*');
726 i += 2;
727 continue;
728 } else if (('|' == p[i] && '-' == p[i + 2]) ||
729 ('-' == p[i] && '|' == p[i + 1]) ||
730 ('+' == p[i] && '-' == p[i + 1]) ||
731 ('-' == p[i] && '+' == p[i + 1]) ||
732 ('+' == p[i] && '|' == p[i + 1]) ||
733 ('|' == p[i] && '+' == p[i + 1])) {
734 if (italic)
735 printf("</i>");
736 if (bold)
737 printf("</b>");
738 italic = bold = 0;
739 putchar('+');
740 i += 2;
741 continue;
742 }
743
744 /* Bold mode. */
745
746 if (italic)
747 printf("</i>");
748 if ( ! bold)
749 printf("<b>");
750 bold = 1;
751 italic = 0;
752 i += 2;
753 html_putchar(p[i]);
754 }
755
756 /*
757 * Clean up the last character.
758 * We can get to a newline; don't print that.
759 */
760
761 if (italic)
762 printf("</i>");
763 if (bold)
764 printf("</b>");
765
766 if (i == len - 1 && p[i] != '\n')
767 html_putchar(p[i]);
768
769 putchar('\n');
770 }
771 free(p);
772
773 puts("</pre>\n"
774 "</div>");
775
776 fclose(f);
777 }
778
779 static void
780 resp_format(const struct req *req, const char *file)
781 {
782 struct manoutput conf;
783 struct mparse *mp;
784 struct roff_man *man;
785 void *vp;
786 int fd;
787 int usepath;
788
789 if (-1 == (fd = open(file, O_RDONLY, 0))) {
790 puts("<p>You specified an invalid manual file.</p>");
791 return;
792 }
793
794 mchars_alloc();
795 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
796 MANDOCLEVEL_BADARG, NULL, req->q.manpath);
797 mparse_readfd(mp, fd, file);
798 close(fd);
799
800 memset(&conf, 0, sizeof(conf));
801 conf.fragment = 1;
802 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
803 usepath = strcmp(req->q.manpath, req->p[0]);
804 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
805 usepath ? req->q.manpath : "", usepath ? "/" : "");
806
807 mparse_result(mp, &man, NULL);
808 if (man == NULL) {
809 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
810 pg_error_internal();
811 mparse_free(mp);
812 mchars_free();
813 return;
814 }
815
816 vp = html_alloc(&conf);
817
818 if (man->macroset == MACROSET_MDOC) {
819 mdoc_validate(man);
820 html_mdoc(vp, man);
821 } else {
822 man_validate(man);
823 html_man(vp, man);
824 }
825
826 html_free(vp);
827 mparse_free(mp);
828 mchars_free();
829 free(conf.man);
830 free(conf.style);
831 }
832
833 static void
834 resp_show(const struct req *req, const char *file)
835 {
836
837 if ('.' == file[0] && '/' == file[1])
838 file += 2;
839
840 if ('c' == *file)
841 resp_catman(req, file);
842 else
843 resp_format(req, file);
844 }
845
846 static void
847 pg_show(struct req *req, const char *fullpath)
848 {
849 char *manpath;
850 const char *file;
851
852 if ((file = strchr(fullpath, '/')) == NULL) {
853 pg_error_badrequest(
854 "You did not specify a page to show.");
855 return;
856 }
857 manpath = mandoc_strndup(fullpath, file - fullpath);
858 file++;
859
860 if ( ! validate_manpath(req, manpath)) {
861 pg_error_badrequest(
862 "You specified an invalid manpath.");
863 free(manpath);
864 return;
865 }
866
867 /*
868 * Begin by chdir()ing into the manpath.
869 * This way we can pick up the database files, which are
870 * relative to the manpath root.
871 */
872
873 if (chdir(manpath) == -1) {
874 warn("chdir %s", manpath);
875 pg_error_internal();
876 free(manpath);
877 return;
878 }
879 free(manpath);
880
881 if ( ! validate_filename(file)) {
882 pg_error_badrequest(
883 "You specified an invalid manual file.");
884 return;
885 }
886
887 resp_begin_html(200, NULL);
888 resp_searchform(req, FOCUS_NONE);
889 resp_show(req, file);
890 resp_end_html();
891 }
892
893 static void
894 pg_search(const struct req *req)
895 {
896 struct mansearch search;
897 struct manpaths paths;
898 struct manpage *res;
899 char **argv;
900 char *query, *rp, *wp;
901 size_t ressz;
902 int argc;
903
904 /*
905 * Begin by chdir()ing into the root of the manpath.
906 * This way we can pick up the database files, which are
907 * relative to the manpath root.
908 */
909
910 if (chdir(req->q.manpath) == -1) {
911 warn("chdir %s", req->q.manpath);
912 pg_error_internal();
913 return;
914 }
915
916 search.arch = req->q.arch;
917 search.sec = req->q.sec;
918 search.outkey = "Nd";
919 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
920 search.firstmatch = 1;
921
922 paths.sz = 1;
923 paths.paths = mandoc_malloc(sizeof(char *));
924 paths.paths[0] = mandoc_strdup(".");
925
926 /*
927 * Break apart at spaces with backslash-escaping.
928 */
929
930 argc = 0;
931 argv = NULL;
932 rp = query = mandoc_strdup(req->q.query);
933 for (;;) {
934 while (isspace((unsigned char)*rp))
935 rp++;
936 if (*rp == '\0')
937 break;
938 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
939 argv[argc++] = wp = rp;
940 for (;;) {
941 if (isspace((unsigned char)*rp)) {
942 *wp = '\0';
943 rp++;
944 break;
945 }
946 if (rp[0] == '\\' && rp[1] != '\0')
947 rp++;
948 if (wp != rp)
949 *wp = *rp;
950 if (*rp == '\0')
951 break;
952 wp++;
953 rp++;
954 }
955 }
956
957 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
958 pg_noresult(req, "You entered an invalid query.");
959 else if (0 == ressz)
960 pg_noresult(req, "No results found.");
961 else
962 pg_searchres(req, res, ressz);
963
964 free(query);
965 mansearch_free(res, ressz);
966 free(paths.paths[0]);
967 free(paths.paths);
968 }
969
970 int
971 main(void)
972 {
973 struct req req;
974 struct itimerval itimer;
975 const char *path;
976 const char *querystring;
977 int i;
978
979 /* Poor man's ReDoS mitigation. */
980
981 itimer.it_value.tv_sec = 2;
982 itimer.it_value.tv_usec = 0;
983 itimer.it_interval.tv_sec = 2;
984 itimer.it_interval.tv_usec = 0;
985 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
986 warn("setitimer");
987 pg_error_internal();
988 return EXIT_FAILURE;
989 }
990
991 /*
992 * First we change directory into the MAN_DIR so that
993 * subsequent scanning for manpath directories is rooted
994 * relative to the same position.
995 */
996
997 if (chdir(MAN_DIR) == -1) {
998 warn("MAN_DIR: %s", MAN_DIR);
999 pg_error_internal();
1000 return EXIT_FAILURE;
1001 }
1002
1003 memset(&req, 0, sizeof(struct req));
1004 req.q.equal = 1;
1005 parse_manpath_conf(&req);
1006
1007 /* Parse the path info and the query string. */
1008
1009 if ((path = getenv("PATH_INFO")) == NULL)
1010 path = "";
1011 else if (*path == '/')
1012 path++;
1013
1014 if (*path != '\0') {
1015 parse_path_info(&req, path);
1016 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1017 path = "";
1018 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1019 parse_query_string(&req, querystring);
1020
1021 /* Validate parsed data and add defaults. */
1022
1023 if (req.q.manpath == NULL)
1024 req.q.manpath = mandoc_strdup(req.p[0]);
1025 else if ( ! validate_manpath(&req, req.q.manpath)) {
1026 pg_error_badrequest(
1027 "You specified an invalid manpath.");
1028 return EXIT_FAILURE;
1029 }
1030
1031 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1032 pg_error_badrequest(
1033 "You specified an invalid architecture.");
1034 return EXIT_FAILURE;
1035 }
1036
1037 /* Dispatch to the three different pages. */
1038
1039 if ('\0' != *path)
1040 pg_show(&req, path);
1041 else if (NULL != req.q.query)
1042 pg_search(&req);
1043 else
1044 pg_index(&req);
1045
1046 free(req.q.manpath);
1047 free(req.q.arch);
1048 free(req.q.sec);
1049 free(req.q.query);
1050 for (i = 0; i < (int)req.psz; i++)
1051 free(req.p[i]);
1052 free(req.p);
1053 return EXIT_SUCCESS;
1054 }
1055
1056 /*
1057 * If PATH_INFO is not a file name, translate it to a query.
1058 */
1059 static void
1060 parse_path_info(struct req *req, const char *path)
1061 {
1062 char *dir[4];
1063 int i;
1064
1065 req->isquery = 0;
1066 req->q.equal = 1;
1067 req->q.manpath = mandoc_strdup(path);
1068 req->q.arch = NULL;
1069
1070 /* Mandatory manual page name. */
1071 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1072 req->q.query = req->q.manpath;
1073 req->q.manpath = NULL;
1074 } else
1075 *req->q.query++ = '\0';
1076
1077 /* Optional trailing section. */
1078 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1079 if(isdigit((unsigned char)req->q.sec[1])) {
1080 *req->q.sec++ = '\0';
1081 req->q.sec = mandoc_strdup(req->q.sec);
1082 } else
1083 req->q.sec = NULL;
1084 }
1085
1086 /* Handle the case of name[.section] only. */
1087 if (req->q.manpath == NULL)
1088 return;
1089 req->q.query = mandoc_strdup(req->q.query);
1090
1091 /* Split directory components. */
1092 dir[i = 0] = req->q.manpath;
1093 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1094 if (++i == 3) {
1095 pg_error_badrequest(
1096 "You specified too many directory components.");
1097 exit(EXIT_FAILURE);
1098 }
1099 *dir[i]++ = '\0';
1100 }
1101
1102 /* Optional manpath. */
1103 if ((i = validate_manpath(req, req->q.manpath)) == 0)
1104 req->q.manpath = NULL;
1105 else if (dir[1] == NULL)
1106 return;
1107
1108 /* Optional section. */
1109 if (strncmp(dir[i], "man", 3) == 0) {
1110 free(req->q.sec);
1111 req->q.sec = mandoc_strdup(dir[i++] + 3);
1112 }
1113 if (dir[i] == NULL) {
1114 if (req->q.manpath == NULL)
1115 free(dir[0]);
1116 return;
1117 }
1118 if (dir[i + 1] != NULL) {
1119 pg_error_badrequest(
1120 "You specified an invalid directory component.");
1121 exit(EXIT_FAILURE);
1122 }
1123
1124 /* Optional architecture. */
1125 if (i) {
1126 req->q.arch = mandoc_strdup(dir[i]);
1127 if (req->q.manpath == NULL)
1128 free(dir[0]);
1129 } else
1130 req->q.arch = dir[0];
1131 }
1132
1133 /*
1134 * Scan for indexable paths.
1135 */
1136 static void
1137 parse_manpath_conf(struct req *req)
1138 {
1139 FILE *fp;
1140 char *dp;
1141 size_t dpsz;
1142 ssize_t len;
1143
1144 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1145 warn("%s/manpath.conf", MAN_DIR);
1146 pg_error_internal();
1147 exit(EXIT_FAILURE);
1148 }
1149
1150 dp = NULL;
1151 dpsz = 0;
1152
1153 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1154 if (dp[len - 1] == '\n')
1155 dp[--len] = '\0';
1156 req->p = mandoc_realloc(req->p,
1157 (req->psz + 1) * sizeof(char *));
1158 if ( ! validate_urifrag(dp)) {
1159 warnx("%s/manpath.conf contains "
1160 "unsafe path \"%s\"", MAN_DIR, dp);
1161 pg_error_internal();
1162 exit(EXIT_FAILURE);
1163 }
1164 if (strchr(dp, '/') != NULL) {
1165 warnx("%s/manpath.conf contains "
1166 "path with slash \"%s\"", MAN_DIR, dp);
1167 pg_error_internal();
1168 exit(EXIT_FAILURE);
1169 }
1170 req->p[req->psz++] = dp;
1171 dp = NULL;
1172 dpsz = 0;
1173 }
1174 free(dp);
1175
1176 if (req->p == NULL) {
1177 warnx("%s/manpath.conf is empty", MAN_DIR);
1178 pg_error_internal();
1179 exit(EXIT_FAILURE);
1180 }
1181 }