]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Port ctags-style, less(1) :t internal searching from terminal output
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.148 2017/02/22 16:20:01 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45
46 /*
47 * A query as passed to the search function.
48 */
49 struct query {
50 char *manpath; /* desired manual directory */
51 char *arch; /* architecture */
52 char *sec; /* manual section */
53 char *query; /* unparsed query expression */
54 int equal; /* match whole names, not substrings */
55 };
56
57 struct req {
58 struct query q;
59 char **p; /* array of available manpaths */
60 size_t psz; /* number of available manpaths */
61 int isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63
64 enum focus {
65 FOCUS_NONE = 0,
66 FOCUS_QUERY
67 };
68
69 static void html_print(const char *);
70 static void html_putchar(char);
71 static int http_decode(char *);
72 static void parse_manpath_conf(struct req *);
73 static void parse_path_info(struct req *req, const char *path);
74 static void parse_query_string(struct req *, const char *);
75 static void pg_error_badrequest(const char *);
76 static void pg_error_internal(void);
77 static void pg_index(const struct req *);
78 static void pg_noresult(const struct req *, const char *);
79 static void pg_search(const struct req *);
80 static void pg_searchres(const struct req *,
81 struct manpage *, size_t);
82 static void pg_show(struct req *, const char *);
83 static void resp_begin_html(int, const char *);
84 static void resp_begin_http(int, const char *);
85 static void resp_catman(const struct req *, const char *);
86 static void resp_copy(const char *);
87 static void resp_end_html(void);
88 static void resp_format(const struct req *, const char *);
89 static void resp_searchform(const struct req *, enum focus);
90 static void resp_show(const struct req *, const char *);
91 static void set_query_attr(char **, char **);
92 static int validate_filename(const char *);
93 static int validate_manpath(const struct req *, const char *);
94 static int validate_urifrag(const char *);
95
96 static const char *scriptname = SCRIPT_NAME;
97
98 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
99 static const char *const sec_numbers[] = {
100 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
101 };
102 static const char *const sec_names[] = {
103 "All Sections",
104 "1 - General Commands",
105 "2 - System Calls",
106 "3 - Library Functions",
107 "3p - Perl Library",
108 "4 - Device Drivers",
109 "5 - File Formats",
110 "6 - Games",
111 "7 - Miscellaneous Information",
112 "8 - System Manager\'s Manual",
113 "9 - Kernel Developer\'s Manual"
114 };
115 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
116
117 static const char *const arch_names[] = {
118 "amd64", "alpha", "armv7", "arm64",
119 "hppa", "i386", "landisk",
120 "loongson", "luna88k", "macppc", "mips64",
121 "octeon", "sgi", "socppc", "sparc64",
122 "amiga", "arc", "armish", "arm32",
123 "atari", "aviion", "beagle", "cats",
124 "hppa64", "hp300",
125 "ia64", "mac68k", "mvme68k", "mvme88k",
126 "mvmeppc", "palm", "pc532", "pegasos",
127 "pmax", "powerpc", "solbourne", "sparc",
128 "sun3", "vax", "wgrisc", "x68k",
129 "zaurus"
130 };
131 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
132
133 /*
134 * Print a character, escaping HTML along the way.
135 * This will pass non-ASCII straight to output: be warned!
136 */
137 static void
138 html_putchar(char c)
139 {
140
141 switch (c) {
142 case ('"'):
143 printf("&quot;");
144 break;
145 case ('&'):
146 printf("&amp;");
147 break;
148 case ('>'):
149 printf("&gt;");
150 break;
151 case ('<'):
152 printf("&lt;");
153 break;
154 default:
155 putchar((unsigned char)c);
156 break;
157 }
158 }
159
160 /*
161 * Call through to html_putchar().
162 * Accepts NULL strings.
163 */
164 static void
165 html_print(const char *p)
166 {
167
168 if (NULL == p)
169 return;
170 while ('\0' != *p)
171 html_putchar(*p++);
172 }
173
174 /*
175 * Transfer the responsibility for the allocated string *val
176 * to the query structure.
177 */
178 static void
179 set_query_attr(char **attr, char **val)
180 {
181
182 free(*attr);
183 if (**val == '\0') {
184 *attr = NULL;
185 free(*val);
186 } else
187 *attr = *val;
188 *val = NULL;
189 }
190
191 /*
192 * Parse the QUERY_STRING for key-value pairs
193 * and store the values into the query structure.
194 */
195 static void
196 parse_query_string(struct req *req, const char *qs)
197 {
198 char *key, *val;
199 size_t keysz, valsz;
200
201 req->isquery = 1;
202 req->q.manpath = NULL;
203 req->q.arch = NULL;
204 req->q.sec = NULL;
205 req->q.query = NULL;
206 req->q.equal = 1;
207
208 key = val = NULL;
209 while (*qs != '\0') {
210
211 /* Parse one key. */
212
213 keysz = strcspn(qs, "=;&");
214 key = mandoc_strndup(qs, keysz);
215 qs += keysz;
216 if (*qs != '=')
217 goto next;
218
219 /* Parse one value. */
220
221 valsz = strcspn(++qs, ";&");
222 val = mandoc_strndup(qs, valsz);
223 qs += valsz;
224
225 /* Decode and catch encoding errors. */
226
227 if ( ! (http_decode(key) && http_decode(val)))
228 goto next;
229
230 /* Handle key-value pairs. */
231
232 if ( ! strcmp(key, "query"))
233 set_query_attr(&req->q.query, &val);
234
235 else if ( ! strcmp(key, "apropos"))
236 req->q.equal = !strcmp(val, "0");
237
238 else if ( ! strcmp(key, "manpath")) {
239 #ifdef COMPAT_OLDURI
240 if ( ! strncmp(val, "OpenBSD ", 8)) {
241 val[7] = '-';
242 if ('C' == val[8])
243 val[8] = 'c';
244 }
245 #endif
246 set_query_attr(&req->q.manpath, &val);
247 }
248
249 else if ( ! (strcmp(key, "sec")
250 #ifdef COMPAT_OLDURI
251 && strcmp(key, "sektion")
252 #endif
253 )) {
254 if ( ! strcmp(val, "0"))
255 *val = '\0';
256 set_query_attr(&req->q.sec, &val);
257 }
258
259 else if ( ! strcmp(key, "arch")) {
260 if ( ! strcmp(val, "default"))
261 *val = '\0';
262 set_query_attr(&req->q.arch, &val);
263 }
264
265 /*
266 * The key must be freed in any case.
267 * The val may have been handed over to the query
268 * structure, in which case it is now NULL.
269 */
270 next:
271 free(key);
272 key = NULL;
273 free(val);
274 val = NULL;
275
276 if (*qs != '\0')
277 qs++;
278 }
279 }
280
281 /*
282 * HTTP-decode a string. The standard explanation is that this turns
283 * "%4e+foo" into "n foo" in the regular way. This is done in-place
284 * over the allocated string.
285 */
286 static int
287 http_decode(char *p)
288 {
289 char hex[3];
290 char *q;
291 int c;
292
293 hex[2] = '\0';
294
295 q = p;
296 for ( ; '\0' != *p; p++, q++) {
297 if ('%' == *p) {
298 if ('\0' == (hex[0] = *(p + 1)))
299 return 0;
300 if ('\0' == (hex[1] = *(p + 2)))
301 return 0;
302 if (1 != sscanf(hex, "%x", &c))
303 return 0;
304 if ('\0' == c)
305 return 0;
306
307 *q = (char)c;
308 p += 2;
309 } else
310 *q = '+' == *p ? ' ' : *p;
311 }
312
313 *q = '\0';
314 return 1;
315 }
316
317 static void
318 resp_begin_http(int code, const char *msg)
319 {
320
321 if (200 != code)
322 printf("Status: %d %s\r\n", code, msg);
323
324 printf("Content-Type: text/html; charset=utf-8\r\n"
325 "Cache-Control: no-cache\r\n"
326 "Pragma: no-cache\r\n"
327 "\r\n");
328
329 fflush(stdout);
330 }
331
332 static void
333 resp_copy(const char *filename)
334 {
335 char buf[4096];
336 ssize_t sz;
337 int fd;
338
339 if ((fd = open(filename, O_RDONLY)) != -1) {
340 fflush(stdout);
341 while ((sz = read(fd, buf, sizeof(buf))) > 0)
342 write(STDOUT_FILENO, buf, sz);
343 close(fd);
344 }
345 }
346
347 static void
348 resp_begin_html(int code, const char *msg)
349 {
350
351 resp_begin_http(code, msg);
352
353 printf("<!DOCTYPE html>\n"
354 "<html>\n"
355 "<head>\n"
356 " <meta charset=\"UTF-8\"/>\n"
357 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
358 " type=\"text/css\" media=\"all\">\n"
359 " <title>%s</title>\n"
360 "</head>\n"
361 "<body>\n",
362 CSS_DIR, CUSTOMIZE_TITLE);
363
364 resp_copy(MAN_DIR "/header.html");
365 }
366
367 static void
368 resp_end_html(void)
369 {
370
371 resp_copy(MAN_DIR "/footer.html");
372
373 puts("</body>\n"
374 "</html>");
375 }
376
377 static void
378 resp_searchform(const struct req *req, enum focus focus)
379 {
380 int i;
381
382 printf("<form action=\"/%s\" method=\"get\">\n"
383 " <fieldset>\n"
384 " <legend>Manual Page Search Parameters</legend>\n",
385 scriptname);
386
387 /* Write query input box. */
388
389 printf(" <input type=\"text\" name=\"query\" value=\"");
390 if (req->q.query != NULL)
391 html_print(req->q.query);
392 printf( "\" size=\"40\"");
393 if (focus == FOCUS_QUERY)
394 printf(" autofocus");
395 puts(">");
396
397 /* Write submission buttons. */
398
399 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
400 "man</button>\n"
401 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
402 "apropos</button>\n"
403 " <br/>\n");
404
405 /* Write section selector. */
406
407 puts(" <select name=\"sec\">");
408 for (i = 0; i < sec_MAX; i++) {
409 printf(" <option value=\"%s\"", sec_numbers[i]);
410 if (NULL != req->q.sec &&
411 0 == strcmp(sec_numbers[i], req->q.sec))
412 printf(" selected=\"selected\"");
413 printf(">%s</option>\n", sec_names[i]);
414 }
415 puts(" </select>");
416
417 /* Write architecture selector. */
418
419 printf( " <select name=\"arch\">\n"
420 " <option value=\"default\"");
421 if (NULL == req->q.arch)
422 printf(" selected=\"selected\"");
423 puts(">All Architectures</option>");
424 for (i = 0; i < arch_MAX; i++) {
425 printf(" <option value=\"%s\"", arch_names[i]);
426 if (NULL != req->q.arch &&
427 0 == strcmp(arch_names[i], req->q.arch))
428 printf(" selected=\"selected\"");
429 printf(">%s</option>\n", arch_names[i]);
430 }
431 puts(" </select>");
432
433 /* Write manpath selector. */
434
435 if (req->psz > 1) {
436 puts(" <select name=\"manpath\">");
437 for (i = 0; i < (int)req->psz; i++) {
438 printf(" <option ");
439 if (strcmp(req->q.manpath, req->p[i]) == 0)
440 printf("selected=\"selected\" ");
441 printf("value=\"");
442 html_print(req->p[i]);
443 printf("\">");
444 html_print(req->p[i]);
445 puts("</option>");
446 }
447 puts(" </select>");
448 }
449
450 puts(" </fieldset>\n"
451 "</form>");
452 }
453
454 static int
455 validate_urifrag(const char *frag)
456 {
457
458 while ('\0' != *frag) {
459 if ( ! (isalnum((unsigned char)*frag) ||
460 '-' == *frag || '.' == *frag ||
461 '/' == *frag || '_' == *frag))
462 return 0;
463 frag++;
464 }
465 return 1;
466 }
467
468 static int
469 validate_manpath(const struct req *req, const char* manpath)
470 {
471 size_t i;
472
473 for (i = 0; i < req->psz; i++)
474 if ( ! strcmp(manpath, req->p[i]))
475 return 1;
476
477 return 0;
478 }
479
480 static int
481 validate_filename(const char *file)
482 {
483
484 if ('.' == file[0] && '/' == file[1])
485 file += 2;
486
487 return ! (strstr(file, "../") || strstr(file, "/..") ||
488 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
489 }
490
491 static void
492 pg_index(const struct req *req)
493 {
494
495 resp_begin_html(200, NULL);
496 resp_searchform(req, FOCUS_QUERY);
497 printf("<p>\n"
498 "This web interface is documented in the\n"
499 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
500 "manual, and the\n"
501 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
502 "manual explains the query syntax.\n"
503 "</p>\n",
504 scriptname, *scriptname == '\0' ? "" : "/",
505 scriptname, *scriptname == '\0' ? "" : "/");
506 resp_end_html();
507 }
508
509 static void
510 pg_noresult(const struct req *req, const char *msg)
511 {
512 resp_begin_html(200, NULL);
513 resp_searchform(req, FOCUS_QUERY);
514 puts("<p>");
515 puts(msg);
516 puts("</p>");
517 resp_end_html();
518 }
519
520 static void
521 pg_error_badrequest(const char *msg)
522 {
523
524 resp_begin_html(400, "Bad Request");
525 puts("<h1>Bad Request</h1>\n"
526 "<p>\n");
527 puts(msg);
528 printf("Try again from the\n"
529 "<a href=\"/%s\">main page</a>.\n"
530 "</p>", scriptname);
531 resp_end_html();
532 }
533
534 static void
535 pg_error_internal(void)
536 {
537 resp_begin_html(500, "Internal Server Error");
538 puts("<p>Internal Server Error</p>");
539 resp_end_html();
540 }
541
542 static void
543 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
544 {
545 char *arch, *archend;
546 const char *sec;
547 size_t i, iuse;
548 int archprio, archpriouse;
549 int prio, priouse;
550
551 for (i = 0; i < sz; i++) {
552 if (validate_filename(r[i].file))
553 continue;
554 warnx("invalid filename %s in %s database",
555 r[i].file, req->q.manpath);
556 pg_error_internal();
557 return;
558 }
559
560 if (req->isquery && sz == 1) {
561 /*
562 * If we have just one result, then jump there now
563 * without any delay.
564 */
565 printf("Status: 303 See Other\r\n");
566 printf("Location: http://%s/%s%s%s/%s",
567 HTTP_HOST, scriptname,
568 *scriptname == '\0' ? "" : "/",
569 req->q.manpath, r[0].file);
570 printf("\r\n"
571 "Content-Type: text/html; charset=utf-8\r\n"
572 "\r\n");
573 return;
574 }
575
576 resp_begin_html(200, NULL);
577 resp_searchform(req,
578 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
579
580 if (sz > 1) {
581 puts("<table class=\"results\">");
582 for (i = 0; i < sz; i++) {
583 printf(" <tr>\n"
584 " <td>"
585 "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
586 scriptname, *scriptname == '\0' ? "" : "/",
587 req->q.manpath, r[i].file);
588 html_print(r[i].names);
589 printf("</a></td>\n"
590 " <td><span class=\"Nd\">");
591 html_print(r[i].output);
592 puts("</span></td>\n"
593 " </tr>");
594 }
595 puts("</table>");
596 }
597
598 /*
599 * In man(1) mode, show one of the pages
600 * even if more than one is found.
601 */
602
603 if (req->q.equal || sz == 1) {
604 puts("<hr>");
605 iuse = 0;
606 priouse = 20;
607 archpriouse = 3;
608 for (i = 0; i < sz; i++) {
609 sec = r[i].file;
610 sec += strcspn(sec, "123456789");
611 if (sec[0] == '\0')
612 continue;
613 prio = sec_prios[sec[0] - '1'];
614 if (sec[1] != '/')
615 prio += 10;
616 if (req->q.arch == NULL) {
617 archprio =
618 ((arch = strchr(sec + 1, '/'))
619 == NULL) ? 3 :
620 ((archend = strchr(arch + 1, '/'))
621 == NULL) ? 0 :
622 strncmp(arch, "amd64/",
623 archend - arch) ? 2 : 1;
624 if (archprio < archpriouse) {
625 archpriouse = archprio;
626 priouse = prio;
627 iuse = i;
628 continue;
629 }
630 if (archprio > archpriouse)
631 continue;
632 }
633 if (prio >= priouse)
634 continue;
635 priouse = prio;
636 iuse = i;
637 }
638 resp_show(req, r[iuse].file);
639 }
640
641 resp_end_html();
642 }
643
644 static void
645 resp_catman(const struct req *req, const char *file)
646 {
647 FILE *f;
648 char *p;
649 size_t sz;
650 ssize_t len;
651 int i;
652 int italic, bold;
653
654 if ((f = fopen(file, "r")) == NULL) {
655 puts("<p>You specified an invalid manual file.</p>");
656 return;
657 }
658
659 puts("<div class=\"catman\">\n"
660 "<pre>");
661
662 p = NULL;
663 sz = 0;
664
665 while ((len = getline(&p, &sz, f)) != -1) {
666 bold = italic = 0;
667 for (i = 0; i < len - 1; i++) {
668 /*
669 * This means that the catpage is out of state.
670 * Ignore it and keep going (although the
671 * catpage is bogus).
672 */
673
674 if ('\b' == p[i] || '\n' == p[i])
675 continue;
676
677 /*
678 * Print a regular character.
679 * Close out any bold/italic scopes.
680 * If we're in back-space mode, make sure we'll
681 * have something to enter when we backspace.
682 */
683
684 if ('\b' != p[i + 1]) {
685 if (italic)
686 printf("</i>");
687 if (bold)
688 printf("</b>");
689 italic = bold = 0;
690 html_putchar(p[i]);
691 continue;
692 } else if (i + 2 >= len)
693 continue;
694
695 /* Italic mode. */
696
697 if ('_' == p[i]) {
698 if (bold)
699 printf("</b>");
700 if ( ! italic)
701 printf("<i>");
702 bold = 0;
703 italic = 1;
704 i += 2;
705 html_putchar(p[i]);
706 continue;
707 }
708
709 /*
710 * Handle funny behaviour troff-isms.
711 * These grok'd from the original man2html.c.
712 */
713
714 if (('+' == p[i] && 'o' == p[i + 2]) ||
715 ('o' == p[i] && '+' == p[i + 2]) ||
716 ('|' == p[i] && '=' == p[i + 2]) ||
717 ('=' == p[i] && '|' == p[i + 2]) ||
718 ('*' == p[i] && '=' == p[i + 2]) ||
719 ('=' == p[i] && '*' == p[i + 2]) ||
720 ('*' == p[i] && '|' == p[i + 2]) ||
721 ('|' == p[i] && '*' == p[i + 2])) {
722 if (italic)
723 printf("</i>");
724 if (bold)
725 printf("</b>");
726 italic = bold = 0;
727 putchar('*');
728 i += 2;
729 continue;
730 } else if (('|' == p[i] && '-' == p[i + 2]) ||
731 ('-' == p[i] && '|' == p[i + 1]) ||
732 ('+' == p[i] && '-' == p[i + 1]) ||
733 ('-' == p[i] && '+' == p[i + 1]) ||
734 ('+' == p[i] && '|' == p[i + 1]) ||
735 ('|' == p[i] && '+' == p[i + 1])) {
736 if (italic)
737 printf("</i>");
738 if (bold)
739 printf("</b>");
740 italic = bold = 0;
741 putchar('+');
742 i += 2;
743 continue;
744 }
745
746 /* Bold mode. */
747
748 if (italic)
749 printf("</i>");
750 if ( ! bold)
751 printf("<b>");
752 bold = 1;
753 italic = 0;
754 i += 2;
755 html_putchar(p[i]);
756 }
757
758 /*
759 * Clean up the last character.
760 * We can get to a newline; don't print that.
761 */
762
763 if (italic)
764 printf("</i>");
765 if (bold)
766 printf("</b>");
767
768 if (i == len - 1 && p[i] != '\n')
769 html_putchar(p[i]);
770
771 putchar('\n');
772 }
773 free(p);
774
775 puts("</pre>\n"
776 "</div>");
777
778 fclose(f);
779 }
780
781 static void
782 resp_format(const struct req *req, const char *file)
783 {
784 struct manoutput conf;
785 struct mparse *mp;
786 struct roff_man *man;
787 void *vp;
788 int fd;
789 int usepath;
790
791 if (-1 == (fd = open(file, O_RDONLY, 0))) {
792 puts("<p>You specified an invalid manual file.</p>");
793 return;
794 }
795
796 mchars_alloc();
797 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
798 MANDOCLEVEL_BADARG, NULL, req->q.manpath);
799 mparse_readfd(mp, fd, file);
800 close(fd);
801
802 memset(&conf, 0, sizeof(conf));
803 conf.fragment = 1;
804 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
805 usepath = strcmp(req->q.manpath, req->p[0]);
806 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
807 usepath ? req->q.manpath : "", usepath ? "/" : "");
808
809 mparse_result(mp, &man, NULL);
810 if (man == NULL) {
811 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
812 pg_error_internal();
813 mparse_free(mp);
814 mchars_free();
815 return;
816 }
817
818 vp = html_alloc(&conf);
819
820 if (man->macroset == MACROSET_MDOC) {
821 mdoc_validate(man);
822 html_mdoc(vp, man);
823 } else {
824 man_validate(man);
825 html_man(vp, man);
826 }
827
828 html_free(vp);
829 mparse_free(mp);
830 mchars_free();
831 free(conf.man);
832 free(conf.style);
833 }
834
835 static void
836 resp_show(const struct req *req, const char *file)
837 {
838
839 if ('.' == file[0] && '/' == file[1])
840 file += 2;
841
842 if ('c' == *file)
843 resp_catman(req, file);
844 else
845 resp_format(req, file);
846 }
847
848 static void
849 pg_show(struct req *req, const char *fullpath)
850 {
851 char *manpath;
852 const char *file;
853
854 if ((file = strchr(fullpath, '/')) == NULL) {
855 pg_error_badrequest(
856 "You did not specify a page to show.");
857 return;
858 }
859 manpath = mandoc_strndup(fullpath, file - fullpath);
860 file++;
861
862 if ( ! validate_manpath(req, manpath)) {
863 pg_error_badrequest(
864 "You specified an invalid manpath.");
865 free(manpath);
866 return;
867 }
868
869 /*
870 * Begin by chdir()ing into the manpath.
871 * This way we can pick up the database files, which are
872 * relative to the manpath root.
873 */
874
875 if (chdir(manpath) == -1) {
876 warn("chdir %s", manpath);
877 pg_error_internal();
878 free(manpath);
879 return;
880 }
881 free(manpath);
882
883 if ( ! validate_filename(file)) {
884 pg_error_badrequest(
885 "You specified an invalid manual file.");
886 return;
887 }
888
889 resp_begin_html(200, NULL);
890 resp_searchform(req, FOCUS_NONE);
891 resp_show(req, file);
892 resp_end_html();
893 }
894
895 static void
896 pg_search(const struct req *req)
897 {
898 struct mansearch search;
899 struct manpaths paths;
900 struct manpage *res;
901 char **argv;
902 char *query, *rp, *wp;
903 size_t ressz;
904 int argc;
905
906 /*
907 * Begin by chdir()ing into the root of the manpath.
908 * This way we can pick up the database files, which are
909 * relative to the manpath root.
910 */
911
912 if (chdir(req->q.manpath) == -1) {
913 warn("chdir %s", req->q.manpath);
914 pg_error_internal();
915 return;
916 }
917
918 search.arch = req->q.arch;
919 search.sec = req->q.sec;
920 search.outkey = "Nd";
921 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
922 search.firstmatch = 1;
923
924 paths.sz = 1;
925 paths.paths = mandoc_malloc(sizeof(char *));
926 paths.paths[0] = mandoc_strdup(".");
927
928 /*
929 * Break apart at spaces with backslash-escaping.
930 */
931
932 argc = 0;
933 argv = NULL;
934 rp = query = mandoc_strdup(req->q.query);
935 for (;;) {
936 while (isspace((unsigned char)*rp))
937 rp++;
938 if (*rp == '\0')
939 break;
940 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
941 argv[argc++] = wp = rp;
942 for (;;) {
943 if (isspace((unsigned char)*rp)) {
944 *wp = '\0';
945 rp++;
946 break;
947 }
948 if (rp[0] == '\\' && rp[1] != '\0')
949 rp++;
950 if (wp != rp)
951 *wp = *rp;
952 if (*rp == '\0')
953 break;
954 wp++;
955 rp++;
956 }
957 }
958
959 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
960 pg_noresult(req, "You entered an invalid query.");
961 else if (0 == ressz)
962 pg_noresult(req, "No results found.");
963 else
964 pg_searchres(req, res, ressz);
965
966 free(query);
967 mansearch_free(res, ressz);
968 free(paths.paths[0]);
969 free(paths.paths);
970 }
971
972 int
973 main(void)
974 {
975 struct req req;
976 struct itimerval itimer;
977 const char *path;
978 const char *querystring;
979 int i;
980
981 #if HAVE_PLEDGE
982 /*
983 * The "rpath" pledge could be revoked after mparse_readfd()
984 * if the file desciptor to "/footer.html" would be opened
985 * up front, but it's probably not worth the complication
986 * of the code it would cause: it would require scattering
987 * pledge() calls in multiple low-level resp_*() functions.
988 */
989
990 if (pledge("stdio rpath", NULL) == -1) {
991 warn("pledge");
992 pg_error_internal();
993 return EXIT_FAILURE;
994 }
995 #endif
996
997 /* Poor man's ReDoS mitigation. */
998
999 itimer.it_value.tv_sec = 2;
1000 itimer.it_value.tv_usec = 0;
1001 itimer.it_interval.tv_sec = 2;
1002 itimer.it_interval.tv_usec = 0;
1003 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1004 warn("setitimer");
1005 pg_error_internal();
1006 return EXIT_FAILURE;
1007 }
1008
1009 /*
1010 * First we change directory into the MAN_DIR so that
1011 * subsequent scanning for manpath directories is rooted
1012 * relative to the same position.
1013 */
1014
1015 if (chdir(MAN_DIR) == -1) {
1016 warn("MAN_DIR: %s", MAN_DIR);
1017 pg_error_internal();
1018 return EXIT_FAILURE;
1019 }
1020
1021 memset(&req, 0, sizeof(struct req));
1022 req.q.equal = 1;
1023 parse_manpath_conf(&req);
1024
1025 /* Parse the path info and the query string. */
1026
1027 if ((path = getenv("PATH_INFO")) == NULL)
1028 path = "";
1029 else if (*path == '/')
1030 path++;
1031
1032 if (*path != '\0') {
1033 parse_path_info(&req, path);
1034 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1035 path = "";
1036 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1037 parse_query_string(&req, querystring);
1038
1039 /* Validate parsed data and add defaults. */
1040
1041 if (req.q.manpath == NULL)
1042 req.q.manpath = mandoc_strdup(req.p[0]);
1043 else if ( ! validate_manpath(&req, req.q.manpath)) {
1044 pg_error_badrequest(
1045 "You specified an invalid manpath.");
1046 return EXIT_FAILURE;
1047 }
1048
1049 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1050 pg_error_badrequest(
1051 "You specified an invalid architecture.");
1052 return EXIT_FAILURE;
1053 }
1054
1055 /* Dispatch to the three different pages. */
1056
1057 if ('\0' != *path)
1058 pg_show(&req, path);
1059 else if (NULL != req.q.query)
1060 pg_search(&req);
1061 else
1062 pg_index(&req);
1063
1064 free(req.q.manpath);
1065 free(req.q.arch);
1066 free(req.q.sec);
1067 free(req.q.query);
1068 for (i = 0; i < (int)req.psz; i++)
1069 free(req.p[i]);
1070 free(req.p);
1071 return EXIT_SUCCESS;
1072 }
1073
1074 /*
1075 * If PATH_INFO is not a file name, translate it to a query.
1076 */
1077 static void
1078 parse_path_info(struct req *req, const char *path)
1079 {
1080 char *dir[4];
1081 int i;
1082
1083 req->isquery = 0;
1084 req->q.equal = 1;
1085 req->q.manpath = mandoc_strdup(path);
1086 req->q.arch = NULL;
1087
1088 /* Mandatory manual page name. */
1089 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1090 req->q.query = req->q.manpath;
1091 req->q.manpath = NULL;
1092 } else
1093 *req->q.query++ = '\0';
1094
1095 /* Optional trailing section. */
1096 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1097 if(isdigit((unsigned char)req->q.sec[1])) {
1098 *req->q.sec++ = '\0';
1099 req->q.sec = mandoc_strdup(req->q.sec);
1100 } else
1101 req->q.sec = NULL;
1102 }
1103
1104 /* Handle the case of name[.section] only. */
1105 if (req->q.manpath == NULL)
1106 return;
1107 req->q.query = mandoc_strdup(req->q.query);
1108
1109 /* Split directory components. */
1110 dir[i = 0] = req->q.manpath;
1111 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1112 if (++i == 3) {
1113 pg_error_badrequest(
1114 "You specified too many directory components.");
1115 exit(EXIT_FAILURE);
1116 }
1117 *dir[i]++ = '\0';
1118 }
1119
1120 /* Optional manpath. */
1121 if ((i = validate_manpath(req, req->q.manpath)) == 0)
1122 req->q.manpath = NULL;
1123 else if (dir[1] == NULL)
1124 return;
1125
1126 /* Optional section. */
1127 if (strncmp(dir[i], "man", 3) == 0) {
1128 free(req->q.sec);
1129 req->q.sec = mandoc_strdup(dir[i++] + 3);
1130 }
1131 if (dir[i] == NULL) {
1132 if (req->q.manpath == NULL)
1133 free(dir[0]);
1134 return;
1135 }
1136 if (dir[i + 1] != NULL) {
1137 pg_error_badrequest(
1138 "You specified an invalid directory component.");
1139 exit(EXIT_FAILURE);
1140 }
1141
1142 /* Optional architecture. */
1143 if (i) {
1144 req->q.arch = mandoc_strdup(dir[i]);
1145 if (req->q.manpath == NULL)
1146 free(dir[0]);
1147 } else
1148 req->q.arch = dir[0];
1149 }
1150
1151 /*
1152 * Scan for indexable paths.
1153 */
1154 static void
1155 parse_manpath_conf(struct req *req)
1156 {
1157 FILE *fp;
1158 char *dp;
1159 size_t dpsz;
1160 ssize_t len;
1161
1162 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1163 warn("%s/manpath.conf", MAN_DIR);
1164 pg_error_internal();
1165 exit(EXIT_FAILURE);
1166 }
1167
1168 dp = NULL;
1169 dpsz = 0;
1170
1171 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1172 if (dp[len - 1] == '\n')
1173 dp[--len] = '\0';
1174 req->p = mandoc_realloc(req->p,
1175 (req->psz + 1) * sizeof(char *));
1176 if ( ! validate_urifrag(dp)) {
1177 warnx("%s/manpath.conf contains "
1178 "unsafe path \"%s\"", MAN_DIR, dp);
1179 pg_error_internal();
1180 exit(EXIT_FAILURE);
1181 }
1182 if (strchr(dp, '/') != NULL) {
1183 warnx("%s/manpath.conf contains "
1184 "path with slash \"%s\"", MAN_DIR, dp);
1185 pg_error_internal();
1186 exit(EXIT_FAILURE);
1187 }
1188 req->p[req->psz++] = dp;
1189 dp = NULL;
1190 dpsz = 0;
1191 }
1192 free(dp);
1193
1194 if (req->p == NULL) {
1195 warnx("%s/manpath.conf is empty", MAN_DIR);
1196 pg_error_internal();
1197 exit(EXIT_FAILURE);
1198 }
1199 }