]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
For accessibility, label the last two widgets in the search form.
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.180 2022/07/06 17:21:04 schwarze Exp $ */
2 /*
3 * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Implementation of the man.cgi(8) program.
20 */
21 #include "config.h"
22
23 #include <sys/types.h>
24 #include <sys/time.h>
25
26 #include <ctype.h>
27 #if HAVE_ERR
28 #include <err.h>
29 #endif
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <limits.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc_aux.h"
40 #include "mandoc.h"
41 #include "roff.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "mandoc_parse.h"
45 #include "main.h"
46 #include "manconf.h"
47 #include "mansearch.h"
48 #include "cgi.h"
49
50 /*
51 * A query as passed to the search function.
52 */
53 struct query {
54 char *manpath; /* desired manual directory */
55 char *arch; /* architecture */
56 char *sec; /* manual section */
57 char *query; /* unparsed query expression */
58 int equal; /* match whole names, not substrings */
59 };
60
61 struct req {
62 struct query q;
63 char **p; /* array of available manpaths */
64 size_t psz; /* number of available manpaths */
65 int isquery; /* QUERY_STRING used, not PATH_INFO */
66 };
67
68 enum focus {
69 FOCUS_NONE = 0,
70 FOCUS_QUERY
71 };
72
73 static void html_print(const char *);
74 static void html_putchar(char);
75 static int http_decode(char *);
76 static void http_encode(const char *);
77 static void parse_manpath_conf(struct req *);
78 static void parse_path_info(struct req *, const char *);
79 static void parse_query_string(struct req *, const char *);
80 static void pg_error_badrequest(const char *);
81 static void pg_error_internal(void);
82 static void pg_index(const struct req *);
83 static void pg_noresult(const struct req *, int, const char *,
84 const char *);
85 static void pg_redirect(const struct req *, const char *);
86 static void pg_search(const struct req *);
87 static void pg_searchres(const struct req *,
88 struct manpage *, size_t);
89 static void pg_show(struct req *, const char *);
90 static int resp_begin_html(int, const char *, const char *);
91 static void resp_begin_http(int, const char *);
92 static void resp_catman(const struct req *, const char *);
93 static int resp_copy(const char *, const char *);
94 static void resp_end_html(void);
95 static void resp_format(const struct req *, const char *);
96 static void resp_searchform(const struct req *, enum focus);
97 static void resp_show(const struct req *, const char *);
98 static void set_query_attr(char **, char **);
99 static int validate_arch(const char *);
100 static int validate_filename(const char *);
101 static int validate_manpath(const struct req *, const char *);
102 static int validate_urifrag(const char *);
103
104 static const char *scriptname = SCRIPT_NAME;
105
106 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
107 static const char *const sec_numbers[] = {
108 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
109 };
110 static const char *const sec_names[] = {
111 "All Sections",
112 "1 - General Commands",
113 "2 - System Calls",
114 "3 - Library Functions",
115 "3p - Perl Library",
116 "4 - Device Drivers",
117 "5 - File Formats",
118 "6 - Games",
119 "7 - Miscellaneous Information",
120 "8 - System Manager\'s Manual",
121 "9 - Kernel Developer\'s Manual"
122 };
123 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
124
125 static const char *const arch_names[] = {
126 "amd64", "alpha", "armv7", "arm64",
127 "hppa", "i386", "landisk", "loongson",
128 "luna88k", "macppc", "mips64", "octeon",
129 "powerpc64", "riscv64", "sparc64",
130
131 "amiga", "arc", "armish", "arm32",
132 "atari", "aviion", "beagle", "cats",
133 "hppa64", "hp300",
134 "ia64", "mac68k", "mvme68k", "mvme88k",
135 "mvmeppc", "palm", "pc532", "pegasos",
136 "pmax", "powerpc", "sgi", "socppc",
137 "solbourne", "sparc",
138 "sun3", "vax", "wgrisc", "x68k",
139 "zaurus"
140 };
141 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
142
143 /*
144 * Print a character, escaping HTML along the way.
145 * This will pass non-ASCII straight to output: be warned!
146 */
147 static void
148 html_putchar(char c)
149 {
150
151 switch (c) {
152 case '"':
153 printf("&quot;");
154 break;
155 case '&':
156 printf("&amp;");
157 break;
158 case '>':
159 printf("&gt;");
160 break;
161 case '<':
162 printf("&lt;");
163 break;
164 default:
165 putchar((unsigned char)c);
166 break;
167 }
168 }
169
170 /*
171 * Call through to html_putchar().
172 * Accepts NULL strings.
173 */
174 static void
175 html_print(const char *p)
176 {
177
178 if (NULL == p)
179 return;
180 while ('\0' != *p)
181 html_putchar(*p++);
182 }
183
184 /*
185 * Transfer the responsibility for the allocated string *val
186 * to the query structure.
187 */
188 static void
189 set_query_attr(char **attr, char **val)
190 {
191
192 free(*attr);
193 if (**val == '\0') {
194 *attr = NULL;
195 free(*val);
196 } else
197 *attr = *val;
198 *val = NULL;
199 }
200
201 /*
202 * Parse the QUERY_STRING for key-value pairs
203 * and store the values into the query structure.
204 */
205 static void
206 parse_query_string(struct req *req, const char *qs)
207 {
208 char *key, *val;
209 size_t keysz, valsz;
210
211 req->isquery = 1;
212 req->q.manpath = NULL;
213 req->q.arch = NULL;
214 req->q.sec = NULL;
215 req->q.query = NULL;
216 req->q.equal = 1;
217
218 key = val = NULL;
219 while (*qs != '\0') {
220
221 /* Parse one key. */
222
223 keysz = strcspn(qs, "=;&");
224 key = mandoc_strndup(qs, keysz);
225 qs += keysz;
226 if (*qs != '=')
227 goto next;
228
229 /* Parse one value. */
230
231 valsz = strcspn(++qs, ";&");
232 val = mandoc_strndup(qs, valsz);
233 qs += valsz;
234
235 /* Decode and catch encoding errors. */
236
237 if ( ! (http_decode(key) && http_decode(val)))
238 goto next;
239
240 /* Handle key-value pairs. */
241
242 if ( ! strcmp(key, "query"))
243 set_query_attr(&req->q.query, &val);
244
245 else if ( ! strcmp(key, "apropos"))
246 req->q.equal = !strcmp(val, "0");
247
248 else if ( ! strcmp(key, "manpath")) {
249 #ifdef COMPAT_OLDURI
250 if ( ! strncmp(val, "OpenBSD ", 8)) {
251 val[7] = '-';
252 if ('C' == val[8])
253 val[8] = 'c';
254 }
255 #endif
256 set_query_attr(&req->q.manpath, &val);
257 }
258
259 else if ( ! (strcmp(key, "sec")
260 #ifdef COMPAT_OLDURI
261 && strcmp(key, "sektion")
262 #endif
263 )) {
264 if ( ! strcmp(val, "0"))
265 *val = '\0';
266 set_query_attr(&req->q.sec, &val);
267 }
268
269 else if ( ! strcmp(key, "arch")) {
270 if ( ! strcmp(val, "default"))
271 *val = '\0';
272 set_query_attr(&req->q.arch, &val);
273 }
274
275 /*
276 * The key must be freed in any case.
277 * The val may have been handed over to the query
278 * structure, in which case it is now NULL.
279 */
280 next:
281 free(key);
282 key = NULL;
283 free(val);
284 val = NULL;
285
286 if (*qs != '\0')
287 qs++;
288 }
289 }
290
291 /*
292 * HTTP-decode a string. The standard explanation is that this turns
293 * "%4e+foo" into "n foo" in the regular way. This is done in-place
294 * over the allocated string.
295 */
296 static int
297 http_decode(char *p)
298 {
299 char hex[3];
300 char *q;
301 int c;
302
303 hex[2] = '\0';
304
305 q = p;
306 for ( ; '\0' != *p; p++, q++) {
307 if ('%' == *p) {
308 if ('\0' == (hex[0] = *(p + 1)))
309 return 0;
310 if ('\0' == (hex[1] = *(p + 2)))
311 return 0;
312 if (1 != sscanf(hex, "%x", &c))
313 return 0;
314 if ('\0' == c)
315 return 0;
316
317 *q = (char)c;
318 p += 2;
319 } else
320 *q = '+' == *p ? ' ' : *p;
321 }
322
323 *q = '\0';
324 return 1;
325 }
326
327 static void
328 http_encode(const char *p)
329 {
330 for (; *p != '\0'; p++) {
331 if (isalnum((unsigned char)*p) == 0 &&
332 strchr("-._~", *p) == NULL)
333 printf("%%%2.2X", (unsigned char)*p);
334 else
335 putchar(*p);
336 }
337 }
338
339 static void
340 resp_begin_http(int code, const char *msg)
341 {
342
343 if (200 != code)
344 printf("Status: %d %s\r\n", code, msg);
345
346 printf("Content-Type: text/html; charset=utf-8\r\n"
347 "Cache-Control: no-cache\r\n"
348 "Content-Security-Policy: default-src 'none'; "
349 "style-src 'self' 'unsafe-inline'\r\n"
350 "Pragma: no-cache\r\n"
351 "\r\n");
352
353 fflush(stdout);
354 }
355
356 static int
357 resp_copy(const char *element, const char *filename)
358 {
359 char buf[4096];
360 ssize_t sz;
361 int fd;
362
363 if ((fd = open(filename, O_RDONLY)) == -1)
364 return 0;
365
366 if (element != NULL)
367 printf("<%s>\n", element);
368 fflush(stdout);
369 while ((sz = read(fd, buf, sizeof(buf))) > 0)
370 write(STDOUT_FILENO, buf, sz);
371 close(fd);
372 return 1;
373 }
374
375 static int
376 resp_begin_html(int code, const char *msg, const char *file)
377 {
378 const char *name, *sec, *cp;
379 int namesz, secsz;
380
381 resp_begin_http(code, msg);
382
383 printf("<!DOCTYPE html>\n"
384 "<html>\n"
385 "<head>\n"
386 " <meta charset=\"UTF-8\"/>\n"
387 " <meta name=\"viewport\""
388 " content=\"width=device-width, initial-scale=1.0\">\n"
389 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
390 " type=\"text/css\" media=\"all\">\n"
391 " <title>",
392 CSS_DIR);
393 if (file != NULL) {
394 cp = strrchr(file, '/');
395 name = cp == NULL ? file : cp + 1;
396 cp = strrchr(name, '.');
397 namesz = cp == NULL ? strlen(name) : cp - name;
398 sec = NULL;
399 if (cp != NULL && cp[1] != '0') {
400 sec = cp + 1;
401 secsz = strlen(sec);
402 } else if (name - file > 1) {
403 for (cp = name - 2; cp >= file; cp--) {
404 if (*cp < '1' || *cp > '9')
405 continue;
406 sec = cp;
407 secsz = name - cp - 1;
408 break;
409 }
410 }
411 printf("%.*s", namesz, name);
412 if (sec != NULL)
413 printf("(%.*s)", secsz, sec);
414 fputs(" - ", stdout);
415 }
416 printf("%s</title>\n"
417 "</head>\n"
418 "<body>\n",
419 CUSTOMIZE_TITLE);
420
421 return resp_copy("header", MAN_DIR "/header.html");
422 }
423
424 static void
425 resp_end_html(void)
426 {
427 if (resp_copy("footer", MAN_DIR "/footer.html"))
428 puts("</footer>");
429
430 puts("</body>\n"
431 "</html>");
432 }
433
434 static void
435 resp_searchform(const struct req *req, enum focus focus)
436 {
437 int i;
438
439 printf("<form role=\"search\" action=\"/%s\" method=\"get\" "
440 "autocomplete=\"off\" autocapitalize=\"none\">\n"
441 " <fieldset>\n"
442 " <legend>Manual Page Search Parameters</legend>\n",
443 scriptname);
444
445 /* Write query input box. */
446
447 printf(" <label>Search query:\n"
448 " <input type=\"search\" name=\"query\" value=\"");
449 if (req->q.query != NULL)
450 html_print(req->q.query);
451 printf("\" size=\"40\"");
452 if (focus == FOCUS_QUERY)
453 printf(" autofocus");
454 puts(">\n </label>");
455
456 /* Write submission buttons. */
457
458 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
459 "man</button>\n"
460 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
461 "apropos</button>\n"
462 " <br/>\n");
463
464 /* Write section selector. */
465
466 puts(" <select name=\"sec\" aria-label=\"Manual section\">");
467 for (i = 0; i < sec_MAX; i++) {
468 printf(" <option value=\"%s\"", sec_numbers[i]);
469 if (NULL != req->q.sec &&
470 0 == strcmp(sec_numbers[i], req->q.sec))
471 printf(" selected=\"selected\"");
472 printf(">%s</option>\n", sec_names[i]);
473 }
474 puts(" </select>");
475
476 /* Write architecture selector. */
477
478 printf( " <select name=\"arch\" aria-label=\"CPU architecture\">\n"
479 " <option value=\"default\"");
480 if (NULL == req->q.arch)
481 printf(" selected=\"selected\"");
482 puts(">All Architectures</option>");
483 for (i = 0; i < arch_MAX; i++) {
484 printf(" <option");
485 if (NULL != req->q.arch &&
486 0 == strcmp(arch_names[i], req->q.arch))
487 printf(" selected=\"selected\"");
488 printf(">%s</option>\n", arch_names[i]);
489 }
490 puts(" </select>");
491
492 /* Write manpath selector. */
493
494 if (req->psz > 1) {
495 puts(" <select name=\"manpath\""
496 " aria-label=\"Manual path\">");
497 for (i = 0; i < (int)req->psz; i++) {
498 printf(" <option");
499 if (strcmp(req->q.manpath, req->p[i]) == 0)
500 printf(" selected=\"selected\"");
501 printf(">");
502 html_print(req->p[i]);
503 puts("</option>");
504 }
505 puts(" </select>");
506 }
507
508 puts(" </fieldset>\n"
509 "</form>");
510 }
511
512 static int
513 validate_urifrag(const char *frag)
514 {
515
516 while ('\0' != *frag) {
517 if ( ! (isalnum((unsigned char)*frag) ||
518 '-' == *frag || '.' == *frag ||
519 '/' == *frag || '_' == *frag))
520 return 0;
521 frag++;
522 }
523 return 1;
524 }
525
526 static int
527 validate_manpath(const struct req *req, const char* manpath)
528 {
529 size_t i;
530
531 for (i = 0; i < req->psz; i++)
532 if ( ! strcmp(manpath, req->p[i]))
533 return 1;
534
535 return 0;
536 }
537
538 static int
539 validate_arch(const char *arch)
540 {
541 int i;
542
543 for (i = 0; i < arch_MAX; i++)
544 if (strcmp(arch, arch_names[i]) == 0)
545 return 1;
546
547 return 0;
548 }
549
550 static int
551 validate_filename(const char *file)
552 {
553
554 if ('.' == file[0] && '/' == file[1])
555 file += 2;
556
557 return ! (strstr(file, "../") || strstr(file, "/..") ||
558 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
559 }
560
561 static void
562 pg_index(const struct req *req)
563 {
564 if (resp_begin_html(200, NULL, NULL) == 0)
565 puts("<header>");
566 resp_searchform(req, FOCUS_QUERY);
567 printf("</header>\n"
568 "<main>\n"
569 "<p role=\"doc-notice\" aria-label=\"Usage\">\n"
570 "This web interface is documented in the\n"
571 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
572 " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
573 "manual, and the\n"
574 "<a class=\"Xr\" href=\"/%s%sapropos.1\""
575 " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
576 "manual explains the query syntax.\n"
577 "</p>\n"
578 "</main>\n",
579 scriptname, *scriptname == '\0' ? "" : "/",
580 scriptname, *scriptname == '\0' ? "" : "/");
581 resp_end_html();
582 }
583
584 static void
585 pg_noresult(const struct req *req, int code, const char *http_msg,
586 const char *user_msg)
587 {
588 if (resp_begin_html(code, http_msg, NULL) == 0)
589 puts("<header>");
590 resp_searchform(req, FOCUS_QUERY);
591 puts("</header>");
592 puts("<main>");
593 puts("<p role=\"doc-notice\" aria-label=\"No result\">");
594 puts(user_msg);
595 puts("</p>");
596 puts("</main>");
597 resp_end_html();
598 }
599
600 static void
601 pg_error_badrequest(const char *msg)
602 {
603 if (resp_begin_html(400, "Bad Request", NULL))
604 puts("</header>");
605 puts("<main>\n"
606 "<h1>Bad Request</h1>\n"
607 "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
608 puts(msg);
609 printf("Try again from the\n"
610 "<a href=\"/%s\">main page</a>.\n"
611 "</p>\n"
612 "</main>\n", scriptname);
613 resp_end_html();
614 }
615
616 static void
617 pg_error_internal(void)
618 {
619 if (resp_begin_html(500, "Internal Server Error", NULL))
620 puts("</header>");
621 puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
622 resp_end_html();
623 }
624
625 static void
626 pg_redirect(const struct req *req, const char *name)
627 {
628 printf("Status: 303 See Other\r\n"
629 "Location: /");
630 if (*scriptname != '\0')
631 printf("%s/", scriptname);
632 if (strcmp(req->q.manpath, req->p[0]))
633 printf("%s/", req->q.manpath);
634 if (req->q.arch != NULL)
635 printf("%s/", req->q.arch);
636 http_encode(name);
637 if (req->q.sec != NULL) {
638 putchar('.');
639 http_encode(req->q.sec);
640 }
641 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
642 }
643
644 static void
645 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
646 {
647 char *arch, *archend;
648 const char *sec;
649 size_t i, iuse;
650 int archprio, archpriouse;
651 int prio, priouse;
652 int have_header;
653
654 for (i = 0; i < sz; i++) {
655 if (validate_filename(r[i].file))
656 continue;
657 warnx("invalid filename %s in %s database",
658 r[i].file, req->q.manpath);
659 pg_error_internal();
660 return;
661 }
662
663 if (req->isquery && sz == 1) {
664 /*
665 * If we have just one result, then jump there now
666 * without any delay.
667 */
668 printf("Status: 303 See Other\r\n"
669 "Location: /");
670 if (*scriptname != '\0')
671 printf("%s/", scriptname);
672 if (strcmp(req->q.manpath, req->p[0]))
673 printf("%s/", req->q.manpath);
674 printf("%s\r\n"
675 "Content-Type: text/html; charset=utf-8\r\n\r\n",
676 r[0].file);
677 return;
678 }
679
680 /*
681 * In man(1) mode, show one of the pages
682 * even if more than one is found.
683 */
684
685 iuse = 0;
686 if (req->q.equal || sz == 1) {
687 priouse = 20;
688 archpriouse = 3;
689 for (i = 0; i < sz; i++) {
690 sec = r[i].file;
691 sec += strcspn(sec, "123456789");
692 if (sec[0] == '\0')
693 continue;
694 prio = sec_prios[sec[0] - '1'];
695 if (sec[1] != '/')
696 prio += 10;
697 if (req->q.arch == NULL) {
698 archprio =
699 ((arch = strchr(sec + 1, '/'))
700 == NULL) ? 3 :
701 ((archend = strchr(arch + 1, '/'))
702 == NULL) ? 0 :
703 strncmp(arch, "amd64/",
704 archend - arch) ? 2 : 1;
705 if (archprio < archpriouse) {
706 archpriouse = archprio;
707 priouse = prio;
708 iuse = i;
709 continue;
710 }
711 if (archprio > archpriouse)
712 continue;
713 }
714 if (prio >= priouse)
715 continue;
716 priouse = prio;
717 iuse = i;
718 }
719 have_header = resp_begin_html(200, NULL, r[iuse].file);
720 } else
721 have_header = resp_begin_html(200, NULL, NULL);
722
723 if (have_header == 0)
724 puts("<header>");
725 resp_searchform(req,
726 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
727 puts("</header>");
728
729 if (sz > 1) {
730 puts("<nav>");
731 puts("<table class=\"results\">");
732 for (i = 0; i < sz; i++) {
733 printf(" <tr>\n"
734 " <td>"
735 "<a class=\"Xr\" href=\"/");
736 if (*scriptname != '\0')
737 printf("%s/", scriptname);
738 if (strcmp(req->q.manpath, req->p[0]))
739 printf("%s/", req->q.manpath);
740 printf("%s\">", r[i].file);
741 html_print(r[i].names);
742 printf("</a></td>\n"
743 " <td><span class=\"Nd\">");
744 html_print(r[i].output);
745 puts("</span></td>\n"
746 " </tr>");
747 }
748 puts("</table>");
749 puts("</nav>");
750 }
751
752 if (req->q.equal || sz == 1) {
753 puts("<hr>");
754 resp_show(req, r[iuse].file);
755 }
756
757 resp_end_html();
758 }
759
760 static void
761 resp_catman(const struct req *req, const char *file)
762 {
763 FILE *f;
764 char *p;
765 size_t sz;
766 ssize_t len;
767 int i;
768 int italic, bold;
769
770 if ((f = fopen(file, "r")) == NULL) {
771 puts("<p role=\"doc-notice\">\n"
772 " You specified an invalid manual file.\n"
773 "</p>");
774 return;
775 }
776
777 puts("<div class=\"catman\">\n"
778 "<pre>");
779
780 p = NULL;
781 sz = 0;
782
783 while ((len = getline(&p, &sz, f)) != -1) {
784 bold = italic = 0;
785 for (i = 0; i < len - 1; i++) {
786 /*
787 * This means that the catpage is out of state.
788 * Ignore it and keep going (although the
789 * catpage is bogus).
790 */
791
792 if ('\b' == p[i] || '\n' == p[i])
793 continue;
794
795 /*
796 * Print a regular character.
797 * Close out any bold/italic scopes.
798 * If we're in back-space mode, make sure we'll
799 * have something to enter when we backspace.
800 */
801
802 if ('\b' != p[i + 1]) {
803 if (italic)
804 printf("</i>");
805 if (bold)
806 printf("</b>");
807 italic = bold = 0;
808 html_putchar(p[i]);
809 continue;
810 } else if (i + 2 >= len)
811 continue;
812
813 /* Italic mode. */
814
815 if ('_' == p[i]) {
816 if (bold)
817 printf("</b>");
818 if ( ! italic)
819 printf("<i>");
820 bold = 0;
821 italic = 1;
822 i += 2;
823 html_putchar(p[i]);
824 continue;
825 }
826
827 /*
828 * Handle funny behaviour troff-isms.
829 * These grok'd from the original man2html.c.
830 */
831
832 if (('+' == p[i] && 'o' == p[i + 2]) ||
833 ('o' == p[i] && '+' == p[i + 2]) ||
834 ('|' == p[i] && '=' == p[i + 2]) ||
835 ('=' == p[i] && '|' == p[i + 2]) ||
836 ('*' == p[i] && '=' == p[i + 2]) ||
837 ('=' == p[i] && '*' == p[i + 2]) ||
838 ('*' == p[i] && '|' == p[i + 2]) ||
839 ('|' == p[i] && '*' == p[i + 2])) {
840 if (italic)
841 printf("</i>");
842 if (bold)
843 printf("</b>");
844 italic = bold = 0;
845 putchar('*');
846 i += 2;
847 continue;
848 } else if (('|' == p[i] && '-' == p[i + 2]) ||
849 ('-' == p[i] && '|' == p[i + 1]) ||
850 ('+' == p[i] && '-' == p[i + 1]) ||
851 ('-' == p[i] && '+' == p[i + 1]) ||
852 ('+' == p[i] && '|' == p[i + 1]) ||
853 ('|' == p[i] && '+' == p[i + 1])) {
854 if (italic)
855 printf("</i>");
856 if (bold)
857 printf("</b>");
858 italic = bold = 0;
859 putchar('+');
860 i += 2;
861 continue;
862 }
863
864 /* Bold mode. */
865
866 if (italic)
867 printf("</i>");
868 if ( ! bold)
869 printf("<b>");
870 bold = 1;
871 italic = 0;
872 i += 2;
873 html_putchar(p[i]);
874 }
875
876 /*
877 * Clean up the last character.
878 * We can get to a newline; don't print that.
879 */
880
881 if (italic)
882 printf("</i>");
883 if (bold)
884 printf("</b>");
885
886 if (i == len - 1 && p[i] != '\n')
887 html_putchar(p[i]);
888
889 putchar('\n');
890 }
891 free(p);
892
893 puts("</pre>\n"
894 "</div>");
895
896 fclose(f);
897 }
898
899 static void
900 resp_format(const struct req *req, const char *file)
901 {
902 struct manoutput conf;
903 struct mparse *mp;
904 struct roff_meta *meta;
905 void *vp;
906 int fd;
907 int usepath;
908
909 if (-1 == (fd = open(file, O_RDONLY))) {
910 puts("<p role=\"doc-notice\">\n"
911 " You specified an invalid manual file.\n"
912 "</p>");
913 return;
914 }
915
916 mchars_alloc();
917 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
918 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
919 mparse_readfd(mp, fd, file);
920 close(fd);
921 meta = mparse_result(mp);
922
923 memset(&conf, 0, sizeof(conf));
924 conf.fragment = 1;
925 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
926 usepath = strcmp(req->q.manpath, req->p[0]);
927 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
928 scriptname, *scriptname == '\0' ? "" : "/",
929 usepath ? req->q.manpath : "", usepath ? "/" : "");
930
931 vp = html_alloc(&conf);
932 if (meta->macroset == MACROSET_MDOC)
933 html_mdoc(vp, meta);
934 else
935 html_man(vp, meta);
936
937 html_free(vp);
938 mparse_free(mp);
939 mchars_free();
940 free(conf.man);
941 free(conf.style);
942 }
943
944 static void
945 resp_show(const struct req *req, const char *file)
946 {
947
948 if ('.' == file[0] && '/' == file[1])
949 file += 2;
950
951 if ('c' == *file)
952 resp_catman(req, file);
953 else
954 resp_format(req, file);
955 }
956
957 static void
958 pg_show(struct req *req, const char *fullpath)
959 {
960 char *manpath;
961 const char *file;
962
963 if ((file = strchr(fullpath, '/')) == NULL) {
964 pg_error_badrequest(
965 "You did not specify a page to show.");
966 return;
967 }
968 manpath = mandoc_strndup(fullpath, file - fullpath);
969 file++;
970
971 if ( ! validate_manpath(req, manpath)) {
972 pg_error_badrequest(
973 "You specified an invalid manpath.");
974 free(manpath);
975 return;
976 }
977
978 /*
979 * Begin by chdir()ing into the manpath.
980 * This way we can pick up the database files, which are
981 * relative to the manpath root.
982 */
983
984 if (chdir(manpath) == -1) {
985 warn("chdir %s", manpath);
986 pg_error_internal();
987 free(manpath);
988 return;
989 }
990 free(manpath);
991
992 if ( ! validate_filename(file)) {
993 pg_error_badrequest(
994 "You specified an invalid manual file.");
995 return;
996 }
997
998 if (resp_begin_html(200, NULL, file) == 0)
999 puts("<header>");
1000 resp_searchform(req, FOCUS_NONE);
1001 puts("</header>");
1002 resp_show(req, file);
1003 resp_end_html();
1004 }
1005
1006 static void
1007 pg_search(const struct req *req)
1008 {
1009 struct mansearch search;
1010 struct manpaths paths;
1011 struct manpage *res;
1012 char **argv;
1013 char *query, *rp, *wp;
1014 size_t ressz;
1015 int argc;
1016
1017 /*
1018 * Begin by chdir()ing into the root of the manpath.
1019 * This way we can pick up the database files, which are
1020 * relative to the manpath root.
1021 */
1022
1023 if (chdir(req->q.manpath) == -1) {
1024 warn("chdir %s", req->q.manpath);
1025 pg_error_internal();
1026 return;
1027 }
1028
1029 search.arch = req->q.arch;
1030 search.sec = req->q.sec;
1031 search.outkey = "Nd";
1032 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1033 search.firstmatch = 1;
1034
1035 paths.sz = 1;
1036 paths.paths = mandoc_malloc(sizeof(char *));
1037 paths.paths[0] = mandoc_strdup(".");
1038
1039 /*
1040 * Break apart at spaces with backslash-escaping.
1041 */
1042
1043 argc = 0;
1044 argv = NULL;
1045 rp = query = mandoc_strdup(req->q.query);
1046 for (;;) {
1047 while (isspace((unsigned char)*rp))
1048 rp++;
1049 if (*rp == '\0')
1050 break;
1051 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1052 argv[argc++] = wp = rp;
1053 for (;;) {
1054 if (isspace((unsigned char)*rp)) {
1055 *wp = '\0';
1056 rp++;
1057 break;
1058 }
1059 if (rp[0] == '\\' && rp[1] != '\0')
1060 rp++;
1061 if (wp != rp)
1062 *wp = *rp;
1063 if (*rp == '\0')
1064 break;
1065 wp++;
1066 rp++;
1067 }
1068 }
1069
1070 res = NULL;
1071 ressz = 0;
1072 if (req->isquery && req->q.equal && argc == 1)
1073 pg_redirect(req, argv[0]);
1074 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1075 pg_noresult(req, 400, "Bad Request",
1076 "You entered an invalid query.");
1077 else if (ressz == 0)
1078 pg_noresult(req, 404, "Not Found", "No results found.");
1079 else
1080 pg_searchres(req, res, ressz);
1081
1082 free(query);
1083 mansearch_free(res, ressz);
1084 free(paths.paths[0]);
1085 free(paths.paths);
1086 }
1087
1088 int
1089 main(void)
1090 {
1091 struct req req;
1092 struct itimerval itimer;
1093 const char *path;
1094 const char *querystring;
1095 int i;
1096
1097 #if HAVE_PLEDGE
1098 /*
1099 * The "rpath" pledge could be revoked after mparse_readfd()
1100 * if the file desciptor to "/footer.html" would be opened
1101 * up front, but it's probably not worth the complication
1102 * of the code it would cause: it would require scattering
1103 * pledge() calls in multiple low-level resp_*() functions.
1104 */
1105
1106 if (pledge("stdio rpath", NULL) == -1) {
1107 warn("pledge");
1108 pg_error_internal();
1109 return EXIT_FAILURE;
1110 }
1111 #endif
1112
1113 /* Poor man's ReDoS mitigation. */
1114
1115 itimer.it_value.tv_sec = 2;
1116 itimer.it_value.tv_usec = 0;
1117 itimer.it_interval.tv_sec = 2;
1118 itimer.it_interval.tv_usec = 0;
1119 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1120 warn("setitimer");
1121 pg_error_internal();
1122 return EXIT_FAILURE;
1123 }
1124
1125 /*
1126 * First we change directory into the MAN_DIR so that
1127 * subsequent scanning for manpath directories is rooted
1128 * relative to the same position.
1129 */
1130
1131 if (chdir(MAN_DIR) == -1) {
1132 warn("MAN_DIR: %s", MAN_DIR);
1133 pg_error_internal();
1134 return EXIT_FAILURE;
1135 }
1136
1137 memset(&req, 0, sizeof(struct req));
1138 req.q.equal = 1;
1139 parse_manpath_conf(&req);
1140
1141 /* Parse the path info and the query string. */
1142
1143 if ((path = getenv("PATH_INFO")) == NULL)
1144 path = "";
1145 else if (*path == '/')
1146 path++;
1147
1148 if (*path != '\0') {
1149 parse_path_info(&req, path);
1150 if (req.q.manpath == NULL || req.q.sec == NULL ||
1151 *req.q.query == '\0' || access(path, F_OK) == -1)
1152 path = "";
1153 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1154 parse_query_string(&req, querystring);
1155
1156 /* Validate parsed data and add defaults. */
1157
1158 if (req.q.manpath == NULL)
1159 req.q.manpath = mandoc_strdup(req.p[0]);
1160 else if ( ! validate_manpath(&req, req.q.manpath)) {
1161 pg_error_badrequest(
1162 "You specified an invalid manpath.");
1163 return EXIT_FAILURE;
1164 }
1165
1166 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1167 pg_error_badrequest(
1168 "You specified an invalid architecture.");
1169 return EXIT_FAILURE;
1170 }
1171
1172 /* Dispatch to the three different pages. */
1173
1174 if ('\0' != *path)
1175 pg_show(&req, path);
1176 else if (NULL != req.q.query)
1177 pg_search(&req);
1178 else
1179 pg_index(&req);
1180
1181 free(req.q.manpath);
1182 free(req.q.arch);
1183 free(req.q.sec);
1184 free(req.q.query);
1185 for (i = 0; i < (int)req.psz; i++)
1186 free(req.p[i]);
1187 free(req.p);
1188 return EXIT_SUCCESS;
1189 }
1190
1191 /*
1192 * Translate PATH_INFO to a query.
1193 */
1194 static void
1195 parse_path_info(struct req *req, const char *path)
1196 {
1197 const char *name, *sec, *end;
1198
1199 req->isquery = 0;
1200 req->q.equal = 1;
1201 req->q.manpath = NULL;
1202 req->q.arch = NULL;
1203
1204 /* Mandatory manual page name. */
1205 if ((name = strrchr(path, '/')) == NULL)
1206 name = path;
1207 else
1208 name++;
1209
1210 /* Optional trailing section. */
1211 sec = strrchr(name, '.');
1212 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1213 req->q.query = mandoc_strndup(name, sec - name - 1);
1214 req->q.sec = mandoc_strdup(sec);
1215 } else {
1216 req->q.query = mandoc_strdup(name);
1217 req->q.sec = NULL;
1218 }
1219
1220 /* Handle the case of name[.section] only. */
1221 if (name == path)
1222 return;
1223
1224 /* Optional manpath. */
1225 end = strchr(path, '/');
1226 req->q.manpath = mandoc_strndup(path, end - path);
1227 if (validate_manpath(req, req->q.manpath)) {
1228 path = end + 1;
1229 if (name == path)
1230 return;
1231 } else {
1232 free(req->q.manpath);
1233 req->q.manpath = NULL;
1234 }
1235
1236 /* Optional section. */
1237 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1238 path += 3;
1239 end = strchr(path, '/');
1240 free(req->q.sec);
1241 req->q.sec = mandoc_strndup(path, end - path);
1242 path = end + 1;
1243 if (name == path)
1244 return;
1245 }
1246
1247 /* Optional architecture. */
1248 end = strchr(path, '/');
1249 if (end + 1 != name) {
1250 pg_error_badrequest(
1251 "You specified too many directory components.");
1252 exit(EXIT_FAILURE);
1253 }
1254 req->q.arch = mandoc_strndup(path, end - path);
1255 if (validate_arch(req->q.arch) == 0) {
1256 pg_error_badrequest(
1257 "You specified an invalid directory component.");
1258 exit(EXIT_FAILURE);
1259 }
1260 }
1261
1262 /*
1263 * Scan for indexable paths.
1264 */
1265 static void
1266 parse_manpath_conf(struct req *req)
1267 {
1268 FILE *fp;
1269 char *dp;
1270 size_t dpsz;
1271 ssize_t len;
1272
1273 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1274 warn("%s/manpath.conf", MAN_DIR);
1275 pg_error_internal();
1276 exit(EXIT_FAILURE);
1277 }
1278
1279 dp = NULL;
1280 dpsz = 0;
1281
1282 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1283 if (dp[len - 1] == '\n')
1284 dp[--len] = '\0';
1285 req->p = mandoc_realloc(req->p,
1286 (req->psz + 1) * sizeof(char *));
1287 if ( ! validate_urifrag(dp)) {
1288 warnx("%s/manpath.conf contains "
1289 "unsafe path \"%s\"", MAN_DIR, dp);
1290 pg_error_internal();
1291 exit(EXIT_FAILURE);
1292 }
1293 if (strchr(dp, '/') != NULL) {
1294 warnx("%s/manpath.conf contains "
1295 "path with slash \"%s\"", MAN_DIR, dp);
1296 pg_error_internal();
1297 exit(EXIT_FAILURE);
1298 }
1299 req->p[req->psz++] = dp;
1300 dp = NULL;
1301 dpsz = 0;
1302 }
1303 free(dp);
1304
1305 if (req->p == NULL) {
1306 warnx("%s/manpath.conf is empty", MAN_DIR);
1307 pg_error_internal();
1308 exit(EXIT_FAILURE);
1309 }
1310 }