]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Improve accessibility of man.cgi(8) in various respects,
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.177 2022/07/04 16:20:42 schwarze Exp $ */
2 /*
3 * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Implementation of the man.cgi(8) program.
20 */
21 #include "config.h"
22
23 #include <sys/types.h>
24 #include <sys/time.h>
25
26 #include <ctype.h>
27 #if HAVE_ERR
28 #include <err.h>
29 #endif
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <limits.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc_aux.h"
40 #include "mandoc.h"
41 #include "roff.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "mandoc_parse.h"
45 #include "main.h"
46 #include "manconf.h"
47 #include "mansearch.h"
48 #include "cgi.h"
49
50 /*
51 * A query as passed to the search function.
52 */
53 struct query {
54 char *manpath; /* desired manual directory */
55 char *arch; /* architecture */
56 char *sec; /* manual section */
57 char *query; /* unparsed query expression */
58 int equal; /* match whole names, not substrings */
59 };
60
61 struct req {
62 struct query q;
63 char **p; /* array of available manpaths */
64 size_t psz; /* number of available manpaths */
65 int isquery; /* QUERY_STRING used, not PATH_INFO */
66 };
67
68 enum focus {
69 FOCUS_NONE = 0,
70 FOCUS_QUERY
71 };
72
73 static void html_print(const char *);
74 static void html_putchar(char);
75 static int http_decode(char *);
76 static void http_encode(const char *);
77 static void parse_manpath_conf(struct req *);
78 static void parse_path_info(struct req *, const char *);
79 static void parse_query_string(struct req *, const char *);
80 static void pg_error_badrequest(const char *);
81 static void pg_error_internal(void);
82 static void pg_index(const struct req *);
83 static void pg_noresult(const struct req *, int, const char *,
84 const char *);
85 static void pg_redirect(const struct req *, const char *);
86 static void pg_search(const struct req *);
87 static void pg_searchres(const struct req *,
88 struct manpage *, size_t);
89 static void pg_show(struct req *, const char *);
90 static void resp_begin_html(int, const char *, const char *);
91 static void resp_begin_http(int, const char *);
92 static void resp_catman(const struct req *, const char *);
93 static void resp_copy(const char *);
94 static void resp_end_html(void);
95 static void resp_format(const struct req *, const char *);
96 static void resp_searchform(const struct req *, enum focus);
97 static void resp_show(const struct req *, const char *);
98 static void set_query_attr(char **, char **);
99 static int validate_arch(const char *);
100 static int validate_filename(const char *);
101 static int validate_manpath(const struct req *, const char *);
102 static int validate_urifrag(const char *);
103
104 static const char *scriptname = SCRIPT_NAME;
105
106 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
107 static const char *const sec_numbers[] = {
108 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
109 };
110 static const char *const sec_names[] = {
111 "All Sections",
112 "1 - General Commands",
113 "2 - System Calls",
114 "3 - Library Functions",
115 "3p - Perl Library",
116 "4 - Device Drivers",
117 "5 - File Formats",
118 "6 - Games",
119 "7 - Miscellaneous Information",
120 "8 - System Manager\'s Manual",
121 "9 - Kernel Developer\'s Manual"
122 };
123 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
124
125 static const char *const arch_names[] = {
126 "amd64", "alpha", "armv7", "arm64",
127 "hppa", "i386", "landisk", "loongson",
128 "luna88k", "macppc", "mips64", "octeon",
129 "powerpc64", "riscv64", "sparc64",
130
131 "amiga", "arc", "armish", "arm32",
132 "atari", "aviion", "beagle", "cats",
133 "hppa64", "hp300",
134 "ia64", "mac68k", "mvme68k", "mvme88k",
135 "mvmeppc", "palm", "pc532", "pegasos",
136 "pmax", "powerpc", "sgi", "socppc",
137 "solbourne", "sparc",
138 "sun3", "vax", "wgrisc", "x68k",
139 "zaurus"
140 };
141 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
142
143 /*
144 * Print a character, escaping HTML along the way.
145 * This will pass non-ASCII straight to output: be warned!
146 */
147 static void
148 html_putchar(char c)
149 {
150
151 switch (c) {
152 case '"':
153 printf("&quot;");
154 break;
155 case '&':
156 printf("&amp;");
157 break;
158 case '>':
159 printf("&gt;");
160 break;
161 case '<':
162 printf("&lt;");
163 break;
164 default:
165 putchar((unsigned char)c);
166 break;
167 }
168 }
169
170 /*
171 * Call through to html_putchar().
172 * Accepts NULL strings.
173 */
174 static void
175 html_print(const char *p)
176 {
177
178 if (NULL == p)
179 return;
180 while ('\0' != *p)
181 html_putchar(*p++);
182 }
183
184 /*
185 * Transfer the responsibility for the allocated string *val
186 * to the query structure.
187 */
188 static void
189 set_query_attr(char **attr, char **val)
190 {
191
192 free(*attr);
193 if (**val == '\0') {
194 *attr = NULL;
195 free(*val);
196 } else
197 *attr = *val;
198 *val = NULL;
199 }
200
201 /*
202 * Parse the QUERY_STRING for key-value pairs
203 * and store the values into the query structure.
204 */
205 static void
206 parse_query_string(struct req *req, const char *qs)
207 {
208 char *key, *val;
209 size_t keysz, valsz;
210
211 req->isquery = 1;
212 req->q.manpath = NULL;
213 req->q.arch = NULL;
214 req->q.sec = NULL;
215 req->q.query = NULL;
216 req->q.equal = 1;
217
218 key = val = NULL;
219 while (*qs != '\0') {
220
221 /* Parse one key. */
222
223 keysz = strcspn(qs, "=;&");
224 key = mandoc_strndup(qs, keysz);
225 qs += keysz;
226 if (*qs != '=')
227 goto next;
228
229 /* Parse one value. */
230
231 valsz = strcspn(++qs, ";&");
232 val = mandoc_strndup(qs, valsz);
233 qs += valsz;
234
235 /* Decode and catch encoding errors. */
236
237 if ( ! (http_decode(key) && http_decode(val)))
238 goto next;
239
240 /* Handle key-value pairs. */
241
242 if ( ! strcmp(key, "query"))
243 set_query_attr(&req->q.query, &val);
244
245 else if ( ! strcmp(key, "apropos"))
246 req->q.equal = !strcmp(val, "0");
247
248 else if ( ! strcmp(key, "manpath")) {
249 #ifdef COMPAT_OLDURI
250 if ( ! strncmp(val, "OpenBSD ", 8)) {
251 val[7] = '-';
252 if ('C' == val[8])
253 val[8] = 'c';
254 }
255 #endif
256 set_query_attr(&req->q.manpath, &val);
257 }
258
259 else if ( ! (strcmp(key, "sec")
260 #ifdef COMPAT_OLDURI
261 && strcmp(key, "sektion")
262 #endif
263 )) {
264 if ( ! strcmp(val, "0"))
265 *val = '\0';
266 set_query_attr(&req->q.sec, &val);
267 }
268
269 else if ( ! strcmp(key, "arch")) {
270 if ( ! strcmp(val, "default"))
271 *val = '\0';
272 set_query_attr(&req->q.arch, &val);
273 }
274
275 /*
276 * The key must be freed in any case.
277 * The val may have been handed over to the query
278 * structure, in which case it is now NULL.
279 */
280 next:
281 free(key);
282 key = NULL;
283 free(val);
284 val = NULL;
285
286 if (*qs != '\0')
287 qs++;
288 }
289 }
290
291 /*
292 * HTTP-decode a string. The standard explanation is that this turns
293 * "%4e+foo" into "n foo" in the regular way. This is done in-place
294 * over the allocated string.
295 */
296 static int
297 http_decode(char *p)
298 {
299 char hex[3];
300 char *q;
301 int c;
302
303 hex[2] = '\0';
304
305 q = p;
306 for ( ; '\0' != *p; p++, q++) {
307 if ('%' == *p) {
308 if ('\0' == (hex[0] = *(p + 1)))
309 return 0;
310 if ('\0' == (hex[1] = *(p + 2)))
311 return 0;
312 if (1 != sscanf(hex, "%x", &c))
313 return 0;
314 if ('\0' == c)
315 return 0;
316
317 *q = (char)c;
318 p += 2;
319 } else
320 *q = '+' == *p ? ' ' : *p;
321 }
322
323 *q = '\0';
324 return 1;
325 }
326
327 static void
328 http_encode(const char *p)
329 {
330 for (; *p != '\0'; p++) {
331 if (isalnum((unsigned char)*p) == 0 &&
332 strchr("-._~", *p) == NULL)
333 printf("%%%2.2X", (unsigned char)*p);
334 else
335 putchar(*p);
336 }
337 }
338
339 static void
340 resp_begin_http(int code, const char *msg)
341 {
342
343 if (200 != code)
344 printf("Status: %d %s\r\n", code, msg);
345
346 printf("Content-Type: text/html; charset=utf-8\r\n"
347 "Cache-Control: no-cache\r\n"
348 "Content-Security-Policy: default-src 'none'; "
349 "style-src 'self' 'unsafe-inline'\r\n"
350 "Pragma: no-cache\r\n"
351 "\r\n");
352
353 fflush(stdout);
354 }
355
356 static void
357 resp_copy(const char *filename)
358 {
359 char buf[4096];
360 ssize_t sz;
361 int fd;
362
363 if ((fd = open(filename, O_RDONLY)) != -1) {
364 fflush(stdout);
365 while ((sz = read(fd, buf, sizeof(buf))) > 0)
366 write(STDOUT_FILENO, buf, sz);
367 close(fd);
368 }
369 }
370
371 static void
372 resp_begin_html(int code, const char *msg, const char *file)
373 {
374 const char *name, *sec, *cp;
375 int namesz, secsz;
376
377 resp_begin_http(code, msg);
378
379 printf("<!DOCTYPE html>\n"
380 "<html>\n"
381 "<head>\n"
382 " <meta charset=\"UTF-8\"/>\n"
383 " <meta name=\"viewport\""
384 " content=\"width=device-width, initial-scale=1.0\">\n"
385 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
386 " type=\"text/css\" media=\"all\">\n"
387 " <title>",
388 CSS_DIR);
389 if (file != NULL) {
390 cp = strrchr(file, '/');
391 name = cp == NULL ? file : cp + 1;
392 cp = strrchr(name, '.');
393 namesz = cp == NULL ? strlen(name) : cp - name;
394 sec = NULL;
395 if (cp != NULL && cp[1] != '0') {
396 sec = cp + 1;
397 secsz = strlen(sec);
398 } else if (name - file > 1) {
399 for (cp = name - 2; cp >= file; cp--) {
400 if (*cp < '1' || *cp > '9')
401 continue;
402 sec = cp;
403 secsz = name - cp - 1;
404 break;
405 }
406 }
407 printf("%.*s", namesz, name);
408 if (sec != NULL)
409 printf("(%.*s)", secsz, sec);
410 fputs(" - ", stdout);
411 }
412 printf("%s</title>\n"
413 "</head>\n"
414 "<body>\n",
415 CUSTOMIZE_TITLE);
416
417 resp_copy(MAN_DIR "/header.html");
418 }
419
420 static void
421 resp_end_html(void)
422 {
423
424 resp_copy(MAN_DIR "/footer.html");
425
426 puts("</body>\n"
427 "</html>");
428 }
429
430 static void
431 resp_searchform(const struct req *req, enum focus focus)
432 {
433 int i;
434
435 printf("<header>\n"
436 "<form role=\"search\" action=\"/%s\" method=\"get\" "
437 "autocomplete=\"off\" autocapitalize=\"none\">\n"
438 " <fieldset>\n"
439 " <legend>Manual Page Search Parameters</legend>\n",
440 scriptname);
441
442 /* Write query input box. */
443
444 printf(" <input type=\"search\" name=\"query\" value=\"");
445 if (req->q.query != NULL)
446 html_print(req->q.query);
447 printf( "\" size=\"40\"");
448 if (focus == FOCUS_QUERY)
449 printf(" autofocus");
450 puts(">");
451
452 /* Write submission buttons. */
453
454 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
455 "man</button>\n"
456 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
457 "apropos</button>\n"
458 " <br/>\n");
459
460 /* Write section selector. */
461
462 puts(" <select name=\"sec\" aria-label=\"manual section\">");
463 for (i = 0; i < sec_MAX; i++) {
464 printf(" <option value=\"%s\"", sec_numbers[i]);
465 if (NULL != req->q.sec &&
466 0 == strcmp(sec_numbers[i], req->q.sec))
467 printf(" selected=\"selected\"");
468 printf(">%s</option>\n", sec_names[i]);
469 }
470 puts(" </select>");
471
472 /* Write architecture selector. */
473
474 printf( " <select name=\"arch\" aria-label=\"CPU architecture\">\n"
475 " <option value=\"default\"");
476 if (NULL == req->q.arch)
477 printf(" selected=\"selected\"");
478 puts(">All Architectures</option>");
479 for (i = 0; i < arch_MAX; i++) {
480 printf(" <option");
481 if (NULL != req->q.arch &&
482 0 == strcmp(arch_names[i], req->q.arch))
483 printf(" selected=\"selected\"");
484 printf(">%s</option>\n", arch_names[i]);
485 }
486 puts(" </select>");
487
488 /* Write manpath selector. */
489
490 if (req->psz > 1) {
491 puts(" <select name=\"manpath\">");
492 for (i = 0; i < (int)req->psz; i++) {
493 printf(" <option");
494 if (strcmp(req->q.manpath, req->p[i]) == 0)
495 printf(" selected=\"selected\"");
496 printf(">");
497 html_print(req->p[i]);
498 puts("</option>");
499 }
500 puts(" </select>");
501 }
502
503 puts(" </fieldset>\n"
504 "</form>\n"
505 "</header>");
506 }
507
508 static int
509 validate_urifrag(const char *frag)
510 {
511
512 while ('\0' != *frag) {
513 if ( ! (isalnum((unsigned char)*frag) ||
514 '-' == *frag || '.' == *frag ||
515 '/' == *frag || '_' == *frag))
516 return 0;
517 frag++;
518 }
519 return 1;
520 }
521
522 static int
523 validate_manpath(const struct req *req, const char* manpath)
524 {
525 size_t i;
526
527 for (i = 0; i < req->psz; i++)
528 if ( ! strcmp(manpath, req->p[i]))
529 return 1;
530
531 return 0;
532 }
533
534 static int
535 validate_arch(const char *arch)
536 {
537 int i;
538
539 for (i = 0; i < arch_MAX; i++)
540 if (strcmp(arch, arch_names[i]) == 0)
541 return 1;
542
543 return 0;
544 }
545
546 static int
547 validate_filename(const char *file)
548 {
549
550 if ('.' == file[0] && '/' == file[1])
551 file += 2;
552
553 return ! (strstr(file, "../") || strstr(file, "/..") ||
554 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
555 }
556
557 static void
558 pg_index(const struct req *req)
559 {
560
561 resp_begin_html(200, NULL, NULL);
562 resp_searchform(req, FOCUS_QUERY);
563 printf("<main>\n"
564 "<p role=\"doc-notice\" aria-label=\"usage\">\n"
565 "This web interface is documented in the\n"
566 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
567 " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
568 "manual, and the\n"
569 "<a class=\"Xr\" href=\"/%s%sapropos.1\""
570 " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
571 "manual explains the query syntax.\n"
572 "</p>\n"
573 "</main>\n",
574 scriptname, *scriptname == '\0' ? "" : "/",
575 scriptname, *scriptname == '\0' ? "" : "/");
576 resp_end_html();
577 }
578
579 static void
580 pg_noresult(const struct req *req, int code, const char *http_msg,
581 const char *user_msg)
582 {
583 resp_begin_html(code, http_msg, NULL);
584 resp_searchform(req, FOCUS_QUERY);
585 puts("<main>");
586 puts("<p role=\"doc-notice\" aria-label=\"no result\">");
587 puts(user_msg);
588 puts("</p>");
589 puts("</main>");
590 resp_end_html();
591 }
592
593 static void
594 pg_error_badrequest(const char *msg)
595 {
596
597 resp_begin_html(400, "Bad Request", NULL);
598 puts("<main>\n"
599 "<h1>Bad Request</h1>\n"
600 "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
601 puts(msg);
602 printf("Try again from the\n"
603 "<a href=\"/%s\">main page</a>.\n"
604 "</p>\n"
605 "</main>", scriptname);
606 resp_end_html();
607 }
608
609 static void
610 pg_error_internal(void)
611 {
612 resp_begin_html(500, "Internal Server Error", NULL);
613 puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
614 resp_end_html();
615 }
616
617 static void
618 pg_redirect(const struct req *req, const char *name)
619 {
620 printf("Status: 303 See Other\r\n"
621 "Location: /");
622 if (*scriptname != '\0')
623 printf("%s/", scriptname);
624 if (strcmp(req->q.manpath, req->p[0]))
625 printf("%s/", req->q.manpath);
626 if (req->q.arch != NULL)
627 printf("%s/", req->q.arch);
628 http_encode(name);
629 if (req->q.sec != NULL) {
630 putchar('.');
631 http_encode(req->q.sec);
632 }
633 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
634 }
635
636 static void
637 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
638 {
639 char *arch, *archend;
640 const char *sec;
641 size_t i, iuse;
642 int archprio, archpriouse;
643 int prio, priouse;
644
645 for (i = 0; i < sz; i++) {
646 if (validate_filename(r[i].file))
647 continue;
648 warnx("invalid filename %s in %s database",
649 r[i].file, req->q.manpath);
650 pg_error_internal();
651 return;
652 }
653
654 if (req->isquery && sz == 1) {
655 /*
656 * If we have just one result, then jump there now
657 * without any delay.
658 */
659 printf("Status: 303 See Other\r\n"
660 "Location: /");
661 if (*scriptname != '\0')
662 printf("%s/", scriptname);
663 if (strcmp(req->q.manpath, req->p[0]))
664 printf("%s/", req->q.manpath);
665 printf("%s\r\n"
666 "Content-Type: text/html; charset=utf-8\r\n\r\n",
667 r[0].file);
668 return;
669 }
670
671 /*
672 * In man(1) mode, show one of the pages
673 * even if more than one is found.
674 */
675
676 iuse = 0;
677 if (req->q.equal || sz == 1) {
678 priouse = 20;
679 archpriouse = 3;
680 for (i = 0; i < sz; i++) {
681 sec = r[i].file;
682 sec += strcspn(sec, "123456789");
683 if (sec[0] == '\0')
684 continue;
685 prio = sec_prios[sec[0] - '1'];
686 if (sec[1] != '/')
687 prio += 10;
688 if (req->q.arch == NULL) {
689 archprio =
690 ((arch = strchr(sec + 1, '/'))
691 == NULL) ? 3 :
692 ((archend = strchr(arch + 1, '/'))
693 == NULL) ? 0 :
694 strncmp(arch, "amd64/",
695 archend - arch) ? 2 : 1;
696 if (archprio < archpriouse) {
697 archpriouse = archprio;
698 priouse = prio;
699 iuse = i;
700 continue;
701 }
702 if (archprio > archpriouse)
703 continue;
704 }
705 if (prio >= priouse)
706 continue;
707 priouse = prio;
708 iuse = i;
709 }
710 resp_begin_html(200, NULL, r[iuse].file);
711 } else
712 resp_begin_html(200, NULL, NULL);
713
714 resp_searchform(req,
715 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
716
717 if (sz > 1) {
718 puts("<nav>");
719 puts("<table class=\"results\">");
720 for (i = 0; i < sz; i++) {
721 printf(" <tr>\n"
722 " <td>"
723 "<a class=\"Xr\" href=\"/");
724 if (*scriptname != '\0')
725 printf("%s/", scriptname);
726 if (strcmp(req->q.manpath, req->p[0]))
727 printf("%s/", req->q.manpath);
728 printf("%s\">", r[i].file);
729 html_print(r[i].names);
730 printf("</a></td>\n"
731 " <td><span class=\"Nd\">");
732 html_print(r[i].output);
733 puts("</span></td>\n"
734 " </tr>");
735 }
736 puts("</table>");
737 puts("</nav>");
738 }
739
740 if (req->q.equal || sz == 1) {
741 puts("<hr>");
742 resp_show(req, r[iuse].file);
743 }
744
745 resp_end_html();
746 }
747
748 static void
749 resp_catman(const struct req *req, const char *file)
750 {
751 FILE *f;
752 char *p;
753 size_t sz;
754 ssize_t len;
755 int i;
756 int italic, bold;
757
758 if ((f = fopen(file, "r")) == NULL) {
759 puts("<p role=\"doc-notice\">\n"
760 " You specified an invalid manual file.\n"
761 "</p>");
762 return;
763 }
764
765 puts("<div class=\"catman\">\n"
766 "<pre>");
767
768 p = NULL;
769 sz = 0;
770
771 while ((len = getline(&p, &sz, f)) != -1) {
772 bold = italic = 0;
773 for (i = 0; i < len - 1; i++) {
774 /*
775 * This means that the catpage is out of state.
776 * Ignore it and keep going (although the
777 * catpage is bogus).
778 */
779
780 if ('\b' == p[i] || '\n' == p[i])
781 continue;
782
783 /*
784 * Print a regular character.
785 * Close out any bold/italic scopes.
786 * If we're in back-space mode, make sure we'll
787 * have something to enter when we backspace.
788 */
789
790 if ('\b' != p[i + 1]) {
791 if (italic)
792 printf("</i>");
793 if (bold)
794 printf("</b>");
795 italic = bold = 0;
796 html_putchar(p[i]);
797 continue;
798 } else if (i + 2 >= len)
799 continue;
800
801 /* Italic mode. */
802
803 if ('_' == p[i]) {
804 if (bold)
805 printf("</b>");
806 if ( ! italic)
807 printf("<i>");
808 bold = 0;
809 italic = 1;
810 i += 2;
811 html_putchar(p[i]);
812 continue;
813 }
814
815 /*
816 * Handle funny behaviour troff-isms.
817 * These grok'd from the original man2html.c.
818 */
819
820 if (('+' == p[i] && 'o' == p[i + 2]) ||
821 ('o' == p[i] && '+' == p[i + 2]) ||
822 ('|' == p[i] && '=' == p[i + 2]) ||
823 ('=' == p[i] && '|' == p[i + 2]) ||
824 ('*' == p[i] && '=' == p[i + 2]) ||
825 ('=' == p[i] && '*' == p[i + 2]) ||
826 ('*' == p[i] && '|' == p[i + 2]) ||
827 ('|' == p[i] && '*' == p[i + 2])) {
828 if (italic)
829 printf("</i>");
830 if (bold)
831 printf("</b>");
832 italic = bold = 0;
833 putchar('*');
834 i += 2;
835 continue;
836 } else if (('|' == p[i] && '-' == p[i + 2]) ||
837 ('-' == p[i] && '|' == p[i + 1]) ||
838 ('+' == p[i] && '-' == p[i + 1]) ||
839 ('-' == p[i] && '+' == p[i + 1]) ||
840 ('+' == p[i] && '|' == p[i + 1]) ||
841 ('|' == p[i] && '+' == p[i + 1])) {
842 if (italic)
843 printf("</i>");
844 if (bold)
845 printf("</b>");
846 italic = bold = 0;
847 putchar('+');
848 i += 2;
849 continue;
850 }
851
852 /* Bold mode. */
853
854 if (italic)
855 printf("</i>");
856 if ( ! bold)
857 printf("<b>");
858 bold = 1;
859 italic = 0;
860 i += 2;
861 html_putchar(p[i]);
862 }
863
864 /*
865 * Clean up the last character.
866 * We can get to a newline; don't print that.
867 */
868
869 if (italic)
870 printf("</i>");
871 if (bold)
872 printf("</b>");
873
874 if (i == len - 1 && p[i] != '\n')
875 html_putchar(p[i]);
876
877 putchar('\n');
878 }
879 free(p);
880
881 puts("</pre>\n"
882 "</div>");
883
884 fclose(f);
885 }
886
887 static void
888 resp_format(const struct req *req, const char *file)
889 {
890 struct manoutput conf;
891 struct mparse *mp;
892 struct roff_meta *meta;
893 void *vp;
894 int fd;
895 int usepath;
896
897 if (-1 == (fd = open(file, O_RDONLY))) {
898 puts("<p role=\"doc-notice\">\n"
899 " You specified an invalid manual file.\n"
900 "</p>");
901 return;
902 }
903
904 mchars_alloc();
905 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
906 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
907 mparse_readfd(mp, fd, file);
908 close(fd);
909 meta = mparse_result(mp);
910
911 memset(&conf, 0, sizeof(conf));
912 conf.fragment = 1;
913 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
914 usepath = strcmp(req->q.manpath, req->p[0]);
915 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
916 scriptname, *scriptname == '\0' ? "" : "/",
917 usepath ? req->q.manpath : "", usepath ? "/" : "");
918
919 vp = html_alloc(&conf);
920 if (meta->macroset == MACROSET_MDOC)
921 html_mdoc(vp, meta);
922 else
923 html_man(vp, meta);
924
925 html_free(vp);
926 mparse_free(mp);
927 mchars_free();
928 free(conf.man);
929 free(conf.style);
930 }
931
932 static void
933 resp_show(const struct req *req, const char *file)
934 {
935
936 if ('.' == file[0] && '/' == file[1])
937 file += 2;
938
939 if ('c' == *file)
940 resp_catman(req, file);
941 else
942 resp_format(req, file);
943 }
944
945 static void
946 pg_show(struct req *req, const char *fullpath)
947 {
948 char *manpath;
949 const char *file;
950
951 if ((file = strchr(fullpath, '/')) == NULL) {
952 pg_error_badrequest(
953 "You did not specify a page to show.");
954 return;
955 }
956 manpath = mandoc_strndup(fullpath, file - fullpath);
957 file++;
958
959 if ( ! validate_manpath(req, manpath)) {
960 pg_error_badrequest(
961 "You specified an invalid manpath.");
962 free(manpath);
963 return;
964 }
965
966 /*
967 * Begin by chdir()ing into the manpath.
968 * This way we can pick up the database files, which are
969 * relative to the manpath root.
970 */
971
972 if (chdir(manpath) == -1) {
973 warn("chdir %s", manpath);
974 pg_error_internal();
975 free(manpath);
976 return;
977 }
978 free(manpath);
979
980 if ( ! validate_filename(file)) {
981 pg_error_badrequest(
982 "You specified an invalid manual file.");
983 return;
984 }
985
986 resp_begin_html(200, NULL, file);
987 resp_searchform(req, FOCUS_NONE);
988 resp_show(req, file);
989 resp_end_html();
990 }
991
992 static void
993 pg_search(const struct req *req)
994 {
995 struct mansearch search;
996 struct manpaths paths;
997 struct manpage *res;
998 char **argv;
999 char *query, *rp, *wp;
1000 size_t ressz;
1001 int argc;
1002
1003 /*
1004 * Begin by chdir()ing into the root of the manpath.
1005 * This way we can pick up the database files, which are
1006 * relative to the manpath root.
1007 */
1008
1009 if (chdir(req->q.manpath) == -1) {
1010 warn("chdir %s", req->q.manpath);
1011 pg_error_internal();
1012 return;
1013 }
1014
1015 search.arch = req->q.arch;
1016 search.sec = req->q.sec;
1017 search.outkey = "Nd";
1018 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1019 search.firstmatch = 1;
1020
1021 paths.sz = 1;
1022 paths.paths = mandoc_malloc(sizeof(char *));
1023 paths.paths[0] = mandoc_strdup(".");
1024
1025 /*
1026 * Break apart at spaces with backslash-escaping.
1027 */
1028
1029 argc = 0;
1030 argv = NULL;
1031 rp = query = mandoc_strdup(req->q.query);
1032 for (;;) {
1033 while (isspace((unsigned char)*rp))
1034 rp++;
1035 if (*rp == '\0')
1036 break;
1037 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1038 argv[argc++] = wp = rp;
1039 for (;;) {
1040 if (isspace((unsigned char)*rp)) {
1041 *wp = '\0';
1042 rp++;
1043 break;
1044 }
1045 if (rp[0] == '\\' && rp[1] != '\0')
1046 rp++;
1047 if (wp != rp)
1048 *wp = *rp;
1049 if (*rp == '\0')
1050 break;
1051 wp++;
1052 rp++;
1053 }
1054 }
1055
1056 res = NULL;
1057 ressz = 0;
1058 if (req->isquery && req->q.equal && argc == 1)
1059 pg_redirect(req, argv[0]);
1060 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1061 pg_noresult(req, 400, "Bad Request",
1062 "You entered an invalid query.");
1063 else if (ressz == 0)
1064 pg_noresult(req, 404, "Not Found", "No results found.");
1065 else
1066 pg_searchres(req, res, ressz);
1067
1068 free(query);
1069 mansearch_free(res, ressz);
1070 free(paths.paths[0]);
1071 free(paths.paths);
1072 }
1073
1074 int
1075 main(void)
1076 {
1077 struct req req;
1078 struct itimerval itimer;
1079 const char *path;
1080 const char *querystring;
1081 int i;
1082
1083 #if HAVE_PLEDGE
1084 /*
1085 * The "rpath" pledge could be revoked after mparse_readfd()
1086 * if the file desciptor to "/footer.html" would be opened
1087 * up front, but it's probably not worth the complication
1088 * of the code it would cause: it would require scattering
1089 * pledge() calls in multiple low-level resp_*() functions.
1090 */
1091
1092 if (pledge("stdio rpath", NULL) == -1) {
1093 warn("pledge");
1094 pg_error_internal();
1095 return EXIT_FAILURE;
1096 }
1097 #endif
1098
1099 /* Poor man's ReDoS mitigation. */
1100
1101 itimer.it_value.tv_sec = 2;
1102 itimer.it_value.tv_usec = 0;
1103 itimer.it_interval.tv_sec = 2;
1104 itimer.it_interval.tv_usec = 0;
1105 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1106 warn("setitimer");
1107 pg_error_internal();
1108 return EXIT_FAILURE;
1109 }
1110
1111 /*
1112 * First we change directory into the MAN_DIR so that
1113 * subsequent scanning for manpath directories is rooted
1114 * relative to the same position.
1115 */
1116
1117 if (chdir(MAN_DIR) == -1) {
1118 warn("MAN_DIR: %s", MAN_DIR);
1119 pg_error_internal();
1120 return EXIT_FAILURE;
1121 }
1122
1123 memset(&req, 0, sizeof(struct req));
1124 req.q.equal = 1;
1125 parse_manpath_conf(&req);
1126
1127 /* Parse the path info and the query string. */
1128
1129 if ((path = getenv("PATH_INFO")) == NULL)
1130 path = "";
1131 else if (*path == '/')
1132 path++;
1133
1134 if (*path != '\0') {
1135 parse_path_info(&req, path);
1136 if (req.q.manpath == NULL || req.q.sec == NULL ||
1137 *req.q.query == '\0' || access(path, F_OK) == -1)
1138 path = "";
1139 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1140 parse_query_string(&req, querystring);
1141
1142 /* Validate parsed data and add defaults. */
1143
1144 if (req.q.manpath == NULL)
1145 req.q.manpath = mandoc_strdup(req.p[0]);
1146 else if ( ! validate_manpath(&req, req.q.manpath)) {
1147 pg_error_badrequest(
1148 "You specified an invalid manpath.");
1149 return EXIT_FAILURE;
1150 }
1151
1152 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1153 pg_error_badrequest(
1154 "You specified an invalid architecture.");
1155 return EXIT_FAILURE;
1156 }
1157
1158 /* Dispatch to the three different pages. */
1159
1160 if ('\0' != *path)
1161 pg_show(&req, path);
1162 else if (NULL != req.q.query)
1163 pg_search(&req);
1164 else
1165 pg_index(&req);
1166
1167 free(req.q.manpath);
1168 free(req.q.arch);
1169 free(req.q.sec);
1170 free(req.q.query);
1171 for (i = 0; i < (int)req.psz; i++)
1172 free(req.p[i]);
1173 free(req.p);
1174 return EXIT_SUCCESS;
1175 }
1176
1177 /*
1178 * Translate PATH_INFO to a query.
1179 */
1180 static void
1181 parse_path_info(struct req *req, const char *path)
1182 {
1183 const char *name, *sec, *end;
1184
1185 req->isquery = 0;
1186 req->q.equal = 1;
1187 req->q.manpath = NULL;
1188 req->q.arch = NULL;
1189
1190 /* Mandatory manual page name. */
1191 if ((name = strrchr(path, '/')) == NULL)
1192 name = path;
1193 else
1194 name++;
1195
1196 /* Optional trailing section. */
1197 sec = strrchr(name, '.');
1198 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1199 req->q.query = mandoc_strndup(name, sec - name - 1);
1200 req->q.sec = mandoc_strdup(sec);
1201 } else {
1202 req->q.query = mandoc_strdup(name);
1203 req->q.sec = NULL;
1204 }
1205
1206 /* Handle the case of name[.section] only. */
1207 if (name == path)
1208 return;
1209
1210 /* Optional manpath. */
1211 end = strchr(path, '/');
1212 req->q.manpath = mandoc_strndup(path, end - path);
1213 if (validate_manpath(req, req->q.manpath)) {
1214 path = end + 1;
1215 if (name == path)
1216 return;
1217 } else {
1218 free(req->q.manpath);
1219 req->q.manpath = NULL;
1220 }
1221
1222 /* Optional section. */
1223 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1224 path += 3;
1225 end = strchr(path, '/');
1226 free(req->q.sec);
1227 req->q.sec = mandoc_strndup(path, end - path);
1228 path = end + 1;
1229 if (name == path)
1230 return;
1231 }
1232
1233 /* Optional architecture. */
1234 end = strchr(path, '/');
1235 if (end + 1 != name) {
1236 pg_error_badrequest(
1237 "You specified too many directory components.");
1238 exit(EXIT_FAILURE);
1239 }
1240 req->q.arch = mandoc_strndup(path, end - path);
1241 if (validate_arch(req->q.arch) == 0) {
1242 pg_error_badrequest(
1243 "You specified an invalid directory component.");
1244 exit(EXIT_FAILURE);
1245 }
1246 }
1247
1248 /*
1249 * Scan for indexable paths.
1250 */
1251 static void
1252 parse_manpath_conf(struct req *req)
1253 {
1254 FILE *fp;
1255 char *dp;
1256 size_t dpsz;
1257 ssize_t len;
1258
1259 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1260 warn("%s/manpath.conf", MAN_DIR);
1261 pg_error_internal();
1262 exit(EXIT_FAILURE);
1263 }
1264
1265 dp = NULL;
1266 dpsz = 0;
1267
1268 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1269 if (dp[len - 1] == '\n')
1270 dp[--len] = '\0';
1271 req->p = mandoc_realloc(req->p,
1272 (req->psz + 1) * sizeof(char *));
1273 if ( ! validate_urifrag(dp)) {
1274 warnx("%s/manpath.conf contains "
1275 "unsafe path \"%s\"", MAN_DIR, dp);
1276 pg_error_internal();
1277 exit(EXIT_FAILURE);
1278 }
1279 if (strchr(dp, '/') != NULL) {
1280 warnx("%s/manpath.conf contains "
1281 "path with slash \"%s\"", MAN_DIR, dp);
1282 pg_error_internal();
1283 exit(EXIT_FAILURE);
1284 }
1285 req->p[req->psz++] = dp;
1286 dp = NULL;
1287 dpsz = 0;
1288 }
1289 free(dp);
1290
1291 if (req->p == NULL) {
1292 warnx("%s/manpath.conf is empty", MAN_DIR);
1293 pg_error_internal();
1294 exit(EXIT_FAILURE);
1295 }
1296 }