]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Rename manup(8) to catman(8), which Linux already uses for a similar tool.
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.7 2011/11/24 12:27:18 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22 #include <sys/wait.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <limits.h>
29 #include <regex.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "apropos_db.h"
38 #include "mandoc.h"
39 #include "manpath.h"
40
41 #ifdef __linux__
42 # include <db_185.h>
43 #else
44 # include <db.h>
45 #endif
46
47 enum page {
48 PAGE_INDEX,
49 PAGE_SEARCH,
50 PAGE_SHOW,
51 PAGE__MAX
52 };
53
54 struct kval {
55 char *key;
56 char *val;
57 };
58
59 struct req {
60 struct kval *fields;
61 size_t fieldsz;
62 enum page page;
63 };
64
65 static int atou(const char *, unsigned *);
66 static void format_insecure(const char *);
67 static void format_secure(const char *);
68 static void html_print(const char *);
69 static int kval_decode(char *);
70 static void kval_parse(struct kval **, size_t *, char *);
71 static void kval_free(struct kval *, size_t);
72 static void pg_index(const struct manpaths *,
73 const struct req *, char *);
74 static void pg_search(const struct manpaths *,
75 const struct req *, char *);
76 static void pg_show(const struct manpaths *,
77 const struct req *, char *);
78 static void resp_bad(void);
79 static void resp_baddb(void);
80 static void resp_badexpr(const struct req *);
81 static void resp_badmanual(void);
82 static void resp_begin_html(int, const char *);
83 static void resp_begin_http(int, const char *);
84 static void resp_end_html(void);
85 static void resp_index(const struct req *);
86 static void resp_search(struct res *, size_t, void *);
87 static void resp_searchform(const struct req *);
88
89 static int insecure = 1;
90 static const char *progname;
91 static const char *cache;
92 static const char *host;
93
94 static const char * const pages[PAGE__MAX] = {
95 "index", /* PAGE_INDEX */
96 "search", /* PAGE_SEARCH */
97 "show", /* PAGE_SHOW */
98 };
99
100 /*
101 * This is just OpenBSD's strtol(3) suggestion.
102 * I use it instead of strtonum(3) for portability's sake.
103 */
104 static int
105 atou(const char *buf, unsigned *v)
106 {
107 char *ep;
108 long lval;
109
110 errno = 0;
111 lval = strtol(buf, &ep, 10);
112 if (buf[0] == '\0' || *ep != '\0')
113 return(0);
114 if ((errno == ERANGE && (lval == LONG_MAX ||
115 lval == LONG_MIN)) ||
116 (lval > UINT_MAX || lval < 0))
117 return(0);
118
119 *v = (unsigned int)lval;
120 return(1);
121 }
122
123 /*
124 * Print a word, escaping HTML along the way.
125 * This will pass non-ASCII straight to output: be warned!
126 */
127 static void
128 html_print(const char *p)
129 {
130 char c;
131
132 if (NULL == p)
133 return;
134
135 while ('\0' != *p)
136 switch ((c = *p++)) {
137 case ('"'):
138 printf("&quote;");
139 break;
140 case ('&'):
141 printf("&amp;");
142 break;
143 case ('>'):
144 printf("&gt;");
145 break;
146 case ('<'):
147 printf("&lt;");
148 break;
149 default:
150 putchar((unsigned char)c);
151 break;
152 }
153 }
154
155 static void
156 kval_free(struct kval *p, size_t sz)
157 {
158 int i;
159
160 for (i = 0; i < (int)sz; i++) {
161 free(p[i].key);
162 free(p[i].val);
163 }
164 free(p);
165 }
166
167 /*
168 * Parse out key-value pairs from an HTTP request variable.
169 * This can be either a cookie or a POST/GET string, although man.cgi
170 * uses only GET for simplicity.
171 */
172 static void
173 kval_parse(struct kval **kv, size_t *kvsz, char *p)
174 {
175 char *key, *val;
176 size_t sz, cur;
177
178 cur = 0;
179
180 while (p && '\0' != *p) {
181 while (' ' == *p)
182 p++;
183
184 key = p;
185 val = NULL;
186
187 if (NULL != (p = strchr(p, '='))) {
188 *p++ = '\0';
189 val = p;
190
191 sz = strcspn(p, ";&");
192 /* LINTED */
193 p += sz;
194
195 if ('\0' != *p)
196 *p++ = '\0';
197 } else {
198 p = key;
199 sz = strcspn(p, ";&");
200 /* LINTED */
201 p += sz;
202
203 if ('\0' != *p)
204 p++;
205 continue;
206 }
207
208 if ('\0' == *key || '\0' == *val)
209 continue;
210
211 /* Just abort handling. */
212
213 if ( ! kval_decode(key))
214 return;
215 if ( ! kval_decode(val))
216 return;
217
218 if (*kvsz + 1 >= cur) {
219 cur++;
220 *kv = mandoc_realloc
221 (*kv, cur * sizeof(struct kval));
222 }
223
224 (*kv)[(int)*kvsz].key = mandoc_strdup(key);
225 (*kv)[(int)*kvsz].val = mandoc_strdup(val);
226 (*kvsz)++;
227 }
228 }
229
230 /*
231 * HTTP-decode a string. The standard explanation is that this turns
232 * "%4e+foo" into "n foo" in the regular way. This is done in-place
233 * over the allocated string.
234 */
235 static int
236 kval_decode(char *p)
237 {
238 char hex[3];
239 int c;
240
241 hex[2] = '\0';
242
243 for ( ; '\0' != *p; p++) {
244 if ('%' == *p) {
245 if ('\0' == (hex[0] = *(p + 1)))
246 return(0);
247 if ('\0' == (hex[1] = *(p + 2)))
248 return(0);
249 if (1 != sscanf(hex, "%x", &c))
250 return(0);
251 if ('\0' == c)
252 return(0);
253
254 *p = (char)c;
255 memmove(p + 1, p + 3, strlen(p + 3) + 1);
256 } else
257 *p = '+' == *p ? ' ' : *p;
258 }
259
260 *p = '\0';
261 return(1);
262 }
263
264 static void
265 resp_begin_http(int code, const char *msg)
266 {
267
268 if (200 != code)
269 printf("Status: %d %s\n", code, msg);
270
271 puts("Content-Type: text/html; charset=utf-8" "\n"
272 "Cache-Control: no-cache" "\n"
273 "Pragma: no-cache" "\n"
274 "");
275
276 fflush(stdout);
277 }
278
279 static void
280 resp_begin_html(int code, const char *msg)
281 {
282
283 resp_begin_http(code, msg);
284
285 puts("<!DOCTYPE HTML PUBLIC " "\n"
286 " \"-//W3C//DTD HTML 4.01//EN\"" "\n"
287 " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n"
288 "<HTML>" "\n"
289 " <HEAD>" "\n"
290 " <TITLE>System Manpage Reference</TITLE>" "\n"
291 " </HEAD>" "\n"
292 " <BODY>" "\n"
293 "<!-- Begin page content. //-->");
294 }
295
296 static void
297 resp_end_html(void)
298 {
299
300 puts(" </BODY>\n</HTML>");
301 }
302
303 static void
304 resp_searchform(const struct req *req)
305 {
306 int i;
307 const char *expr, *sec, *arch;
308
309 expr = sec = arch = "";
310
311 for (i = 0; i < (int)req->fieldsz; i++)
312 if (0 == strcmp(req->fields[i].key, "expr"))
313 expr = req->fields[i].val;
314 else if (0 == strcmp(req->fields[i].key, "sec"))
315 sec = req->fields[i].val;
316 else if (0 == strcmp(req->fields[i].key, "arch"))
317 arch = req->fields[i].val;
318
319 puts("<!-- Begin search form. //-->");
320 printf("<FORM ACTION=\"");
321 html_print(progname);
322 printf("/search\" METHOD=\"get\">\n");
323 puts(" <FIELDSET>" "\n"
324 " <INPUT TYPE=\"submit\" VALUE=\"Search:\">");
325 printf(" Terms: <INPUT TYPE=\"text\" "
326 "SIZE=\"60\" NAME=\"expr\" VALUE=\"");
327 html_print(expr);
328 puts("\">");
329 printf(" Section: <INPUT TYPE=\"text\" "
330 "SIZE=\"4\" NAME=\"sec\" VALUE=\"");
331 html_print(sec);
332 puts("\">");
333 printf(" Arch: <INPUT TYPE=\"text\" "
334 "SIZE=\"8\" NAME=\"arch\" VALUE=\"");
335 html_print(arch);
336 puts("\">");
337 puts(" </FIELDSET>\n</FORM>\n<!-- End search form. //-->");
338 }
339
340 static void
341 resp_index(const struct req *req)
342 {
343
344 resp_begin_html(200, NULL);
345 resp_searchform(req);
346 resp_end_html();
347 }
348
349 static void
350 resp_badmanual(void)
351 {
352
353 resp_begin_html(404, "Not Found");
354 puts("<P>Requested manual not found.</P>");
355 resp_end_html();
356 }
357
358 static void
359 resp_badexpr(const struct req *req)
360 {
361
362 resp_begin_html(200, NULL);
363 resp_searchform(req);
364 puts("<P>Your search didn't work.</P>");
365 resp_end_html();
366 }
367
368 static void
369 resp_bad(void)
370 {
371 resp_begin_html(500, "Internal Server Error");
372 puts("<P>Generic badness happened.</P>");
373 resp_end_html();
374 }
375
376 static void
377 resp_baddb(void)
378 {
379
380 resp_begin_html(500, "Internal Server Error");
381 puts("<P>Your database is broken.</P>");
382 resp_end_html();
383 }
384
385 static void
386 resp_search(struct res *r, size_t sz, void *arg)
387 {
388 int i;
389
390 if (1 == sz) {
391 /*
392 * If we have just one result, then jump there now
393 * without any delay.
394 */
395 puts("Status: 303 See Other");
396 printf("Location: http://%s%s/show/%u/%u.html\n",
397 host, progname,
398 r[0].volume, r[0].rec);
399 puts("Content-Type: text/html; charset=utf-8\n");
400 return;
401 }
402
403 resp_begin_html(200, NULL);
404 resp_searchform((const struct req *)arg);
405
406 if (0 == sz)
407 puts("<P>No results found.</P>");
408
409 for (i = 0; i < (int)sz; i++) {
410 printf("<P><A HREF=\"");
411 html_print(progname);
412 printf("/show/%u/%u.html\">", r[i].volume, r[i].rec);
413 html_print(r[i].title);
414 putchar('(');
415 html_print(r[i].cat);
416 if (r[i].arch && '\0' != *r[i].arch) {
417 putchar('/');
418 html_print(r[i].arch);
419 }
420 printf(")</A> ");
421 html_print(r[i].desc);
422 puts("</P>");
423 }
424
425 resp_end_html();
426 }
427
428 /* ARGSUSED */
429 static void
430 pg_index(const struct manpaths *ps, const struct req *req, char *path)
431 {
432
433 resp_index(req);
434 }
435
436 static void
437 format_insecure(const char *file)
438 {
439 pid_t pid;
440 char cmd[MAXPATHLEN];
441
442 strlcpy(cmd, "man=", MAXPATHLEN);
443 strlcat(cmd, progname, MAXPATHLEN);
444 strlcat(cmd, "/search?expr=%N&sec=%S", MAXPATHLEN);
445
446 /* Get ready to call the child mandoc(1) process. */
447
448 if (-1 == (pid = fork()))
449 exit(EXIT_FAILURE);
450
451 if (pid > 0) {
452 waitpid(pid, NULL, 0);
453 return;
454 }
455
456 dup2(STDOUT_FILENO, STDERR_FILENO);
457
458 puts("Content-Type: text/html; charset=utf-8\n");
459
460 fflush(stdout);
461
462 execlp("mandoc", "mandoc", "-T",
463 "html", "-O", cmd, file, (char *)NULL);
464 }
465
466 static void
467 format_secure(const char *file)
468 {
469 char buf[BUFSIZ];
470 int fd;
471 ssize_t ssz;
472
473 if (-1 == (fd = open(file, O_RDONLY, 0))) {
474 resp_baddb();
475 return;
476 }
477
478 resp_begin_http(200, NULL);
479
480 do {
481 ssz = read(fd, buf, BUFSIZ);
482 if (ssz > 0)
483 write(STDOUT_FILENO, buf, ssz);
484 } while (ssz > 0);
485
486 close(fd);
487 }
488
489 static void
490 pg_show(const struct manpaths *ps, const struct req *req, char *path)
491 {
492 char *sub;
493 char file[MAXPATHLEN];
494 int rc;
495 unsigned int vol, rec;
496 DB *db;
497 DBT key, val;
498
499 if (NULL == path) {
500 resp_badmanual();
501 return;
502 } else if (NULL == (sub = strrchr(path, '/'))) {
503 resp_badmanual();
504 return;
505 } else
506 *sub++ = '\0';
507
508 if ( ! (atou(path, &vol) && atou(sub, &rec))) {
509 resp_badmanual();
510 return;
511 } else if (vol >= (unsigned int)ps->sz) {
512 resp_badmanual();
513 return;
514 }
515
516 strlcpy(file, ps->paths[vol], MAXPATHLEN);
517 strlcat(file, "/mandoc.index", MAXPATHLEN);
518
519 /* Open the index recno(3) database. */
520
521 db = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
522 if (NULL == db) {
523 resp_baddb();
524 return;
525 }
526
527 key.data = &rec;
528 key.size = 4;
529
530 if (0 != (rc = (*db->get)(db, &key, &val, 0))) {
531 rc < 0 ? resp_baddb() : resp_badmanual();
532 (*db->close)(db);
533 return;
534 }
535
536 /* Extra filename: the first nil-terminated entry. */
537
538 (*db->close)(db);
539
540 strlcpy(file, ps->paths[vol], MAXPATHLEN);
541 strlcat(file, "/", MAXPATHLEN);
542 strlcat(file, (char *)val.data, MAXPATHLEN);
543
544 if ( ! insecure) {
545 strlcat(file, ".html", MAXPATHLEN);
546 format_secure(file);
547 } else
548 format_insecure(file);
549 }
550
551 static void
552 pg_search(const struct manpaths *ps, const struct req *req, char *path)
553 {
554 size_t tt;
555 int i, sz, rc;
556 const char *ep, *start;
557 char **cp;
558 struct opts opt;
559 struct expr *expr;
560
561 expr = NULL;
562 cp = NULL;
563 ep = NULL;
564 sz = 0;
565
566 memset(&opt, 0, sizeof(struct opts));
567
568 for (sz = i = 0; i < (int)req->fieldsz; i++)
569 if (0 == strcmp(req->fields[i].key, "expr"))
570 ep = req->fields[i].val;
571 else if (0 == strcmp(req->fields[i].key, "sec"))
572 opt.cat = req->fields[i].val;
573 else if (0 == strcmp(req->fields[i].key, "arch"))
574 opt.arch = req->fields[i].val;
575
576 /*
577 * Poor man's tokenisation.
578 * Just break apart by spaces.
579 * Yes, this is half-ass. But it works for now.
580 */
581
582 while (ep && isspace((unsigned char)*ep))
583 ep++;
584
585 while (ep && '\0' != *ep) {
586 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
587 start = ep;
588 while ('\0' != *ep && ! isspace((unsigned char)*ep))
589 ep++;
590 cp[sz] = mandoc_malloc((ep - start) + 1);
591 memcpy(cp[sz], start, ep - start);
592 cp[sz++][ep - start] = '\0';
593 while (isspace((unsigned char)*ep))
594 ep++;
595 }
596
597 rc = -1;
598
599 /*
600 * Pump down into apropos backend.
601 * The resp_search() function is called with the results.
602 */
603
604 if (NULL != (expr = exprcomp(sz, cp, &tt)))
605 rc = apropos_search
606 (ps->sz, ps->paths, &opt,
607 expr, tt, (void *)req, resp_search);
608
609 /* ...unless errors occured. */
610
611 if (0 == rc)
612 resp_baddb();
613 else if (-1 == rc)
614 resp_badexpr(req);
615
616 for (i = 0; i < sz; i++)
617 free(cp[i]);
618
619 free(cp);
620 exprfree(expr);
621 }
622
623 int
624 main(void)
625 {
626 int i;
627 struct req req;
628 char *p, *path, *subpath;
629 struct manpaths paths;
630
631 /* HTTP init: read and parse the query string. */
632
633 progname = getenv("SCRIPT_NAME");
634 if (NULL == progname)
635 progname = "";
636
637 cache = getenv("CACHE_DIR");
638 if (NULL == cache)
639 cache = "/cache/man.cgi";
640
641 if (NULL == getenv("INSECURE")) {
642 insecure = 0;
643 if (-1 == chdir(cache)) {
644 resp_bad();
645 return(EXIT_FAILURE);
646 }
647 }
648
649 host = getenv("HTTP_HOST");
650 if (NULL == host)
651 host = "localhost";
652
653 memset(&req, 0, sizeof(struct req));
654
655 if (NULL != (p = getenv("QUERY_STRING")))
656 kval_parse(&req.fields, &req.fieldsz, p);
657
658 /* Resolve leading subpath component. */
659
660 subpath = path = NULL;
661 req.page = PAGE__MAX;
662
663 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
664 req.page = PAGE_INDEX;
665
666 if (NULL != path && '/' == *path && '\0' == *++path)
667 req.page = PAGE_INDEX;
668
669 /* Strip file suffix. */
670
671 if (NULL != path && NULL != (p = strrchr(path, '.')))
672 if (NULL != p && NULL == strchr(p, '/'))
673 *p++ = '\0';
674
675 /* Resolve subpath component. */
676
677 if (NULL != path && NULL != (subpath = strchr(path, '/')))
678 *subpath++ = '\0';
679
680 /* Map path into one we recognise. */
681
682 if (NULL != path && '\0' != *path)
683 for (i = 0; i < (int)PAGE__MAX; i++)
684 if (0 == strcmp(pages[i], path)) {
685 req.page = (enum page)i;
686 break;
687 }
688
689 /* Initialise MANPATH. */
690
691 memset(&paths, 0, sizeof(struct manpaths));
692 if ( ! insecure)
693 manpath_manconf("etc/man.conf", &paths);
694 else
695 manpath_parse(&paths, NULL, NULL);
696
697 /* Route pages. */
698
699 switch (req.page) {
700 case (PAGE_INDEX):
701 pg_index(&paths, &req, subpath);
702 break;
703 case (PAGE_SEARCH):
704 pg_search(&paths, &req, subpath);
705 break;
706 case (PAGE_SHOW):
707 pg_show(&paths, &req, subpath);
708 break;
709 default:
710 break;
711 }
712
713 manpath_free(&paths);
714 kval_free(req.fields, req.fieldsz);
715
716 return(EXIT_SUCCESS);
717 }