]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Implement mdoc(7)-like output style variant for man(7) documents:
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.9 2011/12/04 22:52:50 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22 #include <sys/wait.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <limits.h>
29 #include <regex.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "apropos_db.h"
38 #include "mandoc.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manpath.h"
43
44 #ifdef __linux__
45 # include <db_185.h>
46 #else
47 # include <db.h>
48 #endif
49
50 enum page {
51 PAGE_INDEX,
52 PAGE_SEARCH,
53 PAGE_SHOW,
54 PAGE__MAX
55 };
56
57 struct kval {
58 char *key;
59 char *val;
60 };
61
62 struct req {
63 struct kval *fields;
64 size_t fieldsz;
65 enum page page;
66 };
67
68 static int atou(const char *, unsigned *);
69 static void catman(const char *);
70 static void format(const char *);
71 static void html_print(const char *);
72 static int kval_decode(char *);
73 static void kval_parse(struct kval **, size_t *, char *);
74 static void kval_free(struct kval *, size_t);
75 static void pg_index(const struct manpaths *,
76 const struct req *, char *);
77 static void pg_search(const struct manpaths *,
78 const struct req *, char *);
79 static void pg_show(const struct manpaths *,
80 const struct req *, char *);
81 static void resp_bad(void);
82 static void resp_baddb(void);
83 static void resp_badexpr(const struct req *);
84 static void resp_badmanual(void);
85 static void resp_badpage(void);
86 static void resp_begin_html(int, const char *);
87 static void resp_begin_http(int, const char *);
88 static void resp_end_html(void);
89 static void resp_index(const struct req *);
90 static void resp_search(struct res *, size_t, void *);
91 static void resp_searchform(const struct req *);
92
93 static const char *progname;
94 static const char *cache;
95 static const char *host;
96
97 static const char * const pages[PAGE__MAX] = {
98 "index", /* PAGE_INDEX */
99 "search", /* PAGE_SEARCH */
100 "show", /* PAGE_SHOW */
101 };
102
103 /*
104 * This is just OpenBSD's strtol(3) suggestion.
105 * I use it instead of strtonum(3) for portability's sake.
106 */
107 static int
108 atou(const char *buf, unsigned *v)
109 {
110 char *ep;
111 long lval;
112
113 errno = 0;
114 lval = strtol(buf, &ep, 10);
115 if (buf[0] == '\0' || *ep != '\0')
116 return(0);
117 if ((errno == ERANGE && (lval == LONG_MAX ||
118 lval == LONG_MIN)) ||
119 (lval > UINT_MAX || lval < 0))
120 return(0);
121
122 *v = (unsigned int)lval;
123 return(1);
124 }
125
126 /*
127 * Print a word, escaping HTML along the way.
128 * This will pass non-ASCII straight to output: be warned!
129 */
130 static void
131 html_print(const char *p)
132 {
133 char c;
134
135 if (NULL == p)
136 return;
137
138 while ('\0' != *p)
139 switch ((c = *p++)) {
140 case ('"'):
141 printf("&quote;");
142 break;
143 case ('&'):
144 printf("&amp;");
145 break;
146 case ('>'):
147 printf("&gt;");
148 break;
149 case ('<'):
150 printf("&lt;");
151 break;
152 default:
153 putchar((unsigned char)c);
154 break;
155 }
156 }
157
158 static void
159 kval_free(struct kval *p, size_t sz)
160 {
161 int i;
162
163 for (i = 0; i < (int)sz; i++) {
164 free(p[i].key);
165 free(p[i].val);
166 }
167 free(p);
168 }
169
170 /*
171 * Parse out key-value pairs from an HTTP request variable.
172 * This can be either a cookie or a POST/GET string, although man.cgi
173 * uses only GET for simplicity.
174 */
175 static void
176 kval_parse(struct kval **kv, size_t *kvsz, char *p)
177 {
178 char *key, *val;
179 size_t sz, cur;
180
181 cur = 0;
182
183 while (p && '\0' != *p) {
184 while (' ' == *p)
185 p++;
186
187 key = p;
188 val = NULL;
189
190 if (NULL != (p = strchr(p, '='))) {
191 *p++ = '\0';
192 val = p;
193
194 sz = strcspn(p, ";&");
195 /* LINTED */
196 p += sz;
197
198 if ('\0' != *p)
199 *p++ = '\0';
200 } else {
201 p = key;
202 sz = strcspn(p, ";&");
203 /* LINTED */
204 p += sz;
205
206 if ('\0' != *p)
207 p++;
208 continue;
209 }
210
211 if ('\0' == *key || '\0' == *val)
212 continue;
213
214 /* Just abort handling. */
215
216 if ( ! kval_decode(key))
217 return;
218 if ( ! kval_decode(val))
219 return;
220
221 if (*kvsz + 1 >= cur) {
222 cur++;
223 *kv = mandoc_realloc
224 (*kv, cur * sizeof(struct kval));
225 }
226
227 (*kv)[(int)*kvsz].key = mandoc_strdup(key);
228 (*kv)[(int)*kvsz].val = mandoc_strdup(val);
229 (*kvsz)++;
230 }
231 }
232
233 /*
234 * HTTP-decode a string. The standard explanation is that this turns
235 * "%4e+foo" into "n foo" in the regular way. This is done in-place
236 * over the allocated string.
237 */
238 static int
239 kval_decode(char *p)
240 {
241 char hex[3];
242 int c;
243
244 hex[2] = '\0';
245
246 for ( ; '\0' != *p; p++) {
247 if ('%' == *p) {
248 if ('\0' == (hex[0] = *(p + 1)))
249 return(0);
250 if ('\0' == (hex[1] = *(p + 2)))
251 return(0);
252 if (1 != sscanf(hex, "%x", &c))
253 return(0);
254 if ('\0' == c)
255 return(0);
256
257 *p = (char)c;
258 memmove(p + 1, p + 3, strlen(p + 3) + 1);
259 } else
260 *p = '+' == *p ? ' ' : *p;
261 }
262
263 *p = '\0';
264 return(1);
265 }
266
267 static void
268 resp_begin_http(int code, const char *msg)
269 {
270
271 if (200 != code)
272 printf("Status: %d %s\n", code, msg);
273
274 puts("Content-Type: text/html; charset=utf-8" "\n"
275 "Cache-Control: no-cache" "\n"
276 "Pragma: no-cache" "\n"
277 "");
278
279 fflush(stdout);
280 }
281
282 static void
283 resp_begin_html(int code, const char *msg)
284 {
285
286 resp_begin_http(code, msg);
287
288 puts("<!DOCTYPE HTML PUBLIC " "\n"
289 " \"-//W3C//DTD HTML 4.01//EN\"" "\n"
290 " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n"
291 "<HTML>" "\n"
292 " <HEAD>" "\n"
293 " <TITLE>System Manpage Reference</TITLE>" "\n"
294 " </HEAD>" "\n"
295 " <BODY>" "\n"
296 "<!-- Begin page content. //-->");
297 }
298
299 static void
300 resp_end_html(void)
301 {
302
303 puts(" </BODY>\n</HTML>");
304 }
305
306 static void
307 resp_searchform(const struct req *req)
308 {
309 int i;
310 const char *expr, *sec, *arch;
311
312 expr = sec = arch = "";
313
314 for (i = 0; i < (int)req->fieldsz; i++)
315 if (0 == strcmp(req->fields[i].key, "expr"))
316 expr = req->fields[i].val;
317 else if (0 == strcmp(req->fields[i].key, "sec"))
318 sec = req->fields[i].val;
319 else if (0 == strcmp(req->fields[i].key, "arch"))
320 arch = req->fields[i].val;
321
322 puts("<!-- Begin search form. //-->");
323 printf("<FORM ACTION=\"");
324 html_print(progname);
325 printf("/search.html\" METHOD=\"get\">\n");
326 puts(" <FIELDSET>" "\n"
327 " <INPUT TYPE=\"submit\" VALUE=\"Search:\">");
328 printf(" Terms: <INPUT TYPE=\"text\" "
329 "SIZE=\"60\" NAME=\"expr\" VALUE=\"");
330 html_print(expr);
331 puts("\">");
332 printf(" Section: <INPUT TYPE=\"text\" "
333 "SIZE=\"4\" NAME=\"sec\" VALUE=\"");
334 html_print(sec);
335 puts("\">");
336 printf(" Arch: <INPUT TYPE=\"text\" "
337 "SIZE=\"8\" NAME=\"arch\" VALUE=\"");
338 html_print(arch);
339 puts("\">");
340 puts(" </FIELDSET>\n</FORM>\n<!-- End search form. //-->");
341 }
342
343 static void
344 resp_index(const struct req *req)
345 {
346
347 resp_begin_html(200, NULL);
348 resp_searchform(req);
349 resp_end_html();
350 }
351
352 static void
353 resp_badpage(void)
354 {
355
356 resp_begin_html(404, "Not Found");
357 puts("<P>Page not found.</P>");
358 resp_end_html();
359 }
360
361 static void
362 resp_badmanual(void)
363 {
364
365 resp_begin_html(404, "Not Found");
366 puts("<P>Requested manual not found.</P>");
367 resp_end_html();
368 }
369
370 static void
371 resp_badexpr(const struct req *req)
372 {
373
374 resp_begin_html(200, NULL);
375 resp_searchform(req);
376 puts("<P>Your search didn't work.</P>");
377 resp_end_html();
378 }
379
380 static void
381 resp_bad(void)
382 {
383 resp_begin_html(500, "Internal Server Error");
384 puts("<P>Generic badness happened.</P>");
385 resp_end_html();
386 }
387
388 static void
389 resp_baddb(void)
390 {
391
392 resp_begin_html(500, "Internal Server Error");
393 puts("<P>Your database is broken.</P>");
394 resp_end_html();
395 }
396
397 static void
398 resp_search(struct res *r, size_t sz, void *arg)
399 {
400 int i;
401
402 if (1 == sz) {
403 /*
404 * If we have just one result, then jump there now
405 * without any delay.
406 */
407 puts("Status: 303 See Other");
408 printf("Location: http://%s%s/show/%u/%u.html\n",
409 host, progname,
410 r[0].volume, r[0].rec);
411 puts("Content-Type: text/html; charset=utf-8\n");
412 return;
413 }
414
415 resp_begin_html(200, NULL);
416 resp_searchform((const struct req *)arg);
417
418 if (0 == sz)
419 puts("<P>No results found.</P>");
420
421 for (i = 0; i < (int)sz; i++) {
422 printf("<P><A HREF=\"");
423 html_print(progname);
424 printf("/show/%u/%u.html\">", r[i].volume, r[i].rec);
425 html_print(r[i].title);
426 putchar('(');
427 html_print(r[i].cat);
428 if (r[i].arch && '\0' != *r[i].arch) {
429 putchar('/');
430 html_print(r[i].arch);
431 }
432 printf(")</A> ");
433 html_print(r[i].desc);
434 puts("</P>");
435 }
436
437 resp_end_html();
438 }
439
440 /* ARGSUSED */
441 static void
442 pg_index(const struct manpaths *ps, const struct req *req, char *path)
443 {
444
445 resp_index(req);
446 }
447
448 static void
449 catman(const char *file)
450 {
451 int fd;
452 char buf[BUFSIZ];
453 ssize_t ssz;
454
455 if (-1 == (fd = open(file, O_RDONLY, 0))) {
456 resp_baddb();
457 return;
458 }
459
460 resp_begin_http(200, NULL);
461
462 while ((ssz = read(fd, buf, BUFSIZ)) > 0)
463 write(STDOUT_FILENO, buf, (size_t)ssz);
464
465 if (ssz < 0)
466 perror(file);
467
468 close(fd);
469 }
470
471 static void
472 format(const char *file)
473 {
474 struct mparse *mp;
475 int fd;
476 struct mdoc *mdoc;
477 struct man *man;
478 void *vp;
479 enum mandoclevel rc;
480
481 if (-1 == (fd = open(file, O_RDONLY, 0))) {
482 resp_baddb();
483 return;
484 }
485
486 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
487 rc = mparse_readfd(mp, fd, file);
488 close(fd);
489
490 if (rc >= MANDOCLEVEL_FATAL) {
491 resp_baddb();
492 return;
493 }
494
495 mparse_result(mp, &mdoc, &man);
496 vp = html_alloc(NULL);
497
498 if (NULL != mdoc) {
499 resp_begin_http(200, NULL);
500 html_mdoc(vp, mdoc);
501 } else if (NULL != man) {
502 resp_begin_http(200, NULL);
503 html_man(vp, man);
504 } else
505 resp_baddb();
506
507 html_free(vp);
508 mparse_free(mp);
509 }
510
511 static void
512 pg_show(const struct manpaths *ps, const struct req *req, char *path)
513 {
514 char *sub;
515 char file[MAXPATHLEN];
516 const char *fn, *cp;
517 int rc;
518 unsigned int vol, rec;
519 DB *idx;
520 DBT key, val;
521
522 if (NULL == path) {
523 resp_badmanual();
524 return;
525 } else if (NULL == (sub = strrchr(path, '/'))) {
526 resp_badmanual();
527 return;
528 } else
529 *sub++ = '\0';
530
531 if ( ! (atou(path, &vol) && atou(sub, &rec))) {
532 resp_badmanual();
533 return;
534 } else if (vol >= (unsigned int)ps->sz) {
535 resp_badmanual();
536 return;
537 }
538
539 strlcpy(file, ps->paths[vol], MAXPATHLEN);
540 strlcat(file, "/mandoc.index", MAXPATHLEN);
541
542 /* Open the index recno(3) database. */
543
544 idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
545 if (NULL == idx) {
546 resp_baddb();
547 return;
548 }
549
550 key.data = &rec;
551 key.size = 4;
552
553 if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
554 rc < 0 ? resp_baddb() : resp_badmanual();
555 goto out;
556 }
557
558 cp = (char *)val.data;
559
560 if (NULL == (fn = memchr(cp, '\0', val.size)))
561 resp_baddb();
562 else if (++fn - cp >= (int)val.size)
563 resp_baddb();
564 else if (NULL == memchr(fn, '\0', val.size - (fn - cp)))
565 resp_baddb();
566 else {
567 strlcpy(file, ps->paths[vol], MAXPATHLEN);
568 strlcat(file, "/", MAXPATHLEN);
569 strlcat(file, fn, MAXPATHLEN);
570 if (0 == strcmp(cp, "cat"))
571 catman(file);
572 else
573 format(file);
574 }
575 out:
576 (*idx->close)(idx);
577 }
578
579 static void
580 pg_search(const struct manpaths *ps, const struct req *req, char *path)
581 {
582 size_t tt;
583 int i, sz, rc;
584 const char *ep, *start;
585 char **cp;
586 struct opts opt;
587 struct expr *expr;
588
589 expr = NULL;
590 cp = NULL;
591 ep = NULL;
592 sz = 0;
593
594 memset(&opt, 0, sizeof(struct opts));
595
596 for (sz = i = 0; i < (int)req->fieldsz; i++)
597 if (0 == strcmp(req->fields[i].key, "expr"))
598 ep = req->fields[i].val;
599 else if (0 == strcmp(req->fields[i].key, "sec"))
600 opt.cat = req->fields[i].val;
601 else if (0 == strcmp(req->fields[i].key, "arch"))
602 opt.arch = req->fields[i].val;
603
604 /*
605 * Poor man's tokenisation.
606 * Just break apart by spaces.
607 * Yes, this is half-ass. But it works for now.
608 */
609
610 while (ep && isspace((unsigned char)*ep))
611 ep++;
612
613 while (ep && '\0' != *ep) {
614 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
615 start = ep;
616 while ('\0' != *ep && ! isspace((unsigned char)*ep))
617 ep++;
618 cp[sz] = mandoc_malloc((ep - start) + 1);
619 memcpy(cp[sz], start, ep - start);
620 cp[sz++][ep - start] = '\0';
621 while (isspace((unsigned char)*ep))
622 ep++;
623 }
624
625 rc = -1;
626
627 /*
628 * Pump down into apropos backend.
629 * The resp_search() function is called with the results.
630 */
631
632 if (NULL != (expr = exprcomp(sz, cp, &tt)))
633 rc = apropos_search
634 (ps->sz, ps->paths, &opt,
635 expr, tt, (void *)req, resp_search);
636
637 /* ...unless errors occured. */
638
639 if (0 == rc)
640 resp_baddb();
641 else if (-1 == rc)
642 resp_badexpr(req);
643
644 for (i = 0; i < sz; i++)
645 free(cp[i]);
646
647 free(cp);
648 exprfree(expr);
649 }
650
651 int
652 main(void)
653 {
654 int i;
655 struct req req;
656 char *p, *path, *subpath;
657 struct manpaths paths;
658
659 /* HTTP init: read and parse the query string. */
660
661 progname = getenv("SCRIPT_NAME");
662 if (NULL == progname)
663 progname = "";
664
665 cache = getenv("CACHE_DIR");
666 if (NULL == cache)
667 cache = "/cache/man.cgi";
668
669 if (-1 == chdir(cache)) {
670 resp_bad();
671 return(EXIT_FAILURE);
672 }
673
674 host = getenv("HTTP_HOST");
675 if (NULL == host)
676 host = "localhost";
677
678 memset(&req, 0, sizeof(struct req));
679
680 if (NULL != (p = getenv("QUERY_STRING")))
681 kval_parse(&req.fields, &req.fieldsz, p);
682
683 /* Resolve leading subpath component. */
684
685 subpath = path = NULL;
686 req.page = PAGE__MAX;
687
688 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
689 req.page = PAGE_INDEX;
690
691 if (NULL != path && '/' == *path && '\0' == *++path)
692 req.page = PAGE_INDEX;
693
694 /* Strip file suffix. */
695
696 if (NULL != path && NULL != (p = strrchr(path, '.')))
697 if (NULL != p && NULL == strchr(p, '/'))
698 *p++ = '\0';
699
700 /* Resolve subpath component. */
701
702 if (NULL != path && NULL != (subpath = strchr(path, '/')))
703 *subpath++ = '\0';
704
705 /* Map path into one we recognise. */
706
707 if (NULL != path && '\0' != *path)
708 for (i = 0; i < (int)PAGE__MAX; i++)
709 if (0 == strcmp(pages[i], path)) {
710 req.page = (enum page)i;
711 break;
712 }
713
714 /* Initialise MANPATH. */
715
716 memset(&paths, 0, sizeof(struct manpaths));
717 manpath_manconf("etc/catman.conf", &paths);
718
719 /* Route pages. */
720
721 switch (req.page) {
722 case (PAGE_INDEX):
723 pg_index(&paths, &req, subpath);
724 break;
725 case (PAGE_SEARCH):
726 pg_search(&paths, &req, subpath);
727 break;
728 case (PAGE_SHOW):
729 pg_show(&paths, &req, subpath);
730 break;
731 default:
732 resp_badpage();
733 break;
734 }
735
736 manpath_free(&paths);
737 kval_free(req.fields, req.fieldsz);
738
739 return(EXIT_SUCCESS);
740 }