]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Back out lorder, which doesn't seem necessary (?). I think this means all
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.8 2011/11/27 11:46:44 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22 #include <sys/wait.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <limits.h>
29 #include <regex.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "apropos_db.h"
38 #include "mandoc.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manpath.h"
43
44 #ifdef __linux__
45 # include <db_185.h>
46 #else
47 # include <db.h>
48 #endif
49
50 enum page {
51 PAGE_INDEX,
52 PAGE_SEARCH,
53 PAGE_SHOW,
54 PAGE__MAX
55 };
56
57 struct kval {
58 char *key;
59 char *val;
60 };
61
62 struct req {
63 struct kval *fields;
64 size_t fieldsz;
65 enum page page;
66 };
67
68 static int atou(const char *, unsigned *);
69 static void format(const char *);
70 static void html_print(const char *);
71 static int kval_decode(char *);
72 static void kval_parse(struct kval **, size_t *, char *);
73 static void kval_free(struct kval *, size_t);
74 static void pg_index(const struct manpaths *,
75 const struct req *, char *);
76 static void pg_search(const struct manpaths *,
77 const struct req *, char *);
78 static void pg_show(const struct manpaths *,
79 const struct req *, char *);
80 static void resp_bad(void);
81 static void resp_baddb(void);
82 static void resp_badexpr(const struct req *);
83 static void resp_badmanual(void);
84 static void resp_begin_html(int, const char *);
85 static void resp_begin_http(int, const char *);
86 static void resp_end_html(void);
87 static void resp_index(const struct req *);
88 static void resp_search(struct res *, size_t, void *);
89 static void resp_searchform(const struct req *);
90
91 static const char *progname;
92 static const char *cache;
93 static const char *host;
94
95 static const char * const pages[PAGE__MAX] = {
96 "index", /* PAGE_INDEX */
97 "search", /* PAGE_SEARCH */
98 "show", /* PAGE_SHOW */
99 };
100
101 /*
102 * This is just OpenBSD's strtol(3) suggestion.
103 * I use it instead of strtonum(3) for portability's sake.
104 */
105 static int
106 atou(const char *buf, unsigned *v)
107 {
108 char *ep;
109 long lval;
110
111 errno = 0;
112 lval = strtol(buf, &ep, 10);
113 if (buf[0] == '\0' || *ep != '\0')
114 return(0);
115 if ((errno == ERANGE && (lval == LONG_MAX ||
116 lval == LONG_MIN)) ||
117 (lval > UINT_MAX || lval < 0))
118 return(0);
119
120 *v = (unsigned int)lval;
121 return(1);
122 }
123
124 /*
125 * Print a word, escaping HTML along the way.
126 * This will pass non-ASCII straight to output: be warned!
127 */
128 static void
129 html_print(const char *p)
130 {
131 char c;
132
133 if (NULL == p)
134 return;
135
136 while ('\0' != *p)
137 switch ((c = *p++)) {
138 case ('"'):
139 printf("&quote;");
140 break;
141 case ('&'):
142 printf("&amp;");
143 break;
144 case ('>'):
145 printf("&gt;");
146 break;
147 case ('<'):
148 printf("&lt;");
149 break;
150 default:
151 putchar((unsigned char)c);
152 break;
153 }
154 }
155
156 static void
157 kval_free(struct kval *p, size_t sz)
158 {
159 int i;
160
161 for (i = 0; i < (int)sz; i++) {
162 free(p[i].key);
163 free(p[i].val);
164 }
165 free(p);
166 }
167
168 /*
169 * Parse out key-value pairs from an HTTP request variable.
170 * This can be either a cookie or a POST/GET string, although man.cgi
171 * uses only GET for simplicity.
172 */
173 static void
174 kval_parse(struct kval **kv, size_t *kvsz, char *p)
175 {
176 char *key, *val;
177 size_t sz, cur;
178
179 cur = 0;
180
181 while (p && '\0' != *p) {
182 while (' ' == *p)
183 p++;
184
185 key = p;
186 val = NULL;
187
188 if (NULL != (p = strchr(p, '='))) {
189 *p++ = '\0';
190 val = p;
191
192 sz = strcspn(p, ";&");
193 /* LINTED */
194 p += sz;
195
196 if ('\0' != *p)
197 *p++ = '\0';
198 } else {
199 p = key;
200 sz = strcspn(p, ";&");
201 /* LINTED */
202 p += sz;
203
204 if ('\0' != *p)
205 p++;
206 continue;
207 }
208
209 if ('\0' == *key || '\0' == *val)
210 continue;
211
212 /* Just abort handling. */
213
214 if ( ! kval_decode(key))
215 return;
216 if ( ! kval_decode(val))
217 return;
218
219 if (*kvsz + 1 >= cur) {
220 cur++;
221 *kv = mandoc_realloc
222 (*kv, cur * sizeof(struct kval));
223 }
224
225 (*kv)[(int)*kvsz].key = mandoc_strdup(key);
226 (*kv)[(int)*kvsz].val = mandoc_strdup(val);
227 (*kvsz)++;
228 }
229 }
230
231 /*
232 * HTTP-decode a string. The standard explanation is that this turns
233 * "%4e+foo" into "n foo" in the regular way. This is done in-place
234 * over the allocated string.
235 */
236 static int
237 kval_decode(char *p)
238 {
239 char hex[3];
240 int c;
241
242 hex[2] = '\0';
243
244 for ( ; '\0' != *p; p++) {
245 if ('%' == *p) {
246 if ('\0' == (hex[0] = *(p + 1)))
247 return(0);
248 if ('\0' == (hex[1] = *(p + 2)))
249 return(0);
250 if (1 != sscanf(hex, "%x", &c))
251 return(0);
252 if ('\0' == c)
253 return(0);
254
255 *p = (char)c;
256 memmove(p + 1, p + 3, strlen(p + 3) + 1);
257 } else
258 *p = '+' == *p ? ' ' : *p;
259 }
260
261 *p = '\0';
262 return(1);
263 }
264
265 static void
266 resp_begin_http(int code, const char *msg)
267 {
268
269 if (200 != code)
270 printf("Status: %d %s\n", code, msg);
271
272 puts("Content-Type: text/html; charset=utf-8" "\n"
273 "Cache-Control: no-cache" "\n"
274 "Pragma: no-cache" "\n"
275 "");
276
277 fflush(stdout);
278 }
279
280 static void
281 resp_begin_html(int code, const char *msg)
282 {
283
284 resp_begin_http(code, msg);
285
286 puts("<!DOCTYPE HTML PUBLIC " "\n"
287 " \"-//W3C//DTD HTML 4.01//EN\"" "\n"
288 " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n"
289 "<HTML>" "\n"
290 " <HEAD>" "\n"
291 " <TITLE>System Manpage Reference</TITLE>" "\n"
292 " </HEAD>" "\n"
293 " <BODY>" "\n"
294 "<!-- Begin page content. //-->");
295 }
296
297 static void
298 resp_end_html(void)
299 {
300
301 puts(" </BODY>\n</HTML>");
302 }
303
304 static void
305 resp_searchform(const struct req *req)
306 {
307 int i;
308 const char *expr, *sec, *arch;
309
310 expr = sec = arch = "";
311
312 for (i = 0; i < (int)req->fieldsz; i++)
313 if (0 == strcmp(req->fields[i].key, "expr"))
314 expr = req->fields[i].val;
315 else if (0 == strcmp(req->fields[i].key, "sec"))
316 sec = req->fields[i].val;
317 else if (0 == strcmp(req->fields[i].key, "arch"))
318 arch = req->fields[i].val;
319
320 puts("<!-- Begin search form. //-->");
321 printf("<FORM ACTION=\"");
322 html_print(progname);
323 printf("/search\" METHOD=\"get\">\n");
324 puts(" <FIELDSET>" "\n"
325 " <INPUT TYPE=\"submit\" VALUE=\"Search:\">");
326 printf(" Terms: <INPUT TYPE=\"text\" "
327 "SIZE=\"60\" NAME=\"expr\" VALUE=\"");
328 html_print(expr);
329 puts("\">");
330 printf(" Section: <INPUT TYPE=\"text\" "
331 "SIZE=\"4\" NAME=\"sec\" VALUE=\"");
332 html_print(sec);
333 puts("\">");
334 printf(" Arch: <INPUT TYPE=\"text\" "
335 "SIZE=\"8\" NAME=\"arch\" VALUE=\"");
336 html_print(arch);
337 puts("\">");
338 puts(" </FIELDSET>\n</FORM>\n<!-- End search form. //-->");
339 }
340
341 static void
342 resp_index(const struct req *req)
343 {
344
345 resp_begin_html(200, NULL);
346 resp_searchform(req);
347 resp_end_html();
348 }
349
350 static void
351 resp_badmanual(void)
352 {
353
354 resp_begin_html(404, "Not Found");
355 puts("<P>Requested manual not found.</P>");
356 resp_end_html();
357 }
358
359 static void
360 resp_badexpr(const struct req *req)
361 {
362
363 resp_begin_html(200, NULL);
364 resp_searchform(req);
365 puts("<P>Your search didn't work.</P>");
366 resp_end_html();
367 }
368
369 static void
370 resp_bad(void)
371 {
372 resp_begin_html(500, "Internal Server Error");
373 puts("<P>Generic badness happened.</P>");
374 resp_end_html();
375 }
376
377 static void
378 resp_baddb(void)
379 {
380
381 resp_begin_html(500, "Internal Server Error");
382 puts("<P>Your database is broken.</P>");
383 resp_end_html();
384 }
385
386 static void
387 resp_search(struct res *r, size_t sz, void *arg)
388 {
389 int i;
390
391 if (1 == sz) {
392 /*
393 * If we have just one result, then jump there now
394 * without any delay.
395 */
396 puts("Status: 303 See Other");
397 printf("Location: http://%s%s/show/%u/%u.html\n",
398 host, progname,
399 r[0].volume, r[0].rec);
400 puts("Content-Type: text/html; charset=utf-8\n");
401 return;
402 }
403
404 resp_begin_html(200, NULL);
405 resp_searchform((const struct req *)arg);
406
407 if (0 == sz)
408 puts("<P>No results found.</P>");
409
410 for (i = 0; i < (int)sz; i++) {
411 printf("<P><A HREF=\"");
412 html_print(progname);
413 printf("/show/%u/%u.html\">", r[i].volume, r[i].rec);
414 html_print(r[i].title);
415 putchar('(');
416 html_print(r[i].cat);
417 if (r[i].arch && '\0' != *r[i].arch) {
418 putchar('/');
419 html_print(r[i].arch);
420 }
421 printf(")</A> ");
422 html_print(r[i].desc);
423 puts("</P>");
424 }
425
426 resp_end_html();
427 }
428
429 /* ARGSUSED */
430 static void
431 pg_index(const struct manpaths *ps, const struct req *req, char *path)
432 {
433
434 resp_index(req);
435 }
436
437 static void
438 format(const char *file)
439 {
440 struct mparse *mp;
441 int fd;
442 struct mdoc *mdoc;
443 struct man *man;
444 void *vp;
445 enum mandoclevel rc;
446
447 if (-1 == (fd = open(file, O_RDONLY, 0))) {
448 resp_baddb();
449 return;
450 }
451
452 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
453 rc = mparse_readfd(mp, fd, file);
454 close(fd);
455
456 if (rc >= MANDOCLEVEL_FATAL) {
457 resp_baddb();
458 return;
459 }
460
461 mparse_result(mp, &mdoc, &man);
462 vp = html_alloc(NULL);
463
464 if (NULL != mdoc) {
465 resp_begin_http(200, NULL);
466 html_mdoc(vp, mdoc);
467 } else if (NULL != man) {
468 resp_begin_http(200, NULL);
469 html_man(vp, man);
470 } else
471 resp_baddb();
472
473 html_free(vp);
474 mparse_free(mp);
475 }
476
477 static void
478 pg_show(const struct manpaths *ps, const struct req *req, char *path)
479 {
480 char *sub;
481 char file[MAXPATHLEN];
482 int rc;
483 unsigned int vol, rec;
484 DB *db;
485 DBT key, val;
486
487 if (NULL == path) {
488 resp_badmanual();
489 return;
490 } else if (NULL == (sub = strrchr(path, '/'))) {
491 resp_badmanual();
492 return;
493 } else
494 *sub++ = '\0';
495
496 if ( ! (atou(path, &vol) && atou(sub, &rec))) {
497 resp_badmanual();
498 return;
499 } else if (vol >= (unsigned int)ps->sz) {
500 resp_badmanual();
501 return;
502 }
503
504 strlcpy(file, ps->paths[vol], MAXPATHLEN);
505 strlcat(file, "/mandoc.index", MAXPATHLEN);
506
507 /* Open the index recno(3) database. */
508
509 db = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
510 if (NULL == db) {
511 resp_baddb();
512 return;
513 }
514
515 key.data = &rec;
516 key.size = 4;
517
518 if (0 != (rc = (*db->get)(db, &key, &val, 0))) {
519 rc < 0 ? resp_baddb() : resp_badmanual();
520 (*db->close)(db);
521 return;
522 }
523
524 /* Extra filename: the first nil-terminated entry. */
525
526 strlcpy(file, ps->paths[vol], MAXPATHLEN);
527 strlcat(file, "/", MAXPATHLEN);
528 strlcat(file, (char *)val.data, MAXPATHLEN);
529
530 (*db->close)(db);
531
532 format(file);
533 }
534
535 static void
536 pg_search(const struct manpaths *ps, const struct req *req, char *path)
537 {
538 size_t tt;
539 int i, sz, rc;
540 const char *ep, *start;
541 char **cp;
542 struct opts opt;
543 struct expr *expr;
544
545 expr = NULL;
546 cp = NULL;
547 ep = NULL;
548 sz = 0;
549
550 memset(&opt, 0, sizeof(struct opts));
551
552 for (sz = i = 0; i < (int)req->fieldsz; i++)
553 if (0 == strcmp(req->fields[i].key, "expr"))
554 ep = req->fields[i].val;
555 else if (0 == strcmp(req->fields[i].key, "sec"))
556 opt.cat = req->fields[i].val;
557 else if (0 == strcmp(req->fields[i].key, "arch"))
558 opt.arch = req->fields[i].val;
559
560 /*
561 * Poor man's tokenisation.
562 * Just break apart by spaces.
563 * Yes, this is half-ass. But it works for now.
564 */
565
566 while (ep && isspace((unsigned char)*ep))
567 ep++;
568
569 while (ep && '\0' != *ep) {
570 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
571 start = ep;
572 while ('\0' != *ep && ! isspace((unsigned char)*ep))
573 ep++;
574 cp[sz] = mandoc_malloc((ep - start) + 1);
575 memcpy(cp[sz], start, ep - start);
576 cp[sz++][ep - start] = '\0';
577 while (isspace((unsigned char)*ep))
578 ep++;
579 }
580
581 rc = -1;
582
583 /*
584 * Pump down into apropos backend.
585 * The resp_search() function is called with the results.
586 */
587
588 if (NULL != (expr = exprcomp(sz, cp, &tt)))
589 rc = apropos_search
590 (ps->sz, ps->paths, &opt,
591 expr, tt, (void *)req, resp_search);
592
593 /* ...unless errors occured. */
594
595 if (0 == rc)
596 resp_baddb();
597 else if (-1 == rc)
598 resp_badexpr(req);
599
600 for (i = 0; i < sz; i++)
601 free(cp[i]);
602
603 free(cp);
604 exprfree(expr);
605 }
606
607 int
608 main(void)
609 {
610 int i;
611 struct req req;
612 char *p, *path, *subpath;
613 struct manpaths paths;
614
615 /* HTTP init: read and parse the query string. */
616
617 progname = getenv("SCRIPT_NAME");
618 if (NULL == progname)
619 progname = "";
620
621 cache = getenv("CACHE_DIR");
622 if (NULL == cache)
623 cache = "/cache/man.cgi";
624
625 if (-1 == chdir(cache)) {
626 resp_bad();
627 return(EXIT_FAILURE);
628 }
629
630 host = getenv("HTTP_HOST");
631 if (NULL == host)
632 host = "localhost";
633
634 memset(&req, 0, sizeof(struct req));
635
636 if (NULL != (p = getenv("QUERY_STRING")))
637 kval_parse(&req.fields, &req.fieldsz, p);
638
639 /* Resolve leading subpath component. */
640
641 subpath = path = NULL;
642 req.page = PAGE__MAX;
643
644 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
645 req.page = PAGE_INDEX;
646
647 if (NULL != path && '/' == *path && '\0' == *++path)
648 req.page = PAGE_INDEX;
649
650 /* Strip file suffix. */
651
652 if (NULL != path && NULL != (p = strrchr(path, '.')))
653 if (NULL != p && NULL == strchr(p, '/'))
654 *p++ = '\0';
655
656 /* Resolve subpath component. */
657
658 if (NULL != path && NULL != (subpath = strchr(path, '/')))
659 *subpath++ = '\0';
660
661 /* Map path into one we recognise. */
662
663 if (NULL != path && '\0' != *path)
664 for (i = 0; i < (int)PAGE__MAX; i++)
665 if (0 == strcmp(pages[i], path)) {
666 req.page = (enum page)i;
667 break;
668 }
669
670 /* Initialise MANPATH. */
671
672 memset(&paths, 0, sizeof(struct manpaths));
673 manpath_manconf("etc/man.conf", &paths);
674
675 /* Route pages. */
676
677 switch (req.page) {
678 case (PAGE_INDEX):
679 pg_index(&paths, &req, subpath);
680 break;
681 case (PAGE_SEARCH):
682 pg_search(&paths, &req, subpath);
683 break;
684 case (PAGE_SHOW):
685 pg_show(&paths, &req, subpath);
686 break;
687 default:
688 break;
689 }
690
691 manpath_free(&paths);
692 kval_free(req.fields, req.fieldsz);
693
694 return(EXIT_SUCCESS);
695 }