]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Two lint fixes.
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.38 2011/12/16 20:06:58 kristaps Exp $ */
2 /*
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/param.h>
22 #include <sys/wait.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <dirent.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <regex.h>
31 #include <stdio.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37
38 #include "apropos_db.h"
39 #include "mandoc.h"
40 #include "mdoc.h"
41 #include "man.h"
42 #include "main.h"
43 #include "manpath.h"
44
45 #ifdef __linux__
46 # include <db_185.h>
47 #else
48 # include <db.h>
49 #endif
50
51 enum page {
52 PAGE_INDEX,
53 PAGE_SEARCH,
54 PAGE_SHOW,
55 PAGE__MAX
56 };
57
58 struct paths {
59 char *name;
60 char *path;
61 };
62
63 /*
64 * A query as passed to the search function.
65 */
66 struct query {
67 const char *arch; /* architecture */
68 const char *sec; /* manual section */
69 const char *expr; /* unparsed expression string */
70 int manroot; /* manroot index (or -1)*/
71 int whatis; /* whether whatis mode */
72 int legacy; /* whether legacy mode */
73 };
74
75 struct req {
76 struct query q;
77 struct paths *p;
78 size_t psz;
79 enum page page;
80 };
81
82 static int atou(const char *, unsigned *);
83 static void catman(const struct req *, const char *);
84 static int cmp(const void *, const void *);
85 static void format(const struct req *, const char *);
86 static void html_print(const char *);
87 static void html_printquery(const struct req *);
88 static void html_putchar(char);
89 static int http_decode(char *);
90 static void http_parse(struct req *, char *);
91 static void http_print(const char *);
92 static void http_putchar(char);
93 static void http_printquery(const struct req *);
94 static int pathstop(DIR *);
95 static void pathgen(DIR *, char *, struct req *);
96 static void pg_index(const struct req *, char *);
97 static void pg_search(const struct req *, char *);
98 static void pg_show(const struct req *, char *);
99 static void resp_bad(void);
100 static void resp_baddb(void);
101 static void resp_error400(void);
102 static void resp_error404(const char *);
103 static void resp_begin_html(int, const char *);
104 static void resp_begin_http(int, const char *);
105 static void resp_end_html(void);
106 static void resp_index(const struct req *);
107 static void resp_search(struct res *, size_t, void *);
108 static void resp_searchform(const struct req *);
109
110 static const char *progname; /* cgi script name */
111 static const char *cache; /* cache directory */
112 static const char *css; /* css directory */
113 static const char *host; /* hostname */
114
115 static const char * const pages[PAGE__MAX] = {
116 "index", /* PAGE_INDEX */
117 "search", /* PAGE_SEARCH */
118 "show", /* PAGE_SHOW */
119 };
120
121 /*
122 * This is just OpenBSD's strtol(3) suggestion.
123 * I use it instead of strtonum(3) for portability's sake.
124 */
125 static int
126 atou(const char *buf, unsigned *v)
127 {
128 char *ep;
129 long lval;
130
131 errno = 0;
132 lval = strtol(buf, &ep, 10);
133 if (buf[0] == '\0' || *ep != '\0')
134 return(0);
135 if ((errno == ERANGE && (lval == LONG_MAX ||
136 lval == LONG_MIN)) ||
137 (lval > INT_MAX || lval < 0))
138 return(0);
139
140 *v = (unsigned int)lval;
141 return(1);
142 }
143
144 /*
145 * Print a character, escaping HTML along the way.
146 * This will pass non-ASCII straight to output: be warned!
147 */
148 static void
149 html_putchar(char c)
150 {
151
152 switch (c) {
153 case ('"'):
154 printf("&quote;");
155 break;
156 case ('&'):
157 printf("&amp;");
158 break;
159 case ('>'):
160 printf("&gt;");
161 break;
162 case ('<'):
163 printf("&lt;");
164 break;
165 default:
166 putchar((unsigned char)c);
167 break;
168 }
169 }
170 static void
171 http_printquery(const struct req *req)
172 {
173
174 printf("&expr=");
175 http_print(req->q.expr ? req->q.expr : "");
176 printf("&sec=");
177 http_print(req->q.sec ? req->q.sec : "");
178 printf("&arch=");
179 http_print(req->q.arch ? req->q.arch : "");
180 }
181
182
183 static void
184 html_printquery(const struct req *req)
185 {
186
187 printf("&amp;expr=");
188 html_print(req->q.expr ? req->q.expr : "");
189 printf("&amp;sec=");
190 html_print(req->q.sec ? req->q.sec : "");
191 printf("&amp;arch=");
192 html_print(req->q.arch ? req->q.arch : "");
193 }
194
195 static void
196 http_print(const char *p)
197 {
198
199 if (NULL == p)
200 return;
201 while ('\0' != *p)
202 http_putchar(*p++);
203 }
204
205 /*
206 * Call through to html_putchar().
207 * Accepts NULL strings.
208 */
209 static void
210 html_print(const char *p)
211 {
212
213 if (NULL == p)
214 return;
215 while ('\0' != *p)
216 html_putchar(*p++);
217 }
218
219 /*
220 * Parse out key-value pairs from an HTTP request variable.
221 * This can be either a cookie or a POST/GET string, although man.cgi
222 * uses only GET for simplicity.
223 */
224 static void
225 http_parse(struct req *req, char *p)
226 {
227 char *key, *val, *manroot;
228 int i, legacy;
229
230 memset(&req->q, 0, sizeof(struct query));
231
232 req->q.whatis = 1;
233 legacy = -1;
234 manroot = NULL;
235
236 while ('\0' != *p) {
237 key = p;
238 val = NULL;
239
240 p += (int)strcspn(p, ";&");
241 if ('\0' != *p)
242 *p++ = '\0';
243 if (NULL != (val = strchr(key, '=')))
244 *val++ = '\0';
245
246 if ('\0' == *key || NULL == val || '\0' == *val)
247 continue;
248
249 /* Just abort handling. */
250
251 if ( ! http_decode(key))
252 break;
253 if (NULL != val && ! http_decode(val))
254 break;
255
256 if (0 == strcmp(key, "expr"))
257 req->q.expr = val;
258 else if (0 == strcmp(key, "query"))
259 req->q.expr = val;
260 else if (0 == strcmp(key, "sec"))
261 req->q.sec = val;
262 else if (0 == strcmp(key, "sektion"))
263 req->q.sec = val;
264 else if (0 == strcmp(key, "arch"))
265 req->q.arch = val;
266 else if (0 == strcmp(key, "manpath"))
267 manroot = val;
268 else if (0 == strcmp(key, "apropos"))
269 legacy = 0 == strcmp(val, "0");
270 else if (0 == strcmp(key, "op"))
271 req->q.whatis = 0 == strcasecmp(val, "whatis");
272 }
273
274 /* Test for old man.cgi compatibility mode. */
275
276 if (legacy == 0) {
277 req->q.whatis = 0;
278 req->q.legacy = 1;
279 } else if (legacy > 0) {
280 req->q.legacy = 1;
281 req->q.whatis = 1;
282 }
283
284 /*
285 * Section "0" means no section when in legacy mode.
286 * For some man.cgi scripts, "default" arch is none.
287 */
288
289 if (req->q.legacy && NULL != req->q.sec)
290 if (0 == strcmp(req->q.sec, "0"))
291 req->q.sec = NULL;
292 if (req->q.legacy && NULL != req->q.arch)
293 if (0 == strcmp(req->q.arch, "default"))
294 req->q.arch = NULL;
295
296 /* Default to first manroot. */
297
298 if (NULL != manroot) {
299 for (i = 0; i < (int)req->psz; i++)
300 if (0 == strcmp(req->p[i].name, manroot))
301 break;
302 req->q.manroot = i < (int)req->psz ? i : -1;
303 }
304 }
305
306 static void
307 http_putchar(char c)
308 {
309
310 if (isalnum((unsigned char)c)) {
311 putchar((unsigned char)c);
312 return;
313 } else if (' ' == c) {
314 putchar('+');
315 return;
316 }
317 printf("%%%.2x", c);
318 }
319
320 /*
321 * HTTP-decode a string. The standard explanation is that this turns
322 * "%4e+foo" into "n foo" in the regular way. This is done in-place
323 * over the allocated string.
324 */
325 static int
326 http_decode(char *p)
327 {
328 char hex[3];
329 int c;
330
331 hex[2] = '\0';
332
333 for ( ; '\0' != *p; p++) {
334 if ('%' == *p) {
335 if ('\0' == (hex[0] = *(p + 1)))
336 return(0);
337 if ('\0' == (hex[1] = *(p + 2)))
338 return(0);
339 if (1 != sscanf(hex, "%x", &c))
340 return(0);
341 if ('\0' == c)
342 return(0);
343
344 *p = (char)c;
345 memmove(p + 1, p + 3, strlen(p + 3) + 1);
346 } else
347 *p = '+' == *p ? ' ' : *p;
348 }
349
350 *p = '\0';
351 return(1);
352 }
353
354 static void
355 resp_begin_http(int code, const char *msg)
356 {
357
358 if (200 != code)
359 printf("Status: %d %s\n", code, msg);
360
361 puts("Content-Type: text/html; charset=utf-8\n"
362 "Cache-Control: no-cache\n"
363 "Pragma: no-cache\n"
364 "");
365
366 fflush(stdout);
367 }
368
369 static void
370 resp_begin_html(int code, const char *msg)
371 {
372
373 resp_begin_http(code, msg);
374
375 printf("<!DOCTYPE HTML PUBLIC "
376 " \"-//W3C//DTD HTML 4.01//EN\""
377 " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
378 "<HTML>\n"
379 "<HEAD>\n"
380 "<META HTTP-EQUIV=\"Content-Type\""
381 " CONTENT=\"text/html; charset=utf-8\">\n"
382 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
383 " TYPE=\"text/css\" media=\"all\">\n"
384 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
385 " TYPE=\"text/css\" media=\"all\">\n"
386 "<TITLE>System Manpage Reference</TITLE>\n"
387 "</HEAD>\n"
388 "<BODY>\n"
389 "<!-- Begin page content. //-->\n", css, css);
390 }
391
392 static void
393 resp_end_html(void)
394 {
395
396 puts("</BODY>\n"
397 "</HTML>");
398 }
399
400 static void
401 resp_searchform(const struct req *req)
402 {
403 int i;
404
405 puts("<!-- Begin search form. //-->");
406 printf("<DIV ID=\"mancgi\">\n"
407 "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
408 "<FIELDSET>\n"
409 "<LEGEND>Search Parameters</LEGEND>\n"
410 "<INPUT TYPE=\"submit\" NAME=\"op\""
411 " VALUE=\"Whatis\"> or \n"
412 "<INPUT TYPE=\"submit\" NAME=\"op\""
413 " VALUE=\"apropos\"> for manuals satisfying \n"
414 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
415 progname);
416 html_print(req->q.expr ? req->q.expr : "");
417 printf("\">, section "
418 "<INPUT TYPE=\"text\""
419 " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
420 html_print(req->q.sec ? req->q.sec : "");
421 printf("\">, arch "
422 "<INPUT TYPE=\"text\""
423 " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
424 html_print(req->q.arch ? req->q.arch : "");
425 printf("\">");
426 if (req->psz > 1) {
427 puts(", <SELECT NAME=\"manpath\">");
428 for (i = 0; i < (int)req->psz; i++) {
429 printf("<OPTION %s VALUE=\"",
430 (i == req->q.manroot) ||
431 (0 == i && -1 == req->q.manroot) ?
432 "SELECTED=\"selected\"" : "");
433 html_print(req->p[i].name);
434 printf("\">");
435 html_print(req->p[i].name);
436 puts("</OPTION>");
437 }
438 puts("</SELECT>");
439 }
440 puts(".\n"
441 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
442 "</FIELDSET>\n"
443 "</FORM>\n"
444 "</DIV>");
445 puts("<!-- End search form. //-->");
446 }
447
448 static void
449 resp_index(const struct req *req)
450 {
451
452 resp_begin_html(200, NULL);
453 resp_searchform(req);
454 resp_end_html();
455 }
456
457 static void
458 resp_error400(void)
459 {
460
461 resp_begin_html(400, "Query Malformed");
462 printf("<H1>Malformed Query</H1>\n"
463 "<P>\n"
464 "The query your entered was malformed.\n"
465 "Try again from the\n"
466 "<A HREF=\"%s/index.html\">main page</A>.\n"
467 "</P>", progname);
468 resp_end_html();
469 }
470
471 static void
472 resp_error404(const char *page)
473 {
474
475 resp_begin_html(404, "Not Found");
476 puts("<H1>Page Not Found</H1>\n"
477 "<P>\n"
478 "The page you're looking for, ");
479 printf("<B>");
480 html_print(page);
481 printf("</B>,\n"
482 "could not be found.\n"
483 "Try searching from the\n"
484 "<A HREF=\"%s/index.html\">main page</A>.\n"
485 "</P>", progname);
486 resp_end_html();
487 }
488
489 static void
490 resp_bad(void)
491 {
492 resp_begin_html(500, "Internal Server Error");
493 puts("<P>Generic badness happened.</P>");
494 resp_end_html();
495 }
496
497 static void
498 resp_baddb(void)
499 {
500
501 resp_begin_html(500, "Internal Server Error");
502 puts("<P>Your database is broken.</P>");
503 resp_end_html();
504 }
505
506 static void
507 resp_search(struct res *r, size_t sz, void *arg)
508 {
509 int i;
510 const struct req *req;
511
512 req = (const struct req *)arg;
513
514 if (sz > 0)
515 assert(req->q.manroot >= 0);
516
517 if (1 == sz) {
518 /*
519 * If we have just one result, then jump there now
520 * without any delay.
521 */
522 puts("Status: 303 See Other");
523 printf("Location: http://%s%s/show/%d/%u/%u.html?",
524 host, progname, req->q.manroot,
525 r[0].volume, r[0].rec);
526 http_printquery(req);
527 puts("\n"
528 "Content-Type: text/html; charset=utf-8\n");
529 return;
530 }
531
532 qsort(r, sz, sizeof(struct res), cmp);
533
534 resp_begin_html(200, NULL);
535 resp_searchform(req);
536
537 puts("<DIV CLASS=\"results\">");
538
539 if (0 == sz) {
540 printf("<P>\n"
541 "No %s results found.\n",
542 req->q.whatis ? "whatis" : "apropos");
543 if (req->q.whatis) {
544 printf("(Try "
545 "<A HREF=\"%s/search.html?op=apropos",
546 progname);
547 html_printquery(req);
548 puts("\">apropos</A>?)");
549 }
550 puts("</P>");
551 puts("</DIV>");
552 resp_end_html();
553 return;
554 }
555
556 puts("<TABLE>");
557
558 for (i = 0; i < (int)sz; i++) {
559 printf("<TR>\n"
560 "<TD CLASS=\"title\">\n"
561 "<A HREF=\"%s/show/%d/%u/%u.html?",
562 progname, req->q.manroot,
563 r[i].volume, r[i].rec);
564 html_printquery(req);
565 printf("\">");
566 html_print(r[i].title);
567 putchar('(');
568 html_print(r[i].cat);
569 if (r[i].arch && '\0' != *r[i].arch) {
570 putchar('/');
571 html_print(r[i].arch);
572 }
573 printf(")</A>\n"
574 "</TD>\n"
575 "<TD CLASS=\"desc\">");
576 html_print(r[i].desc);
577 puts("</TD>\n"
578 "</TR>");
579 }
580
581 puts("</TABLE>\n"
582 "</DIV>");
583 resp_end_html();
584 }
585
586 /* ARGSUSED */
587 static void
588 pg_index(const struct req *req, char *path)
589 {
590
591 resp_index(req);
592 }
593
594 static void
595 catman(const struct req *req, const char *file)
596 {
597 FILE *f;
598 size_t len;
599 int i;
600 char *p;
601 int italic, bold;
602
603 if (NULL == (f = fopen(file, "r"))) {
604 resp_baddb();
605 return;
606 }
607
608 resp_begin_html(200, NULL);
609 resp_searchform(req);
610 puts("<DIV CLASS=\"catman\">\n"
611 "<PRE>");
612
613 while (NULL != (p = fgetln(f, &len))) {
614 bold = italic = 0;
615 for (i = 0; i < (int)len - 1; i++) {
616 /*
617 * This means that the catpage is out of state.
618 * Ignore it and keep going (although the
619 * catpage is bogus).
620 */
621
622 if ('\b' == p[i] || '\n' == p[i])
623 continue;
624
625 /*
626 * Print a regular character.
627 * Close out any bold/italic scopes.
628 * If we're in back-space mode, make sure we'll
629 * have something to enter when we backspace.
630 */
631
632 if ('\b' != p[i + 1]) {
633 if (italic)
634 printf("</I>");
635 if (bold)
636 printf("</B>");
637 italic = bold = 0;
638 html_putchar(p[i]);
639 continue;
640 } else if (i + 2 >= (int)len)
641 continue;
642
643 /* Italic mode. */
644
645 if ('_' == p[i]) {
646 if (bold)
647 printf("</B>");
648 if ( ! italic)
649 printf("<I>");
650 bold = 0;
651 italic = 1;
652 i += 2;
653 html_putchar(p[i]);
654 continue;
655 }
656
657 /*
658 * Handle funny behaviour troff-isms.
659 * These grok'd from the original man2html.c.
660 */
661
662 if (('+' == p[i] && 'o' == p[i + 2]) ||
663 ('o' == p[i] && '+' == p[i + 2]) ||
664 ('|' == p[i] && '=' == p[i + 2]) ||
665 ('=' == p[i] && '|' == p[i + 2]) ||
666 ('*' == p[i] && '=' == p[i + 2]) ||
667 ('=' == p[i] && '*' == p[i + 2]) ||
668 ('*' == p[i] && '|' == p[i + 2]) ||
669 ('|' == p[i] && '*' == p[i + 2])) {
670 if (italic)
671 printf("</I>");
672 if (bold)
673 printf("</B>");
674 italic = bold = 0;
675 putchar('*');
676 i += 2;
677 continue;
678 } else if (('|' == p[i] && '-' == p[i + 2]) ||
679 ('-' == p[i] && '|' == p[i + 1]) ||
680 ('+' == p[i] && '-' == p[i + 1]) ||
681 ('-' == p[i] && '+' == p[i + 1]) ||
682 ('+' == p[i] && '|' == p[i + 1]) ||
683 ('|' == p[i] && '+' == p[i + 1])) {
684 if (italic)
685 printf("</I>");
686 if (bold)
687 printf("</B>");
688 italic = bold = 0;
689 putchar('+');
690 i += 2;
691 continue;
692 }
693
694 /* Bold mode. */
695
696 if (italic)
697 printf("</I>");
698 if ( ! bold)
699 printf("<B>");
700 bold = 1;
701 italic = 0;
702 i += 2;
703 html_putchar(p[i]);
704 }
705
706 /*
707 * Clean up the last character.
708 * We can get to a newline; don't print that.
709 */
710
711 if (italic)
712 printf("</I>");
713 if (bold)
714 printf("</B>");
715
716 if (i == (int)len - 1 && '\n' != p[i])
717 html_putchar(p[i]);
718
719 putchar('\n');
720 }
721
722 puts("</PRE>\n"
723 "</DIV>\n"
724 "</BODY>\n"
725 "</HTML>");
726
727 fclose(f);
728 }
729
730 static void
731 format(const struct req *req, const char *file)
732 {
733 struct mparse *mp;
734 int fd;
735 struct mdoc *mdoc;
736 struct man *man;
737 void *vp;
738 enum mandoclevel rc;
739 char opts[MAXPATHLEN + 128];
740
741 if (-1 == (fd = open(file, O_RDONLY, 0))) {
742 resp_baddb();
743 return;
744 }
745
746 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
747 rc = mparse_readfd(mp, fd, file);
748 close(fd);
749
750 if (rc >= MANDOCLEVEL_FATAL) {
751 resp_baddb();
752 return;
753 }
754
755 snprintf(opts, sizeof(opts), "fragment,"
756 "man=%s/search.html?sec=%%S&expr=%%N,"
757 /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
758 progname);
759
760 mparse_result(mp, &mdoc, &man);
761 if (NULL == man && NULL == mdoc) {
762 resp_baddb();
763 mparse_free(mp);
764 return;
765 }
766
767 resp_begin_html(200, NULL);
768 resp_searchform(req);
769
770 vp = html_alloc(opts);
771
772 if (NULL != mdoc)
773 html_mdoc(vp, mdoc);
774 else
775 html_man(vp, man);
776
777 puts("</BODY>\n"
778 "</HTML>");
779
780 html_free(vp);
781 mparse_free(mp);
782 }
783
784 static void
785 pg_show(const struct req *req, char *path)
786 {
787 struct manpaths ps;
788 size_t sz;
789 char *sub;
790 char file[MAXPATHLEN];
791 const char *cp;
792 int rc, catm;
793 unsigned int vol, rec, mr;
794 DB *idx;
795 DBT key, val;
796
797 idx = NULL;
798
799 /* Parse out mroot, volume, and record from the path. */
800
801 if (NULL == path || NULL == (sub = strchr(path, '/'))) {
802 resp_error400();
803 return;
804 }
805 *sub++ = '\0';
806 if ( ! atou(path, &mr)) {
807 resp_error400();
808 return;
809 }
810 path = sub;
811 if (NULL == (sub = strchr(path, '/'))) {
812 resp_error400();
813 return;
814 }
815 *sub++ = '\0';
816 if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
817 resp_error400();
818 return;
819 } else if (mr >= (unsigned int)req->psz) {
820 resp_error400();
821 return;
822 }
823
824 /*
825 * Begin by chdir()ing into the manroot.
826 * This way we can pick up the database files, which are
827 * relative to the manpath root.
828 */
829
830 if (-1 == chdir(req->p[(int)mr].path)) {
831 perror(req->p[(int)mr].path);
832 resp_baddb();
833 return;
834 }
835
836 memset(&ps, 0, sizeof(struct manpaths));
837 manpath_manconf(&ps, "etc/catman.conf");
838
839 if (vol >= (unsigned int)ps.sz) {
840 resp_error400();
841 goto out;
842 }
843
844 sz = strlcpy(file, ps.paths[vol], MAXPATHLEN);
845 assert(sz < MAXPATHLEN);
846 strlcat(file, "/mandoc.index", MAXPATHLEN);
847
848 /* Open the index recno(3) database. */
849
850 idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
851 if (NULL == idx) {
852 perror(file);
853 resp_baddb();
854 goto out;
855 }
856
857 key.data = &rec;
858 key.size = 4;
859
860 if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
861 rc < 0 ? resp_baddb() : resp_error400();
862 goto out;
863 } else if (0 == val.size) {
864 resp_baddb();
865 goto out;
866 }
867
868 cp = (char *)val.data;
869 catm = 'c' == *cp++;
870
871 if (NULL == memchr(cp, '\0', val.size - 1))
872 resp_baddb();
873 else {
874 file[(int)sz] = '\0';
875 strlcat(file, "/", MAXPATHLEN);
876 strlcat(file, cp, MAXPATHLEN);
877 if (catm)
878 catman(req, file);
879 else
880 format(req, file);
881 }
882 out:
883 if (idx)
884 (*idx->close)(idx);
885 manpath_free(&ps);
886 }
887
888 static void
889 pg_search(const struct req *req, char *path)
890 {
891 size_t tt;
892 struct manpaths ps;
893 int i, sz, rc;
894 const char *ep, *start;
895 char **cp;
896 struct opts opt;
897 struct expr *expr;
898
899 if (req->q.manroot < 0 || 0 == req->psz) {
900 resp_search(NULL, 0, (void *)req);
901 return;
902 }
903
904 memset(&opt, 0, sizeof(struct opts));
905
906 ep = req->q.expr;
907 opt.arch = req->q.arch;
908 opt.cat = req->q.sec;
909 rc = -1;
910 sz = 0;
911 cp = NULL;
912
913 /*
914 * Begin by chdir()ing into the root of the manpath.
915 * This way we can pick up the database files, which are
916 * relative to the manpath root.
917 */
918
919 assert(req->q.manroot < (int)req->psz);
920 if (-1 == (chdir(req->p[req->q.manroot].path))) {
921 perror(req->p[req->q.manroot].path);
922 resp_search(NULL, 0, (void *)req);
923 return;
924 }
925
926 memset(&ps, 0, sizeof(struct manpaths));
927 manpath_manconf(&ps, "etc/catman.conf");
928
929 /*
930 * Poor man's tokenisation: just break apart by spaces.
931 * Yes, this is half-ass. But it works for now.
932 */
933
934 while (ep && isspace((unsigned char)*ep))
935 ep++;
936
937 while (ep && '\0' != *ep) {
938 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
939 start = ep;
940 while ('\0' != *ep && ! isspace((unsigned char)*ep))
941 ep++;
942 cp[sz] = mandoc_malloc((ep - start) + 1);
943 memcpy(cp[sz], start, ep - start);
944 cp[sz++][ep - start] = '\0';
945 while (isspace((unsigned char)*ep))
946 ep++;
947 }
948
949 /*
950 * Pump down into apropos backend.
951 * The resp_search() function is called with the results.
952 */
953
954 expr = req->q.whatis ?
955 termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
956
957 if (NULL != expr)
958 rc = apropos_search
959 (ps.sz, ps.paths, &opt,
960 expr, tt, (void *)req, resp_search);
961
962 /* ...unless errors occured. */
963
964 if (0 == rc)
965 resp_baddb();
966 else if (-1 == rc)
967 resp_search(NULL, 0, (void *)req);
968
969 for (i = 0; i < sz; i++)
970 free(cp[i]);
971
972 free(cp);
973 exprfree(expr);
974 manpath_free(&ps);
975 }
976
977 int
978 main(void)
979 {
980 int i;
981 char buf[MAXPATHLEN];
982 DIR *cwd;
983 struct req req;
984 char *p, *path, *subpath;
985
986 /* Scan our run-time environment. */
987
988 if (NULL == (cache = getenv("CACHE_DIR")))
989 cache = "/cache/man.cgi";
990
991 if (NULL == (progname = getenv("SCRIPT_NAME")))
992 progname = "";
993
994 if (NULL == (css = getenv("CSS_DIR")))
995 css = "";
996
997 if (NULL == (host = getenv("HTTP_HOST")))
998 host = "localhost";
999
1000 /*
1001 * First we change directory into the cache directory so that
1002 * subsequent scanning for manpath directories is rooted
1003 * relative to the same position.
1004 */
1005
1006 if (-1 == chdir(cache)) {
1007 perror(cache);
1008 resp_bad();
1009 return(EXIT_FAILURE);
1010 } else if (NULL == (cwd = opendir(cache))) {
1011 perror(cache);
1012 resp_bad();
1013 return(EXIT_FAILURE);
1014 }
1015
1016 memset(&req, 0, sizeof(struct req));
1017
1018 strlcpy(buf, ".", MAXPATHLEN);
1019 pathgen(cwd, buf, &req);
1020 closedir(cwd);
1021
1022 /* Next parse out the query string. */
1023
1024 if (NULL != (p = getenv("QUERY_STRING")))
1025 http_parse(&req, p);
1026
1027 /*
1028 * Now juggle paths to extract information.
1029 * We want to extract our filetype (the file suffix), the
1030 * initial path component, then the trailing component(s).
1031 * Start with leading subpath component.
1032 */
1033
1034 subpath = path = NULL;
1035 req.page = PAGE__MAX;
1036
1037 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
1038 req.page = PAGE_INDEX;
1039
1040 if (NULL != path && '/' == *path && '\0' == *++path)
1041 req.page = PAGE_INDEX;
1042
1043 /* Strip file suffix. */
1044
1045 if (NULL != path && NULL != (p = strrchr(path, '.')))
1046 if (NULL != p && NULL == strchr(p, '/'))
1047 *p++ = '\0';
1048
1049 /* Resolve subpath component. */
1050
1051 if (NULL != path && NULL != (subpath = strchr(path, '/')))
1052 *subpath++ = '\0';
1053
1054 /* Map path into one we recognise. */
1055
1056 if (NULL != path && '\0' != *path)
1057 for (i = 0; i < (int)PAGE__MAX; i++)
1058 if (0 == strcmp(pages[i], path)) {
1059 req.page = (enum page)i;
1060 break;
1061 }
1062
1063 /* Route pages. */
1064
1065 switch (req.page) {
1066 case (PAGE_INDEX):
1067 pg_index(&req, subpath);
1068 break;
1069 case (PAGE_SEARCH):
1070 pg_search(&req, subpath);
1071 break;
1072 case (PAGE_SHOW):
1073 pg_show(&req, subpath);
1074 break;
1075 default:
1076 resp_error404(path);
1077 break;
1078 }
1079
1080 for (i = 0; i < (int)req.psz; i++) {
1081 free(req.p[i].path);
1082 free(req.p[i].name);
1083 }
1084
1085 free(req.p);
1086 return(EXIT_SUCCESS);
1087 }
1088
1089 static int
1090 cmp(const void *p1, const void *p2)
1091 {
1092
1093 return(strcasecmp(((const struct res *)p1)->title,
1094 ((const struct res *)p2)->title));
1095 }
1096
1097 /*
1098 * Check to see if an "etc" path consists of a catman.conf file. If it
1099 * does, that means that the path contains a tree created by catman(8)
1100 * and should be used for indexing.
1101 */
1102 static int
1103 pathstop(DIR *dir)
1104 {
1105 struct dirent *d;
1106
1107 while (NULL != (d = readdir(dir)))
1108 if (DT_REG == d->d_type)
1109 if (0 == strcmp(d->d_name, "catman.conf"))
1110 return(1);
1111
1112 return(0);
1113 }
1114
1115 /*
1116 * Scan for indexable paths.
1117 * This adds all paths with "etc/catman.conf" to the buffer.
1118 */
1119 static void
1120 pathgen(DIR *dir, char *path, struct req *req)
1121 {
1122 struct dirent *d;
1123 char *cp;
1124 DIR *cd;
1125 int rc;
1126 size_t sz, ssz;
1127
1128 sz = strlcat(path, "/", MAXPATHLEN);
1129 if (sz >= MAXPATHLEN) {
1130 fprintf(stderr, "%s: Path too long", path);
1131 return;
1132 }
1133
1134 /*
1135 * First, scan for the "etc" directory.
1136 * If it's found, then see if it should cause us to stop. This
1137 * happens when a catman.conf is found in the directory.
1138 */
1139
1140 rc = 0;
1141 while (0 == rc && NULL != (d = readdir(dir))) {
1142 if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
1143 continue;
1144
1145 path[(int)sz] = '\0';
1146 ssz = strlcat(path, d->d_name, MAXPATHLEN);
1147
1148 if (ssz >= MAXPATHLEN) {
1149 fprintf(stderr, "%s: Path too long", path);
1150 return;
1151 } else if (NULL == (cd = opendir(path))) {
1152 perror(path);
1153 return;
1154 }
1155
1156 rc = pathstop(cd);
1157 closedir(cd);
1158 }
1159
1160 if (rc > 0) {
1161 /* This also strips the trailing slash. */
1162 path[(int)--sz] = '\0';
1163 req->p = mandoc_realloc
1164 (req->p,
1165 (req->psz + 1) * sizeof(struct paths));
1166 /*
1167 * Strip out the leading "./" unless we're just a ".",
1168 * in which case use an empty string as our name.
1169 */
1170 req->p[(int)req->psz].path = mandoc_strdup(path);
1171 req->p[(int)req->psz].name =
1172 cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
1173 req->psz++;
1174 /*
1175 * The name is just the path with all the slashes taken
1176 * out of it. Simple but effective.
1177 */
1178 for ( ; '\0' != *cp; cp++)
1179 if ('/' == *cp)
1180 *cp = ' ';
1181 return;
1182 }
1183
1184 /*
1185 * If no etc/catman.conf was found, recursively enter child
1186 * directory and continue scanning.
1187 */
1188
1189 rewinddir(dir);
1190 while (NULL != (d = readdir(dir))) {
1191 if (DT_DIR != d->d_type || '.' == d->d_name[0])
1192 continue;
1193
1194 path[(int)sz] = '\0';
1195 ssz = strlcat(path, d->d_name, MAXPATHLEN);
1196
1197 if (ssz >= MAXPATHLEN) {
1198 fprintf(stderr, "%s: Path too long", path);
1199 return;
1200 } else if (NULL == (cd = opendir(path))) {
1201 perror(path);
1202 return;
1203 }
1204
1205 pathgen(cd, path, req);
1206 closedir(cd);
1207 }
1208 }