]> git.cameronkatri.com Git - mandoc.git/blob - cgi.c
Rudimentary implementation of the .it request (input line trap).
[mandoc.git] / cgi.c
1 /* $Id: cgi.c,v 1.45 2013/06/05 02:00:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/wait.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #include <dirent.h>
27 #include <fcntl.h>
28 #include <limits.h>
29 #include <regex.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "apropos_db.h"
38 #include "mandoc.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manpath.h"
43 #include "mandocdb.h"
44
45 #ifdef __linux__
46 # include <db_185.h>
47 #else
48 # include <db.h>
49 #endif
50
51 enum page {
52 PAGE_INDEX,
53 PAGE_SEARCH,
54 PAGE_SHOW,
55 PAGE__MAX
56 };
57
58 struct paths {
59 char *name;
60 char *path;
61 };
62
63 /*
64 * A query as passed to the search function.
65 */
66 struct query {
67 const char *arch; /* architecture */
68 const char *sec; /* manual section */
69 const char *expr; /* unparsed expression string */
70 int manroot; /* manroot index (or -1)*/
71 int legacy; /* whether legacy mode */
72 };
73
74 struct req {
75 struct query q;
76 struct paths *p;
77 size_t psz;
78 enum page page;
79 };
80
81 static int atou(const char *, unsigned *);
82 static void catman(const struct req *, const char *);
83 static int cmp(const void *, const void *);
84 static void format(const struct req *, const char *);
85 static void html_print(const char *);
86 static void html_printquery(const struct req *);
87 static void html_putchar(char);
88 static int http_decode(char *);
89 static void http_parse(struct req *, char *);
90 static void http_print(const char *);
91 static void http_putchar(char);
92 static void http_printquery(const struct req *);
93 static int pathstop(DIR *);
94 static void pathgen(DIR *, char *, struct req *);
95 static void pg_index(const struct req *, char *);
96 static void pg_search(const struct req *, char *);
97 static void pg_show(const struct req *, char *);
98 static void resp_bad(void);
99 static void resp_baddb(void);
100 static void resp_error400(void);
101 static void resp_error404(const char *);
102 static void resp_begin_html(int, const char *);
103 static void resp_begin_http(int, const char *);
104 static void resp_end_html(void);
105 static void resp_index(const struct req *);
106 static void resp_search(struct res *, size_t, void *);
107 static void resp_searchform(const struct req *);
108
109 static const char *progname; /* cgi script name */
110 static const char *cache; /* cache directory */
111 static const char *css; /* css directory */
112 static const char *host; /* hostname */
113
114 static const char * const pages[PAGE__MAX] = {
115 "index", /* PAGE_INDEX */
116 "search", /* PAGE_SEARCH */
117 "show", /* PAGE_SHOW */
118 };
119
120 /*
121 * This is just OpenBSD's strtol(3) suggestion.
122 * I use it instead of strtonum(3) for portability's sake.
123 */
124 static int
125 atou(const char *buf, unsigned *v)
126 {
127 char *ep;
128 long lval;
129
130 errno = 0;
131 lval = strtol(buf, &ep, 10);
132 if (buf[0] == '\0' || *ep != '\0')
133 return(0);
134 if ((errno == ERANGE && (lval == LONG_MAX ||
135 lval == LONG_MIN)) ||
136 (lval > INT_MAX || lval < 0))
137 return(0);
138
139 *v = (unsigned int)lval;
140 return(1);
141 }
142
143 /*
144 * Print a character, escaping HTML along the way.
145 * This will pass non-ASCII straight to output: be warned!
146 */
147 static void
148 html_putchar(char c)
149 {
150
151 switch (c) {
152 case ('"'):
153 printf("&quote;");
154 break;
155 case ('&'):
156 printf("&amp;");
157 break;
158 case ('>'):
159 printf("&gt;");
160 break;
161 case ('<'):
162 printf("&lt;");
163 break;
164 default:
165 putchar((unsigned char)c);
166 break;
167 }
168 }
169 static void
170 http_printquery(const struct req *req)
171 {
172
173 printf("&expr=");
174 http_print(req->q.expr ? req->q.expr : "");
175 printf("&sec=");
176 http_print(req->q.sec ? req->q.sec : "");
177 printf("&arch=");
178 http_print(req->q.arch ? req->q.arch : "");
179 }
180
181
182 static void
183 html_printquery(const struct req *req)
184 {
185
186 printf("&amp;expr=");
187 html_print(req->q.expr ? req->q.expr : "");
188 printf("&amp;sec=");
189 html_print(req->q.sec ? req->q.sec : "");
190 printf("&amp;arch=");
191 html_print(req->q.arch ? req->q.arch : "");
192 }
193
194 static void
195 http_print(const char *p)
196 {
197
198 if (NULL == p)
199 return;
200 while ('\0' != *p)
201 http_putchar(*p++);
202 }
203
204 /*
205 * Call through to html_putchar().
206 * Accepts NULL strings.
207 */
208 static void
209 html_print(const char *p)
210 {
211
212 if (NULL == p)
213 return;
214 while ('\0' != *p)
215 html_putchar(*p++);
216 }
217
218 /*
219 * Parse out key-value pairs from an HTTP request variable.
220 * This can be either a cookie or a POST/GET string, although man.cgi
221 * uses only GET for simplicity.
222 */
223 static void
224 http_parse(struct req *req, char *p)
225 {
226 char *key, *val, *manroot;
227 int i, legacy;
228
229 memset(&req->q, 0, sizeof(struct query));
230
231 legacy = -1;
232 manroot = NULL;
233
234 while ('\0' != *p) {
235 key = p;
236 val = NULL;
237
238 p += (int)strcspn(p, ";&");
239 if ('\0' != *p)
240 *p++ = '\0';
241 if (NULL != (val = strchr(key, '=')))
242 *val++ = '\0';
243
244 if ('\0' == *key || NULL == val || '\0' == *val)
245 continue;
246
247 /* Just abort handling. */
248
249 if ( ! http_decode(key))
250 break;
251 if (NULL != val && ! http_decode(val))
252 break;
253
254 if (0 == strcmp(key, "expr"))
255 req->q.expr = val;
256 else if (0 == strcmp(key, "query"))
257 req->q.expr = val;
258 else if (0 == strcmp(key, "sec"))
259 req->q.sec = val;
260 else if (0 == strcmp(key, "sektion"))
261 req->q.sec = val;
262 else if (0 == strcmp(key, "arch"))
263 req->q.arch = val;
264 else if (0 == strcmp(key, "manpath"))
265 manroot = val;
266 else if (0 == strcmp(key, "apropos"))
267 legacy = 0 == strcmp(val, "0");
268 }
269
270 /* Test for old man.cgi compatibility mode. */
271
272 req->q.legacy = legacy > 0;
273
274 /*
275 * Section "0" means no section when in legacy mode.
276 * For some man.cgi scripts, "default" arch is none.
277 */
278
279 if (req->q.legacy && NULL != req->q.sec)
280 if (0 == strcmp(req->q.sec, "0"))
281 req->q.sec = NULL;
282 if (req->q.legacy && NULL != req->q.arch)
283 if (0 == strcmp(req->q.arch, "default"))
284 req->q.arch = NULL;
285
286 /* Default to first manroot. */
287
288 if (NULL != manroot) {
289 for (i = 0; i < (int)req->psz; i++)
290 if (0 == strcmp(req->p[i].name, manroot))
291 break;
292 req->q.manroot = i < (int)req->psz ? i : -1;
293 }
294 }
295
296 static void
297 http_putchar(char c)
298 {
299
300 if (isalnum((unsigned char)c)) {
301 putchar((unsigned char)c);
302 return;
303 } else if (' ' == c) {
304 putchar('+');
305 return;
306 }
307 printf("%%%.2x", c);
308 }
309
310 /*
311 * HTTP-decode a string. The standard explanation is that this turns
312 * "%4e+foo" into "n foo" in the regular way. This is done in-place
313 * over the allocated string.
314 */
315 static int
316 http_decode(char *p)
317 {
318 char hex[3];
319 int c;
320
321 hex[2] = '\0';
322
323 for ( ; '\0' != *p; p++) {
324 if ('%' == *p) {
325 if ('\0' == (hex[0] = *(p + 1)))
326 return(0);
327 if ('\0' == (hex[1] = *(p + 2)))
328 return(0);
329 if (1 != sscanf(hex, "%x", &c))
330 return(0);
331 if ('\0' == c)
332 return(0);
333
334 *p = (char)c;
335 memmove(p + 1, p + 3, strlen(p + 3) + 1);
336 } else
337 *p = '+' == *p ? ' ' : *p;
338 }
339
340 *p = '\0';
341 return(1);
342 }
343
344 static void
345 resp_begin_http(int code, const char *msg)
346 {
347
348 if (200 != code)
349 printf("Status: %d %s\n", code, msg);
350
351 puts("Content-Type: text/html; charset=utf-8\n"
352 "Cache-Control: no-cache\n"
353 "Pragma: no-cache\n"
354 "");
355
356 fflush(stdout);
357 }
358
359 static void
360 resp_begin_html(int code, const char *msg)
361 {
362
363 resp_begin_http(code, msg);
364
365 printf("<!DOCTYPE HTML PUBLIC "
366 " \"-//W3C//DTD HTML 4.01//EN\""
367 " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
368 "<HTML>\n"
369 "<HEAD>\n"
370 "<META HTTP-EQUIV=\"Content-Type\""
371 " CONTENT=\"text/html; charset=utf-8\">\n"
372 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
373 " TYPE=\"text/css\" media=\"all\">\n"
374 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
375 " TYPE=\"text/css\" media=\"all\">\n"
376 "<TITLE>System Manpage Reference</TITLE>\n"
377 "</HEAD>\n"
378 "<BODY>\n"
379 "<!-- Begin page content. //-->\n", css, css);
380 }
381
382 static void
383 resp_end_html(void)
384 {
385
386 puts("</BODY>\n"
387 "</HTML>");
388 }
389
390 static void
391 resp_searchform(const struct req *req)
392 {
393 int i;
394
395 puts("<!-- Begin search form. //-->");
396 printf("<DIV ID=\"mancgi\">\n"
397 "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
398 "<FIELDSET>\n"
399 "<LEGEND>Search Parameters</LEGEND>\n"
400 "<INPUT TYPE=\"submit\" "
401 " VALUE=\"Search\"> for manuals satisfying \n"
402 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
403 progname);
404 html_print(req->q.expr ? req->q.expr : "");
405 printf("\">, section "
406 "<INPUT TYPE=\"text\""
407 " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
408 html_print(req->q.sec ? req->q.sec : "");
409 printf("\">, arch "
410 "<INPUT TYPE=\"text\""
411 " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
412 html_print(req->q.arch ? req->q.arch : "");
413 printf("\">");
414 if (req->psz > 1) {
415 puts(", <SELECT NAME=\"manpath\">");
416 for (i = 0; i < (int)req->psz; i++) {
417 printf("<OPTION %s VALUE=\"",
418 (i == req->q.manroot) ||
419 (0 == i && -1 == req->q.manroot) ?
420 "SELECTED=\"selected\"" : "");
421 html_print(req->p[i].name);
422 printf("\">");
423 html_print(req->p[i].name);
424 puts("</OPTION>");
425 }
426 puts("</SELECT>");
427 }
428 puts(".\n"
429 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
430 "</FIELDSET>\n"
431 "</FORM>\n"
432 "</DIV>");
433 puts("<!-- End search form. //-->");
434 }
435
436 static void
437 resp_index(const struct req *req)
438 {
439
440 resp_begin_html(200, NULL);
441 resp_searchform(req);
442 resp_end_html();
443 }
444
445 static void
446 resp_error400(void)
447 {
448
449 resp_begin_html(400, "Query Malformed");
450 printf("<H1>Malformed Query</H1>\n"
451 "<P>\n"
452 "The query your entered was malformed.\n"
453 "Try again from the\n"
454 "<A HREF=\"%s/index.html\">main page</A>.\n"
455 "</P>", progname);
456 resp_end_html();
457 }
458
459 static void
460 resp_error404(const char *page)
461 {
462
463 resp_begin_html(404, "Not Found");
464 puts("<H1>Page Not Found</H1>\n"
465 "<P>\n"
466 "The page you're looking for, ");
467 printf("<B>");
468 html_print(page);
469 printf("</B>,\n"
470 "could not be found.\n"
471 "Try searching from the\n"
472 "<A HREF=\"%s/index.html\">main page</A>.\n"
473 "</P>", progname);
474 resp_end_html();
475 }
476
477 static void
478 resp_bad(void)
479 {
480 resp_begin_html(500, "Internal Server Error");
481 puts("<P>Generic badness happened.</P>");
482 resp_end_html();
483 }
484
485 static void
486 resp_baddb(void)
487 {
488
489 resp_begin_html(500, "Internal Server Error");
490 puts("<P>Your database is broken.</P>");
491 resp_end_html();
492 }
493
494 static void
495 resp_search(struct res *r, size_t sz, void *arg)
496 {
497 size_t i, matched;
498 const struct req *req;
499
500 req = (const struct req *)arg;
501
502 if (sz > 0)
503 assert(req->q.manroot >= 0);
504
505 for (matched = i = 0; i < sz; i++)
506 if (r[i].matched)
507 matched++;
508
509 if (1 == matched) {
510 for (i = 0; i < sz; i++)
511 if (r[i].matched)
512 break;
513 /*
514 * If we have just one result, then jump there now
515 * without any delay.
516 */
517 puts("Status: 303 See Other");
518 printf("Location: http://%s%s/show/%d/%u/%u.html?",
519 host, progname, req->q.manroot,
520 r[i].volume, r[i].rec);
521 http_printquery(req);
522 puts("\n"
523 "Content-Type: text/html; charset=utf-8\n");
524 return;
525 }
526
527 resp_begin_html(200, NULL);
528 resp_searchform(req);
529
530 puts("<DIV CLASS=\"results\">");
531
532 if (0 == matched) {
533 puts("<P>\n"
534 "No results found.\n"
535 "</P>\n"
536 "</DIV>");
537 resp_end_html();
538 return;
539 }
540
541 qsort(r, sz, sizeof(struct res), cmp);
542
543 puts("<TABLE>");
544
545 for (i = 0; i < sz; i++) {
546 if ( ! r[i].matched)
547 continue;
548 printf("<TR>\n"
549 "<TD CLASS=\"title\">\n"
550 "<A HREF=\"%s/show/%d/%u/%u.html?",
551 progname, req->q.manroot,
552 r[i].volume, r[i].rec);
553 html_printquery(req);
554 printf("\">");
555 html_print(r[i].title);
556 putchar('(');
557 html_print(r[i].cat);
558 if (r[i].arch && '\0' != *r[i].arch) {
559 putchar('/');
560 html_print(r[i].arch);
561 }
562 printf(")</A>\n"
563 "</TD>\n"
564 "<TD CLASS=\"desc\">");
565 html_print(r[i].desc);
566 puts("</TD>\n"
567 "</TR>");
568 }
569
570 puts("</TABLE>\n"
571 "</DIV>");
572 resp_end_html();
573 }
574
575 /* ARGSUSED */
576 static void
577 pg_index(const struct req *req, char *path)
578 {
579
580 resp_index(req);
581 }
582
583 static void
584 catman(const struct req *req, const char *file)
585 {
586 FILE *f;
587 size_t len;
588 int i;
589 char *p;
590 int italic, bold;
591
592 if (NULL == (f = fopen(file, "r"))) {
593 resp_baddb();
594 return;
595 }
596
597 resp_begin_html(200, NULL);
598 resp_searchform(req);
599 puts("<DIV CLASS=\"catman\">\n"
600 "<PRE>");
601
602 while (NULL != (p = fgetln(f, &len))) {
603 bold = italic = 0;
604 for (i = 0; i < (int)len - 1; i++) {
605 /*
606 * This means that the catpage is out of state.
607 * Ignore it and keep going (although the
608 * catpage is bogus).
609 */
610
611 if ('\b' == p[i] || '\n' == p[i])
612 continue;
613
614 /*
615 * Print a regular character.
616 * Close out any bold/italic scopes.
617 * If we're in back-space mode, make sure we'll
618 * have something to enter when we backspace.
619 */
620
621 if ('\b' != p[i + 1]) {
622 if (italic)
623 printf("</I>");
624 if (bold)
625 printf("</B>");
626 italic = bold = 0;
627 html_putchar(p[i]);
628 continue;
629 } else if (i + 2 >= (int)len)
630 continue;
631
632 /* Italic mode. */
633
634 if ('_' == p[i]) {
635 if (bold)
636 printf("</B>");
637 if ( ! italic)
638 printf("<I>");
639 bold = 0;
640 italic = 1;
641 i += 2;
642 html_putchar(p[i]);
643 continue;
644 }
645
646 /*
647 * Handle funny behaviour troff-isms.
648 * These grok'd from the original man2html.c.
649 */
650
651 if (('+' == p[i] && 'o' == p[i + 2]) ||
652 ('o' == p[i] && '+' == p[i + 2]) ||
653 ('|' == p[i] && '=' == p[i + 2]) ||
654 ('=' == p[i] && '|' == p[i + 2]) ||
655 ('*' == p[i] && '=' == p[i + 2]) ||
656 ('=' == p[i] && '*' == p[i + 2]) ||
657 ('*' == p[i] && '|' == p[i + 2]) ||
658 ('|' == p[i] && '*' == p[i + 2])) {
659 if (italic)
660 printf("</I>");
661 if (bold)
662 printf("</B>");
663 italic = bold = 0;
664 putchar('*');
665 i += 2;
666 continue;
667 } else if (('|' == p[i] && '-' == p[i + 2]) ||
668 ('-' == p[i] && '|' == p[i + 1]) ||
669 ('+' == p[i] && '-' == p[i + 1]) ||
670 ('-' == p[i] && '+' == p[i + 1]) ||
671 ('+' == p[i] && '|' == p[i + 1]) ||
672 ('|' == p[i] && '+' == p[i + 1])) {
673 if (italic)
674 printf("</I>");
675 if (bold)
676 printf("</B>");
677 italic = bold = 0;
678 putchar('+');
679 i += 2;
680 continue;
681 }
682
683 /* Bold mode. */
684
685 if (italic)
686 printf("</I>");
687 if ( ! bold)
688 printf("<B>");
689 bold = 1;
690 italic = 0;
691 i += 2;
692 html_putchar(p[i]);
693 }
694
695 /*
696 * Clean up the last character.
697 * We can get to a newline; don't print that.
698 */
699
700 if (italic)
701 printf("</I>");
702 if (bold)
703 printf("</B>");
704
705 if (i == (int)len - 1 && '\n' != p[i])
706 html_putchar(p[i]);
707
708 putchar('\n');
709 }
710
711 puts("</PRE>\n"
712 "</DIV>\n"
713 "</BODY>\n"
714 "</HTML>");
715
716 fclose(f);
717 }
718
719 static void
720 format(const struct req *req, const char *file)
721 {
722 struct mparse *mp;
723 int fd;
724 struct mdoc *mdoc;
725 struct man *man;
726 void *vp;
727 enum mandoclevel rc;
728 char opts[PATH_MAX + 128];
729
730 if (-1 == (fd = open(file, O_RDONLY, 0))) {
731 resp_baddb();
732 return;
733 }
734
735 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
736 rc = mparse_readfd(mp, fd, file);
737 close(fd);
738
739 if (rc >= MANDOCLEVEL_FATAL) {
740 resp_baddb();
741 return;
742 }
743
744 snprintf(opts, sizeof(opts), "fragment,"
745 "man=%s/search.html?sec=%%S&expr=Nm~^%%N$,"
746 /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
747 progname);
748
749 mparse_result(mp, &mdoc, &man);
750 if (NULL == man && NULL == mdoc) {
751 resp_baddb();
752 mparse_free(mp);
753 return;
754 }
755
756 resp_begin_html(200, NULL);
757 resp_searchform(req);
758
759 vp = html_alloc(opts);
760
761 if (NULL != mdoc)
762 html_mdoc(vp, mdoc);
763 else
764 html_man(vp, man);
765
766 puts("</BODY>\n"
767 "</HTML>");
768
769 html_free(vp);
770 mparse_free(mp);
771 }
772
773 static void
774 pg_show(const struct req *req, char *path)
775 {
776 struct manpaths ps;
777 size_t sz;
778 char *sub;
779 char file[PATH_MAX];
780 const char *cp;
781 int rc, catm;
782 unsigned int vol, rec, mr;
783 DB *idx;
784 DBT key, val;
785
786 idx = NULL;
787
788 /* Parse out mroot, volume, and record from the path. */
789
790 if (NULL == path || NULL == (sub = strchr(path, '/'))) {
791 resp_error400();
792 return;
793 }
794 *sub++ = '\0';
795 if ( ! atou(path, &mr)) {
796 resp_error400();
797 return;
798 }
799 path = sub;
800 if (NULL == (sub = strchr(path, '/'))) {
801 resp_error400();
802 return;
803 }
804 *sub++ = '\0';
805 if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
806 resp_error400();
807 return;
808 } else if (mr >= (unsigned int)req->psz) {
809 resp_error400();
810 return;
811 }
812
813 /*
814 * Begin by chdir()ing into the manroot.
815 * This way we can pick up the database files, which are
816 * relative to the manpath root.
817 */
818
819 if (-1 == chdir(req->p[(int)mr].path)) {
820 perror(req->p[(int)mr].path);
821 resp_baddb();
822 return;
823 }
824
825 memset(&ps, 0, sizeof(struct manpaths));
826 manpath_manconf(&ps, "etc/catman.conf");
827
828 if (vol >= (unsigned int)ps.sz) {
829 resp_error400();
830 goto out;
831 }
832
833 sz = strlcpy(file, ps.paths[vol], PATH_MAX);
834 assert(sz < PATH_MAX);
835 strlcat(file, "/", PATH_MAX);
836 strlcat(file, MANDOC_IDX, PATH_MAX);
837
838 /* Open the index recno(3) database. */
839
840 idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
841 if (NULL == idx) {
842 perror(file);
843 resp_baddb();
844 goto out;
845 }
846
847 key.data = &rec;
848 key.size = 4;
849
850 if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
851 rc < 0 ? resp_baddb() : resp_error400();
852 goto out;
853 } else if (0 == val.size) {
854 resp_baddb();
855 goto out;
856 }
857
858 cp = (char *)val.data;
859 catm = 'c' == *cp++;
860
861 if (NULL == memchr(cp, '\0', val.size - 1))
862 resp_baddb();
863 else {
864 file[(int)sz] = '\0';
865 strlcat(file, "/", PATH_MAX);
866 strlcat(file, cp, PATH_MAX);
867 if (catm)
868 catman(req, file);
869 else
870 format(req, file);
871 }
872 out:
873 if (idx)
874 (*idx->close)(idx);
875 manpath_free(&ps);
876 }
877
878 static void
879 pg_search(const struct req *req, char *path)
880 {
881 size_t tt, ressz;
882 struct manpaths ps;
883 int i, sz, rc;
884 const char *ep, *start;
885 struct res *res;
886 char **cp;
887 struct opts opt;
888 struct expr *expr;
889
890 if (req->q.manroot < 0 || 0 == req->psz) {
891 resp_search(NULL, 0, (void *)req);
892 return;
893 }
894
895 memset(&opt, 0, sizeof(struct opts));
896
897 ep = req->q.expr;
898 opt.arch = req->q.arch;
899 opt.cat = req->q.sec;
900 rc = -1;
901 sz = 0;
902 cp = NULL;
903 ressz = 0;
904 res = NULL;
905
906 /*
907 * Begin by chdir()ing into the root of the manpath.
908 * This way we can pick up the database files, which are
909 * relative to the manpath root.
910 */
911
912 assert(req->q.manroot < (int)req->psz);
913 if (-1 == (chdir(req->p[req->q.manroot].path))) {
914 perror(req->p[req->q.manroot].path);
915 resp_search(NULL, 0, (void *)req);
916 return;
917 }
918
919 memset(&ps, 0, sizeof(struct manpaths));
920 manpath_manconf(&ps, "etc/catman.conf");
921
922 /*
923 * Poor man's tokenisation: just break apart by spaces.
924 * Yes, this is half-ass. But it works for now.
925 */
926
927 while (ep && isspace((unsigned char)*ep))
928 ep++;
929
930 while (ep && '\0' != *ep) {
931 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
932 start = ep;
933 while ('\0' != *ep && ! isspace((unsigned char)*ep))
934 ep++;
935 cp[sz] = mandoc_malloc((ep - start) + 1);
936 memcpy(cp[sz], start, ep - start);
937 cp[sz++][ep - start] = '\0';
938 while (isspace((unsigned char)*ep))
939 ep++;
940 }
941
942 /*
943 * Pump down into apropos backend.
944 * The resp_search() function is called with the results.
945 */
946
947 expr = req->q.legacy ?
948 termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
949
950 if (NULL != expr)
951 rc = apropos_search
952 (ps.sz, ps.paths, &opt, expr, tt,
953 (void *)req, &ressz, &res, resp_search);
954
955 /* ...unless errors occured. */
956
957 if (0 == rc)
958 resp_baddb();
959 else if (-1 == rc)
960 resp_search(NULL, 0, NULL);
961
962 for (i = 0; i < sz; i++)
963 free(cp[i]);
964
965 free(cp);
966 resfree(res, ressz);
967 exprfree(expr);
968 manpath_free(&ps);
969 }
970
971 int
972 main(void)
973 {
974 int i;
975 char buf[PATH_MAX];
976 DIR *cwd;
977 struct req req;
978 char *p, *path, *subpath;
979
980 /* Scan our run-time environment. */
981
982 if (NULL == (cache = getenv("CACHE_DIR")))
983 cache = "/cache/man.cgi";
984
985 if (NULL == (progname = getenv("SCRIPT_NAME")))
986 progname = "";
987
988 if (NULL == (css = getenv("CSS_DIR")))
989 css = "";
990
991 if (NULL == (host = getenv("HTTP_HOST")))
992 host = "localhost";
993
994 /*
995 * First we change directory into the cache directory so that
996 * subsequent scanning for manpath directories is rooted
997 * relative to the same position.
998 */
999
1000 if (-1 == chdir(cache)) {
1001 perror(cache);
1002 resp_bad();
1003 return(EXIT_FAILURE);
1004 } else if (NULL == (cwd = opendir(cache))) {
1005 perror(cache);
1006 resp_bad();
1007 return(EXIT_FAILURE);
1008 }
1009
1010 memset(&req, 0, sizeof(struct req));
1011
1012 strlcpy(buf, ".", PATH_MAX);
1013 pathgen(cwd, buf, &req);
1014 closedir(cwd);
1015
1016 /* Next parse out the query string. */
1017
1018 if (NULL != (p = getenv("QUERY_STRING")))
1019 http_parse(&req, p);
1020
1021 /*
1022 * Now juggle paths to extract information.
1023 * We want to extract our filetype (the file suffix), the
1024 * initial path component, then the trailing component(s).
1025 * Start with leading subpath component.
1026 */
1027
1028 subpath = path = NULL;
1029 req.page = PAGE__MAX;
1030
1031 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
1032 req.page = PAGE_INDEX;
1033
1034 if (NULL != path && '/' == *path && '\0' == *++path)
1035 req.page = PAGE_INDEX;
1036
1037 /* Strip file suffix. */
1038
1039 if (NULL != path && NULL != (p = strrchr(path, '.')))
1040 if (NULL != p && NULL == strchr(p, '/'))
1041 *p++ = '\0';
1042
1043 /* Resolve subpath component. */
1044
1045 if (NULL != path && NULL != (subpath = strchr(path, '/')))
1046 *subpath++ = '\0';
1047
1048 /* Map path into one we recognise. */
1049
1050 if (NULL != path && '\0' != *path)
1051 for (i = 0; i < (int)PAGE__MAX; i++)
1052 if (0 == strcmp(pages[i], path)) {
1053 req.page = (enum page)i;
1054 break;
1055 }
1056
1057 /* Route pages. */
1058
1059 switch (req.page) {
1060 case (PAGE_INDEX):
1061 pg_index(&req, subpath);
1062 break;
1063 case (PAGE_SEARCH):
1064 pg_search(&req, subpath);
1065 break;
1066 case (PAGE_SHOW):
1067 pg_show(&req, subpath);
1068 break;
1069 default:
1070 resp_error404(path);
1071 break;
1072 }
1073
1074 for (i = 0; i < (int)req.psz; i++) {
1075 free(req.p[i].path);
1076 free(req.p[i].name);
1077 }
1078
1079 free(req.p);
1080 return(EXIT_SUCCESS);
1081 }
1082
1083 static int
1084 cmp(const void *p1, const void *p2)
1085 {
1086
1087 return(strcasecmp(((const struct res *)p1)->title,
1088 ((const struct res *)p2)->title));
1089 }
1090
1091 /*
1092 * Check to see if an "etc" path consists of a catman.conf file. If it
1093 * does, that means that the path contains a tree created by catman(8)
1094 * and should be used for indexing.
1095 */
1096 static int
1097 pathstop(DIR *dir)
1098 {
1099 struct dirent *d;
1100
1101 while (NULL != (d = readdir(dir)))
1102 if (DT_REG == d->d_type)
1103 if (0 == strcmp(d->d_name, "catman.conf"))
1104 return(1);
1105
1106 return(0);
1107 }
1108
1109 /*
1110 * Scan for indexable paths.
1111 * This adds all paths with "etc/catman.conf" to the buffer.
1112 */
1113 static void
1114 pathgen(DIR *dir, char *path, struct req *req)
1115 {
1116 struct dirent *d;
1117 char *cp;
1118 DIR *cd;
1119 int rc;
1120 size_t sz, ssz;
1121
1122 sz = strlcat(path, "/", PATH_MAX);
1123 if (sz >= PATH_MAX) {
1124 fprintf(stderr, "%s: Path too long", path);
1125 return;
1126 }
1127
1128 /*
1129 * First, scan for the "etc" directory.
1130 * If it's found, then see if it should cause us to stop. This
1131 * happens when a catman.conf is found in the directory.
1132 */
1133
1134 rc = 0;
1135 while (0 == rc && NULL != (d = readdir(dir))) {
1136 if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
1137 continue;
1138
1139 path[(int)sz] = '\0';
1140 ssz = strlcat(path, d->d_name, PATH_MAX);
1141
1142 if (ssz >= PATH_MAX) {
1143 fprintf(stderr, "%s: Path too long", path);
1144 return;
1145 } else if (NULL == (cd = opendir(path))) {
1146 perror(path);
1147 return;
1148 }
1149
1150 rc = pathstop(cd);
1151 closedir(cd);
1152 }
1153
1154 if (rc > 0) {
1155 /* This also strips the trailing slash. */
1156 path[(int)--sz] = '\0';
1157 req->p = mandoc_realloc
1158 (req->p,
1159 (req->psz + 1) * sizeof(struct paths));
1160 /*
1161 * Strip out the leading "./" unless we're just a ".",
1162 * in which case use an empty string as our name.
1163 */
1164 req->p[(int)req->psz].path = mandoc_strdup(path);
1165 req->p[(int)req->psz].name =
1166 cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
1167 req->psz++;
1168 /*
1169 * The name is just the path with all the slashes taken
1170 * out of it. Simple but effective.
1171 */
1172 for ( ; '\0' != *cp; cp++)
1173 if ('/' == *cp)
1174 *cp = ' ';
1175 return;
1176 }
1177
1178 /*
1179 * If no etc/catman.conf was found, recursively enter child
1180 * directory and continue scanning.
1181 */
1182
1183 rewinddir(dir);
1184 while (NULL != (d = readdir(dir))) {
1185 if (DT_DIR != d->d_type || '.' == d->d_name[0])
1186 continue;
1187
1188 path[(int)sz] = '\0';
1189 ssz = strlcat(path, d->d_name, PATH_MAX);
1190
1191 if (ssz >= PATH_MAX) {
1192 fprintf(stderr, "%s: Path too long", path);
1193 return;
1194 } else if (NULL == (cd = opendir(path))) {
1195 perror(path);
1196 return;
1197 }
1198
1199 pathgen(cd, path, req);
1200 closedir(cd);
1201 }
1202 }