]>
git.cameronkatri.com Git - mandoc.git/blob - cgi.c
1 /* $Id: cgi.c,v 1.38 2011/12/16 20:06:58 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/param.h>
38 #include "apropos_db.h"
64 * A query as passed to the search function.
67 const char *arch
; /* architecture */
68 const char *sec
; /* manual section */
69 const char *expr
; /* unparsed expression string */
70 int manroot
; /* manroot index (or -1)*/
71 int whatis
; /* whether whatis mode */
72 int legacy
; /* whether legacy mode */
82 static int atou(const char *, unsigned *);
83 static void catman(const struct req
*, const char *);
84 static int cmp(const void *, const void *);
85 static void format(const struct req
*, const char *);
86 static void html_print(const char *);
87 static void html_printquery(const struct req
*);
88 static void html_putchar(char);
89 static int http_decode(char *);
90 static void http_parse(struct req
*, char *);
91 static void http_print(const char *);
92 static void http_putchar(char);
93 static void http_printquery(const struct req
*);
94 static int pathstop(DIR *);
95 static void pathgen(DIR *, char *, struct req
*);
96 static void pg_index(const struct req
*, char *);
97 static void pg_search(const struct req
*, char *);
98 static void pg_show(const struct req
*, char *);
99 static void resp_bad(void);
100 static void resp_baddb(void);
101 static void resp_error400(void);
102 static void resp_error404(const char *);
103 static void resp_begin_html(int, const char *);
104 static void resp_begin_http(int, const char *);
105 static void resp_end_html(void);
106 static void resp_index(const struct req
*);
107 static void resp_search(struct res
*, size_t, void *);
108 static void resp_searchform(const struct req
*);
110 static const char *progname
; /* cgi script name */
111 static const char *cache
; /* cache directory */
112 static const char *css
; /* css directory */
113 static const char *host
; /* hostname */
115 static const char * const pages
[PAGE__MAX
] = {
116 "index", /* PAGE_INDEX */
117 "search", /* PAGE_SEARCH */
118 "show", /* PAGE_SHOW */
122 * This is just OpenBSD's strtol(3) suggestion.
123 * I use it instead of strtonum(3) for portability's sake.
126 atou(const char *buf
, unsigned *v
)
132 lval
= strtol(buf
, &ep
, 10);
133 if (buf
[0] == '\0' || *ep
!= '\0')
135 if ((errno
== ERANGE
&& (lval
== LONG_MAX
||
136 lval
== LONG_MIN
)) ||
137 (lval
> INT_MAX
|| lval
< 0))
140 *v
= (unsigned int)lval
;
145 * Print a character, escaping HTML along the way.
146 * This will pass non-ASCII straight to output: be warned!
166 putchar((unsigned char)c
);
171 http_printquery(const struct req
*req
)
175 http_print(req
->q
.expr
? req
->q
.expr
: "");
177 http_print(req
->q
.sec
? req
->q
.sec
: "");
179 http_print(req
->q
.arch
? req
->q
.arch
: "");
184 html_printquery(const struct req
*req
)
187 printf("&expr=");
188 html_print(req
->q
.expr
? req
->q
.expr
: "");
190 html_print(req
->q
.sec
? req
->q
.sec
: "");
191 printf("&arch=");
192 html_print(req
->q
.arch
? req
->q
.arch
: "");
196 http_print(const char *p
)
206 * Call through to html_putchar().
207 * Accepts NULL strings.
210 html_print(const char *p
)
220 * Parse out key-value pairs from an HTTP request variable.
221 * This can be either a cookie or a POST/GET string, although man.cgi
222 * uses only GET for simplicity.
225 http_parse(struct req
*req
, char *p
)
227 char *key
, *val
, *manroot
;
230 memset(&req
->q
, 0, sizeof(struct query
));
240 p
+= (int)strcspn(p
, ";&");
243 if (NULL
!= (val
= strchr(key
, '=')))
246 if ('\0' == *key
|| NULL
== val
|| '\0' == *val
)
249 /* Just abort handling. */
251 if ( ! http_decode(key
))
253 if (NULL
!= val
&& ! http_decode(val
))
256 if (0 == strcmp(key
, "expr"))
258 else if (0 == strcmp(key
, "query"))
260 else if (0 == strcmp(key
, "sec"))
262 else if (0 == strcmp(key
, "sektion"))
264 else if (0 == strcmp(key
, "arch"))
266 else if (0 == strcmp(key
, "manpath"))
268 else if (0 == strcmp(key
, "apropos"))
269 legacy
= 0 == strcmp(val
, "0");
270 else if (0 == strcmp(key
, "op"))
271 req
->q
.whatis
= 0 == strcasecmp(val
, "whatis");
274 /* Test for old man.cgi compatibility mode. */
279 } else if (legacy
> 0) {
285 * Section "0" means no section when in legacy mode.
286 * For some man.cgi scripts, "default" arch is none.
289 if (req
->q
.legacy
&& NULL
!= req
->q
.sec
)
290 if (0 == strcmp(req
->q
.sec
, "0"))
292 if (req
->q
.legacy
&& NULL
!= req
->q
.arch
)
293 if (0 == strcmp(req
->q
.arch
, "default"))
296 /* Default to first manroot. */
298 if (NULL
!= manroot
) {
299 for (i
= 0; i
< (int)req
->psz
; i
++)
300 if (0 == strcmp(req
->p
[i
].name
, manroot
))
302 req
->q
.manroot
= i
< (int)req
->psz
? i
: -1;
310 if (isalnum((unsigned char)c
)) {
311 putchar((unsigned char)c
);
313 } else if (' ' == c
) {
321 * HTTP-decode a string. The standard explanation is that this turns
322 * "%4e+foo" into "n foo" in the regular way. This is done in-place
323 * over the allocated string.
333 for ( ; '\0' != *p
; p
++) {
335 if ('\0' == (hex
[0] = *(p
+ 1)))
337 if ('\0' == (hex
[1] = *(p
+ 2)))
339 if (1 != sscanf(hex
, "%x", &c
))
345 memmove(p
+ 1, p
+ 3, strlen(p
+ 3) + 1);
347 *p
= '+' == *p
? ' ' : *p
;
355 resp_begin_http(int code
, const char *msg
)
359 printf("Status: %d %s\n", code
, msg
);
361 puts("Content-Type: text/html; charset=utf-8\n"
362 "Cache-Control: no-cache\n"
370 resp_begin_html(int code
, const char *msg
)
373 resp_begin_http(code
, msg
);
375 printf("<!DOCTYPE HTML PUBLIC "
376 " \"-//W3C//DTD HTML 4.01//EN\""
377 " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
380 "<META HTTP-EQUIV=\"Content-Type\""
381 " CONTENT=\"text/html; charset=utf-8\">\n"
382 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
383 " TYPE=\"text/css\" media=\"all\">\n"
384 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
385 " TYPE=\"text/css\" media=\"all\">\n"
386 "<TITLE>System Manpage Reference</TITLE>\n"
389 "<!-- Begin page content. //-->\n", css
, css
);
401 resp_searchform(const struct req
*req
)
405 puts("<!-- Begin search form. //-->");
406 printf("<DIV ID=\"mancgi\">\n"
407 "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
409 "<LEGEND>Search Parameters</LEGEND>\n"
410 "<INPUT TYPE=\"submit\" NAME=\"op\""
411 " VALUE=\"Whatis\"> or \n"
412 "<INPUT TYPE=\"submit\" NAME=\"op\""
413 " VALUE=\"apropos\"> for manuals satisfying \n"
414 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
416 html_print(req
->q
.expr
? req
->q
.expr
: "");
417 printf("\">, section "
418 "<INPUT TYPE=\"text\""
419 " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
420 html_print(req
->q
.sec
? req
->q
.sec
: "");
422 "<INPUT TYPE=\"text\""
423 " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
424 html_print(req
->q
.arch
? req
->q
.arch
: "");
427 puts(", <SELECT NAME=\"manpath\">");
428 for (i
= 0; i
< (int)req
->psz
; i
++) {
429 printf("<OPTION %s VALUE=\"",
430 (i
== req
->q
.manroot
) ||
431 (0 == i
&& -1 == req
->q
.manroot
) ?
432 "SELECTED=\"selected\"" : "");
433 html_print(req
->p
[i
].name
);
435 html_print(req
->p
[i
].name
);
441 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
445 puts("<!-- End search form. //-->");
449 resp_index(const struct req
*req
)
452 resp_begin_html(200, NULL
);
453 resp_searchform(req
);
461 resp_begin_html(400, "Query Malformed");
462 printf("<H1>Malformed Query</H1>\n"
464 "The query your entered was malformed.\n"
465 "Try again from the\n"
466 "<A HREF=\"%s/index.html\">main page</A>.\n"
472 resp_error404(const char *page
)
475 resp_begin_html(404, "Not Found");
476 puts("<H1>Page Not Found</H1>\n"
478 "The page you're looking for, ");
482 "could not be found.\n"
483 "Try searching from the\n"
484 "<A HREF=\"%s/index.html\">main page</A>.\n"
492 resp_begin_html(500, "Internal Server Error");
493 puts("<P>Generic badness happened.</P>");
501 resp_begin_html(500, "Internal Server Error");
502 puts("<P>Your database is broken.</P>");
507 resp_search(struct res
*r
, size_t sz
, void *arg
)
510 const struct req
*req
;
512 req
= (const struct req
*)arg
;
515 assert(req
->q
.manroot
>= 0);
519 * If we have just one result, then jump there now
522 puts("Status: 303 See Other");
523 printf("Location: http://%s%s/show/%d/%u/%u.html?",
524 host
, progname
, req
->q
.manroot
,
525 r
[0].volume
, r
[0].rec
);
526 http_printquery(req
);
528 "Content-Type: text/html; charset=utf-8\n");
532 qsort(r
, sz
, sizeof(struct res
), cmp
);
534 resp_begin_html(200, NULL
);
535 resp_searchform(req
);
537 puts("<DIV CLASS=\"results\">");
541 "No %s results found.\n",
542 req
->q
.whatis
? "whatis" : "apropos");
545 "<A HREF=\"%s/search.html?op=apropos",
547 html_printquery(req
);
548 puts("\">apropos</A>?)");
558 for (i
= 0; i
< (int)sz
; i
++) {
560 "<TD CLASS=\"title\">\n"
561 "<A HREF=\"%s/show/%d/%u/%u.html?",
562 progname
, req
->q
.manroot
,
563 r
[i
].volume
, r
[i
].rec
);
564 html_printquery(req
);
566 html_print(r
[i
].title
);
568 html_print(r
[i
].cat
);
569 if (r
[i
].arch
&& '\0' != *r
[i
].arch
) {
571 html_print(r
[i
].arch
);
575 "<TD CLASS=\"desc\">");
576 html_print(r
[i
].desc
);
588 pg_index(const struct req
*req
, char *path
)
595 catman(const struct req
*req
, const char *file
)
603 if (NULL
== (f
= fopen(file
, "r"))) {
608 resp_begin_html(200, NULL
);
609 resp_searchform(req
);
610 puts("<DIV CLASS=\"catman\">\n"
613 while (NULL
!= (p
= fgetln(f
, &len
))) {
615 for (i
= 0; i
< (int)len
- 1; i
++) {
617 * This means that the catpage is out of state.
618 * Ignore it and keep going (although the
622 if ('\b' == p
[i
] || '\n' == p
[i
])
626 * Print a regular character.
627 * Close out any bold/italic scopes.
628 * If we're in back-space mode, make sure we'll
629 * have something to enter when we backspace.
632 if ('\b' != p
[i
+ 1]) {
640 } else if (i
+ 2 >= (int)len
)
658 * Handle funny behaviour troff-isms.
659 * These grok'd from the original man2html.c.
662 if (('+' == p
[i
] && 'o' == p
[i
+ 2]) ||
663 ('o' == p
[i
] && '+' == p
[i
+ 2]) ||
664 ('|' == p
[i
] && '=' == p
[i
+ 2]) ||
665 ('=' == p
[i
] && '|' == p
[i
+ 2]) ||
666 ('*' == p
[i
] && '=' == p
[i
+ 2]) ||
667 ('=' == p
[i
] && '*' == p
[i
+ 2]) ||
668 ('*' == p
[i
] && '|' == p
[i
+ 2]) ||
669 ('|' == p
[i
] && '*' == p
[i
+ 2])) {
678 } else if (('|' == p
[i
] && '-' == p
[i
+ 2]) ||
679 ('-' == p
[i
] && '|' == p
[i
+ 1]) ||
680 ('+' == p
[i
] && '-' == p
[i
+ 1]) ||
681 ('-' == p
[i
] && '+' == p
[i
+ 1]) ||
682 ('+' == p
[i
] && '|' == p
[i
+ 1]) ||
683 ('|' == p
[i
] && '+' == p
[i
+ 1])) {
707 * Clean up the last character.
708 * We can get to a newline; don't print that.
716 if (i
== (int)len
- 1 && '\n' != p
[i
])
731 format(const struct req
*req
, const char *file
)
739 char opts
[MAXPATHLEN
+ 128];
741 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
746 mp
= mparse_alloc(MPARSE_AUTO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
747 rc
= mparse_readfd(mp
, fd
, file
);
750 if (rc
>= MANDOCLEVEL_FATAL
) {
755 snprintf(opts
, sizeof(opts
), "fragment,"
756 "man=%s/search.html?sec=%%S&expr=%%N,"
757 /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
760 mparse_result(mp
, &mdoc
, &man
);
761 if (NULL
== man
&& NULL
== mdoc
) {
767 resp_begin_html(200, NULL
);
768 resp_searchform(req
);
770 vp
= html_alloc(opts
);
785 pg_show(const struct req
*req
, char *path
)
790 char file
[MAXPATHLEN
];
793 unsigned int vol
, rec
, mr
;
799 /* Parse out mroot, volume, and record from the path. */
801 if (NULL
== path
|| NULL
== (sub
= strchr(path
, '/'))) {
806 if ( ! atou(path
, &mr
)) {
811 if (NULL
== (sub
= strchr(path
, '/'))) {
816 if ( ! atou(path
, &vol
) || ! atou(sub
, &rec
)) {
819 } else if (mr
>= (unsigned int)req
->psz
) {
825 * Begin by chdir()ing into the manroot.
826 * This way we can pick up the database files, which are
827 * relative to the manpath root.
830 if (-1 == chdir(req
->p
[(int)mr
].path
)) {
831 perror(req
->p
[(int)mr
].path
);
836 memset(&ps
, 0, sizeof(struct manpaths
));
837 manpath_manconf(&ps
, "etc/catman.conf");
839 if (vol
>= (unsigned int)ps
.sz
) {
844 sz
= strlcpy(file
, ps
.paths
[vol
], MAXPATHLEN
);
845 assert(sz
< MAXPATHLEN
);
846 strlcat(file
, "/mandoc.index", MAXPATHLEN
);
848 /* Open the index recno(3) database. */
850 idx
= dbopen(file
, O_RDONLY
, 0, DB_RECNO
, NULL
);
860 if (0 != (rc
= (*idx
->get
)(idx
, &key
, &val
, 0))) {
861 rc
< 0 ? resp_baddb() : resp_error400();
863 } else if (0 == val
.size
) {
868 cp
= (char *)val
.data
;
871 if (NULL
== memchr(cp
, '\0', val
.size
- 1))
874 file
[(int)sz
] = '\0';
875 strlcat(file
, "/", MAXPATHLEN
);
876 strlcat(file
, cp
, MAXPATHLEN
);
889 pg_search(const struct req
*req
, char *path
)
894 const char *ep
, *start
;
899 if (req
->q
.manroot
< 0 || 0 == req
->psz
) {
900 resp_search(NULL
, 0, (void *)req
);
904 memset(&opt
, 0, sizeof(struct opts
));
907 opt
.arch
= req
->q
.arch
;
908 opt
.cat
= req
->q
.sec
;
914 * Begin by chdir()ing into the root of the manpath.
915 * This way we can pick up the database files, which are
916 * relative to the manpath root.
919 assert(req
->q
.manroot
< (int)req
->psz
);
920 if (-1 == (chdir(req
->p
[req
->q
.manroot
].path
))) {
921 perror(req
->p
[req
->q
.manroot
].path
);
922 resp_search(NULL
, 0, (void *)req
);
926 memset(&ps
, 0, sizeof(struct manpaths
));
927 manpath_manconf(&ps
, "etc/catman.conf");
930 * Poor man's tokenisation: just break apart by spaces.
931 * Yes, this is half-ass. But it works for now.
934 while (ep
&& isspace((unsigned char)*ep
))
937 while (ep
&& '\0' != *ep
) {
938 cp
= mandoc_realloc(cp
, (sz
+ 1) * sizeof(char *));
940 while ('\0' != *ep
&& ! isspace((unsigned char)*ep
))
942 cp
[sz
] = mandoc_malloc((ep
- start
) + 1);
943 memcpy(cp
[sz
], start
, ep
- start
);
944 cp
[sz
++][ep
- start
] = '\0';
945 while (isspace((unsigned char)*ep
))
950 * Pump down into apropos backend.
951 * The resp_search() function is called with the results.
954 expr
= req
->q
.whatis
?
955 termcomp(sz
, cp
, &tt
) : exprcomp(sz
, cp
, &tt
);
959 (ps
.sz
, ps
.paths
, &opt
,
960 expr
, tt
, (void *)req
, resp_search
);
962 /* ...unless errors occured. */
967 resp_search(NULL
, 0, (void *)req
);
969 for (i
= 0; i
< sz
; i
++)
981 char buf
[MAXPATHLEN
];
984 char *p
, *path
, *subpath
;
986 /* Scan our run-time environment. */
988 if (NULL
== (cache
= getenv("CACHE_DIR")))
989 cache
= "/cache/man.cgi";
991 if (NULL
== (progname
= getenv("SCRIPT_NAME")))
994 if (NULL
== (css
= getenv("CSS_DIR")))
997 if (NULL
== (host
= getenv("HTTP_HOST")))
1001 * First we change directory into the cache directory so that
1002 * subsequent scanning for manpath directories is rooted
1003 * relative to the same position.
1006 if (-1 == chdir(cache
)) {
1009 return(EXIT_FAILURE
);
1010 } else if (NULL
== (cwd
= opendir(cache
))) {
1013 return(EXIT_FAILURE
);
1016 memset(&req
, 0, sizeof(struct req
));
1018 strlcpy(buf
, ".", MAXPATHLEN
);
1019 pathgen(cwd
, buf
, &req
);
1022 /* Next parse out the query string. */
1024 if (NULL
!= (p
= getenv("QUERY_STRING")))
1025 http_parse(&req
, p
);
1028 * Now juggle paths to extract information.
1029 * We want to extract our filetype (the file suffix), the
1030 * initial path component, then the trailing component(s).
1031 * Start with leading subpath component.
1034 subpath
= path
= NULL
;
1035 req
.page
= PAGE__MAX
;
1037 if (NULL
== (path
= getenv("PATH_INFO")) || '\0' == *path
)
1038 req
.page
= PAGE_INDEX
;
1040 if (NULL
!= path
&& '/' == *path
&& '\0' == *++path
)
1041 req
.page
= PAGE_INDEX
;
1043 /* Strip file suffix. */
1045 if (NULL
!= path
&& NULL
!= (p
= strrchr(path
, '.')))
1046 if (NULL
!= p
&& NULL
== strchr(p
, '/'))
1049 /* Resolve subpath component. */
1051 if (NULL
!= path
&& NULL
!= (subpath
= strchr(path
, '/')))
1054 /* Map path into one we recognise. */
1056 if (NULL
!= path
&& '\0' != *path
)
1057 for (i
= 0; i
< (int)PAGE__MAX
; i
++)
1058 if (0 == strcmp(pages
[i
], path
)) {
1059 req
.page
= (enum page
)i
;
1067 pg_index(&req
, subpath
);
1070 pg_search(&req
, subpath
);
1073 pg_show(&req
, subpath
);
1076 resp_error404(path
);
1080 for (i
= 0; i
< (int)req
.psz
; i
++) {
1081 free(req
.p
[i
].path
);
1082 free(req
.p
[i
].name
);
1086 return(EXIT_SUCCESS
);
1090 cmp(const void *p1
, const void *p2
)
1093 return(strcasecmp(((const struct res
*)p1
)->title
,
1094 ((const struct res
*)p2
)->title
));
1098 * Check to see if an "etc" path consists of a catman.conf file. If it
1099 * does, that means that the path contains a tree created by catman(8)
1100 * and should be used for indexing.
1107 while (NULL
!= (d
= readdir(dir
)))
1108 if (DT_REG
== d
->d_type
)
1109 if (0 == strcmp(d
->d_name
, "catman.conf"))
1116 * Scan for indexable paths.
1117 * This adds all paths with "etc/catman.conf" to the buffer.
1120 pathgen(DIR *dir
, char *path
, struct req
*req
)
1128 sz
= strlcat(path
, "/", MAXPATHLEN
);
1129 if (sz
>= MAXPATHLEN
) {
1130 fprintf(stderr
, "%s: Path too long", path
);
1135 * First, scan for the "etc" directory.
1136 * If it's found, then see if it should cause us to stop. This
1137 * happens when a catman.conf is found in the directory.
1141 while (0 == rc
&& NULL
!= (d
= readdir(dir
))) {
1142 if (DT_DIR
!= d
->d_type
|| strcmp(d
->d_name
, "etc"))
1145 path
[(int)sz
] = '\0';
1146 ssz
= strlcat(path
, d
->d_name
, MAXPATHLEN
);
1148 if (ssz
>= MAXPATHLEN
) {
1149 fprintf(stderr
, "%s: Path too long", path
);
1151 } else if (NULL
== (cd
= opendir(path
))) {
1161 /* This also strips the trailing slash. */
1162 path
[(int)--sz
] = '\0';
1163 req
->p
= mandoc_realloc
1165 (req
->psz
+ 1) * sizeof(struct paths
));
1167 * Strip out the leading "./" unless we're just a ".",
1168 * in which case use an empty string as our name.
1170 req
->p
[(int)req
->psz
].path
= mandoc_strdup(path
);
1171 req
->p
[(int)req
->psz
].name
=
1172 cp
= mandoc_strdup(path
+ (1 == sz
? 1 : 2));
1175 * The name is just the path with all the slashes taken
1176 * out of it. Simple but effective.
1178 for ( ; '\0' != *cp
; cp
++)
1185 * If no etc/catman.conf was found, recursively enter child
1186 * directory and continue scanning.
1190 while (NULL
!= (d
= readdir(dir
))) {
1191 if (DT_DIR
!= d
->d_type
|| '.' == d
->d_name
[0])
1194 path
[(int)sz
] = '\0';
1195 ssz
= strlcat(path
, d
->d_name
, MAXPATHLEN
);
1197 if (ssz
>= MAXPATHLEN
) {
1198 fprintf(stderr
, "%s: Path too long", path
);
1200 } else if (NULL
== (cd
= opendir(path
))) {
1205 pathgen(cd
, path
, req
);