]>
git.cameronkatri.com Git - mandoc.git/blob - cgi.c
1 /* $Id: cgi.c,v 1.53 2014/07/09 08:53:28 schwarze Exp $ */
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014 Ingo Schwarze <schwarze@usta.de>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
34 #include <sys/types.h>
39 #include "mandoc_aux.h"
42 #include "mansearch.h"
52 * A query as passed to the search function.
55 const char *manroot
; /* manual root directory */
56 const char *arch
; /* architecture */
57 const char *sec
; /* manual section */
58 const char *expr
; /* unparsed expression string */
59 int legacy
; /* whether legacy mode */
64 char **p
; /* array of available manroots */
69 static void catman(const struct req
*, const char *);
70 static int cmp(const void *, const void *);
71 static void format(const struct req
*, const char *);
72 static void html_print(const char *);
73 static void html_printquery(const struct req
*);
74 static void html_putchar(char);
75 static int http_decode(char *);
76 static void http_parse(struct req
*, char *);
77 static void http_print(const char *);
78 static void http_putchar(char);
79 static void http_printquery(const struct req
*);
80 static void pathgen(DIR *, struct req
*);
81 static void pg_index(const struct req
*, char *);
82 static void pg_search(const struct req
*, char *);
83 static void pg_show(const struct req
*, char *);
84 static void resp_bad(void);
85 static void resp_baddb(void);
86 static void resp_error400(void);
87 static void resp_error404(const char *);
88 static void resp_begin_html(int, const char *);
89 static void resp_begin_http(int, const char *);
90 static void resp_end_html(void);
91 static void resp_index(const struct req
*);
92 static void resp_search(const struct req
*,
93 struct manpage
*, size_t);
94 static void resp_searchform(const struct req
*);
96 static const char *progname
; /* cgi script name */
97 static const char *cache
; /* cache directory */
98 static const char *css
; /* css directory */
99 static const char *host
; /* hostname */
101 static const char * const pages
[PAGE__MAX
] = {
102 "index", /* PAGE_INDEX */
103 "search", /* PAGE_SEARCH */
104 "show", /* PAGE_SHOW */
108 * Print a character, escaping HTML along the way.
109 * This will pass non-ASCII straight to output: be warned!
129 putchar((unsigned char)c
);
134 http_printquery(const struct req
*req
)
137 if (NULL
!= req
->q
.manroot
) {
139 http_print(req
->q
.manroot
);
141 if (NULL
!= req
->q
.sec
) {
143 http_print(req
->q
.sec
);
145 if (NULL
!= req
->q
.arch
) {
147 http_print(req
->q
.arch
);
149 if (NULL
!= req
->q
.expr
) {
151 http_print(req
->q
.expr
? req
->q
.expr
: "");
157 html_printquery(const struct req
*req
)
160 if (NULL
!= req
->q
.manroot
) {
161 printf("&manpath=");
162 html_print(req
->q
.manroot
);
164 if (NULL
!= req
->q
.sec
) {
166 html_print(req
->q
.sec
);
168 if (NULL
!= req
->q
.arch
) {
169 printf("&arch=");
170 html_print(req
->q
.arch
);
172 if (NULL
!= req
->q
.expr
) {
173 printf("&expr=");
174 html_print(req
->q
.expr
? req
->q
.expr
: "");
179 http_print(const char *p
)
189 * Call through to html_putchar().
190 * Accepts NULL strings.
193 html_print(const char *p
)
203 * Parse out key-value pairs from an HTTP request variable.
204 * This can be either a cookie or a POST/GET string, although man.cgi
205 * uses only GET for simplicity.
208 http_parse(struct req
*req
, char *p
)
213 memset(&req
->q
, 0, sizeof(struct query
));
214 req
->q
.manroot
= req
->p
[0];
221 p
+= (int)strcspn(p
, ";&");
224 if (NULL
!= (val
= strchr(key
, '=')))
227 if ('\0' == *key
|| NULL
== val
|| '\0' == *val
)
230 /* Just abort handling. */
232 if ( ! http_decode(key
))
234 if (NULL
!= val
&& ! http_decode(val
))
237 if (0 == strcmp(key
, "expr"))
239 else if (0 == strcmp(key
, "query"))
241 else if (0 == strcmp(key
, "sec"))
243 else if (0 == strcmp(key
, "sektion"))
245 else if (0 == strcmp(key
, "arch"))
247 else if (0 == strcmp(key
, "manpath"))
248 req
->q
.manroot
= val
;
249 else if (0 == strcmp(key
, "apropos"))
250 legacy
= 0 == strcmp(val
, "0");
253 /* Test for old man.cgi compatibility mode. */
255 req
->q
.legacy
= legacy
> 0;
258 * Section "0" means no section when in legacy mode.
259 * For some man.cgi scripts, "default" arch is none.
262 if (req
->q
.legacy
&& NULL
!= req
->q
.sec
)
263 if (0 == strcmp(req
->q
.sec
, "0"))
265 if (req
->q
.legacy
&& NULL
!= req
->q
.arch
)
266 if (0 == strcmp(req
->q
.arch
, "default"))
274 if (isalnum((unsigned char)c
)) {
275 putchar((unsigned char)c
);
277 } else if (' ' == c
) {
285 * HTTP-decode a string. The standard explanation is that this turns
286 * "%4e+foo" into "n foo" in the regular way. This is done in-place
287 * over the allocated string.
297 for ( ; '\0' != *p
; p
++) {
299 if ('\0' == (hex
[0] = *(p
+ 1)))
301 if ('\0' == (hex
[1] = *(p
+ 2)))
303 if (1 != sscanf(hex
, "%x", &c
))
309 memmove(p
+ 1, p
+ 3, strlen(p
+ 3) + 1);
311 *p
= '+' == *p
? ' ' : *p
;
319 resp_begin_http(int code
, const char *msg
)
323 printf("Status: %d %s\n", code
, msg
);
325 puts("Content-Type: text/html; charset=utf-8\n"
326 "Cache-Control: no-cache\n"
334 resp_begin_html(int code
, const char *msg
)
337 resp_begin_http(code
, msg
);
339 printf("<!DOCTYPE HTML PUBLIC "
340 " \"-//W3C//DTD HTML 4.01//EN\""
341 " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
344 "<META HTTP-EQUIV=\"Content-Type\""
345 " CONTENT=\"text/html; charset=utf-8\">\n"
346 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
347 " TYPE=\"text/css\" media=\"all\">\n"
348 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
349 " TYPE=\"text/css\" media=\"all\">\n"
350 "<TITLE>System Manpage Reference</TITLE>\n"
353 "<!-- Begin page content. //-->\n", css
, css
);
365 resp_searchform(const struct req
*req
)
369 puts("<!-- Begin search form. //-->");
370 printf("<DIV ID=\"mancgi\">\n"
371 "<FORM ACTION=\"%s/search\" METHOD=\"get\">\n"
373 "<LEGEND>Search Parameters</LEGEND>\n"
374 "<INPUT TYPE=\"submit\" "
375 " VALUE=\"Search\"> for manuals satisfying \n"
376 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
378 html_print(req
->q
.expr
? req
->q
.expr
: "");
379 printf("\">, section "
380 "<INPUT TYPE=\"text\""
381 " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
382 html_print(req
->q
.sec
? req
->q
.sec
: "");
384 "<INPUT TYPE=\"text\""
385 " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
386 html_print(req
->q
.arch
? req
->q
.arch
: "");
389 puts(", <SELECT NAME=\"manpath\">");
390 for (i
= 0; i
< (int)req
->psz
; i
++) {
392 if (NULL
== req
->q
.manroot
? 0 == i
:
393 0 == strcmp(req
->q
.manroot
, req
->p
[i
]))
394 printf("SELECTED=\"selected\" ");
396 html_print(req
->p
[i
]);
398 html_print(req
->p
[i
]);
404 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
408 puts("<!-- End search form. //-->");
412 resp_index(const struct req
*req
)
415 resp_begin_html(200, NULL
);
416 resp_searchform(req
);
424 resp_begin_html(400, "Query Malformed");
425 printf("<H1>Malformed Query</H1>\n"
427 "The query your entered was malformed.\n"
428 "Try again from the\n"
429 "<A HREF=\"%s/index.html\">main page</A>.\n"
435 resp_error404(const char *page
)
438 resp_begin_html(404, "Not Found");
439 puts("<H1>Page Not Found</H1>\n"
441 "The page you're looking for, ");
445 "could not be found.\n"
446 "Try searching from the\n"
447 "<A HREF=\"%s/index.html\">main page</A>.\n"
455 resp_begin_html(500, "Internal Server Error");
456 puts("<P>Generic badness happened.</P>");
464 resp_begin_html(500, "Internal Server Error");
465 puts("<P>Your database is broken.</P>");
470 resp_search(const struct req
*req
, struct manpage
*r
, size_t sz
)
476 * If we have just one result, then jump there now
479 puts("Status: 303 See Other");
480 printf("Location: http://%s%s/show/%s/%s?",
481 host
, progname
, req
->q
.manroot
, r
[0].file
);
482 http_printquery(req
);
484 "Content-Type: text/html; charset=utf-8\n");
488 resp_begin_html(200, NULL
);
489 resp_searchform(req
);
491 puts("<DIV CLASS=\"results\">");
495 "No results found.\n"
502 qsort(r
, sz
, sizeof(struct manpage
), cmp
);
506 for (i
= 0; i
< sz
; i
++) {
508 "<TD CLASS=\"title\">\n"
509 "<A HREF=\"%s/show/%s/%s?",
510 progname
, req
->q
.manroot
, r
[i
].file
);
511 html_printquery(req
);
513 html_print(r
[i
].names
);
516 "<TD CLASS=\"desc\">");
517 html_print(r
[i
].output
);
529 pg_index(const struct req
*req
, char *path
)
536 catman(const struct req
*req
, const char *file
)
544 if (NULL
== (f
= fopen(file
, "r"))) {
549 resp_begin_html(200, NULL
);
550 resp_searchform(req
);
551 puts("<DIV CLASS=\"catman\">\n"
554 while (NULL
!= (p
= fgetln(f
, &len
))) {
556 for (i
= 0; i
< (int)len
- 1; i
++) {
558 * This means that the catpage is out of state.
559 * Ignore it and keep going (although the
563 if ('\b' == p
[i
] || '\n' == p
[i
])
567 * Print a regular character.
568 * Close out any bold/italic scopes.
569 * If we're in back-space mode, make sure we'll
570 * have something to enter when we backspace.
573 if ('\b' != p
[i
+ 1]) {
581 } else if (i
+ 2 >= (int)len
)
599 * Handle funny behaviour troff-isms.
600 * These grok'd from the original man2html.c.
603 if (('+' == p
[i
] && 'o' == p
[i
+ 2]) ||
604 ('o' == p
[i
] && '+' == p
[i
+ 2]) ||
605 ('|' == p
[i
] && '=' == p
[i
+ 2]) ||
606 ('=' == p
[i
] && '|' == p
[i
+ 2]) ||
607 ('*' == p
[i
] && '=' == p
[i
+ 2]) ||
608 ('=' == p
[i
] && '*' == p
[i
+ 2]) ||
609 ('*' == p
[i
] && '|' == p
[i
+ 2]) ||
610 ('|' == p
[i
] && '*' == p
[i
+ 2])) {
619 } else if (('|' == p
[i
] && '-' == p
[i
+ 2]) ||
620 ('-' == p
[i
] && '|' == p
[i
+ 1]) ||
621 ('+' == p
[i
] && '-' == p
[i
+ 1]) ||
622 ('-' == p
[i
] && '+' == p
[i
+ 1]) ||
623 ('+' == p
[i
] && '|' == p
[i
+ 1]) ||
624 ('|' == p
[i
] && '+' == p
[i
+ 1])) {
648 * Clean up the last character.
649 * We can get to a newline; don't print that.
657 if (i
== (int)len
- 1 && '\n' != p
[i
])
672 format(const struct req
*req
, const char *file
)
680 char opts
[PATH_MAX
+ 128];
682 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
687 mp
= mparse_alloc(MPARSE_SO
, MANDOCLEVEL_FATAL
, NULL
, NULL
);
688 rc
= mparse_readfd(mp
, fd
, file
);
691 if (rc
>= MANDOCLEVEL_FATAL
) {
696 snprintf(opts
, sizeof(opts
),
697 "fragment,man=%s/search?sec=%%S&expr=Nm~^%%N$",
700 mparse_result(mp
, &mdoc
, &man
, NULL
);
701 if (NULL
== man
&& NULL
== mdoc
) {
707 resp_begin_html(200, NULL
);
708 resp_searchform(req
);
710 vp
= html_alloc(opts
);
725 pg_show(const struct req
*req
, char *path
)
729 if (NULL
== path
|| NULL
== (sub
= strchr(path
, '/'))) {
736 * Begin by chdir()ing into the manroot.
737 * This way we can pick up the database files, which are
738 * relative to the manpath root.
741 if (-1 == chdir(path
)) {
754 pg_search(const struct req
*req
, char *path
)
756 struct mansearch search
;
757 struct manpaths paths
;
760 const char *ep
, *start
;
765 * Begin by chdir()ing into the root of the manpath.
766 * This way we can pick up the database files, which are
767 * relative to the manpath root.
770 if (-1 == (chdir(req
->q
.manroot
))) {
771 perror(req
->q
.manroot
);
772 resp_search(req
, NULL
, 0);
776 search
.arch
= req
->q
.arch
;
777 search
.sec
= req
->q
.sec
;
778 search
.deftype
= TYPE_Nm
| TYPE_Nd
;
782 paths
.paths
= mandoc_malloc(sizeof(char *));
783 paths
.paths
[0] = mandoc_strdup(".");
786 * Poor man's tokenisation: just break apart by spaces.
787 * Yes, this is half-ass. But it works for now.
791 while (ep
&& isspace((unsigned char)*ep
))
796 while (ep
&& '\0' != *ep
) {
797 cp
= mandoc_reallocarray(cp
, sz
+ 1, sizeof(char *));
799 while ('\0' != *ep
&& ! isspace((unsigned char)*ep
))
801 cp
[sz
] = mandoc_malloc((ep
- start
) + 1);
802 memcpy(cp
[sz
], start
, ep
- start
);
803 cp
[sz
++][ep
- start
] = '\0';
804 while (isspace((unsigned char)*ep
))
808 if (mansearch(&search
, &paths
, sz
, cp
, "Nd", &res
, &ressz
))
809 resp_search(req
, res
, ressz
);
813 for (i
= 0; i
< sz
; i
++)
817 for (i
= 0; i
< (int)ressz
; i
++) {
824 free(paths
.paths
[0]);
834 char *p
, *path
, *subpath
;
836 /* Scan our run-time environment. */
838 if (NULL
== (cache
= getenv("CACHE_DIR")))
839 cache
= "/cache/man.cgi";
841 if (NULL
== (progname
= getenv("SCRIPT_NAME")))
844 if (NULL
== (css
= getenv("CSS_DIR")))
847 if (NULL
== (host
= getenv("HTTP_HOST")))
851 * First we change directory into the cache directory so that
852 * subsequent scanning for manpath directories is rooted
853 * relative to the same position.
856 if (-1 == chdir(cache
)) {
859 return(EXIT_FAILURE
);
860 } else if (NULL
== (cwd
= opendir(cache
))) {
863 return(EXIT_FAILURE
);
866 memset(&req
, 0, sizeof(struct req
));
871 /* Next parse out the query string. */
873 if (NULL
!= (p
= getenv("QUERY_STRING")))
877 * Now juggle paths to extract information.
878 * We want to extract our filetype (the file suffix), the
879 * initial path component, then the trailing component(s).
880 * Start with leading subpath component.
883 subpath
= path
= NULL
;
884 req
.page
= PAGE__MAX
;
886 if (NULL
== (path
= getenv("PATH_INFO")) || '\0' == *path
)
887 req
.page
= PAGE_INDEX
;
889 if (NULL
!= path
&& '/' == *path
&& '\0' == *++path
)
890 req
.page
= PAGE_INDEX
;
892 /* Resolve subpath component. */
894 if (NULL
!= path
&& NULL
!= (subpath
= strchr(path
, '/')))
897 /* Map path into one we recognise. */
899 if (NULL
!= path
&& '\0' != *path
)
900 for (i
= 0; i
< (int)PAGE__MAX
; i
++)
901 if (0 == strcmp(pages
[i
], path
)) {
902 req
.page
= (enum page
)i
;
910 pg_index(&req
, subpath
);
913 pg_search(&req
, subpath
);
916 pg_show(&req
, subpath
);
923 for (i
= 0; i
< (int)req
.psz
; i
++)
926 return(EXIT_SUCCESS
);
930 cmp(const void *p1
, const void *p2
)
933 return(strcasecmp(((const struct manpage
*)p1
)->names
,
934 ((const struct manpage
*)p2
)->names
));
938 * Scan for indexable paths.
941 pathgen(DIR *dir
, struct req
*req
)
948 while (NULL
!= (d
= readdir(dir
))) {
950 stat(d
->d_name
, &sb
);
951 if (!(S_IFDIR
& sb
.st_mode
)
953 if (DT_DIR
!= d
->d_type
955 || '.' != d
->d_name
[0]) {
956 req
->p
= mandoc_realloc(req
->p
,
957 (req
->psz
+ 1) * sizeof(char *));
958 req
->p
[req
->psz
++] = mandoc_strdup(d
->d_name
);