]>
git.cameronkatri.com Git - mandoc.git/blob - cgi.c
1 /* $Id: cgi.c,v 1.61 2014/07/10 00:52:50 schwarze Exp $ */
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014 Ingo Schwarze <schwarze@usta.de>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
32 #include "mandoc_aux.h"
35 #include "mansearch.h"
45 * A query as passed to the search function.
48 const char *manpath
; /* desired manual directory */
49 const char *arch
; /* architecture */
50 const char *sec
; /* manual section */
51 const char *expr
; /* unparsed expression string */
52 int legacy
; /* whether legacy mode */
57 char **p
; /* array of available manpaths */
58 size_t psz
; /* number of available manpaths */
62 static void catman(const struct req
*, const char *);
63 static int cmp(const void *, const void *);
64 static void format(const struct req
*, const char *);
65 static void html_print(const char *);
66 static void html_printquery(const struct req
*);
67 static void html_putchar(char);
68 static int http_decode(char *);
69 static void http_parse(struct req
*, char *);
70 static void http_print(const char *);
71 static void http_putchar(char);
72 static void http_printquery(const struct req
*);
73 static void pathgen(struct req
*);
74 static void pg_index(const struct req
*, char *);
75 static void pg_search(const struct req
*, char *);
76 static void pg_show(const struct req
*, char *);
77 static void resp_begin_html(int, const char *);
78 static void resp_begin_http(int, const char *);
79 static void resp_end_html(void);
80 static void resp_error_badrequest(const char *);
81 static void resp_error_internal(void);
82 static void resp_error_notfound(const char *);
83 static void resp_index(const struct req
*);
84 static void resp_noresult(const struct req
*,
86 static void resp_search(const struct req
*,
87 struct manpage
*, size_t);
88 static void resp_searchform(const struct req
*);
90 static const char *scriptname
; /* CGI script name */
91 static const char *mandir
; /* contains all manpath directories */
92 static const char *cssdir
; /* css directory */
93 static const char *httphost
; /* hostname used in the URIs */
95 static const char * const pages
[PAGE__MAX
] = {
96 "index", /* PAGE_INDEX */
97 "search", /* PAGE_SEARCH */
98 "show", /* PAGE_SHOW */
102 * Print a character, escaping HTML along the way.
103 * This will pass non-ASCII straight to output: be warned!
123 putchar((unsigned char)c
);
129 http_printquery(const struct req
*req
)
132 if (NULL
!= req
->q
.manpath
) {
134 http_print(req
->q
.manpath
);
136 if (NULL
!= req
->q
.sec
) {
138 http_print(req
->q
.sec
);
140 if (NULL
!= req
->q
.arch
) {
142 http_print(req
->q
.arch
);
144 if (NULL
!= req
->q
.expr
) {
146 http_print(req
->q
.expr
? req
->q
.expr
: "");
151 html_printquery(const struct req
*req
)
154 if (NULL
!= req
->q
.manpath
) {
155 printf("&manpath=");
156 html_print(req
->q
.manpath
);
158 if (NULL
!= req
->q
.sec
) {
160 html_print(req
->q
.sec
);
162 if (NULL
!= req
->q
.arch
) {
163 printf("&arch=");
164 html_print(req
->q
.arch
);
166 if (NULL
!= req
->q
.expr
) {
167 printf("&expr=");
168 html_print(req
->q
.expr
? req
->q
.expr
: "");
173 http_print(const char *p
)
183 * Call through to html_putchar().
184 * Accepts NULL strings.
187 html_print(const char *p
)
197 * Parse out key-value pairs from an HTTP request variable.
198 * This can be either a cookie or a POST/GET string, although man.cgi
199 * uses only GET for simplicity.
202 http_parse(struct req
*req
, char *p
)
207 memset(&req
->q
, 0, sizeof(struct query
));
208 req
->q
.manpath
= req
->p
[0];
215 p
+= (int)strcspn(p
, ";&");
218 if (NULL
!= (val
= strchr(key
, '=')))
221 if ('\0' == *key
|| NULL
== val
|| '\0' == *val
)
224 /* Just abort handling. */
226 if ( ! http_decode(key
))
228 if (NULL
!= val
&& ! http_decode(val
))
231 if (0 == strcmp(key
, "expr"))
233 else if (0 == strcmp(key
, "query"))
235 else if (0 == strcmp(key
, "sec"))
237 else if (0 == strcmp(key
, "sektion"))
239 else if (0 == strcmp(key
, "arch"))
241 else if (0 == strcmp(key
, "manpath"))
242 req
->q
.manpath
= val
;
243 else if (0 == strcmp(key
, "apropos"))
244 legacy
= 0 == strcmp(val
, "0");
247 /* Test for old man.cgi compatibility mode. */
249 req
->q
.legacy
= legacy
> 0;
252 * Section "0" means no section when in legacy mode.
253 * For some man.cgi scripts, "default" arch is none.
256 if (req
->q
.legacy
&& NULL
!= req
->q
.sec
)
257 if (0 == strcmp(req
->q
.sec
, "0"))
259 if (req
->q
.legacy
&& NULL
!= req
->q
.arch
)
260 if (0 == strcmp(req
->q
.arch
, "default"))
268 if (isalnum((unsigned char)c
)) {
269 putchar((unsigned char)c
);
271 } else if (' ' == c
) {
279 * HTTP-decode a string. The standard explanation is that this turns
280 * "%4e+foo" into "n foo" in the regular way. This is done in-place
281 * over the allocated string.
291 for ( ; '\0' != *p
; p
++) {
293 if ('\0' == (hex
[0] = *(p
+ 1)))
295 if ('\0' == (hex
[1] = *(p
+ 2)))
297 if (1 != sscanf(hex
, "%x", &c
))
303 memmove(p
+ 1, p
+ 3, strlen(p
+ 3) + 1);
305 *p
= '+' == *p
? ' ' : *p
;
313 resp_begin_http(int code
, const char *msg
)
317 printf("Status: %d %s\n", code
, msg
);
319 puts("Content-Type: text/html; charset=utf-8\n"
320 "Cache-Control: no-cache\n"
328 resp_begin_html(int code
, const char *msg
)
331 resp_begin_http(code
, msg
);
333 printf("<!DOCTYPE HTML PUBLIC "
334 " \"-//W3C//DTD HTML 4.01//EN\""
335 " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
338 "<META HTTP-EQUIV=\"Content-Type\""
339 " CONTENT=\"text/html; charset=utf-8\">\n"
340 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
341 " TYPE=\"text/css\" media=\"all\">\n"
342 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
343 " TYPE=\"text/css\" media=\"all\">\n"
344 "<TITLE>System Manpage Reference</TITLE>\n"
347 "<!-- Begin page content. //-->\n",
360 resp_searchform(const struct req
*req
)
364 puts("<!-- Begin search form. //-->");
365 printf("<DIV ID=\"mancgi\">\n"
366 "<FORM ACTION=\"%s/search\" METHOD=\"get\">\n"
368 "<LEGEND>Search Parameters</LEGEND>\n"
369 "<INPUT TYPE=\"submit\" "
370 " VALUE=\"Search\"> for manuals matching \n"
371 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
373 html_print(req
->q
.expr
? req
->q
.expr
: "");
374 printf("\">, section "
375 "<INPUT TYPE=\"text\""
376 " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
377 html_print(req
->q
.sec
? req
->q
.sec
: "");
379 "<INPUT TYPE=\"text\""
380 " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
381 html_print(req
->q
.arch
? req
->q
.arch
: "");
384 puts(", in <SELECT NAME=\"manpath\">");
385 for (i
= 0; i
< (int)req
->psz
; i
++) {
387 if (NULL
== req
->q
.manpath
? 0 == i
:
388 0 == strcmp(req
->q
.manpath
, req
->p
[i
]))
389 printf("SELECTED=\"selected\" ");
391 html_print(req
->p
[i
]);
393 html_print(req
->p
[i
]);
399 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
403 puts("<!-- End search form. //-->");
407 resp_index(const struct req
*req
)
410 resp_begin_html(200, NULL
);
412 "Online manuals with "
413 "<A HREF=\"http://mdocml.bsd.lv/\">mandoc</A>\n"
415 resp_searchform(req
);
417 "This web interface is documented in the "
418 "<A HREF=\"search?expr=Nm~^man\\.cgi$&sec=8\">"
419 "man.cgi</A> manual, and the "
420 "<A HREF=\"search?expr=Nm~^apropos$&sec=1\">"
421 "apropos</A> manual explains the query syntax.\n"
427 resp_noresult(const struct req
*req
, const char *msg
)
429 resp_begin_html(200, NULL
);
430 resp_searchform(req
);
438 resp_error_badrequest(const char *msg
)
441 resp_begin_html(400, "Bad Request");
442 puts("<H1>Bad Request</H1>\n"
445 printf("Try again from the\n"
446 "<A HREF=\"%s\">main page</A>.\n"
452 resp_error_notfound(const char *page
)
455 resp_begin_html(404, "Not Found");
456 puts("<H1>Page Not Found</H1>\n"
458 "The page you're looking for, ");
462 "could not be found.\n"
463 "Try searching from the\n"
464 "<A HREF=\"%s\">main page</A>.\n"
470 resp_error_internal(void)
472 resp_begin_html(500, "Internal Server Error");
473 puts("<P>Internal Server Error</P>");
478 resp_search(const struct req
*req
, struct manpage
*r
, size_t sz
)
484 * If we have just one result, then jump there now
487 puts("Status: 303 See Other");
488 printf("Location: http://%s%s/show/%s/%s?",
489 httphost
, scriptname
, req
->q
.manpath
, r
[0].file
);
490 http_printquery(req
);
492 "Content-Type: text/html; charset=utf-8\n");
496 qsort(r
, sz
, sizeof(struct manpage
), cmp
);
498 resp_begin_html(200, NULL
);
499 resp_searchform(req
);
500 puts("<DIV CLASS=\"results\">");
503 for (i
= 0; i
< sz
; i
++) {
505 "<TD CLASS=\"title\">\n"
506 "<A HREF=\"%s/show/%s/%s?",
507 scriptname
, req
->q
.manpath
, r
[i
].file
);
508 html_printquery(req
);
510 html_print(r
[i
].names
);
513 "<TD CLASS=\"desc\">");
514 html_print(r
[i
].output
);
526 pg_index(const struct req
*req
, char *path
)
533 catman(const struct req
*req
, const char *file
)
541 if (NULL
== (f
= fopen(file
, "r"))) {
542 resp_error_badrequest(
543 "You specified an invalid manual file.");
547 resp_begin_html(200, NULL
);
548 resp_searchform(req
);
549 puts("<DIV CLASS=\"catman\">\n"
552 while (NULL
!= (p
= fgetln(f
, &len
))) {
554 for (i
= 0; i
< (int)len
- 1; i
++) {
556 * This means that the catpage is out of state.
557 * Ignore it and keep going (although the
561 if ('\b' == p
[i
] || '\n' == p
[i
])
565 * Print a regular character.
566 * Close out any bold/italic scopes.
567 * If we're in back-space mode, make sure we'll
568 * have something to enter when we backspace.
571 if ('\b' != p
[i
+ 1]) {
579 } else if (i
+ 2 >= (int)len
)
597 * Handle funny behaviour troff-isms.
598 * These grok'd from the original man2html.c.
601 if (('+' == p
[i
] && 'o' == p
[i
+ 2]) ||
602 ('o' == p
[i
] && '+' == p
[i
+ 2]) ||
603 ('|' == p
[i
] && '=' == p
[i
+ 2]) ||
604 ('=' == p
[i
] && '|' == p
[i
+ 2]) ||
605 ('*' == p
[i
] && '=' == p
[i
+ 2]) ||
606 ('=' == p
[i
] && '*' == p
[i
+ 2]) ||
607 ('*' == p
[i
] && '|' == p
[i
+ 2]) ||
608 ('|' == p
[i
] && '*' == p
[i
+ 2])) {
617 } else if (('|' == p
[i
] && '-' == p
[i
+ 2]) ||
618 ('-' == p
[i
] && '|' == p
[i
+ 1]) ||
619 ('+' == p
[i
] && '-' == p
[i
+ 1]) ||
620 ('-' == p
[i
] && '+' == p
[i
+ 1]) ||
621 ('+' == p
[i
] && '|' == p
[i
+ 1]) ||
622 ('|' == p
[i
] && '+' == p
[i
+ 1])) {
646 * Clean up the last character.
647 * We can get to a newline; don't print that.
655 if (i
== (int)len
- 1 && '\n' != p
[i
])
670 format(const struct req
*req
, const char *file
)
678 char opts
[PATH_MAX
+ 128];
680 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
681 resp_error_badrequest(
682 "You specified an invalid manual file.");
686 mp
= mparse_alloc(MPARSE_SO
, MANDOCLEVEL_FATAL
, NULL
,
688 rc
= mparse_readfd(mp
, fd
, file
);
691 if (rc
>= MANDOCLEVEL_FATAL
) {
692 fprintf(stderr
, "fatal mandoc error: %s/%s\n",
693 req
->q
.manpath
, file
);
694 resp_error_internal();
698 snprintf(opts
, sizeof(opts
),
699 "fragment,man=%s/search?sec=%%S&expr=Nm~^%%N$",
702 mparse_result(mp
, &mdoc
, &man
, NULL
);
703 if (NULL
== man
&& NULL
== mdoc
) {
704 fprintf(stderr
, "fatal mandoc error: %s/%s\n",
705 req
->q
.manpath
, file
);
706 resp_error_internal();
711 resp_begin_html(200, NULL
);
712 resp_searchform(req
);
714 vp
= html_alloc(opts
);
729 pg_show(const struct req
*req
, char *path
)
733 if (NULL
== path
|| NULL
== (sub
= strchr(path
, '/'))) {
734 resp_error_badrequest(
735 "You did not specify a page to show.");
741 * Begin by chdir()ing into the manpath.
742 * This way we can pick up the database files, which are
743 * relative to the manpath root.
746 if (-1 == chdir(path
)) {
747 resp_error_badrequest(
748 "You specified an invalid manpath.");
759 pg_search(const struct req
*req
, char *path
)
761 struct mansearch search
;
762 struct manpaths paths
;
765 const char *ep
, *start
;
770 * Begin by chdir()ing into the root of the manpath.
771 * This way we can pick up the database files, which are
772 * relative to the manpath root.
775 if (-1 == (chdir(req
->q
.manpath
))) {
776 resp_error_badrequest(
777 "You specified an invalid manpath.");
781 search
.arch
= req
->q
.arch
;
782 search
.sec
= req
->q
.sec
;
783 search
.deftype
= TYPE_Nm
| TYPE_Nd
;
787 paths
.paths
= mandoc_malloc(sizeof(char *));
788 paths
.paths
[0] = mandoc_strdup(".");
791 * Poor man's tokenisation: just break apart by spaces.
792 * Yes, this is half-ass. But it works for now.
796 while (ep
&& isspace((unsigned char)*ep
))
801 while (ep
&& '\0' != *ep
) {
802 cp
= mandoc_reallocarray(cp
, sz
+ 1, sizeof(char *));
804 while ('\0' != *ep
&& ! isspace((unsigned char)*ep
))
806 cp
[sz
] = mandoc_malloc((ep
- start
) + 1);
807 memcpy(cp
[sz
], start
, ep
- start
);
808 cp
[sz
++][ep
- start
] = '\0';
809 while (isspace((unsigned char)*ep
))
813 if (0 == mansearch(&search
, &paths
, sz
, cp
, "Nd", &res
, &ressz
))
814 resp_noresult(req
, "You entered an invalid query.");
816 resp_noresult(req
, "No results found.");
818 resp_search(req
, res
, ressz
);
820 for (i
= 0; i
< sz
; i
++)
824 for (i
= 0; i
< (int)ressz
; i
++) {
831 free(paths
.paths
[0]);
840 char *querystring
, *path
, *subpath
;
842 /* Scan our run-time environment. */
844 if (NULL
== (mandir
= getenv("MAN_DIR")))
847 if (NULL
== (scriptname
= getenv("SCRIPT_NAME")))
850 if (NULL
== (cssdir
= getenv("CSS_DIR")))
853 if (NULL
== (httphost
= getenv("HTTP_HOST")))
854 httphost
= "localhost";
857 * First we change directory into the mandir so that
858 * subsequent scanning for manpath directories is rooted
859 * relative to the same position.
862 if (-1 == chdir(mandir
)) {
863 fprintf(stderr
, "MAN_DIR: %s: %s\n",
864 mandir
, strerror(errno
));
865 resp_error_internal();
866 return(EXIT_FAILURE
);
869 memset(&req
, 0, sizeof(struct req
));
872 /* Next parse out the query string. */
874 if (NULL
!= (querystring
= getenv("QUERY_STRING")))
875 http_parse(&req
, querystring
);
878 * Now juggle paths to extract information.
879 * We want to extract our filetype (the file suffix), the
880 * initial path component, then the trailing component(s).
881 * Start with leading subpath component.
884 subpath
= path
= NULL
;
885 req
.page
= PAGE__MAX
;
887 if (NULL
== (path
= getenv("PATH_INFO")) || '\0' == *path
)
888 req
.page
= PAGE_INDEX
;
890 if (NULL
!= path
&& '/' == *path
&& '\0' == *++path
)
891 req
.page
= PAGE_INDEX
;
893 /* Resolve subpath component. */
895 if (NULL
!= path
&& NULL
!= (subpath
= strchr(path
, '/')))
898 /* Map path into one we recognise. */
900 if (NULL
!= path
&& '\0' != *path
)
901 for (i
= 0; i
< (int)PAGE__MAX
; i
++)
902 if (0 == strcmp(pages
[i
], path
)) {
903 req
.page
= (enum page
)i
;
911 pg_index(&req
, subpath
);
914 pg_search(&req
, subpath
);
917 pg_show(&req
, subpath
);
920 resp_error_notfound(path
);
924 for (i
= 0; i
< (int)req
.psz
; i
++)
927 return(EXIT_SUCCESS
);
931 cmp(const void *p1
, const void *p2
)
934 return(strcasecmp(((const struct manpage
*)p1
)->names
,
935 ((const struct manpage
*)p2
)->names
));
939 * Scan for indexable paths.
942 pathgen(struct req
*req
)
948 if (NULL
== (fp
= fopen("manpath.conf", "r")))
951 while (NULL
!= (dp
= fgetln(fp
, &dpsz
))) {
952 if ('\n' == dp
[dpsz
- 1])
954 req
->p
= mandoc_realloc(req
->p
,
955 (req
->psz
+ 1) * sizeof(char *));
956 req
->p
[req
->psz
++] = mandoc_strndup(dp
, dpsz
);