]>
git.cameronkatri.com Git - mandoc.git/blob - html.c
4bb3ca56bd737546d9ae3b1cc9cd047e9f214866
1 /* $Id: html.c,v 1.213 2017/06/08 12:54:58 schwarze Exp $ */
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
31 #include "mandoc_aux.h"
42 #define HTML_NOSTACK (1 << 0)
43 #define HTML_AUTOCLOSE (1 << 1)
44 #define HTML_NLBEFORE (1 << 2)
45 #define HTML_NLBEGIN (1 << 3)
46 #define HTML_NLEND (1 << 4)
47 #define HTML_NLAFTER (1 << 5)
48 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
49 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
50 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
51 #define HTML_INDENT (1 << 6)
52 #define HTML_NOINDENT (1 << 7)
55 static const struct htmldata htmltags
[TAG_MAX
] = {
57 {"head", HTML_NLALL
| HTML_INDENT
},
59 {"meta", HTML_NOSTACK
| HTML_AUTOCLOSE
| HTML_NLALL
},
60 {"title", HTML_NLAROUND
},
61 {"div", HTML_NLAROUND
},
62 {"h1", HTML_NLAROUND
},
63 {"h2", HTML_NLAROUND
},
65 {"link", HTML_NOSTACK
| HTML_AUTOCLOSE
| HTML_NLALL
},
66 {"br", HTML_NOSTACK
| HTML_AUTOCLOSE
| HTML_NLALL
},
68 {"table", HTML_NLALL
| HTML_INDENT
},
69 {"colgroup", HTML_NLALL
| HTML_INDENT
},
70 {"col", HTML_NOSTACK
| HTML_AUTOCLOSE
| HTML_NLALL
},
71 {"tr", HTML_NLALL
| HTML_INDENT
},
72 {"td", HTML_NLAROUND
},
73 {"li", HTML_NLAROUND
| HTML_INDENT
},
74 {"ul", HTML_NLALL
| HTML_INDENT
},
75 {"ol", HTML_NLALL
| HTML_INDENT
},
76 {"dl", HTML_NLALL
| HTML_INDENT
},
77 {"dt", HTML_NLAROUND
},
78 {"dd", HTML_NLAROUND
| HTML_INDENT
},
79 {"pre", HTML_NLALL
| HTML_NOINDENT
},
86 {"style", HTML_NLALL
| HTML_INDENT
},
87 {"math", HTML_NLALL
| HTML_INDENT
},
105 static const char *const roffscales
[SCALE_MAX
] = {
118 static void a2width(const char *, struct roffsu
*);
119 static void print_byte(struct html
*, char);
120 static void print_endword(struct html
*);
121 static void print_indent(struct html
*);
122 static void print_word(struct html
*, const char *);
124 static void print_ctag(struct html
*, struct tag
*);
125 static int print_escape(struct html
*, char);
126 static int print_encode(struct html
*, const char *, const char *, int);
127 static void print_href(struct html
*, const char *, const char *, int);
128 static void print_metaf(struct html
*, enum mandoc_esc
);
132 html_alloc(const struct manoutput
*outopts
)
136 h
= mandoc_calloc(1, sizeof(struct html
));
139 h
->style
= outopts
->style
;
140 h
->base_man
= outopts
->man
;
141 h
->base_includes
= outopts
->includes
;
142 if (outopts
->fragment
)
143 h
->oflags
|= HTML_FRAGMENT
;
154 h
= (struct html
*)p
;
156 while ((tag
= h
->tag
) != NULL
) {
165 print_gen_head(struct html
*h
)
169 print_otag(h
, TAG_META
, "?", "charset", "utf-8");
172 * Print a default style-sheet.
175 t
= print_otag(h
, TAG_STYLE
, "");
176 print_text(h
, "table.head, table.foot { width: 100%; }");
178 print_text(h
, "td.head-rtitle, td.foot-os { text-align: right; }");
180 print_text(h
, "td.head-vol { text-align: center; }");
182 print_text(h
, "div.Pp { margin: 1ex 0ex; }");
186 print_otag(h
, TAG_LINK
, "?h??", "rel", "stylesheet",
187 h
->style
, "type", "text/css", "media", "all");
191 print_metaf(struct html
*h
, enum mandoc_esc deco
)
196 case ESCAPE_FONTPREV
:
199 case ESCAPE_FONTITALIC
:
200 font
= HTMLFONT_ITALIC
;
202 case ESCAPE_FONTBOLD
:
203 font
= HTMLFONT_BOLD
;
209 case ESCAPE_FONTROMAN
:
210 font
= HTMLFONT_NONE
;
217 print_tagq(h
, h
->metaf
);
225 case HTMLFONT_ITALIC
:
226 h
->metaf
= print_otag(h
, TAG_I
, "");
229 h
->metaf
= print_otag(h
, TAG_B
, "");
232 h
->metaf
= print_otag(h
, TAG_B
, "");
233 print_otag(h
, TAG_I
, "");
241 html_make_id(const struct roff_node
*n
)
243 const struct roff_node
*nch
;
246 for (nch
= n
->child
; nch
!= NULL
; nch
= nch
->next
)
247 if (nch
->type
!= ROFFT_TEXT
)
253 /* http://www.w3.org/TR/html5/dom.html#the-id-attribute */
255 for (cp
= buf
; *cp
!= '\0'; cp
++)
263 html_strlen(const char *cp
)
269 * Account for escaped sequences within string length
270 * calculations. This follows the logic in term_strlen() as we
271 * must calculate the width of produced strings.
272 * Assume that characters are always width of "1". This is
273 * hacky, but it gets the job done for approximation of widths.
279 rsz
= strcspn(cp
, "\\");
291 switch (mandoc_escape(&cp
, NULL
, NULL
)) {
295 case ESCAPE_NUMBERED
:
297 case ESCAPE_OVERSTRIKE
:
303 case ESCAPE_SKIPCHAR
:
314 print_escape(struct html
*h
, char c
)
319 print_word(h
, "<");
322 print_word(h
, ">");
325 print_word(h
, "&");
328 print_word(h
, """);
331 print_word(h
, " ");
345 print_encode(struct html
*h
, const char *p
, const char *pend
, int norecurse
)
352 static const char rejs
[9] = { '\\', '<', '>', '&', '"',
353 ASCII_NBRSP
, ASCII_HYPH
, ASCII_BREAK
, '\0' };
356 pend
= strchr(p
, '\0');
361 if (HTML_SKIPCHAR
& h
->flags
&& '\\' != *p
) {
362 h
->flags
&= ~HTML_SKIPCHAR
;
367 for (sz
= strcspn(p
, rejs
); sz
-- && p
< pend
; p
++)
376 if (print_escape(h
, *p
++))
379 esc
= mandoc_escape(&p
, &seq
, &len
);
380 if (ESCAPE_ERROR
== esc
)
385 case ESCAPE_FONTPREV
:
386 case ESCAPE_FONTBOLD
:
387 case ESCAPE_FONTITALIC
:
389 case ESCAPE_FONTROMAN
:
393 case ESCAPE_SKIPCHAR
:
394 h
->flags
|= HTML_SKIPCHAR
;
400 if (h
->flags
& HTML_SKIPCHAR
) {
401 h
->flags
&= ~HTML_SKIPCHAR
;
407 /* Skip past "u" header. */
408 c
= mchars_num2uc(seq
+ 1, len
- 1);
410 case ESCAPE_NUMBERED
:
411 c
= mchars_num2char(seq
, len
);
416 c
= mchars_spec2cp(seq
, len
);
424 case ESCAPE_OVERSTRIKE
:
432 if ((c
< 0x20 && c
!= 0x09) ||
433 (c
> 0x7E && c
< 0xA0))
436 (void)snprintf(numbuf
, sizeof(numbuf
), "&#%d;", c
);
437 print_word(h
, numbuf
);
438 } else if (print_escape(h
, c
) == 0)
446 print_href(struct html
*h
, const char *name
, const char *sec
, int man
)
450 pp
= man
? h
->base_man
: h
->base_includes
;
451 while ((p
= strchr(pp
, '%')) != NULL
) {
452 print_encode(h
, pp
, p
, 1);
453 if (man
&& p
[1] == 'S') {
457 print_encode(h
, sec
, NULL
, 1);
458 } else if ((man
&& p
[1] == 'N') ||
459 (man
== 0 && p
[1] == 'I'))
460 print_encode(h
, name
, NULL
, 1);
462 print_encode(h
, p
, p
+ 2, 1);
466 print_encode(h
, pp
, NULL
, 1);
470 print_otag(struct html
*h
, enum htmltag tag
, const char *fmt
, ...)
473 struct roffsu mysu
, *su
;
479 int i
, have_style
, tflags
;
481 tflags
= htmltags
[tag
].flags
;
483 /* Push this tag onto the stack of open scopes. */
485 if ((tflags
& HTML_NOSTACK
) == 0) {
486 t
= mandoc_malloc(sizeof(struct tag
));
493 if (tflags
& HTML_NLBEFORE
)
497 else if ((h
->flags
& HTML_NOSPACE
) == 0) {
498 if (h
->flags
& HTML_KEEP
)
499 print_word(h
, " ");
501 if (h
->flags
& HTML_PREKEEP
)
502 h
->flags
|= HTML_KEEP
;
507 if ( ! (h
->flags
& HTML_NONOSPACE
))
508 h
->flags
&= ~HTML_NOSPACE
;
510 h
->flags
|= HTML_NOSPACE
;
512 /* Print out the tag name and attributes. */
515 print_word(h
, htmltags
[tag
].name
);
520 while (*fmt
!= '\0') {
527 /* Parse a non-style attribute and its arguments. */
529 arg1
= va_arg(ap
, char *);
542 arg1
= va_arg(ap
, char *);
549 arg2
= va_arg(ap
, char *);
553 /* Print the non-style attributes. */
561 print_href(h
, arg1
, NULL
, 0);
565 print_href(h
, arg1
, arg2
, 1);
570 print_encode(h
, arg1
, NULL
, 1);
574 print_encode(h
, arg1
, NULL
, 1);
575 print_word(h
, "\" title=\"");
576 print_encode(h
, arg1
, NULL
, 1);
580 print_encode(h
, arg1
, NULL
, 1);
586 /* Print out styles. */
588 while (*fmt
!= '\0') {
592 /* First letter: input argument type. */
598 SCALE_HS_INIT(su
, i
);
601 arg1
= va_arg(ap
, char *);
604 su
= va_arg(ap
, struct roffsu
*);
609 SCALE_VS_INIT(su
, i
);
612 if ((arg2
= va_arg(ap
, char *)) == NULL
)
617 /* Increase to make even bold text fit. */
632 /* Second letter: style name. */
636 attr
= "margin-bottom";
642 attr
= "text-indent";
645 attr
= "margin-left";
658 arg1
= va_arg(ap
, char *);
663 if (su
== NULL
&& arg1
== NULL
)
667 print_word(h
, " style=\"");
675 if (su
->unit
== SCALE_MM
&& (v
/= 100.0) == 0.0)
677 else if (su
->unit
== SCALE_BU
)
679 (void)snprintf(numbuf
, sizeof(numbuf
), "%.2f", v
);
680 print_word(h
, numbuf
);
681 print_word(h
, roffscales
[su
->unit
]);
692 /* Accommodate for "well-formed" singleton escaping. */
694 if (HTML_AUTOCLOSE
& htmltags
[tag
].flags
)
699 if (tflags
& HTML_NLBEGIN
)
702 h
->flags
|= HTML_NOSPACE
;
704 if (tflags
& HTML_INDENT
)
706 if (tflags
& HTML_NOINDENT
)
713 print_ctag(struct html
*h
, struct tag
*tag
)
718 * Remember to close out and nullify the current
719 * meta-font and table, if applicable.
726 tflags
= htmltags
[tag
->tag
].flags
;
728 if (tflags
& HTML_INDENT
)
730 if (tflags
& HTML_NOINDENT
)
732 if (tflags
& HTML_NLEND
)
737 print_word(h
, htmltags
[tag
->tag
].name
);
739 if (tflags
& HTML_NLAFTER
)
747 print_gen_decls(struct html
*h
)
749 print_word(h
, "<!DOCTYPE html>");
754 print_text(struct html
*h
, const char *word
)
756 if (h
->col
&& (h
->flags
& HTML_NOSPACE
) == 0) {
757 if ( ! (HTML_KEEP
& h
->flags
)) {
758 if (HTML_PREKEEP
& h
->flags
)
759 h
->flags
|= HTML_KEEP
;
762 print_word(h
, " ");
765 assert(NULL
== h
->metaf
);
767 case HTMLFONT_ITALIC
:
768 h
->metaf
= print_otag(h
, TAG_I
, "");
771 h
->metaf
= print_otag(h
, TAG_B
, "");
774 h
->metaf
= print_otag(h
, TAG_B
, "");
775 print_otag(h
, TAG_I
, "");
783 if ( ! print_encode(h
, word
, NULL
, 0)) {
784 if ( ! (h
->flags
& HTML_NONOSPACE
))
785 h
->flags
&= ~HTML_NOSPACE
;
786 h
->flags
&= ~HTML_NONEWLINE
;
788 h
->flags
|= HTML_NOSPACE
| HTML_NONEWLINE
;
791 print_tagq(h
, h
->metaf
);
795 h
->flags
&= ~HTML_IGNDELIM
;
799 print_tagq(struct html
*h
, const struct tag
*until
)
803 while ((tag
= h
->tag
) != NULL
) {
805 if (until
&& tag
== until
)
811 print_stagq(struct html
*h
, const struct tag
*suntil
)
815 while ((tag
= h
->tag
) != NULL
) {
816 if (suntil
&& tag
== suntil
)
823 print_paragraph(struct html
*h
)
827 t
= print_otag(h
, TAG_DIV
, "c", "Pp");
832 /***********************************************************************
833 * Low level output functions.
834 * They implement line breaking using a short static buffer.
835 ***********************************************************************/
838 * Buffer one HTML output byte.
839 * If the buffer is full, flush and deactivate it and start a new line.
840 * If the buffer is inactive, print directly.
843 print_byte(struct html
*h
, char c
)
845 if ((h
->flags
& HTML_BUFFER
) == 0) {
851 if (h
->col
+ h
->bufcol
< sizeof(h
->buf
)) {
852 h
->buf
[h
->bufcol
++] = c
;
861 fwrite(h
->buf
, h
->bufcol
, 1, stdout
);
863 h
->col
= (h
->indent
+ 1) * 2 + h
->bufcol
+ 1;
865 h
->flags
&= ~HTML_BUFFER
;
869 * If something was printed on the current output line, end it.
870 * Not to be called right after print_indent().
873 print_endline(struct html
*h
)
880 fwrite(h
->buf
, h
->bufcol
, 1, stdout
);
885 h
->flags
|= HTML_NOSPACE
;
886 h
->flags
&= ~HTML_BUFFER
;
890 * Flush the HTML output buffer.
891 * If it is inactive, activate it.
894 print_endword(struct html
*h
)
901 if ((h
->flags
& HTML_BUFFER
) == 0) {
903 h
->flags
|= HTML_BUFFER
;
904 } else if (h
->bufcol
) {
906 fwrite(h
->buf
, h
->bufcol
, 1, stdout
);
907 h
->col
+= h
->bufcol
+ 1;
913 * If at the beginning of a new output line,
914 * perform indentation and mark the line as containing output.
915 * Make sure to really produce some output right afterwards,
916 * but do not use print_otag() for producing it.
919 print_indent(struct html
*h
)
926 if (h
->noindent
== 0) {
927 h
->col
= h
->indent
* 2;
928 for (i
= 0; i
< h
->col
; i
++)
931 h
->flags
&= ~HTML_NOSPACE
;
935 * Print or buffer some characters
936 * depending on the current HTML output buffer state.
939 print_word(struct html
*h
, const char *cp
)
942 print_byte(h
, *cp
++);
946 * Calculate the scaling unit passed in a `-width' argument. This uses
947 * either a native scaling unit (e.g., 1i, 2m) or the string length of
951 a2width(const char *p
, struct roffsu
*su
)
955 end
= a2roffsu(p
, su
, SCALE_MAX
);
956 if (end
== NULL
|| *end
!= '\0') {
958 su
->scale
= html_strlen(p
);
959 } else if (su
->scale
< 0.0)