]>
git.cameronkatri.com Git - mandoc.git/blob - html.c
1 /* $Id: html.c,v 1.202 2017/01/26 18:28:18 schwarze Exp $ */
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
32 #include "mandoc_aux.h"
41 #define HTML_NOSTACK (1 << 0)
42 #define HTML_AUTOCLOSE (1 << 1)
43 #define HTML_NLBEFORE (1 << 2)
44 #define HTML_NLBEGIN (1 << 3)
45 #define HTML_NLEND (1 << 4)
46 #define HTML_NLAFTER (1 << 5)
47 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
48 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
49 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
50 #define HTML_INDENT (1 << 6)
51 #define HTML_NOINDENT (1 << 7)
54 static const struct htmldata htmltags
[TAG_MAX
] = {
56 {"head", HTML_NLALL
| HTML_INDENT
},
58 {"meta", HTML_NOSTACK
| HTML_AUTOCLOSE
| HTML_NLALL
},
59 {"title", HTML_NLAROUND
},
60 {"div", HTML_NLAROUND
},
61 {"h1", HTML_NLAROUND
},
62 {"h2", HTML_NLAROUND
},
64 {"link", HTML_NOSTACK
| HTML_AUTOCLOSE
| HTML_NLALL
},
65 {"br", HTML_NOSTACK
| HTML_AUTOCLOSE
| HTML_NLALL
},
67 {"table", HTML_NLALL
| HTML_INDENT
},
68 {"tbody", HTML_NLALL
| HTML_INDENT
},
69 {"col", HTML_NOSTACK
| HTML_AUTOCLOSE
| HTML_NLALL
},
70 {"tr", HTML_NLALL
| HTML_INDENT
},
71 {"td", HTML_NLAROUND
},
72 {"li", HTML_NLAROUND
| HTML_INDENT
},
73 {"ul", HTML_NLALL
| HTML_INDENT
},
74 {"ol", HTML_NLALL
| HTML_INDENT
},
75 {"dl", HTML_NLALL
| HTML_INDENT
},
76 {"dt", HTML_NLAROUND
},
77 {"dd", HTML_NLAROUND
| HTML_INDENT
},
78 {"pre", HTML_NLALL
| HTML_NOINDENT
},
83 {"style", HTML_NLALL
| HTML_INDENT
},
84 {"math", HTML_NLALL
| HTML_INDENT
},
102 static const char *const roffscales
[SCALE_MAX
] = {
115 static void a2width(const char *, struct roffsu
*);
116 static void print_byte(struct html
*, char);
117 static void print_endword(struct html
*);
118 static void print_indent(struct html
*);
119 static void print_word(struct html
*, const char *);
121 static void print_ctag(struct html
*, struct tag
*);
122 static int print_escape(struct html
*, char);
123 static int print_encode(struct html
*, const char *, const char *, int);
124 static void print_href(struct html
*, const char *, const char *, int);
125 static void print_metaf(struct html
*, enum mandoc_esc
);
129 html_alloc(const struct manoutput
*outopts
)
133 h
= mandoc_calloc(1, sizeof(struct html
));
136 h
->style
= outopts
->style
;
137 h
->base_man
= outopts
->man
;
138 h
->base_includes
= outopts
->includes
;
139 if (outopts
->fragment
)
140 h
->oflags
|= HTML_FRAGMENT
;
151 h
= (struct html
*)p
;
153 while ((tag
= h
->tags
.head
) != NULL
) {
154 h
->tags
.head
= tag
->next
;
162 print_gen_head(struct html
*h
)
166 print_otag(h
, TAG_META
, "?", "charset", "utf-8");
169 * Print a default style-sheet.
172 t
= print_otag(h
, TAG_STYLE
, "");
173 print_text(h
, "table.head, table.foot { width: 100%; }");
175 print_text(h
, "td.head-rtitle, td.foot-os { text-align: right; }");
177 print_text(h
, "td.head-vol { text-align: center; }");
179 print_text(h
, "div.Pp { margin: 1ex 0ex; }");
183 print_otag(h
, TAG_LINK
, "?h??", "rel", "stylesheet",
184 h
->style
, "type", "text/css", "media", "all");
188 print_metaf(struct html
*h
, enum mandoc_esc deco
)
193 case ESCAPE_FONTPREV
:
196 case ESCAPE_FONTITALIC
:
197 font
= HTMLFONT_ITALIC
;
199 case ESCAPE_FONTBOLD
:
200 font
= HTMLFONT_BOLD
;
206 case ESCAPE_FONTROMAN
:
207 font
= HTMLFONT_NONE
;
214 print_tagq(h
, h
->metaf
);
222 case HTMLFONT_ITALIC
:
223 h
->metaf
= print_otag(h
, TAG_I
, "");
226 h
->metaf
= print_otag(h
, TAG_B
, "");
229 h
->metaf
= print_otag(h
, TAG_B
, "");
230 print_otag(h
, TAG_I
, "");
238 html_strlen(const char *cp
)
244 * Account for escaped sequences within string length
245 * calculations. This follows the logic in term_strlen() as we
246 * must calculate the width of produced strings.
247 * Assume that characters are always width of "1". This is
248 * hacky, but it gets the job done for approximation of widths.
254 rsz
= strcspn(cp
, "\\");
266 switch (mandoc_escape(&cp
, NULL
, NULL
)) {
270 case ESCAPE_NUMBERED
:
272 case ESCAPE_OVERSTRIKE
:
278 case ESCAPE_SKIPCHAR
:
289 print_escape(struct html
*h
, char c
)
294 print_word(h
, "<");
297 print_word(h
, ">");
300 print_word(h
, "&");
303 print_word(h
, """);
306 print_word(h
, " ");
320 print_encode(struct html
*h
, const char *p
, const char *pend
, int norecurse
)
327 static const char rejs
[9] = { '\\', '<', '>', '&', '"',
328 ASCII_NBRSP
, ASCII_HYPH
, ASCII_BREAK
, '\0' };
331 pend
= strchr(p
, '\0');
336 if (HTML_SKIPCHAR
& h
->flags
&& '\\' != *p
) {
337 h
->flags
&= ~HTML_SKIPCHAR
;
342 for (sz
= strcspn(p
, rejs
); sz
-- && p
< pend
; p
++)
351 if (print_escape(h
, *p
++))
354 esc
= mandoc_escape(&p
, &seq
, &len
);
355 if (ESCAPE_ERROR
== esc
)
360 case ESCAPE_FONTPREV
:
361 case ESCAPE_FONTBOLD
:
362 case ESCAPE_FONTITALIC
:
364 case ESCAPE_FONTROMAN
:
368 case ESCAPE_SKIPCHAR
:
369 h
->flags
|= HTML_SKIPCHAR
;
375 if (h
->flags
& HTML_SKIPCHAR
) {
376 h
->flags
&= ~HTML_SKIPCHAR
;
382 /* Skip past "u" header. */
383 c
= mchars_num2uc(seq
+ 1, len
- 1);
385 case ESCAPE_NUMBERED
:
386 c
= mchars_num2char(seq
, len
);
391 c
= mchars_spec2cp(seq
, len
);
399 case ESCAPE_OVERSTRIKE
:
407 if ((c
< 0x20 && c
!= 0x09) ||
408 (c
> 0x7E && c
< 0xA0))
411 (void)snprintf(numbuf
, sizeof(numbuf
), "&#%d;", c
);
412 print_word(h
, numbuf
);
413 } else if (print_escape(h
, c
) == 0)
421 print_href(struct html
*h
, const char *name
, const char *sec
, int man
)
425 pp
= man
? h
->base_man
: h
->base_includes
;
426 while ((p
= strchr(pp
, '%')) != NULL
) {
427 print_encode(h
, pp
, p
, 1);
428 if (man
&& p
[1] == 'S') {
432 print_encode(h
, sec
, NULL
, 1);
433 } else if ((man
&& p
[1] == 'N') ||
434 (man
== 0 && p
[1] == 'I'))
435 print_encode(h
, name
, NULL
, 1);
437 print_encode(h
, p
, p
+ 2, 1);
441 print_encode(h
, pp
, NULL
, 1);
445 print_otag(struct html
*h
, enum htmltag tag
, const char *fmt
, ...)
448 struct roffsu mysu
, *su
;
454 int i
, have_style
, tflags
;
456 tflags
= htmltags
[tag
].flags
;
458 /* Push this tags onto the stack of open scopes. */
460 if ((tflags
& HTML_NOSTACK
) == 0) {
461 t
= mandoc_malloc(sizeof(struct tag
));
463 t
->next
= h
->tags
.head
;
468 if (tflags
& HTML_NLBEFORE
)
472 else if ((h
->flags
& HTML_NOSPACE
) == 0) {
473 if (h
->flags
& HTML_KEEP
)
474 print_word(h
, " ");
476 if (h
->flags
& HTML_PREKEEP
)
477 h
->flags
|= HTML_KEEP
;
482 if ( ! (h
->flags
& HTML_NONOSPACE
))
483 h
->flags
&= ~HTML_NOSPACE
;
485 h
->flags
|= HTML_NOSPACE
;
487 /* Print out the tag name and attributes. */
490 print_word(h
, htmltags
[tag
].name
);
495 while (*fmt
!= '\0') {
497 print_word(h
, " style=\"");
502 s
= va_arg(ap
, char *);
515 s
= va_arg(ap
, char *);
526 print_href(h
, s
, va_arg(ap
, char *), 1);
530 print_href(h
, s
, NULL
, 0);
538 print_encode(h
, s
, NULL
, 1);
544 /* Print out styles. */
548 while (*fmt
!= '\0') {
550 /* First letter: input argument type. */
555 SCALE_HS_INIT(su
, i
);
558 s
= va_arg(ap
, char *);
561 su
= va_arg(ap
, struct roffsu
*);
565 SCALE_VS_INIT(su
, i
);
569 s
= va_arg(ap
, char *);
578 /* Second letter: style name. */
582 attr
= "margin-bottom";
588 attr
= "text-indent";
591 attr
= "margin-left";
606 print_word(h
, va_arg(ap
, char *));
615 if (su
->unit
== SCALE_MM
&& (v
/= 100.0) == 0.0)
617 else if (su
->unit
== SCALE_BU
)
622 (void)snprintf(numbuf
, sizeof(numbuf
), "%.2f", v
);
623 print_word(h
, numbuf
);
624 print_word(h
, roffscales
[su
->unit
]);
634 /* Accommodate for "well-formed" singleton escaping. */
636 if (HTML_AUTOCLOSE
& htmltags
[tag
].flags
)
641 if (tflags
& HTML_NLBEGIN
)
644 h
->flags
|= HTML_NOSPACE
;
646 if (tflags
& HTML_INDENT
)
648 if (tflags
& HTML_NOINDENT
)
655 print_ctag(struct html
*h
, struct tag
*tag
)
660 * Remember to close out and nullify the current
661 * meta-font and table, if applicable.
668 tflags
= htmltags
[tag
->tag
].flags
;
670 if (tflags
& HTML_INDENT
)
672 if (tflags
& HTML_NOINDENT
)
674 if (tflags
& HTML_NLEND
)
679 print_word(h
, htmltags
[tag
->tag
].name
);
681 if (tflags
& HTML_NLAFTER
)
684 h
->tags
.head
= tag
->next
;
689 print_gen_decls(struct html
*h
)
691 print_word(h
, "<!DOCTYPE html>");
696 print_text(struct html
*h
, const char *word
)
698 if (h
->col
&& (h
->flags
& HTML_NOSPACE
) == 0) {
699 if ( ! (HTML_KEEP
& h
->flags
)) {
700 if (HTML_PREKEEP
& h
->flags
)
701 h
->flags
|= HTML_KEEP
;
704 print_word(h
, " ");
707 assert(NULL
== h
->metaf
);
709 case HTMLFONT_ITALIC
:
710 h
->metaf
= print_otag(h
, TAG_I
, "");
713 h
->metaf
= print_otag(h
, TAG_B
, "");
716 h
->metaf
= print_otag(h
, TAG_B
, "");
717 print_otag(h
, TAG_I
, "");
725 if ( ! print_encode(h
, word
, NULL
, 0)) {
726 if ( ! (h
->flags
& HTML_NONOSPACE
))
727 h
->flags
&= ~HTML_NOSPACE
;
728 h
->flags
&= ~HTML_NONEWLINE
;
730 h
->flags
|= HTML_NOSPACE
| HTML_NONEWLINE
;
733 print_tagq(h
, h
->metaf
);
737 h
->flags
&= ~HTML_IGNDELIM
;
741 print_tagq(struct html
*h
, const struct tag
*until
)
745 while ((tag
= h
->tags
.head
) != NULL
) {
747 if (until
&& tag
== until
)
753 print_stagq(struct html
*h
, const struct tag
*suntil
)
757 while ((tag
= h
->tags
.head
) != NULL
) {
758 if (suntil
&& tag
== suntil
)
765 print_paragraph(struct html
*h
)
769 t
= print_otag(h
, TAG_DIV
, "c", "Pp");
774 /***********************************************************************
775 * Low level output functions.
776 * They implement line breaking using a short static buffer.
777 ***********************************************************************/
780 * Buffer one HTML output byte.
781 * If the buffer is full, flush and deactivate it and start a new line.
782 * If the buffer is inactive, print directly.
785 print_byte(struct html
*h
, char c
)
787 if ((h
->flags
& HTML_BUFFER
) == 0) {
793 if (h
->col
+ h
->bufcol
< sizeof(h
->buf
)) {
794 h
->buf
[h
->bufcol
++] = c
;
803 fwrite(h
->buf
, h
->bufcol
, 1, stdout
);
805 h
->col
= (h
->indent
+ 1) * 2 + h
->bufcol
+ 1;
807 h
->flags
&= ~HTML_BUFFER
;
811 * If something was printed on the current output line, end it.
812 * Not to be called right after print_indent().
815 print_endline(struct html
*h
)
822 fwrite(h
->buf
, h
->bufcol
, 1, stdout
);
827 h
->flags
|= HTML_NOSPACE
;
828 h
->flags
&= ~HTML_BUFFER
;
832 * Flush the HTML output buffer.
833 * If it is inactive, activate it.
836 print_endword(struct html
*h
)
843 if ((h
->flags
& HTML_BUFFER
) == 0) {
845 h
->flags
|= HTML_BUFFER
;
846 } else if (h
->bufcol
) {
848 fwrite(h
->buf
, h
->bufcol
, 1, stdout
);
849 h
->col
+= h
->bufcol
+ 1;
855 * If at the beginning of a new output line,
856 * perform indentation and mark the line as containing output.
857 * Make sure to really produce some output right afterwards,
858 * but do not use print_otag() for producing it.
861 print_indent(struct html
*h
)
868 if (h
->noindent
== 0) {
869 h
->col
= h
->indent
* 2;
870 for (i
= 0; i
< h
->col
; i
++)
873 h
->flags
&= ~HTML_NOSPACE
;
877 * Print or buffer some characters
878 * depending on the current HTML output buffer state.
881 print_word(struct html
*h
, const char *cp
)
884 print_byte(h
, *cp
++);
888 * Calculate the scaling unit passed in a `-width' argument. This uses
889 * either a native scaling unit (e.g., 1i, 2m) or the string length of
893 a2width(const char *p
, struct roffsu
*su
)
895 if (a2roffsu(p
, su
, SCALE_MAX
) < 2) {
897 su
->scale
= html_strlen(p
);
898 } else if (su
->scale
< 0.0)