]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Add support for some MathML elements and attributes in our HTML5.
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.173 2014/09/28 11:33:15 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "libmandoc.h"
34 #include "out.h"
35 #include "html.h"
36 #include "main.h"
37
38 struct htmldata {
39 const char *name;
40 int flags;
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
44 };
45
46 static const struct htmldata htmltags[TAG_MAX] = {
47 {"html", HTML_CLRLINE}, /* TAG_HTML */
48 {"head", HTML_CLRLINE}, /* TAG_HEAD */
49 {"body", HTML_CLRLINE}, /* TAG_BODY */
50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51 {"title", HTML_CLRLINE}, /* TAG_TITLE */
52 {"div", HTML_CLRLINE}, /* TAG_DIV */
53 {"h1", 0}, /* TAG_H1 */
54 {"h2", 0}, /* TAG_H2 */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 {"dl", HTML_CLRLINE}, /* TAG_DL */
68 {"dt", HTML_CLRLINE}, /* TAG_DT */
69 {"dd", HTML_CLRLINE}, /* TAG_DD */
70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71 {"pre", HTML_CLRLINE }, /* TAG_PRE */
72 {"b", 0 }, /* TAG_B */
73 {"i", 0 }, /* TAG_I */
74 {"code", 0 }, /* TAG_CODE */
75 {"small", 0 }, /* TAG_SMALL */
76 {"style", HTML_CLRLINE}, /* TAG_STYLE */
77 {"math", HTML_CLRLINE}, /* TAG_MATH */
78 {"mrow", 0}, /* TAG_MROW */
79 {"mi", 0}, /* TAG_MI */
80 {"mo", 0}, /* TAG_MO */
81 {"msup", 0}, /* TAG_MSUP */
82 {"msub", 0}, /* TAG_MSUB */
83 {"msubsup", 0}, /* TAG_MSUBSUP */
84 {"mfrac", 0}, /* TAG_MFRAC */
85 {"msqrt", 0}, /* TAG_MSQRT */
86 {"mfenced", 0}, /* TAG_MFENCED */
87 {"mtable", 0}, /* TAG_MTABLE */
88 {"mtr", 0}, /* TAG_MTR */
89 {"mtd", 0}, /* TAG_MTD */
90 };
91
92 static const char *const htmlattrs[ATTR_MAX] = {
93 "name", /* ATTR_NAME */
94 "rel", /* ATTR_REL */
95 "href", /* ATTR_HREF */
96 "type", /* ATTR_TYPE */
97 "media", /* ATTR_MEDIA */
98 "class", /* ATTR_CLASS */
99 "style", /* ATTR_STYLE */
100 "id", /* ATTR_ID */
101 "colspan", /* ATTR_COLSPAN */
102 "charset", /* ATTR_CHARSET */
103 "open", /* ATTR_OPEN */
104 "close", /* ATTR_CLOSE */
105 };
106
107 static const char *const roffscales[SCALE_MAX] = {
108 "cm", /* SCALE_CM */
109 "in", /* SCALE_IN */
110 "pc", /* SCALE_PC */
111 "pt", /* SCALE_PT */
112 "em", /* SCALE_EM */
113 "em", /* SCALE_MM */
114 "ex", /* SCALE_EN */
115 "ex", /* SCALE_BU */
116 "em", /* SCALE_VS */
117 "ex", /* SCALE_FS */
118 };
119
120 static void bufncat(struct html *, const char *, size_t);
121 static void print_ctag(struct html *, enum htmltag);
122 static int print_escape(char);
123 static int print_encode(struct html *, const char *, int);
124 static void print_metaf(struct html *, enum mandoc_esc);
125 static void print_attr(struct html *, const char *, const char *);
126 static void *ml_alloc(char *);
127
128
129 static void *
130 ml_alloc(char *outopts)
131 {
132 struct html *h;
133 const char *toks[5];
134 char *v;
135
136 toks[0] = "style";
137 toks[1] = "man";
138 toks[2] = "includes";
139 toks[3] = "fragment";
140 toks[4] = NULL;
141
142 h = mandoc_calloc(1, sizeof(struct html));
143
144 h->tags.head = NULL;
145 h->symtab = mchars_alloc();
146
147 while (outopts && *outopts)
148 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
149 case 0:
150 h->style = v;
151 break;
152 case 1:
153 h->base_man = v;
154 break;
155 case 2:
156 h->base_includes = v;
157 break;
158 case 3:
159 h->oflags |= HTML_FRAGMENT;
160 break;
161 default:
162 break;
163 }
164
165 return(h);
166 }
167
168 void *
169 html_alloc(char *outopts)
170 {
171
172 return(ml_alloc(outopts));
173 }
174
175 void *
176 xhtml_alloc(char *outopts)
177 {
178
179 return(ml_alloc(outopts));
180 }
181
182 void
183 html_free(void *p)
184 {
185 struct tag *tag;
186 struct html *h;
187
188 h = (struct html *)p;
189
190 while ((tag = h->tags.head) != NULL) {
191 h->tags.head = tag->next;
192 free(tag);
193 }
194
195 if (h->symtab)
196 mchars_free(h->symtab);
197
198 free(h);
199 }
200
201 void
202 print_gen_head(struct html *h)
203 {
204 struct htmlpair tag[4];
205 struct tag *t;
206
207 tag[0].key = ATTR_CHARSET;
208 tag[0].val = "utf-8";
209 print_otag(h, TAG_META, 1, tag);
210
211 /*
212 * Print a default style-sheet.
213 */
214 t = print_otag(h, TAG_STYLE, 0, NULL);
215 print_text(h, "table.head, table.foot { width: 100%; }\n"
216 "td.head-rtitle, td.foot-os { text-align: right; }\n"
217 "td.head-vol { text-align: center; }\n"
218 "table.foot td { width: 50%; }\n"
219 "table.head td { width: 33%; }\n"
220 "div.spacer { margin: 1em 0; }\n");
221 print_tagq(h, t);
222
223 if (h->style) {
224 tag[0].key = ATTR_REL;
225 tag[0].val = "stylesheet";
226 tag[1].key = ATTR_HREF;
227 tag[1].val = h->style;
228 tag[2].key = ATTR_TYPE;
229 tag[2].val = "text/css";
230 tag[3].key = ATTR_MEDIA;
231 tag[3].val = "all";
232 print_otag(h, TAG_LINK, 4, tag);
233 }
234 }
235
236 static void
237 print_metaf(struct html *h, enum mandoc_esc deco)
238 {
239 enum htmlfont font;
240
241 switch (deco) {
242 case ESCAPE_FONTPREV:
243 font = h->metal;
244 break;
245 case ESCAPE_FONTITALIC:
246 font = HTMLFONT_ITALIC;
247 break;
248 case ESCAPE_FONTBOLD:
249 font = HTMLFONT_BOLD;
250 break;
251 case ESCAPE_FONTBI:
252 font = HTMLFONT_BI;
253 break;
254 case ESCAPE_FONT:
255 /* FALLTHROUGH */
256 case ESCAPE_FONTROMAN:
257 font = HTMLFONT_NONE;
258 break;
259 default:
260 abort();
261 /* NOTREACHED */
262 }
263
264 if (h->metaf) {
265 print_tagq(h, h->metaf);
266 h->metaf = NULL;
267 }
268
269 h->metal = h->metac;
270 h->metac = font;
271
272 switch (font) {
273 case HTMLFONT_ITALIC:
274 h->metaf = print_otag(h, TAG_I, 0, NULL);
275 break;
276 case HTMLFONT_BOLD:
277 h->metaf = print_otag(h, TAG_B, 0, NULL);
278 break;
279 case HTMLFONT_BI:
280 h->metaf = print_otag(h, TAG_B, 0, NULL);
281 print_otag(h, TAG_I, 0, NULL);
282 break;
283 default:
284 break;
285 }
286 }
287
288 int
289 html_strlen(const char *cp)
290 {
291 size_t rsz;
292 int skip, sz;
293
294 /*
295 * Account for escaped sequences within string length
296 * calculations. This follows the logic in term_strlen() as we
297 * must calculate the width of produced strings.
298 * Assume that characters are always width of "1". This is
299 * hacky, but it gets the job done for approximation of widths.
300 */
301
302 sz = 0;
303 skip = 0;
304 while (1) {
305 rsz = strcspn(cp, "\\");
306 if (rsz) {
307 cp += rsz;
308 if (skip) {
309 skip = 0;
310 rsz--;
311 }
312 sz += rsz;
313 }
314 if ('\0' == *cp)
315 break;
316 cp++;
317 switch (mandoc_escape(&cp, NULL, NULL)) {
318 case ESCAPE_ERROR:
319 return(sz);
320 case ESCAPE_UNICODE:
321 /* FALLTHROUGH */
322 case ESCAPE_NUMBERED:
323 /* FALLTHROUGH */
324 case ESCAPE_SPECIAL:
325 if (skip)
326 skip = 0;
327 else
328 sz++;
329 break;
330 case ESCAPE_SKIPCHAR:
331 skip = 1;
332 break;
333 default:
334 break;
335 }
336 }
337 return(sz);
338 }
339
340 static int
341 print_escape(char c)
342 {
343
344 switch (c) {
345 case '<':
346 printf("&lt;");
347 break;
348 case '>':
349 printf("&gt;");
350 break;
351 case '&':
352 printf("&amp;");
353 break;
354 case '"':
355 printf("&quot;");
356 break;
357 case ASCII_NBRSP:
358 putchar('-');
359 break;
360 case ASCII_HYPH:
361 putchar('-');
362 /* FALLTHROUGH */
363 case ASCII_BREAK:
364 break;
365 default:
366 return(0);
367 }
368 return(1);
369 }
370
371 static int
372 print_encode(struct html *h, const char *p, int norecurse)
373 {
374 size_t sz;
375 int c, len, nospace;
376 const char *seq;
377 enum mandoc_esc esc;
378 static const char rejs[9] = { '\\', '<', '>', '&', '"',
379 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
380
381 nospace = 0;
382
383 while ('\0' != *p) {
384 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
385 h->flags &= ~HTML_SKIPCHAR;
386 p++;
387 continue;
388 }
389
390 sz = strcspn(p, rejs);
391
392 fwrite(p, 1, sz, stdout);
393 p += (int)sz;
394
395 if ('\0' == *p)
396 break;
397
398 if (print_escape(*p++))
399 continue;
400
401 esc = mandoc_escape(&p, &seq, &len);
402 if (ESCAPE_ERROR == esc)
403 break;
404
405 switch (esc) {
406 case ESCAPE_FONT:
407 /* FALLTHROUGH */
408 case ESCAPE_FONTPREV:
409 /* FALLTHROUGH */
410 case ESCAPE_FONTBOLD:
411 /* FALLTHROUGH */
412 case ESCAPE_FONTITALIC:
413 /* FALLTHROUGH */
414 case ESCAPE_FONTBI:
415 /* FALLTHROUGH */
416 case ESCAPE_FONTROMAN:
417 if (0 == norecurse)
418 print_metaf(h, esc);
419 continue;
420 case ESCAPE_SKIPCHAR:
421 h->flags |= HTML_SKIPCHAR;
422 continue;
423 default:
424 break;
425 }
426
427 if (h->flags & HTML_SKIPCHAR) {
428 h->flags &= ~HTML_SKIPCHAR;
429 continue;
430 }
431
432 switch (esc) {
433 case ESCAPE_UNICODE:
434 /* Skip past "u" header. */
435 c = mchars_num2uc(seq + 1, len - 1);
436 if ('\0' != c)
437 printf("&#x%x;", c);
438 break;
439 case ESCAPE_NUMBERED:
440 c = mchars_num2char(seq, len);
441 if ( ! ('\0' == c || print_escape(c)))
442 putchar(c);
443 break;
444 case ESCAPE_SPECIAL:
445 c = mchars_spec2cp(h->symtab, seq, len);
446 if (c > 0)
447 printf("&#%d;", c);
448 else if (-1 == c && 1 == len &&
449 !print_escape(*seq))
450 putchar((int)*seq);
451 break;
452 case ESCAPE_NOSPACE:
453 if ('\0' == *p)
454 nospace = 1;
455 break;
456 default:
457 break;
458 }
459 }
460
461 return(nospace);
462 }
463
464 static void
465 print_attr(struct html *h, const char *key, const char *val)
466 {
467 printf(" %s=\"", key);
468 (void)print_encode(h, val, 1);
469 putchar('\"');
470 }
471
472 struct tag *
473 print_otag(struct html *h, enum htmltag tag,
474 int sz, const struct htmlpair *p)
475 {
476 int i;
477 struct tag *t;
478
479 /* Push this tags onto the stack of open scopes. */
480
481 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
482 t = mandoc_malloc(sizeof(struct tag));
483 t->tag = tag;
484 t->next = h->tags.head;
485 h->tags.head = t;
486 } else
487 t = NULL;
488
489 if ( ! (HTML_NOSPACE & h->flags))
490 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
491 /* Manage keeps! */
492 if ( ! (HTML_KEEP & h->flags)) {
493 if (HTML_PREKEEP & h->flags)
494 h->flags |= HTML_KEEP;
495 putchar(' ');
496 } else
497 printf("&#160;");
498 }
499
500 if ( ! (h->flags & HTML_NONOSPACE))
501 h->flags &= ~HTML_NOSPACE;
502 else
503 h->flags |= HTML_NOSPACE;
504
505 /* Print out the tag name and attributes. */
506
507 printf("<%s", htmltags[tag].name);
508 for (i = 0; i < sz; i++)
509 print_attr(h, htmlattrs[p[i].key], p[i].val);
510
511 /* Accommodate for "well-formed" singleton escaping. */
512
513 if (HTML_AUTOCLOSE & htmltags[tag].flags)
514 putchar('/');
515
516 putchar('>');
517
518 h->flags |= HTML_NOSPACE;
519
520 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
521 putchar('\n');
522
523 return(t);
524 }
525
526 static void
527 print_ctag(struct html *h, enum htmltag tag)
528 {
529
530 printf("</%s>", htmltags[tag].name);
531 if (HTML_CLRLINE & htmltags[tag].flags) {
532 h->flags |= HTML_NOSPACE;
533 putchar('\n');
534 }
535 }
536
537 void
538 print_gen_decls(struct html *h)
539 {
540
541 puts("<!DOCTYPE html>");
542 }
543
544 void
545 print_text(struct html *h, const char *word)
546 {
547
548 if ( ! (HTML_NOSPACE & h->flags)) {
549 /* Manage keeps! */
550 if ( ! (HTML_KEEP & h->flags)) {
551 if (HTML_PREKEEP & h->flags)
552 h->flags |= HTML_KEEP;
553 putchar(' ');
554 } else
555 printf("&#160;");
556 }
557
558 assert(NULL == h->metaf);
559 switch (h->metac) {
560 case HTMLFONT_ITALIC:
561 h->metaf = print_otag(h, TAG_I, 0, NULL);
562 break;
563 case HTMLFONT_BOLD:
564 h->metaf = print_otag(h, TAG_B, 0, NULL);
565 break;
566 case HTMLFONT_BI:
567 h->metaf = print_otag(h, TAG_B, 0, NULL);
568 print_otag(h, TAG_I, 0, NULL);
569 break;
570 default:
571 break;
572 }
573
574 assert(word);
575 if ( ! print_encode(h, word, 0)) {
576 if ( ! (h->flags & HTML_NONOSPACE))
577 h->flags &= ~HTML_NOSPACE;
578 } else
579 h->flags |= HTML_NOSPACE;
580
581 if (h->metaf) {
582 print_tagq(h, h->metaf);
583 h->metaf = NULL;
584 }
585
586 h->flags &= ~HTML_IGNDELIM;
587 }
588
589 void
590 print_tagq(struct html *h, const struct tag *until)
591 {
592 struct tag *tag;
593
594 while ((tag = h->tags.head) != NULL) {
595 /*
596 * Remember to close out and nullify the current
597 * meta-font and table, if applicable.
598 */
599 if (tag == h->metaf)
600 h->metaf = NULL;
601 if (tag == h->tblt)
602 h->tblt = NULL;
603 print_ctag(h, tag->tag);
604 h->tags.head = tag->next;
605 free(tag);
606 if (until && tag == until)
607 return;
608 }
609 }
610
611 void
612 print_stagq(struct html *h, const struct tag *suntil)
613 {
614 struct tag *tag;
615
616 while ((tag = h->tags.head) != NULL) {
617 if (suntil && tag == suntil)
618 return;
619 /*
620 * Remember to close out and nullify the current
621 * meta-font and table, if applicable.
622 */
623 if (tag == h->metaf)
624 h->metaf = NULL;
625 if (tag == h->tblt)
626 h->tblt = NULL;
627 print_ctag(h, tag->tag);
628 h->tags.head = tag->next;
629 free(tag);
630 }
631 }
632
633 void
634 print_paragraph(struct html *h)
635 {
636 struct tag *t;
637 struct htmlpair tag;
638
639 PAIR_CLASS_INIT(&tag, "spacer");
640 t = print_otag(h, TAG_DIV, 1, &tag);
641 print_tagq(h, t);
642 }
643
644
645 void
646 bufinit(struct html *h)
647 {
648
649 h->buf[0] = '\0';
650 h->buflen = 0;
651 }
652
653 void
654 bufcat_style(struct html *h, const char *key, const char *val)
655 {
656
657 bufcat(h, key);
658 bufcat(h, ":");
659 bufcat(h, val);
660 bufcat(h, ";");
661 }
662
663 void
664 bufcat(struct html *h, const char *p)
665 {
666
667 /*
668 * XXX This is broken and not easy to fix.
669 * When using the -Oincludes option, buffmt_includes()
670 * may pass in strings overrunning BUFSIZ, causing a crash.
671 */
672
673 h->buflen = strlcat(h->buf, p, BUFSIZ);
674 assert(h->buflen < BUFSIZ);
675 }
676
677 void
678 bufcat_fmt(struct html *h, const char *fmt, ...)
679 {
680 va_list ap;
681
682 va_start(ap, fmt);
683 (void)vsnprintf(h->buf + (int)h->buflen,
684 BUFSIZ - h->buflen - 1, fmt, ap);
685 va_end(ap);
686 h->buflen = strlen(h->buf);
687 }
688
689 static void
690 bufncat(struct html *h, const char *p, size_t sz)
691 {
692
693 assert(h->buflen + sz + 1 < BUFSIZ);
694 strncat(h->buf, p, sz);
695 h->buflen += sz;
696 }
697
698 void
699 buffmt_includes(struct html *h, const char *name)
700 {
701 const char *p, *pp;
702
703 pp = h->base_includes;
704
705 bufinit(h);
706 while (NULL != (p = strchr(pp, '%'))) {
707 bufncat(h, pp, (size_t)(p - pp));
708 switch (*(p + 1)) {
709 case'I':
710 bufcat(h, name);
711 break;
712 default:
713 bufncat(h, p, 2);
714 break;
715 }
716 pp = p + 2;
717 }
718 if (pp)
719 bufcat(h, pp);
720 }
721
722 void
723 buffmt_man(struct html *h, const char *name, const char *sec)
724 {
725 const char *p, *pp;
726
727 pp = h->base_man;
728
729 bufinit(h);
730 while (NULL != (p = strchr(pp, '%'))) {
731 bufncat(h, pp, (size_t)(p - pp));
732 switch (*(p + 1)) {
733 case 'S':
734 bufcat(h, sec ? sec : "1");
735 break;
736 case 'N':
737 bufcat_fmt(h, "%s", name);
738 break;
739 default:
740 bufncat(h, p, 2);
741 break;
742 }
743 pp = p + 2;
744 }
745 if (pp)
746 bufcat(h, pp);
747 }
748
749 void
750 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
751 {
752 double v;
753
754 v = su->scale;
755 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
756 v = 1.0;
757 else if (SCALE_BU == su->unit)
758 v /= 24.0;
759
760 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
761 }
762
763 void
764 bufcat_id(struct html *h, const char *src)
765 {
766
767 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
768
769 while ('\0' != *src)
770 bufcat_fmt(h, "%.2x", *src++);
771 }