]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Kick out "summary" attribute, which isn't HTML5.
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.167 2014/09/27 09:05:57 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "libmandoc.h"
34 #include "out.h"
35 #include "html.h"
36 #include "main.h"
37
38 struct htmldata {
39 const char *name;
40 int flags;
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
44 };
45
46 static const struct htmldata htmltags[TAG_MAX] = {
47 {"html", HTML_CLRLINE}, /* TAG_HTML */
48 {"head", HTML_CLRLINE}, /* TAG_HEAD */
49 {"body", HTML_CLRLINE}, /* TAG_BODY */
50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51 {"title", HTML_CLRLINE}, /* TAG_TITLE */
52 {"div", HTML_CLRLINE}, /* TAG_DIV */
53 {"h1", 0}, /* TAG_H1 */
54 {"h2", 0}, /* TAG_H2 */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 {"dl", HTML_CLRLINE}, /* TAG_DL */
68 {"dt", HTML_CLRLINE}, /* TAG_DT */
69 {"dd", HTML_CLRLINE}, /* TAG_DD */
70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
72 {"pre", HTML_CLRLINE }, /* TAG_PRE */
73 {"b", 0 }, /* TAG_B */
74 {"i", 0 }, /* TAG_I */
75 {"code", 0 }, /* TAG_CODE */
76 {"small", 0 }, /* TAG_SMALL */
77 {"style", HTML_CLRLINE}, /* TAG_STYLE */
78 };
79
80 static const char *const htmlattrs[ATTR_MAX] = {
81 "name", /* ATTR_NAME */
82 "rel", /* ATTR_REL */
83 "href", /* ATTR_HREF */
84 "type", /* ATTR_TYPE */
85 "media", /* ATTR_MEDIA */
86 "class", /* ATTR_CLASS */
87 "style", /* ATTR_STYLE */
88 "width", /* ATTR_WIDTH */
89 "id", /* ATTR_ID */
90 "align", /* ATTR_ALIGN */
91 "colspan", /* ATTR_COLSPAN */
92 "charset", /* ATTR_CHARSET */
93 };
94
95 static const char *const roffscales[SCALE_MAX] = {
96 "cm", /* SCALE_CM */
97 "in", /* SCALE_IN */
98 "pc", /* SCALE_PC */
99 "pt", /* SCALE_PT */
100 "em", /* SCALE_EM */
101 "em", /* SCALE_MM */
102 "ex", /* SCALE_EN */
103 "ex", /* SCALE_BU */
104 "em", /* SCALE_VS */
105 "ex", /* SCALE_FS */
106 };
107
108 static void bufncat(struct html *, const char *, size_t);
109 static void print_ctag(struct html *, enum htmltag);
110 static int print_escape(char);
111 static int print_encode(struct html *, const char *, int);
112 static void print_metaf(struct html *, enum mandoc_esc);
113 static void print_attr(struct html *, const char *, const char *);
114 static void *ml_alloc(char *, enum htmltype);
115
116
117 static void *
118 ml_alloc(char *outopts, enum htmltype type)
119 {
120 struct html *h;
121 const char *toks[5];
122 char *v;
123
124 toks[0] = "style";
125 toks[1] = "man";
126 toks[2] = "includes";
127 toks[3] = "fragment";
128 toks[4] = NULL;
129
130 h = mandoc_calloc(1, sizeof(struct html));
131
132 h->type = type;
133 h->tags.head = NULL;
134 h->symtab = mchars_alloc();
135
136 while (outopts && *outopts)
137 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
138 case 0:
139 h->style = v;
140 break;
141 case 1:
142 h->base_man = v;
143 break;
144 case 2:
145 h->base_includes = v;
146 break;
147 case 3:
148 h->oflags |= HTML_FRAGMENT;
149 break;
150 default:
151 break;
152 }
153
154 return(h);
155 }
156
157 void *
158 html_alloc(char *outopts)
159 {
160
161 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
162 }
163
164 void *
165 xhtml_alloc(char *outopts)
166 {
167
168 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
169 }
170
171 void
172 html_free(void *p)
173 {
174 struct tag *tag;
175 struct html *h;
176
177 h = (struct html *)p;
178
179 while ((tag = h->tags.head) != NULL) {
180 h->tags.head = tag->next;
181 free(tag);
182 }
183
184 if (h->symtab)
185 mchars_free(h->symtab);
186
187 free(h);
188 }
189
190 void
191 print_gen_head(struct html *h)
192 {
193 struct htmlpair tag[4];
194 struct tag *t;
195
196 tag[0].key = ATTR_CHARSET;
197 tag[0].val = "utf-8";
198 print_otag(h, TAG_META, 1, tag);
199
200 t = print_otag(h, TAG_STYLE, 0, NULL);
201 print_text(h, "table.head, table.foot { width: 100%; }\n");
202 print_tagq(h, t);
203
204 if (h->style) {
205 tag[0].key = ATTR_REL;
206 tag[0].val = "stylesheet";
207 tag[1].key = ATTR_HREF;
208 tag[1].val = h->style;
209 tag[2].key = ATTR_TYPE;
210 tag[2].val = "text/css";
211 tag[3].key = ATTR_MEDIA;
212 tag[3].val = "all";
213 print_otag(h, TAG_LINK, 4, tag);
214 }
215 }
216
217 static void
218 print_metaf(struct html *h, enum mandoc_esc deco)
219 {
220 enum htmlfont font;
221
222 switch (deco) {
223 case ESCAPE_FONTPREV:
224 font = h->metal;
225 break;
226 case ESCAPE_FONTITALIC:
227 font = HTMLFONT_ITALIC;
228 break;
229 case ESCAPE_FONTBOLD:
230 font = HTMLFONT_BOLD;
231 break;
232 case ESCAPE_FONTBI:
233 font = HTMLFONT_BI;
234 break;
235 case ESCAPE_FONT:
236 /* FALLTHROUGH */
237 case ESCAPE_FONTROMAN:
238 font = HTMLFONT_NONE;
239 break;
240 default:
241 abort();
242 /* NOTREACHED */
243 }
244
245 if (h->metaf) {
246 print_tagq(h, h->metaf);
247 h->metaf = NULL;
248 }
249
250 h->metal = h->metac;
251 h->metac = font;
252
253 switch (font) {
254 case HTMLFONT_ITALIC:
255 h->metaf = print_otag(h, TAG_I, 0, NULL);
256 break;
257 case HTMLFONT_BOLD:
258 h->metaf = print_otag(h, TAG_B, 0, NULL);
259 break;
260 case HTMLFONT_BI:
261 h->metaf = print_otag(h, TAG_B, 0, NULL);
262 print_otag(h, TAG_I, 0, NULL);
263 break;
264 default:
265 break;
266 }
267 }
268
269 int
270 html_strlen(const char *cp)
271 {
272 size_t rsz;
273 int skip, sz;
274
275 /*
276 * Account for escaped sequences within string length
277 * calculations. This follows the logic in term_strlen() as we
278 * must calculate the width of produced strings.
279 * Assume that characters are always width of "1". This is
280 * hacky, but it gets the job done for approximation of widths.
281 */
282
283 sz = 0;
284 skip = 0;
285 while (1) {
286 rsz = strcspn(cp, "\\");
287 if (rsz) {
288 cp += rsz;
289 if (skip) {
290 skip = 0;
291 rsz--;
292 }
293 sz += rsz;
294 }
295 if ('\0' == *cp)
296 break;
297 cp++;
298 switch (mandoc_escape(&cp, NULL, NULL)) {
299 case ESCAPE_ERROR:
300 return(sz);
301 case ESCAPE_UNICODE:
302 /* FALLTHROUGH */
303 case ESCAPE_NUMBERED:
304 /* FALLTHROUGH */
305 case ESCAPE_SPECIAL:
306 if (skip)
307 skip = 0;
308 else
309 sz++;
310 break;
311 case ESCAPE_SKIPCHAR:
312 skip = 1;
313 break;
314 default:
315 break;
316 }
317 }
318 return(sz);
319 }
320
321 static int
322 print_escape(char c)
323 {
324
325 switch (c) {
326 case '<':
327 printf("&lt;");
328 break;
329 case '>':
330 printf("&gt;");
331 break;
332 case '&':
333 printf("&amp;");
334 break;
335 case '"':
336 printf("&quot;");
337 break;
338 case ASCII_NBRSP:
339 putchar('-');
340 break;
341 case ASCII_HYPH:
342 putchar('-');
343 /* FALLTHROUGH */
344 case ASCII_BREAK:
345 break;
346 default:
347 return(0);
348 }
349 return(1);
350 }
351
352 static int
353 print_encode(struct html *h, const char *p, int norecurse)
354 {
355 size_t sz;
356 int c, len, nospace;
357 const char *seq;
358 enum mandoc_esc esc;
359 static const char rejs[9] = { '\\', '<', '>', '&', '"',
360 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
361
362 nospace = 0;
363
364 while ('\0' != *p) {
365 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
366 h->flags &= ~HTML_SKIPCHAR;
367 p++;
368 continue;
369 }
370
371 sz = strcspn(p, rejs);
372
373 fwrite(p, 1, sz, stdout);
374 p += (int)sz;
375
376 if ('\0' == *p)
377 break;
378
379 if (print_escape(*p++))
380 continue;
381
382 esc = mandoc_escape(&p, &seq, &len);
383 if (ESCAPE_ERROR == esc)
384 break;
385
386 switch (esc) {
387 case ESCAPE_FONT:
388 /* FALLTHROUGH */
389 case ESCAPE_FONTPREV:
390 /* FALLTHROUGH */
391 case ESCAPE_FONTBOLD:
392 /* FALLTHROUGH */
393 case ESCAPE_FONTITALIC:
394 /* FALLTHROUGH */
395 case ESCAPE_FONTBI:
396 /* FALLTHROUGH */
397 case ESCAPE_FONTROMAN:
398 if (0 == norecurse)
399 print_metaf(h, esc);
400 continue;
401 case ESCAPE_SKIPCHAR:
402 h->flags |= HTML_SKIPCHAR;
403 continue;
404 default:
405 break;
406 }
407
408 if (h->flags & HTML_SKIPCHAR) {
409 h->flags &= ~HTML_SKIPCHAR;
410 continue;
411 }
412
413 switch (esc) {
414 case ESCAPE_UNICODE:
415 /* Skip past "u" header. */
416 c = mchars_num2uc(seq + 1, len - 1);
417 if ('\0' != c)
418 printf("&#x%x;", c);
419 break;
420 case ESCAPE_NUMBERED:
421 c = mchars_num2char(seq, len);
422 if ( ! ('\0' == c || print_escape(c)))
423 putchar(c);
424 break;
425 case ESCAPE_SPECIAL:
426 c = mchars_spec2cp(h->symtab, seq, len);
427 if (c > 0)
428 printf("&#%d;", c);
429 else if (-1 == c && 1 == len &&
430 !print_escape(*seq))
431 putchar((int)*seq);
432 break;
433 case ESCAPE_NOSPACE:
434 if ('\0' == *p)
435 nospace = 1;
436 break;
437 default:
438 break;
439 }
440 }
441
442 return(nospace);
443 }
444
445 static void
446 print_attr(struct html *h, const char *key, const char *val)
447 {
448 printf(" %s=\"", key);
449 (void)print_encode(h, val, 1);
450 putchar('\"');
451 }
452
453 struct tag *
454 print_otag(struct html *h, enum htmltag tag,
455 int sz, const struct htmlpair *p)
456 {
457 int i;
458 struct tag *t;
459
460 /* Push this tags onto the stack of open scopes. */
461
462 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
463 t = mandoc_malloc(sizeof(struct tag));
464 t->tag = tag;
465 t->next = h->tags.head;
466 h->tags.head = t;
467 } else
468 t = NULL;
469
470 if ( ! (HTML_NOSPACE & h->flags))
471 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
472 /* Manage keeps! */
473 if ( ! (HTML_KEEP & h->flags)) {
474 if (HTML_PREKEEP & h->flags)
475 h->flags |= HTML_KEEP;
476 putchar(' ');
477 } else
478 printf("&#160;");
479 }
480
481 if ( ! (h->flags & HTML_NONOSPACE))
482 h->flags &= ~HTML_NOSPACE;
483 else
484 h->flags |= HTML_NOSPACE;
485
486 /* Print out the tag name and attributes. */
487
488 printf("<%s", htmltags[tag].name);
489 for (i = 0; i < sz; i++)
490 print_attr(h, htmlattrs[p[i].key], p[i].val);
491
492 /* Accommodate for XML "well-formed" singleton escaping. */
493
494 if (HTML_AUTOCLOSE & htmltags[tag].flags)
495 switch (h->type) {
496 case HTML_XHTML_1_0_STRICT:
497 putchar('/');
498 break;
499 default:
500 break;
501 }
502
503 putchar('>');
504
505 h->flags |= HTML_NOSPACE;
506
507 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
508 putchar('\n');
509
510 return(t);
511 }
512
513 static void
514 print_ctag(struct html *h, enum htmltag tag)
515 {
516
517 printf("</%s>", htmltags[tag].name);
518 if (HTML_CLRLINE & htmltags[tag].flags) {
519 h->flags |= HTML_NOSPACE;
520 putchar('\n');
521 }
522 }
523
524 void
525 print_gen_decls(struct html *h)
526 {
527
528 puts("<!DOCTYPE html>");
529 }
530
531 void
532 print_text(struct html *h, const char *word)
533 {
534
535 if ( ! (HTML_NOSPACE & h->flags)) {
536 /* Manage keeps! */
537 if ( ! (HTML_KEEP & h->flags)) {
538 if (HTML_PREKEEP & h->flags)
539 h->flags |= HTML_KEEP;
540 putchar(' ');
541 } else
542 printf("&#160;");
543 }
544
545 assert(NULL == h->metaf);
546 switch (h->metac) {
547 case HTMLFONT_ITALIC:
548 h->metaf = print_otag(h, TAG_I, 0, NULL);
549 break;
550 case HTMLFONT_BOLD:
551 h->metaf = print_otag(h, TAG_B, 0, NULL);
552 break;
553 case HTMLFONT_BI:
554 h->metaf = print_otag(h, TAG_B, 0, NULL);
555 print_otag(h, TAG_I, 0, NULL);
556 break;
557 default:
558 break;
559 }
560
561 assert(word);
562 if ( ! print_encode(h, word, 0)) {
563 if ( ! (h->flags & HTML_NONOSPACE))
564 h->flags &= ~HTML_NOSPACE;
565 } else
566 h->flags |= HTML_NOSPACE;
567
568 if (h->metaf) {
569 print_tagq(h, h->metaf);
570 h->metaf = NULL;
571 }
572
573 h->flags &= ~HTML_IGNDELIM;
574 }
575
576 void
577 print_tagq(struct html *h, const struct tag *until)
578 {
579 struct tag *tag;
580
581 while ((tag = h->tags.head) != NULL) {
582 /*
583 * Remember to close out and nullify the current
584 * meta-font and table, if applicable.
585 */
586 if (tag == h->metaf)
587 h->metaf = NULL;
588 if (tag == h->tblt)
589 h->tblt = NULL;
590 print_ctag(h, tag->tag);
591 h->tags.head = tag->next;
592 free(tag);
593 if (until && tag == until)
594 return;
595 }
596 }
597
598 void
599 print_stagq(struct html *h, const struct tag *suntil)
600 {
601 struct tag *tag;
602
603 while ((tag = h->tags.head) != NULL) {
604 if (suntil && tag == suntil)
605 return;
606 /*
607 * Remember to close out and nullify the current
608 * meta-font and table, if applicable.
609 */
610 if (tag == h->metaf)
611 h->metaf = NULL;
612 if (tag == h->tblt)
613 h->tblt = NULL;
614 print_ctag(h, tag->tag);
615 h->tags.head = tag->next;
616 free(tag);
617 }
618 }
619
620 void
621 bufinit(struct html *h)
622 {
623
624 h->buf[0] = '\0';
625 h->buflen = 0;
626 }
627
628 void
629 bufcat_style(struct html *h, const char *key, const char *val)
630 {
631
632 bufcat(h, key);
633 bufcat(h, ":");
634 bufcat(h, val);
635 bufcat(h, ";");
636 }
637
638 void
639 bufcat(struct html *h, const char *p)
640 {
641
642 /*
643 * XXX This is broken and not easy to fix.
644 * When using the -Oincludes option, buffmt_includes()
645 * may pass in strings overrunning BUFSIZ, causing a crash.
646 */
647
648 h->buflen = strlcat(h->buf, p, BUFSIZ);
649 assert(h->buflen < BUFSIZ);
650 }
651
652 void
653 bufcat_fmt(struct html *h, const char *fmt, ...)
654 {
655 va_list ap;
656
657 va_start(ap, fmt);
658 (void)vsnprintf(h->buf + (int)h->buflen,
659 BUFSIZ - h->buflen - 1, fmt, ap);
660 va_end(ap);
661 h->buflen = strlen(h->buf);
662 }
663
664 static void
665 bufncat(struct html *h, const char *p, size_t sz)
666 {
667
668 assert(h->buflen + sz + 1 < BUFSIZ);
669 strncat(h->buf, p, sz);
670 h->buflen += sz;
671 }
672
673 void
674 buffmt_includes(struct html *h, const char *name)
675 {
676 const char *p, *pp;
677
678 pp = h->base_includes;
679
680 bufinit(h);
681 while (NULL != (p = strchr(pp, '%'))) {
682 bufncat(h, pp, (size_t)(p - pp));
683 switch (*(p + 1)) {
684 case'I':
685 bufcat(h, name);
686 break;
687 default:
688 bufncat(h, p, 2);
689 break;
690 }
691 pp = p + 2;
692 }
693 if (pp)
694 bufcat(h, pp);
695 }
696
697 void
698 buffmt_man(struct html *h, const char *name, const char *sec)
699 {
700 const char *p, *pp;
701
702 pp = h->base_man;
703
704 bufinit(h);
705 while (NULL != (p = strchr(pp, '%'))) {
706 bufncat(h, pp, (size_t)(p - pp));
707 switch (*(p + 1)) {
708 case 'S':
709 bufcat(h, sec ? sec : "1");
710 break;
711 case 'N':
712 bufcat_fmt(h, "%s", name);
713 break;
714 default:
715 bufncat(h, p, 2);
716 break;
717 }
718 pp = p + 2;
719 }
720 if (pp)
721 bufcat(h, pp);
722 }
723
724 void
725 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
726 {
727 double v;
728
729 v = su->scale;
730 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
731 v = 1.0;
732 else if (SCALE_BU == su->unit)
733 v /= 24.0;
734
735 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
736 }
737
738 void
739 bufcat_id(struct html *h, const char *src)
740 {
741
742 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
743
744 while ('\0' != *src)
745 bufcat_fmt(h, "%.2x", *src++);
746 }