]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Show the flags MDOC_DELIMO, MDOC_DELIMC, MDOC_EOS, and MAN_EOS.
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.187 2015/09/26 00:54:03 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "out.h"
34 #include "html.h"
35 #include "manconf.h"
36 #include "main.h"
37
38 struct htmldata {
39 const char *name;
40 int flags;
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
44 };
45
46 static const struct htmldata htmltags[TAG_MAX] = {
47 {"html", HTML_CLRLINE}, /* TAG_HTML */
48 {"head", HTML_CLRLINE}, /* TAG_HEAD */
49 {"body", HTML_CLRLINE}, /* TAG_BODY */
50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51 {"title", HTML_CLRLINE}, /* TAG_TITLE */
52 {"div", HTML_CLRLINE}, /* TAG_DIV */
53 {"h1", 0}, /* TAG_H1 */
54 {"h2", 0}, /* TAG_H2 */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 {"dl", HTML_CLRLINE}, /* TAG_DL */
68 {"dt", HTML_CLRLINE}, /* TAG_DT */
69 {"dd", HTML_CLRLINE}, /* TAG_DD */
70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71 {"pre", HTML_CLRLINE }, /* TAG_PRE */
72 {"b", 0 }, /* TAG_B */
73 {"i", 0 }, /* TAG_I */
74 {"code", 0 }, /* TAG_CODE */
75 {"small", 0 }, /* TAG_SMALL */
76 {"style", HTML_CLRLINE}, /* TAG_STYLE */
77 {"math", HTML_CLRLINE}, /* TAG_MATH */
78 {"mrow", 0}, /* TAG_MROW */
79 {"mi", 0}, /* TAG_MI */
80 {"mo", 0}, /* TAG_MO */
81 {"msup", 0}, /* TAG_MSUP */
82 {"msub", 0}, /* TAG_MSUB */
83 {"msubsup", 0}, /* TAG_MSUBSUP */
84 {"mfrac", 0}, /* TAG_MFRAC */
85 {"msqrt", 0}, /* TAG_MSQRT */
86 {"mfenced", 0}, /* TAG_MFENCED */
87 {"mtable", 0}, /* TAG_MTABLE */
88 {"mtr", 0}, /* TAG_MTR */
89 {"mtd", 0}, /* TAG_MTD */
90 {"munderover", 0}, /* TAG_MUNDEROVER */
91 {"munder", 0}, /* TAG_MUNDER*/
92 {"mover", 0}, /* TAG_MOVER*/
93 };
94
95 static const char *const htmlattrs[ATTR_MAX] = {
96 "name", /* ATTR_NAME */
97 "rel", /* ATTR_REL */
98 "href", /* ATTR_HREF */
99 "type", /* ATTR_TYPE */
100 "media", /* ATTR_MEDIA */
101 "class", /* ATTR_CLASS */
102 "style", /* ATTR_STYLE */
103 "id", /* ATTR_ID */
104 "colspan", /* ATTR_COLSPAN */
105 "charset", /* ATTR_CHARSET */
106 "open", /* ATTR_OPEN */
107 "close", /* ATTR_CLOSE */
108 "mathvariant", /* ATTR_MATHVARIANT */
109 };
110
111 static const char *const roffscales[SCALE_MAX] = {
112 "cm", /* SCALE_CM */
113 "in", /* SCALE_IN */
114 "pc", /* SCALE_PC */
115 "pt", /* SCALE_PT */
116 "em", /* SCALE_EM */
117 "em", /* SCALE_MM */
118 "ex", /* SCALE_EN */
119 "ex", /* SCALE_BU */
120 "em", /* SCALE_VS */
121 "ex", /* SCALE_FS */
122 };
123
124 static void bufncat(struct html *, const char *, size_t);
125 static void print_ctag(struct html *, struct tag *);
126 static int print_escape(char);
127 static int print_encode(struct html *, const char *, int);
128 static void print_metaf(struct html *, enum mandoc_esc);
129 static void print_attr(struct html *, const char *, const char *);
130
131
132 void *
133 html_alloc(const struct mchars *mchars, const struct manoutput *outopts)
134 {
135 struct html *h;
136
137 h = mandoc_calloc(1, sizeof(struct html));
138
139 h->tags.head = NULL;
140 h->symtab = mchars;
141
142 h->style = outopts->style;
143 h->base_man = outopts->man;
144 h->base_includes = outopts->includes;
145 if (outopts->fragment)
146 h->oflags |= HTML_FRAGMENT;
147
148 return(h);
149 }
150
151 void
152 html_free(void *p)
153 {
154 struct tag *tag;
155 struct html *h;
156
157 h = (struct html *)p;
158
159 while ((tag = h->tags.head) != NULL) {
160 h->tags.head = tag->next;
161 free(tag);
162 }
163
164 free(h);
165 }
166
167 void
168 print_gen_head(struct html *h)
169 {
170 struct htmlpair tag[4];
171 struct tag *t;
172
173 tag[0].key = ATTR_CHARSET;
174 tag[0].val = "utf-8";
175 print_otag(h, TAG_META, 1, tag);
176
177 /*
178 * Print a default style-sheet.
179 */
180 t = print_otag(h, TAG_STYLE, 0, NULL);
181 print_text(h, "table.head, table.foot { width: 100%; }\n"
182 "td.head-rtitle, td.foot-os { text-align: right; }\n"
183 "td.head-vol { text-align: center; }\n"
184 "table.foot td { width: 50%; }\n"
185 "table.head td { width: 33%; }\n"
186 "div.spacer { margin: 1em 0; }\n");
187 print_tagq(h, t);
188
189 if (h->style) {
190 tag[0].key = ATTR_REL;
191 tag[0].val = "stylesheet";
192 tag[1].key = ATTR_HREF;
193 tag[1].val = h->style;
194 tag[2].key = ATTR_TYPE;
195 tag[2].val = "text/css";
196 tag[3].key = ATTR_MEDIA;
197 tag[3].val = "all";
198 print_otag(h, TAG_LINK, 4, tag);
199 }
200 }
201
202 static void
203 print_metaf(struct html *h, enum mandoc_esc deco)
204 {
205 enum htmlfont font;
206
207 switch (deco) {
208 case ESCAPE_FONTPREV:
209 font = h->metal;
210 break;
211 case ESCAPE_FONTITALIC:
212 font = HTMLFONT_ITALIC;
213 break;
214 case ESCAPE_FONTBOLD:
215 font = HTMLFONT_BOLD;
216 break;
217 case ESCAPE_FONTBI:
218 font = HTMLFONT_BI;
219 break;
220 case ESCAPE_FONT:
221 /* FALLTHROUGH */
222 case ESCAPE_FONTROMAN:
223 font = HTMLFONT_NONE;
224 break;
225 default:
226 abort();
227 }
228
229 if (h->metaf) {
230 print_tagq(h, h->metaf);
231 h->metaf = NULL;
232 }
233
234 h->metal = h->metac;
235 h->metac = font;
236
237 switch (font) {
238 case HTMLFONT_ITALIC:
239 h->metaf = print_otag(h, TAG_I, 0, NULL);
240 break;
241 case HTMLFONT_BOLD:
242 h->metaf = print_otag(h, TAG_B, 0, NULL);
243 break;
244 case HTMLFONT_BI:
245 h->metaf = print_otag(h, TAG_B, 0, NULL);
246 print_otag(h, TAG_I, 0, NULL);
247 break;
248 default:
249 break;
250 }
251 }
252
253 int
254 html_strlen(const char *cp)
255 {
256 size_t rsz;
257 int skip, sz;
258
259 /*
260 * Account for escaped sequences within string length
261 * calculations. This follows the logic in term_strlen() as we
262 * must calculate the width of produced strings.
263 * Assume that characters are always width of "1". This is
264 * hacky, but it gets the job done for approximation of widths.
265 */
266
267 sz = 0;
268 skip = 0;
269 while (1) {
270 rsz = strcspn(cp, "\\");
271 if (rsz) {
272 cp += rsz;
273 if (skip) {
274 skip = 0;
275 rsz--;
276 }
277 sz += rsz;
278 }
279 if ('\0' == *cp)
280 break;
281 cp++;
282 switch (mandoc_escape(&cp, NULL, NULL)) {
283 case ESCAPE_ERROR:
284 return(sz);
285 case ESCAPE_UNICODE:
286 /* FALLTHROUGH */
287 case ESCAPE_NUMBERED:
288 /* FALLTHROUGH */
289 case ESCAPE_SPECIAL:
290 /* FALLTHROUGH */
291 case ESCAPE_OVERSTRIKE:
292 if (skip)
293 skip = 0;
294 else
295 sz++;
296 break;
297 case ESCAPE_SKIPCHAR:
298 skip = 1;
299 break;
300 default:
301 break;
302 }
303 }
304 return(sz);
305 }
306
307 static int
308 print_escape(char c)
309 {
310
311 switch (c) {
312 case '<':
313 printf("&lt;");
314 break;
315 case '>':
316 printf("&gt;");
317 break;
318 case '&':
319 printf("&amp;");
320 break;
321 case '"':
322 printf("&quot;");
323 break;
324 case ASCII_NBRSP:
325 putchar('-');
326 break;
327 case ASCII_HYPH:
328 putchar('-');
329 /* FALLTHROUGH */
330 case ASCII_BREAK:
331 break;
332 default:
333 return(0);
334 }
335 return(1);
336 }
337
338 static int
339 print_encode(struct html *h, const char *p, int norecurse)
340 {
341 size_t sz;
342 int c, len, nospace;
343 const char *seq;
344 enum mandoc_esc esc;
345 static const char rejs[9] = { '\\', '<', '>', '&', '"',
346 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
347
348 nospace = 0;
349
350 while ('\0' != *p) {
351 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
352 h->flags &= ~HTML_SKIPCHAR;
353 p++;
354 continue;
355 }
356
357 sz = strcspn(p, rejs);
358
359 fwrite(p, 1, sz, stdout);
360 p += (int)sz;
361
362 if ('\0' == *p)
363 break;
364
365 if (print_escape(*p++))
366 continue;
367
368 esc = mandoc_escape(&p, &seq, &len);
369 if (ESCAPE_ERROR == esc)
370 break;
371
372 switch (esc) {
373 case ESCAPE_FONT:
374 /* FALLTHROUGH */
375 case ESCAPE_FONTPREV:
376 /* FALLTHROUGH */
377 case ESCAPE_FONTBOLD:
378 /* FALLTHROUGH */
379 case ESCAPE_FONTITALIC:
380 /* FALLTHROUGH */
381 case ESCAPE_FONTBI:
382 /* FALLTHROUGH */
383 case ESCAPE_FONTROMAN:
384 if (0 == norecurse)
385 print_metaf(h, esc);
386 continue;
387 case ESCAPE_SKIPCHAR:
388 h->flags |= HTML_SKIPCHAR;
389 continue;
390 default:
391 break;
392 }
393
394 if (h->flags & HTML_SKIPCHAR) {
395 h->flags &= ~HTML_SKIPCHAR;
396 continue;
397 }
398
399 switch (esc) {
400 case ESCAPE_UNICODE:
401 /* Skip past "u" header. */
402 c = mchars_num2uc(seq + 1, len - 1);
403 break;
404 case ESCAPE_NUMBERED:
405 c = mchars_num2char(seq, len);
406 if (c < 0)
407 continue;
408 break;
409 case ESCAPE_SPECIAL:
410 c = mchars_spec2cp(h->symtab, seq, len);
411 if (c <= 0)
412 continue;
413 break;
414 case ESCAPE_NOSPACE:
415 if ('\0' == *p)
416 nospace = 1;
417 continue;
418 case ESCAPE_OVERSTRIKE:
419 if (len == 0)
420 continue;
421 c = seq[len - 1];
422 break;
423 default:
424 continue;
425 }
426 if ((c < 0x20 && c != 0x09) ||
427 (c > 0x7E && c < 0xA0))
428 c = 0xFFFD;
429 if (c > 0x7E)
430 printf("&#%d;", c);
431 else if ( ! print_escape(c))
432 putchar(c);
433 }
434
435 return(nospace);
436 }
437
438 static void
439 print_attr(struct html *h, const char *key, const char *val)
440 {
441 printf(" %s=\"", key);
442 (void)print_encode(h, val, 1);
443 putchar('\"');
444 }
445
446 struct tag *
447 print_otag(struct html *h, enum htmltag tag,
448 int sz, const struct htmlpair *p)
449 {
450 int i;
451 struct tag *t;
452
453 /* Push this tags onto the stack of open scopes. */
454
455 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
456 t = mandoc_malloc(sizeof(struct tag));
457 t->tag = tag;
458 t->next = h->tags.head;
459 h->tags.head = t;
460 } else
461 t = NULL;
462
463 if ( ! (HTML_NOSPACE & h->flags))
464 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
465 /* Manage keeps! */
466 if ( ! (HTML_KEEP & h->flags)) {
467 if (HTML_PREKEEP & h->flags)
468 h->flags |= HTML_KEEP;
469 putchar(' ');
470 } else
471 printf("&#160;");
472 }
473
474 if ( ! (h->flags & HTML_NONOSPACE))
475 h->flags &= ~HTML_NOSPACE;
476 else
477 h->flags |= HTML_NOSPACE;
478
479 /* Print out the tag name and attributes. */
480
481 printf("<%s", htmltags[tag].name);
482 for (i = 0; i < sz; i++)
483 print_attr(h, htmlattrs[p[i].key], p[i].val);
484
485 /* Accommodate for "well-formed" singleton escaping. */
486
487 if (HTML_AUTOCLOSE & htmltags[tag].flags)
488 putchar('/');
489
490 putchar('>');
491
492 h->flags |= HTML_NOSPACE;
493
494 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
495 putchar('\n');
496
497 return(t);
498 }
499
500 static void
501 print_ctag(struct html *h, struct tag *tag)
502 {
503
504 /*
505 * Remember to close out and nullify the current
506 * meta-font and table, if applicable.
507 */
508 if (tag == h->metaf)
509 h->metaf = NULL;
510 if (tag == h->tblt)
511 h->tblt = NULL;
512
513 printf("</%s>", htmltags[tag->tag].name);
514 if (HTML_CLRLINE & htmltags[tag->tag].flags) {
515 h->flags |= HTML_NOSPACE;
516 putchar('\n');
517 }
518
519 h->tags.head = tag->next;
520 free(tag);
521 }
522
523 void
524 print_gen_decls(struct html *h)
525 {
526
527 puts("<!DOCTYPE html>");
528 }
529
530 void
531 print_text(struct html *h, const char *word)
532 {
533
534 if ( ! (HTML_NOSPACE & h->flags)) {
535 /* Manage keeps! */
536 if ( ! (HTML_KEEP & h->flags)) {
537 if (HTML_PREKEEP & h->flags)
538 h->flags |= HTML_KEEP;
539 putchar(' ');
540 } else
541 printf("&#160;");
542 }
543
544 assert(NULL == h->metaf);
545 switch (h->metac) {
546 case HTMLFONT_ITALIC:
547 h->metaf = print_otag(h, TAG_I, 0, NULL);
548 break;
549 case HTMLFONT_BOLD:
550 h->metaf = print_otag(h, TAG_B, 0, NULL);
551 break;
552 case HTMLFONT_BI:
553 h->metaf = print_otag(h, TAG_B, 0, NULL);
554 print_otag(h, TAG_I, 0, NULL);
555 break;
556 default:
557 break;
558 }
559
560 assert(word);
561 if ( ! print_encode(h, word, 0)) {
562 if ( ! (h->flags & HTML_NONOSPACE))
563 h->flags &= ~HTML_NOSPACE;
564 h->flags &= ~HTML_NONEWLINE;
565 } else
566 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
567
568 if (h->metaf) {
569 print_tagq(h, h->metaf);
570 h->metaf = NULL;
571 }
572
573 h->flags &= ~HTML_IGNDELIM;
574 }
575
576 void
577 print_tagq(struct html *h, const struct tag *until)
578 {
579 struct tag *tag;
580
581 while ((tag = h->tags.head) != NULL) {
582 print_ctag(h, tag);
583 if (until && tag == until)
584 return;
585 }
586 }
587
588 void
589 print_stagq(struct html *h, const struct tag *suntil)
590 {
591 struct tag *tag;
592
593 while ((tag = h->tags.head) != NULL) {
594 if (suntil && tag == suntil)
595 return;
596 print_ctag(h, tag);
597 }
598 }
599
600 void
601 print_paragraph(struct html *h)
602 {
603 struct tag *t;
604 struct htmlpair tag;
605
606 PAIR_CLASS_INIT(&tag, "spacer");
607 t = print_otag(h, TAG_DIV, 1, &tag);
608 print_tagq(h, t);
609 }
610
611
612 void
613 bufinit(struct html *h)
614 {
615
616 h->buf[0] = '\0';
617 h->buflen = 0;
618 }
619
620 void
621 bufcat_style(struct html *h, const char *key, const char *val)
622 {
623
624 bufcat(h, key);
625 bufcat(h, ":");
626 bufcat(h, val);
627 bufcat(h, ";");
628 }
629
630 void
631 bufcat(struct html *h, const char *p)
632 {
633
634 /*
635 * XXX This is broken and not easy to fix.
636 * When using the -Oincludes option, buffmt_includes()
637 * may pass in strings overrunning BUFSIZ, causing a crash.
638 */
639
640 h->buflen = strlcat(h->buf, p, BUFSIZ);
641 assert(h->buflen < BUFSIZ);
642 }
643
644 void
645 bufcat_fmt(struct html *h, const char *fmt, ...)
646 {
647 va_list ap;
648
649 va_start(ap, fmt);
650 (void)vsnprintf(h->buf + (int)h->buflen,
651 BUFSIZ - h->buflen - 1, fmt, ap);
652 va_end(ap);
653 h->buflen = strlen(h->buf);
654 }
655
656 static void
657 bufncat(struct html *h, const char *p, size_t sz)
658 {
659
660 assert(h->buflen + sz + 1 < BUFSIZ);
661 strncat(h->buf, p, sz);
662 h->buflen += sz;
663 }
664
665 void
666 buffmt_includes(struct html *h, const char *name)
667 {
668 const char *p, *pp;
669
670 pp = h->base_includes;
671
672 bufinit(h);
673 while (NULL != (p = strchr(pp, '%'))) {
674 bufncat(h, pp, (size_t)(p - pp));
675 switch (*(p + 1)) {
676 case'I':
677 bufcat(h, name);
678 break;
679 default:
680 bufncat(h, p, 2);
681 break;
682 }
683 pp = p + 2;
684 }
685 if (pp)
686 bufcat(h, pp);
687 }
688
689 void
690 buffmt_man(struct html *h, const char *name, const char *sec)
691 {
692 const char *p, *pp;
693
694 pp = h->base_man;
695
696 bufinit(h);
697 while (NULL != (p = strchr(pp, '%'))) {
698 bufncat(h, pp, (size_t)(p - pp));
699 switch (*(p + 1)) {
700 case 'S':
701 bufcat(h, sec ? sec : "1");
702 break;
703 case 'N':
704 bufcat_fmt(h, "%s", name);
705 break;
706 default:
707 bufncat(h, p, 2);
708 break;
709 }
710 pp = p + 2;
711 }
712 if (pp)
713 bufcat(h, pp);
714 }
715
716 void
717 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
718 {
719 double v;
720
721 v = su->scale;
722 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
723 v = 1.0;
724 else if (SCALE_BU == su->unit)
725 v /= 24.0;
726
727 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
728 }
729
730 void
731 bufcat_id(struct html *h, const char *src)
732 {
733
734 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
735
736 while ('\0' != *src)
737 bufcat_fmt(h, "%.2x", *src++);
738 }