]> git.cameronkatri.com Git - mandoc.git/blob - html.c
no more _subdir; Jan Stary <hans at stare dot cz>
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.186 2015/03/27 21:33:20 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "out.h"
34 #include "html.h"
35 #include "manconf.h"
36 #include "main.h"
37
38 struct htmldata {
39 const char *name;
40 int flags;
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
44 };
45
46 static const struct htmldata htmltags[TAG_MAX] = {
47 {"html", HTML_CLRLINE}, /* TAG_HTML */
48 {"head", HTML_CLRLINE}, /* TAG_HEAD */
49 {"body", HTML_CLRLINE}, /* TAG_BODY */
50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51 {"title", HTML_CLRLINE}, /* TAG_TITLE */
52 {"div", HTML_CLRLINE}, /* TAG_DIV */
53 {"h1", 0}, /* TAG_H1 */
54 {"h2", 0}, /* TAG_H2 */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 {"dl", HTML_CLRLINE}, /* TAG_DL */
68 {"dt", HTML_CLRLINE}, /* TAG_DT */
69 {"dd", HTML_CLRLINE}, /* TAG_DD */
70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71 {"pre", HTML_CLRLINE }, /* TAG_PRE */
72 {"b", 0 }, /* TAG_B */
73 {"i", 0 }, /* TAG_I */
74 {"code", 0 }, /* TAG_CODE */
75 {"small", 0 }, /* TAG_SMALL */
76 {"style", HTML_CLRLINE}, /* TAG_STYLE */
77 {"math", HTML_CLRLINE}, /* TAG_MATH */
78 {"mrow", 0}, /* TAG_MROW */
79 {"mi", 0}, /* TAG_MI */
80 {"mo", 0}, /* TAG_MO */
81 {"msup", 0}, /* TAG_MSUP */
82 {"msub", 0}, /* TAG_MSUB */
83 {"msubsup", 0}, /* TAG_MSUBSUP */
84 {"mfrac", 0}, /* TAG_MFRAC */
85 {"msqrt", 0}, /* TAG_MSQRT */
86 {"mfenced", 0}, /* TAG_MFENCED */
87 {"mtable", 0}, /* TAG_MTABLE */
88 {"mtr", 0}, /* TAG_MTR */
89 {"mtd", 0}, /* TAG_MTD */
90 {"munderover", 0}, /* TAG_MUNDEROVER */
91 {"munder", 0}, /* TAG_MUNDER*/
92 {"mover", 0}, /* TAG_MOVER*/
93 };
94
95 static const char *const htmlattrs[ATTR_MAX] = {
96 "name", /* ATTR_NAME */
97 "rel", /* ATTR_REL */
98 "href", /* ATTR_HREF */
99 "type", /* ATTR_TYPE */
100 "media", /* ATTR_MEDIA */
101 "class", /* ATTR_CLASS */
102 "style", /* ATTR_STYLE */
103 "id", /* ATTR_ID */
104 "colspan", /* ATTR_COLSPAN */
105 "charset", /* ATTR_CHARSET */
106 "open", /* ATTR_OPEN */
107 "close", /* ATTR_CLOSE */
108 "mathvariant", /* ATTR_MATHVARIANT */
109 };
110
111 static const char *const roffscales[SCALE_MAX] = {
112 "cm", /* SCALE_CM */
113 "in", /* SCALE_IN */
114 "pc", /* SCALE_PC */
115 "pt", /* SCALE_PT */
116 "em", /* SCALE_EM */
117 "em", /* SCALE_MM */
118 "ex", /* SCALE_EN */
119 "ex", /* SCALE_BU */
120 "em", /* SCALE_VS */
121 "ex", /* SCALE_FS */
122 };
123
124 static void bufncat(struct html *, const char *, size_t);
125 static void print_ctag(struct html *, struct tag *);
126 static int print_escape(char);
127 static int print_encode(struct html *, const char *, int);
128 static void print_metaf(struct html *, enum mandoc_esc);
129 static void print_attr(struct html *, const char *, const char *);
130
131
132 void *
133 html_alloc(const struct mchars *mchars, const struct manoutput *outopts)
134 {
135 struct html *h;
136
137 h = mandoc_calloc(1, sizeof(struct html));
138
139 h->tags.head = NULL;
140 h->symtab = mchars;
141
142 h->style = outopts->style;
143 h->base_man = outopts->man;
144 h->base_includes = outopts->includes;
145 if (outopts->fragment)
146 h->oflags |= HTML_FRAGMENT;
147
148 return(h);
149 }
150
151 void
152 html_free(void *p)
153 {
154 struct tag *tag;
155 struct html *h;
156
157 h = (struct html *)p;
158
159 while ((tag = h->tags.head) != NULL) {
160 h->tags.head = tag->next;
161 free(tag);
162 }
163
164 free(h);
165 }
166
167 void
168 print_gen_head(struct html *h)
169 {
170 struct htmlpair tag[4];
171 struct tag *t;
172
173 tag[0].key = ATTR_CHARSET;
174 tag[0].val = "utf-8";
175 print_otag(h, TAG_META, 1, tag);
176
177 /*
178 * Print a default style-sheet.
179 */
180 t = print_otag(h, TAG_STYLE, 0, NULL);
181 print_text(h, "table.head, table.foot { width: 100%; }\n"
182 "td.head-rtitle, td.foot-os { text-align: right; }\n"
183 "td.head-vol { text-align: center; }\n"
184 "table.foot td { width: 50%; }\n"
185 "table.head td { width: 33%; }\n"
186 "div.spacer { margin: 1em 0; }\n");
187 print_tagq(h, t);
188
189 if (h->style) {
190 tag[0].key = ATTR_REL;
191 tag[0].val = "stylesheet";
192 tag[1].key = ATTR_HREF;
193 tag[1].val = h->style;
194 tag[2].key = ATTR_TYPE;
195 tag[2].val = "text/css";
196 tag[3].key = ATTR_MEDIA;
197 tag[3].val = "all";
198 print_otag(h, TAG_LINK, 4, tag);
199 }
200 }
201
202 static void
203 print_metaf(struct html *h, enum mandoc_esc deco)
204 {
205 enum htmlfont font;
206
207 switch (deco) {
208 case ESCAPE_FONTPREV:
209 font = h->metal;
210 break;
211 case ESCAPE_FONTITALIC:
212 font = HTMLFONT_ITALIC;
213 break;
214 case ESCAPE_FONTBOLD:
215 font = HTMLFONT_BOLD;
216 break;
217 case ESCAPE_FONTBI:
218 font = HTMLFONT_BI;
219 break;
220 case ESCAPE_FONT:
221 /* FALLTHROUGH */
222 case ESCAPE_FONTROMAN:
223 font = HTMLFONT_NONE;
224 break;
225 default:
226 abort();
227 /* NOTREACHED */
228 }
229
230 if (h->metaf) {
231 print_tagq(h, h->metaf);
232 h->metaf = NULL;
233 }
234
235 h->metal = h->metac;
236 h->metac = font;
237
238 switch (font) {
239 case HTMLFONT_ITALIC:
240 h->metaf = print_otag(h, TAG_I, 0, NULL);
241 break;
242 case HTMLFONT_BOLD:
243 h->metaf = print_otag(h, TAG_B, 0, NULL);
244 break;
245 case HTMLFONT_BI:
246 h->metaf = print_otag(h, TAG_B, 0, NULL);
247 print_otag(h, TAG_I, 0, NULL);
248 break;
249 default:
250 break;
251 }
252 }
253
254 int
255 html_strlen(const char *cp)
256 {
257 size_t rsz;
258 int skip, sz;
259
260 /*
261 * Account for escaped sequences within string length
262 * calculations. This follows the logic in term_strlen() as we
263 * must calculate the width of produced strings.
264 * Assume that characters are always width of "1". This is
265 * hacky, but it gets the job done for approximation of widths.
266 */
267
268 sz = 0;
269 skip = 0;
270 while (1) {
271 rsz = strcspn(cp, "\\");
272 if (rsz) {
273 cp += rsz;
274 if (skip) {
275 skip = 0;
276 rsz--;
277 }
278 sz += rsz;
279 }
280 if ('\0' == *cp)
281 break;
282 cp++;
283 switch (mandoc_escape(&cp, NULL, NULL)) {
284 case ESCAPE_ERROR:
285 return(sz);
286 case ESCAPE_UNICODE:
287 /* FALLTHROUGH */
288 case ESCAPE_NUMBERED:
289 /* FALLTHROUGH */
290 case ESCAPE_SPECIAL:
291 /* FALLTHROUGH */
292 case ESCAPE_OVERSTRIKE:
293 if (skip)
294 skip = 0;
295 else
296 sz++;
297 break;
298 case ESCAPE_SKIPCHAR:
299 skip = 1;
300 break;
301 default:
302 break;
303 }
304 }
305 return(sz);
306 }
307
308 static int
309 print_escape(char c)
310 {
311
312 switch (c) {
313 case '<':
314 printf("&lt;");
315 break;
316 case '>':
317 printf("&gt;");
318 break;
319 case '&':
320 printf("&amp;");
321 break;
322 case '"':
323 printf("&quot;");
324 break;
325 case ASCII_NBRSP:
326 putchar('-');
327 break;
328 case ASCII_HYPH:
329 putchar('-');
330 /* FALLTHROUGH */
331 case ASCII_BREAK:
332 break;
333 default:
334 return(0);
335 }
336 return(1);
337 }
338
339 static int
340 print_encode(struct html *h, const char *p, int norecurse)
341 {
342 size_t sz;
343 int c, len, nospace;
344 const char *seq;
345 enum mandoc_esc esc;
346 static const char rejs[9] = { '\\', '<', '>', '&', '"',
347 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
348
349 nospace = 0;
350
351 while ('\0' != *p) {
352 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
353 h->flags &= ~HTML_SKIPCHAR;
354 p++;
355 continue;
356 }
357
358 sz = strcspn(p, rejs);
359
360 fwrite(p, 1, sz, stdout);
361 p += (int)sz;
362
363 if ('\0' == *p)
364 break;
365
366 if (print_escape(*p++))
367 continue;
368
369 esc = mandoc_escape(&p, &seq, &len);
370 if (ESCAPE_ERROR == esc)
371 break;
372
373 switch (esc) {
374 case ESCAPE_FONT:
375 /* FALLTHROUGH */
376 case ESCAPE_FONTPREV:
377 /* FALLTHROUGH */
378 case ESCAPE_FONTBOLD:
379 /* FALLTHROUGH */
380 case ESCAPE_FONTITALIC:
381 /* FALLTHROUGH */
382 case ESCAPE_FONTBI:
383 /* FALLTHROUGH */
384 case ESCAPE_FONTROMAN:
385 if (0 == norecurse)
386 print_metaf(h, esc);
387 continue;
388 case ESCAPE_SKIPCHAR:
389 h->flags |= HTML_SKIPCHAR;
390 continue;
391 default:
392 break;
393 }
394
395 if (h->flags & HTML_SKIPCHAR) {
396 h->flags &= ~HTML_SKIPCHAR;
397 continue;
398 }
399
400 switch (esc) {
401 case ESCAPE_UNICODE:
402 /* Skip past "u" header. */
403 c = mchars_num2uc(seq + 1, len - 1);
404 break;
405 case ESCAPE_NUMBERED:
406 c = mchars_num2char(seq, len);
407 if (c < 0)
408 continue;
409 break;
410 case ESCAPE_SPECIAL:
411 c = mchars_spec2cp(h->symtab, seq, len);
412 if (c <= 0)
413 continue;
414 break;
415 case ESCAPE_NOSPACE:
416 if ('\0' == *p)
417 nospace = 1;
418 continue;
419 case ESCAPE_OVERSTRIKE:
420 if (len == 0)
421 continue;
422 c = seq[len - 1];
423 break;
424 default:
425 continue;
426 }
427 if ((c < 0x20 && c != 0x09) ||
428 (c > 0x7E && c < 0xA0))
429 c = 0xFFFD;
430 if (c > 0x7E)
431 printf("&#%d;", c);
432 else if ( ! print_escape(c))
433 putchar(c);
434 }
435
436 return(nospace);
437 }
438
439 static void
440 print_attr(struct html *h, const char *key, const char *val)
441 {
442 printf(" %s=\"", key);
443 (void)print_encode(h, val, 1);
444 putchar('\"');
445 }
446
447 struct tag *
448 print_otag(struct html *h, enum htmltag tag,
449 int sz, const struct htmlpair *p)
450 {
451 int i;
452 struct tag *t;
453
454 /* Push this tags onto the stack of open scopes. */
455
456 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
457 t = mandoc_malloc(sizeof(struct tag));
458 t->tag = tag;
459 t->next = h->tags.head;
460 h->tags.head = t;
461 } else
462 t = NULL;
463
464 if ( ! (HTML_NOSPACE & h->flags))
465 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
466 /* Manage keeps! */
467 if ( ! (HTML_KEEP & h->flags)) {
468 if (HTML_PREKEEP & h->flags)
469 h->flags |= HTML_KEEP;
470 putchar(' ');
471 } else
472 printf("&#160;");
473 }
474
475 if ( ! (h->flags & HTML_NONOSPACE))
476 h->flags &= ~HTML_NOSPACE;
477 else
478 h->flags |= HTML_NOSPACE;
479
480 /* Print out the tag name and attributes. */
481
482 printf("<%s", htmltags[tag].name);
483 for (i = 0; i < sz; i++)
484 print_attr(h, htmlattrs[p[i].key], p[i].val);
485
486 /* Accommodate for "well-formed" singleton escaping. */
487
488 if (HTML_AUTOCLOSE & htmltags[tag].flags)
489 putchar('/');
490
491 putchar('>');
492
493 h->flags |= HTML_NOSPACE;
494
495 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
496 putchar('\n');
497
498 return(t);
499 }
500
501 static void
502 print_ctag(struct html *h, struct tag *tag)
503 {
504
505 /*
506 * Remember to close out and nullify the current
507 * meta-font and table, if applicable.
508 */
509 if (tag == h->metaf)
510 h->metaf = NULL;
511 if (tag == h->tblt)
512 h->tblt = NULL;
513
514 printf("</%s>", htmltags[tag->tag].name);
515 if (HTML_CLRLINE & htmltags[tag->tag].flags) {
516 h->flags |= HTML_NOSPACE;
517 putchar('\n');
518 }
519
520 h->tags.head = tag->next;
521 free(tag);
522 }
523
524 void
525 print_gen_decls(struct html *h)
526 {
527
528 puts("<!DOCTYPE html>");
529 }
530
531 void
532 print_text(struct html *h, const char *word)
533 {
534
535 if ( ! (HTML_NOSPACE & h->flags)) {
536 /* Manage keeps! */
537 if ( ! (HTML_KEEP & h->flags)) {
538 if (HTML_PREKEEP & h->flags)
539 h->flags |= HTML_KEEP;
540 putchar(' ');
541 } else
542 printf("&#160;");
543 }
544
545 assert(NULL == h->metaf);
546 switch (h->metac) {
547 case HTMLFONT_ITALIC:
548 h->metaf = print_otag(h, TAG_I, 0, NULL);
549 break;
550 case HTMLFONT_BOLD:
551 h->metaf = print_otag(h, TAG_B, 0, NULL);
552 break;
553 case HTMLFONT_BI:
554 h->metaf = print_otag(h, TAG_B, 0, NULL);
555 print_otag(h, TAG_I, 0, NULL);
556 break;
557 default:
558 break;
559 }
560
561 assert(word);
562 if ( ! print_encode(h, word, 0)) {
563 if ( ! (h->flags & HTML_NONOSPACE))
564 h->flags &= ~HTML_NOSPACE;
565 h->flags &= ~HTML_NONEWLINE;
566 } else
567 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
568
569 if (h->metaf) {
570 print_tagq(h, h->metaf);
571 h->metaf = NULL;
572 }
573
574 h->flags &= ~HTML_IGNDELIM;
575 }
576
577 void
578 print_tagq(struct html *h, const struct tag *until)
579 {
580 struct tag *tag;
581
582 while ((tag = h->tags.head) != NULL) {
583 print_ctag(h, tag);
584 if (until && tag == until)
585 return;
586 }
587 }
588
589 void
590 print_stagq(struct html *h, const struct tag *suntil)
591 {
592 struct tag *tag;
593
594 while ((tag = h->tags.head) != NULL) {
595 if (suntil && tag == suntil)
596 return;
597 print_ctag(h, tag);
598 }
599 }
600
601 void
602 print_paragraph(struct html *h)
603 {
604 struct tag *t;
605 struct htmlpair tag;
606
607 PAIR_CLASS_INIT(&tag, "spacer");
608 t = print_otag(h, TAG_DIV, 1, &tag);
609 print_tagq(h, t);
610 }
611
612
613 void
614 bufinit(struct html *h)
615 {
616
617 h->buf[0] = '\0';
618 h->buflen = 0;
619 }
620
621 void
622 bufcat_style(struct html *h, const char *key, const char *val)
623 {
624
625 bufcat(h, key);
626 bufcat(h, ":");
627 bufcat(h, val);
628 bufcat(h, ";");
629 }
630
631 void
632 bufcat(struct html *h, const char *p)
633 {
634
635 /*
636 * XXX This is broken and not easy to fix.
637 * When using the -Oincludes option, buffmt_includes()
638 * may pass in strings overrunning BUFSIZ, causing a crash.
639 */
640
641 h->buflen = strlcat(h->buf, p, BUFSIZ);
642 assert(h->buflen < BUFSIZ);
643 }
644
645 void
646 bufcat_fmt(struct html *h, const char *fmt, ...)
647 {
648 va_list ap;
649
650 va_start(ap, fmt);
651 (void)vsnprintf(h->buf + (int)h->buflen,
652 BUFSIZ - h->buflen - 1, fmt, ap);
653 va_end(ap);
654 h->buflen = strlen(h->buf);
655 }
656
657 static void
658 bufncat(struct html *h, const char *p, size_t sz)
659 {
660
661 assert(h->buflen + sz + 1 < BUFSIZ);
662 strncat(h->buf, p, sz);
663 h->buflen += sz;
664 }
665
666 void
667 buffmt_includes(struct html *h, const char *name)
668 {
669 const char *p, *pp;
670
671 pp = h->base_includes;
672
673 bufinit(h);
674 while (NULL != (p = strchr(pp, '%'))) {
675 bufncat(h, pp, (size_t)(p - pp));
676 switch (*(p + 1)) {
677 case'I':
678 bufcat(h, name);
679 break;
680 default:
681 bufncat(h, p, 2);
682 break;
683 }
684 pp = p + 2;
685 }
686 if (pp)
687 bufcat(h, pp);
688 }
689
690 void
691 buffmt_man(struct html *h, const char *name, const char *sec)
692 {
693 const char *p, *pp;
694
695 pp = h->base_man;
696
697 bufinit(h);
698 while (NULL != (p = strchr(pp, '%'))) {
699 bufncat(h, pp, (size_t)(p - pp));
700 switch (*(p + 1)) {
701 case 'S':
702 bufcat(h, sec ? sec : "1");
703 break;
704 case 'N':
705 bufcat_fmt(h, "%s", name);
706 break;
707 default:
708 bufncat(h, p, 2);
709 break;
710 }
711 pp = p + 2;
712 }
713 if (pp)
714 bufcat(h, pp);
715 }
716
717 void
718 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
719 {
720 double v;
721
722 v = su->scale;
723 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
724 v = 1.0;
725 else if (SCALE_BU == su->unit)
726 v /= 24.0;
727
728 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
729 }
730
731 void
732 bufcat_id(struct html *h, const char *src)
733 {
734
735 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
736
737 while ('\0' != *src)
738 bufcat_fmt(h, "%.2x", *src++);
739 }