]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Reduce the amount of code by moving the three copies of the ohash
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.190 2015/10/12 00:15:31 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "out.h"
34 #include "html.h"
35 #include "manconf.h"
36 #include "main.h"
37
38 struct htmldata {
39 const char *name;
40 int flags;
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
44 };
45
46 static const struct htmldata htmltags[TAG_MAX] = {
47 {"html", HTML_CLRLINE}, /* TAG_HTML */
48 {"head", HTML_CLRLINE}, /* TAG_HEAD */
49 {"body", HTML_CLRLINE}, /* TAG_BODY */
50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51 {"title", HTML_CLRLINE}, /* TAG_TITLE */
52 {"div", HTML_CLRLINE}, /* TAG_DIV */
53 {"h1", 0}, /* TAG_H1 */
54 {"h2", 0}, /* TAG_H2 */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 {"dl", HTML_CLRLINE}, /* TAG_DL */
68 {"dt", HTML_CLRLINE}, /* TAG_DT */
69 {"dd", HTML_CLRLINE}, /* TAG_DD */
70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71 {"pre", HTML_CLRLINE }, /* TAG_PRE */
72 {"b", 0 }, /* TAG_B */
73 {"i", 0 }, /* TAG_I */
74 {"code", 0 }, /* TAG_CODE */
75 {"small", 0 }, /* TAG_SMALL */
76 {"style", HTML_CLRLINE}, /* TAG_STYLE */
77 {"math", HTML_CLRLINE}, /* TAG_MATH */
78 {"mrow", 0}, /* TAG_MROW */
79 {"mi", 0}, /* TAG_MI */
80 {"mo", 0}, /* TAG_MO */
81 {"msup", 0}, /* TAG_MSUP */
82 {"msub", 0}, /* TAG_MSUB */
83 {"msubsup", 0}, /* TAG_MSUBSUP */
84 {"mfrac", 0}, /* TAG_MFRAC */
85 {"msqrt", 0}, /* TAG_MSQRT */
86 {"mfenced", 0}, /* TAG_MFENCED */
87 {"mtable", 0}, /* TAG_MTABLE */
88 {"mtr", 0}, /* TAG_MTR */
89 {"mtd", 0}, /* TAG_MTD */
90 {"munderover", 0}, /* TAG_MUNDEROVER */
91 {"munder", 0}, /* TAG_MUNDER*/
92 {"mover", 0}, /* TAG_MOVER*/
93 };
94
95 static const char *const htmlattrs[ATTR_MAX] = {
96 "name", /* ATTR_NAME */
97 "rel", /* ATTR_REL */
98 "href", /* ATTR_HREF */
99 "type", /* ATTR_TYPE */
100 "media", /* ATTR_MEDIA */
101 "class", /* ATTR_CLASS */
102 "style", /* ATTR_STYLE */
103 "id", /* ATTR_ID */
104 "colspan", /* ATTR_COLSPAN */
105 "charset", /* ATTR_CHARSET */
106 "open", /* ATTR_OPEN */
107 "close", /* ATTR_CLOSE */
108 "mathvariant", /* ATTR_MATHVARIANT */
109 };
110
111 static const char *const roffscales[SCALE_MAX] = {
112 "cm", /* SCALE_CM */
113 "in", /* SCALE_IN */
114 "pc", /* SCALE_PC */
115 "pt", /* SCALE_PT */
116 "em", /* SCALE_EM */
117 "em", /* SCALE_MM */
118 "ex", /* SCALE_EN */
119 "ex", /* SCALE_BU */
120 "em", /* SCALE_VS */
121 "ex", /* SCALE_FS */
122 };
123
124 static void bufncat(struct html *, const char *, size_t);
125 static void print_ctag(struct html *, struct tag *);
126 static int print_escape(char);
127 static int print_encode(struct html *, const char *, int);
128 static void print_metaf(struct html *, enum mandoc_esc);
129 static void print_attr(struct html *, const char *, const char *);
130
131
132 void *
133 html_alloc(const struct mchars *mchars, const struct manoutput *outopts)
134 {
135 struct html *h;
136
137 h = mandoc_calloc(1, sizeof(struct html));
138
139 h->tags.head = NULL;
140 h->symtab = mchars;
141
142 h->style = outopts->style;
143 h->base_man = outopts->man;
144 h->base_includes = outopts->includes;
145 if (outopts->fragment)
146 h->oflags |= HTML_FRAGMENT;
147
148 return h;
149 }
150
151 void
152 html_free(void *p)
153 {
154 struct tag *tag;
155 struct html *h;
156
157 h = (struct html *)p;
158
159 while ((tag = h->tags.head) != NULL) {
160 h->tags.head = tag->next;
161 free(tag);
162 }
163
164 free(h);
165 }
166
167 void
168 print_gen_head(struct html *h)
169 {
170 struct htmlpair tag[4];
171 struct tag *t;
172
173 tag[0].key = ATTR_CHARSET;
174 tag[0].val = "utf-8";
175 print_otag(h, TAG_META, 1, tag);
176
177 /*
178 * Print a default style-sheet.
179 */
180 t = print_otag(h, TAG_STYLE, 0, NULL);
181 print_text(h, "table.head, table.foot { width: 100%; }\n"
182 "td.head-rtitle, td.foot-os { text-align: right; }\n"
183 "td.head-vol { text-align: center; }\n"
184 "table.foot td { width: 50%; }\n"
185 "table.head td { width: 33%; }\n"
186 "div.spacer { margin: 1em 0; }\n");
187 print_tagq(h, t);
188
189 if (h->style) {
190 tag[0].key = ATTR_REL;
191 tag[0].val = "stylesheet";
192 tag[1].key = ATTR_HREF;
193 tag[1].val = h->style;
194 tag[2].key = ATTR_TYPE;
195 tag[2].val = "text/css";
196 tag[3].key = ATTR_MEDIA;
197 tag[3].val = "all";
198 print_otag(h, TAG_LINK, 4, tag);
199 }
200 }
201
202 static void
203 print_metaf(struct html *h, enum mandoc_esc deco)
204 {
205 enum htmlfont font;
206
207 switch (deco) {
208 case ESCAPE_FONTPREV:
209 font = h->metal;
210 break;
211 case ESCAPE_FONTITALIC:
212 font = HTMLFONT_ITALIC;
213 break;
214 case ESCAPE_FONTBOLD:
215 font = HTMLFONT_BOLD;
216 break;
217 case ESCAPE_FONTBI:
218 font = HTMLFONT_BI;
219 break;
220 case ESCAPE_FONT:
221 case ESCAPE_FONTROMAN:
222 font = HTMLFONT_NONE;
223 break;
224 default:
225 abort();
226 }
227
228 if (h->metaf) {
229 print_tagq(h, h->metaf);
230 h->metaf = NULL;
231 }
232
233 h->metal = h->metac;
234 h->metac = font;
235
236 switch (font) {
237 case HTMLFONT_ITALIC:
238 h->metaf = print_otag(h, TAG_I, 0, NULL);
239 break;
240 case HTMLFONT_BOLD:
241 h->metaf = print_otag(h, TAG_B, 0, NULL);
242 break;
243 case HTMLFONT_BI:
244 h->metaf = print_otag(h, TAG_B, 0, NULL);
245 print_otag(h, TAG_I, 0, NULL);
246 break;
247 default:
248 break;
249 }
250 }
251
252 int
253 html_strlen(const char *cp)
254 {
255 size_t rsz;
256 int skip, sz;
257
258 /*
259 * Account for escaped sequences within string length
260 * calculations. This follows the logic in term_strlen() as we
261 * must calculate the width of produced strings.
262 * Assume that characters are always width of "1". This is
263 * hacky, but it gets the job done for approximation of widths.
264 */
265
266 sz = 0;
267 skip = 0;
268 while (1) {
269 rsz = strcspn(cp, "\\");
270 if (rsz) {
271 cp += rsz;
272 if (skip) {
273 skip = 0;
274 rsz--;
275 }
276 sz += rsz;
277 }
278 if ('\0' == *cp)
279 break;
280 cp++;
281 switch (mandoc_escape(&cp, NULL, NULL)) {
282 case ESCAPE_ERROR:
283 return sz;
284 case ESCAPE_UNICODE:
285 case ESCAPE_NUMBERED:
286 case ESCAPE_SPECIAL:
287 case ESCAPE_OVERSTRIKE:
288 if (skip)
289 skip = 0;
290 else
291 sz++;
292 break;
293 case ESCAPE_SKIPCHAR:
294 skip = 1;
295 break;
296 default:
297 break;
298 }
299 }
300 return sz;
301 }
302
303 static int
304 print_escape(char c)
305 {
306
307 switch (c) {
308 case '<':
309 printf("&lt;");
310 break;
311 case '>':
312 printf("&gt;");
313 break;
314 case '&':
315 printf("&amp;");
316 break;
317 case '"':
318 printf("&quot;");
319 break;
320 case ASCII_NBRSP:
321 printf("&nbsp;");
322 break;
323 case ASCII_HYPH:
324 putchar('-');
325 break;
326 case ASCII_BREAK:
327 break;
328 default:
329 return 0;
330 }
331 return 1;
332 }
333
334 static int
335 print_encode(struct html *h, const char *p, int norecurse)
336 {
337 size_t sz;
338 int c, len, nospace;
339 const char *seq;
340 enum mandoc_esc esc;
341 static const char rejs[9] = { '\\', '<', '>', '&', '"',
342 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
343
344 nospace = 0;
345
346 while ('\0' != *p) {
347 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
348 h->flags &= ~HTML_SKIPCHAR;
349 p++;
350 continue;
351 }
352
353 sz = strcspn(p, rejs);
354
355 fwrite(p, 1, sz, stdout);
356 p += (int)sz;
357
358 if ('\0' == *p)
359 break;
360
361 if (print_escape(*p++))
362 continue;
363
364 esc = mandoc_escape(&p, &seq, &len);
365 if (ESCAPE_ERROR == esc)
366 break;
367
368 switch (esc) {
369 case ESCAPE_FONT:
370 case ESCAPE_FONTPREV:
371 case ESCAPE_FONTBOLD:
372 case ESCAPE_FONTITALIC:
373 case ESCAPE_FONTBI:
374 case ESCAPE_FONTROMAN:
375 if (0 == norecurse)
376 print_metaf(h, esc);
377 continue;
378 case ESCAPE_SKIPCHAR:
379 h->flags |= HTML_SKIPCHAR;
380 continue;
381 default:
382 break;
383 }
384
385 if (h->flags & HTML_SKIPCHAR) {
386 h->flags &= ~HTML_SKIPCHAR;
387 continue;
388 }
389
390 switch (esc) {
391 case ESCAPE_UNICODE:
392 /* Skip past "u" header. */
393 c = mchars_num2uc(seq + 1, len - 1);
394 break;
395 case ESCAPE_NUMBERED:
396 c = mchars_num2char(seq, len);
397 if (c < 0)
398 continue;
399 break;
400 case ESCAPE_SPECIAL:
401 c = mchars_spec2cp(h->symtab, seq, len);
402 if (c <= 0)
403 continue;
404 break;
405 case ESCAPE_NOSPACE:
406 if ('\0' == *p)
407 nospace = 1;
408 continue;
409 case ESCAPE_OVERSTRIKE:
410 if (len == 0)
411 continue;
412 c = seq[len - 1];
413 break;
414 default:
415 continue;
416 }
417 if ((c < 0x20 && c != 0x09) ||
418 (c > 0x7E && c < 0xA0))
419 c = 0xFFFD;
420 if (c > 0x7E)
421 printf("&#%d;", c);
422 else if ( ! print_escape(c))
423 putchar(c);
424 }
425
426 return nospace;
427 }
428
429 static void
430 print_attr(struct html *h, const char *key, const char *val)
431 {
432 printf(" %s=\"", key);
433 (void)print_encode(h, val, 1);
434 putchar('\"');
435 }
436
437 struct tag *
438 print_otag(struct html *h, enum htmltag tag,
439 int sz, const struct htmlpair *p)
440 {
441 int i;
442 struct tag *t;
443
444 /* Push this tags onto the stack of open scopes. */
445
446 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
447 t = mandoc_malloc(sizeof(struct tag));
448 t->tag = tag;
449 t->next = h->tags.head;
450 h->tags.head = t;
451 } else
452 t = NULL;
453
454 if ( ! (HTML_NOSPACE & h->flags))
455 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
456 /* Manage keeps! */
457 if ( ! (HTML_KEEP & h->flags)) {
458 if (HTML_PREKEEP & h->flags)
459 h->flags |= HTML_KEEP;
460 putchar(' ');
461 } else
462 printf("&#160;");
463 }
464
465 if ( ! (h->flags & HTML_NONOSPACE))
466 h->flags &= ~HTML_NOSPACE;
467 else
468 h->flags |= HTML_NOSPACE;
469
470 /* Print out the tag name and attributes. */
471
472 printf("<%s", htmltags[tag].name);
473 for (i = 0; i < sz; i++)
474 print_attr(h, htmlattrs[p[i].key], p[i].val);
475
476 /* Accommodate for "well-formed" singleton escaping. */
477
478 if (HTML_AUTOCLOSE & htmltags[tag].flags)
479 putchar('/');
480
481 putchar('>');
482
483 h->flags |= HTML_NOSPACE;
484
485 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
486 putchar('\n');
487
488 return t;
489 }
490
491 static void
492 print_ctag(struct html *h, struct tag *tag)
493 {
494
495 /*
496 * Remember to close out and nullify the current
497 * meta-font and table, if applicable.
498 */
499 if (tag == h->metaf)
500 h->metaf = NULL;
501 if (tag == h->tblt)
502 h->tblt = NULL;
503
504 printf("</%s>", htmltags[tag->tag].name);
505 if (HTML_CLRLINE & htmltags[tag->tag].flags) {
506 h->flags |= HTML_NOSPACE;
507 putchar('\n');
508 }
509
510 h->tags.head = tag->next;
511 free(tag);
512 }
513
514 void
515 print_gen_decls(struct html *h)
516 {
517
518 puts("<!DOCTYPE html>");
519 }
520
521 void
522 print_text(struct html *h, const char *word)
523 {
524
525 if ( ! (HTML_NOSPACE & h->flags)) {
526 /* Manage keeps! */
527 if ( ! (HTML_KEEP & h->flags)) {
528 if (HTML_PREKEEP & h->flags)
529 h->flags |= HTML_KEEP;
530 putchar(' ');
531 } else
532 printf("&#160;");
533 }
534
535 assert(NULL == h->metaf);
536 switch (h->metac) {
537 case HTMLFONT_ITALIC:
538 h->metaf = print_otag(h, TAG_I, 0, NULL);
539 break;
540 case HTMLFONT_BOLD:
541 h->metaf = print_otag(h, TAG_B, 0, NULL);
542 break;
543 case HTMLFONT_BI:
544 h->metaf = print_otag(h, TAG_B, 0, NULL);
545 print_otag(h, TAG_I, 0, NULL);
546 break;
547 default:
548 break;
549 }
550
551 assert(word);
552 if ( ! print_encode(h, word, 0)) {
553 if ( ! (h->flags & HTML_NONOSPACE))
554 h->flags &= ~HTML_NOSPACE;
555 h->flags &= ~HTML_NONEWLINE;
556 } else
557 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
558
559 if (h->metaf) {
560 print_tagq(h, h->metaf);
561 h->metaf = NULL;
562 }
563
564 h->flags &= ~HTML_IGNDELIM;
565 }
566
567 void
568 print_tagq(struct html *h, const struct tag *until)
569 {
570 struct tag *tag;
571
572 while ((tag = h->tags.head) != NULL) {
573 print_ctag(h, tag);
574 if (until && tag == until)
575 return;
576 }
577 }
578
579 void
580 print_stagq(struct html *h, const struct tag *suntil)
581 {
582 struct tag *tag;
583
584 while ((tag = h->tags.head) != NULL) {
585 if (suntil && tag == suntil)
586 return;
587 print_ctag(h, tag);
588 }
589 }
590
591 void
592 print_paragraph(struct html *h)
593 {
594 struct tag *t;
595 struct htmlpair tag;
596
597 PAIR_CLASS_INIT(&tag, "spacer");
598 t = print_otag(h, TAG_DIV, 1, &tag);
599 print_tagq(h, t);
600 }
601
602
603 void
604 bufinit(struct html *h)
605 {
606
607 h->buf[0] = '\0';
608 h->buflen = 0;
609 }
610
611 void
612 bufcat_style(struct html *h, const char *key, const char *val)
613 {
614
615 bufcat(h, key);
616 bufcat(h, ":");
617 bufcat(h, val);
618 bufcat(h, ";");
619 }
620
621 void
622 bufcat(struct html *h, const char *p)
623 {
624
625 /*
626 * XXX This is broken and not easy to fix.
627 * When using the -Oincludes option, buffmt_includes()
628 * may pass in strings overrunning BUFSIZ, causing a crash.
629 */
630
631 h->buflen = strlcat(h->buf, p, BUFSIZ);
632 assert(h->buflen < BUFSIZ);
633 }
634
635 void
636 bufcat_fmt(struct html *h, const char *fmt, ...)
637 {
638 va_list ap;
639
640 va_start(ap, fmt);
641 (void)vsnprintf(h->buf + (int)h->buflen,
642 BUFSIZ - h->buflen - 1, fmt, ap);
643 va_end(ap);
644 h->buflen = strlen(h->buf);
645 }
646
647 static void
648 bufncat(struct html *h, const char *p, size_t sz)
649 {
650
651 assert(h->buflen + sz + 1 < BUFSIZ);
652 strncat(h->buf, p, sz);
653 h->buflen += sz;
654 }
655
656 void
657 buffmt_includes(struct html *h, const char *name)
658 {
659 const char *p, *pp;
660
661 pp = h->base_includes;
662
663 bufinit(h);
664 while (NULL != (p = strchr(pp, '%'))) {
665 bufncat(h, pp, (size_t)(p - pp));
666 switch (*(p + 1)) {
667 case'I':
668 bufcat(h, name);
669 break;
670 default:
671 bufncat(h, p, 2);
672 break;
673 }
674 pp = p + 2;
675 }
676 if (pp)
677 bufcat(h, pp);
678 }
679
680 void
681 buffmt_man(struct html *h, const char *name, const char *sec)
682 {
683 const char *p, *pp;
684
685 pp = h->base_man;
686
687 bufinit(h);
688 while (NULL != (p = strchr(pp, '%'))) {
689 bufncat(h, pp, (size_t)(p - pp));
690 switch (*(p + 1)) {
691 case 'S':
692 bufcat(h, sec ? sec : "1");
693 break;
694 case 'N':
695 bufcat_fmt(h, "%s", name);
696 break;
697 default:
698 bufncat(h, p, 2);
699 break;
700 }
701 pp = p + 2;
702 }
703 if (pp)
704 bufcat(h, pp);
705 }
706
707 void
708 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
709 {
710 double v;
711
712 v = su->scale;
713 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
714 v = 1.0;
715 else if (SCALE_BU == su->unit)
716 v /= 24.0;
717
718 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
719 }
720
721 void
722 bufcat_id(struct html *h, const char *src)
723 {
724
725 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
726
727 while ('\0' != *src)
728 bufcat_fmt(h, "%.2x", *src++);
729 }