]> git.cameronkatri.com Git - mandoc.git/blob - html.c
fd696284da16c411505bcc8ae920e964da4af817
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.144 2011/05/17 11:50:20 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc.h"
34 #include "libmandoc.h"
35 #include "out.h"
36 #include "html.h"
37 #include "main.h"
38
39 struct htmldata {
40 const char *name;
41 int flags;
42 #define HTML_CLRLINE (1 << 0)
43 #define HTML_NOSTACK (1 << 1)
44 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
45 };
46
47 static const struct htmldata htmltags[TAG_MAX] = {
48 {"html", HTML_CLRLINE}, /* TAG_HTML */
49 {"head", HTML_CLRLINE}, /* TAG_HEAD */
50 {"body", HTML_CLRLINE}, /* TAG_BODY */
51 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
52 {"title", HTML_CLRLINE}, /* TAG_TITLE */
53 {"div", HTML_CLRLINE}, /* TAG_DIV */
54 {"h1", 0}, /* TAG_H1 */
55 {"h2", 0}, /* TAG_H2 */
56 {"span", 0}, /* TAG_SPAN */
57 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
58 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
59 {"a", 0}, /* TAG_A */
60 {"table", HTML_CLRLINE}, /* TAG_TABLE */
61 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
62 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
63 {"tr", HTML_CLRLINE}, /* TAG_TR */
64 {"td", HTML_CLRLINE}, /* TAG_TD */
65 {"li", HTML_CLRLINE}, /* TAG_LI */
66 {"ul", HTML_CLRLINE}, /* TAG_UL */
67 {"ol", HTML_CLRLINE}, /* TAG_OL */
68 {"dl", HTML_CLRLINE}, /* TAG_DL */
69 {"dt", HTML_CLRLINE}, /* TAG_DT */
70 {"dd", HTML_CLRLINE}, /* TAG_DD */
71 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
72 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
73 {"pre", HTML_CLRLINE }, /* TAG_PRE */
74 {"b", 0 }, /* TAG_B */
75 {"i", 0 }, /* TAG_I */
76 {"code", 0 }, /* TAG_CODE */
77 {"small", 0 }, /* TAG_SMALL */
78 };
79
80 static const char *const htmlattrs[ATTR_MAX] = {
81 "http-equiv", /* ATTR_HTTPEQUIV */
82 "content", /* ATTR_CONTENT */
83 "name", /* ATTR_NAME */
84 "rel", /* ATTR_REL */
85 "href", /* ATTR_HREF */
86 "type", /* ATTR_TYPE */
87 "media", /* ATTR_MEDIA */
88 "class", /* ATTR_CLASS */
89 "style", /* ATTR_STYLE */
90 "width", /* ATTR_WIDTH */
91 "id", /* ATTR_ID */
92 "summary", /* ATTR_SUMMARY */
93 "align", /* ATTR_ALIGN */
94 "colspan", /* ATTR_COLSPAN */
95 };
96
97 static const char *const roffscales[SCALE_MAX] = {
98 "cm", /* SCALE_CM */
99 "in", /* SCALE_IN */
100 "pc", /* SCALE_PC */
101 "pt", /* SCALE_PT */
102 "em", /* SCALE_EM */
103 "em", /* SCALE_MM */
104 "ex", /* SCALE_EN */
105 "ex", /* SCALE_BU */
106 "em", /* SCALE_VS */
107 "ex", /* SCALE_FS */
108 };
109
110 static void bufncat(struct html *, const char *, size_t);
111 static void print_spec(struct html *, const char *, size_t);
112 static void print_res(struct html *, const char *, size_t);
113 static void print_ctag(struct html *, enum htmltag);
114 static int print_encode(struct html *, const char *, int);
115 static void print_metaf(struct html *, enum mandoc_esc);
116 static void print_attr(struct html *, const char *, const char *);
117 static void *ml_alloc(char *, enum htmltype);
118
119 static void *
120 ml_alloc(char *outopts, enum htmltype type)
121 {
122 struct html *h;
123 const char *toks[4];
124 char *v;
125
126 toks[0] = "style";
127 toks[1] = "man";
128 toks[2] = "includes";
129 toks[3] = NULL;
130
131 h = mandoc_calloc(1, sizeof(struct html));
132
133 h->type = type;
134 h->tags.head = NULL;
135 h->symtab = mchars_alloc();
136
137 while (outopts && *outopts)
138 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
139 case (0):
140 h->style = v;
141 break;
142 case (1):
143 h->base_man = v;
144 break;
145 case (2):
146 h->base_includes = v;
147 break;
148 default:
149 break;
150 }
151
152 return(h);
153 }
154
155 void *
156 html_alloc(char *outopts)
157 {
158
159 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
160 }
161
162
163 void *
164 xhtml_alloc(char *outopts)
165 {
166
167 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
168 }
169
170
171 void
172 html_free(void *p)
173 {
174 struct tag *tag;
175 struct html *h;
176
177 h = (struct html *)p;
178
179 while ((tag = h->tags.head) != NULL) {
180 h->tags.head = tag->next;
181 free(tag);
182 }
183
184 if (h->symtab)
185 mchars_free(h->symtab);
186
187 free(h);
188 }
189
190
191 void
192 print_gen_head(struct html *h)
193 {
194 struct htmlpair tag[4];
195
196 tag[0].key = ATTR_HTTPEQUIV;
197 tag[0].val = "Content-Type";
198 tag[1].key = ATTR_CONTENT;
199 tag[1].val = "text/html; charset=utf-8";
200 print_otag(h, TAG_META, 2, tag);
201
202 tag[0].key = ATTR_NAME;
203 tag[0].val = "resource-type";
204 tag[1].key = ATTR_CONTENT;
205 tag[1].val = "document";
206 print_otag(h, TAG_META, 2, tag);
207
208 if (h->style) {
209 tag[0].key = ATTR_REL;
210 tag[0].val = "stylesheet";
211 tag[1].key = ATTR_HREF;
212 tag[1].val = h->style;
213 tag[2].key = ATTR_TYPE;
214 tag[2].val = "text/css";
215 tag[3].key = ATTR_MEDIA;
216 tag[3].val = "all";
217 print_otag(h, TAG_LINK, 4, tag);
218 }
219 }
220
221 static void
222 print_spec(struct html *h, const char *p, size_t len)
223 {
224 int cp;
225 const char *rhs;
226 size_t sz;
227
228 if ((cp = mchars_spec2cp(h->symtab, p, len)) > 0) {
229 printf("&#%d;", cp);
230 return;
231 } else if (-1 == cp && 1 == len) {
232 fwrite(p, 1, len, stdout);
233 return;
234 } else if (-1 == cp)
235 return;
236
237 if (NULL != (rhs = mchars_spec2str(h->symtab, p, len, &sz)))
238 fwrite(rhs, 1, sz, stdout);
239 }
240
241
242 static void
243 print_res(struct html *h, const char *p, size_t len)
244 {
245 int cp;
246 const char *rhs;
247 size_t sz;
248
249 if ((cp = mchars_res2cp(h->symtab, p, len)) > 0) {
250 printf("&#%d;", cp);
251 return;
252 } else if (-1 == cp)
253 return;
254
255 if (NULL != (rhs = mchars_res2str(h->symtab, p, len, &sz)))
256 fwrite(rhs, 1, sz, stdout);
257 }
258
259
260 static void
261 print_metaf(struct html *h, enum mandoc_esc deco)
262 {
263 enum htmlfont font;
264
265 switch (deco) {
266 case (ESCAPE_FONTPREV):
267 font = h->metal;
268 break;
269 case (ESCAPE_FONTITALIC):
270 font = HTMLFONT_ITALIC;
271 break;
272 case (ESCAPE_FONTBOLD):
273 font = HTMLFONT_BOLD;
274 break;
275 case (ESCAPE_FONTROMAN):
276 font = HTMLFONT_NONE;
277 break;
278 default:
279 abort();
280 /* NOTREACHED */
281 }
282
283 if (h->metaf) {
284 print_tagq(h, h->metaf);
285 h->metaf = NULL;
286 }
287
288 h->metal = h->metac;
289 h->metac = font;
290
291 if (HTMLFONT_NONE != font)
292 h->metaf = HTMLFONT_BOLD == font ?
293 print_otag(h, TAG_B, 0, NULL) :
294 print_otag(h, TAG_I, 0, NULL);
295 }
296
297 int
298 html_strlen(const char *cp)
299 {
300 int ssz, sz;
301 const char *seq, *p;
302
303 /*
304 * Account for escaped sequences within string length
305 * calculations. This follows the logic in term_strlen() as we
306 * must calculate the width of produced strings.
307 * Assume that characters are always width of "1". This is
308 * hacky, but it gets the job done for approximation of widths.
309 */
310
311 sz = 0;
312 while (NULL != (p = strchr(cp, '\\'))) {
313 sz += (int)(p - cp);
314 ++cp;
315 switch (mandoc_escape(&cp, &seq, &ssz)) {
316 case (ESCAPE_ERROR):
317 return(sz);
318 case (ESCAPE_UNICODE):
319 /* FALLTHROUGH */
320 case (ESCAPE_NUMBERED):
321 /* FALLTHROUGH */
322 case (ESCAPE_PREDEF):
323 /* FALLTHROUGH */
324 case (ESCAPE_SPECIAL):
325 sz++;
326 break;
327 default:
328 break;
329 }
330 }
331
332 assert(sz >= 0);
333 return(sz + strlen(cp));
334 }
335
336 static int
337 print_encode(struct html *h, const char *p, int norecurse)
338 {
339 size_t sz;
340 int c, len, nospace;
341 const char *seq;
342 enum mandoc_esc esc;
343 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
344
345 nospace = 0;
346
347 while ('\0' != *p) {
348 sz = strcspn(p, rejs);
349
350 fwrite(p, 1, sz, stdout);
351 p += (int)sz;
352
353 if ('\0' == *p)
354 break;
355
356 switch (*p++) {
357 case ('<'):
358 printf("&lt;");
359 continue;
360 case ('>'):
361 printf("&gt;");
362 continue;
363 case ('&'):
364 printf("&amp;");
365 continue;
366 case (ASCII_HYPH):
367 putchar('-');
368 continue;
369 default:
370 break;
371 }
372
373 esc = mandoc_escape(&p, &seq, &len);
374 if (ESCAPE_ERROR == esc)
375 break;
376
377 switch (esc) {
378 case (ESCAPE_UNICODE):
379 /* Skip passed "u" header. */
380 c = mchars_num2uc(seq + 1, len - 1);
381 if ('\0' != c)
382 printf("&#x%x;", c);
383 break;
384 case (ESCAPE_NUMBERED):
385 c = mchars_num2char(seq, len);
386 if ('\0' != c)
387 putchar(c);
388 break;
389 case (ESCAPE_PREDEF):
390 print_res(h, seq, len);
391 break;
392 case (ESCAPE_SPECIAL):
393 print_spec(h, seq, len);
394 break;
395 case (ESCAPE_FONTPREV):
396 /* FALLTHROUGH */
397 case (ESCAPE_FONTBOLD):
398 /* FALLTHROUGH */
399 case (ESCAPE_FONTITALIC):
400 /* FALLTHROUGH */
401 case (ESCAPE_FONTROMAN):
402 if (norecurse)
403 break;
404 print_metaf(h, esc);
405 break;
406 case (ESCAPE_NOSPACE):
407 if ('\0' == *p)
408 nospace = 1;
409 break;
410 default:
411 break;
412 }
413 }
414
415 return(nospace);
416 }
417
418
419 static void
420 print_attr(struct html *h, const char *key, const char *val)
421 {
422 printf(" %s=\"", key);
423 (void)print_encode(h, val, 1);
424 putchar('\"');
425 }
426
427
428 struct tag *
429 print_otag(struct html *h, enum htmltag tag,
430 int sz, const struct htmlpair *p)
431 {
432 int i;
433 struct tag *t;
434
435 /* Push this tags onto the stack of open scopes. */
436
437 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
438 t = mandoc_malloc(sizeof(struct tag));
439 t->tag = tag;
440 t->next = h->tags.head;
441 h->tags.head = t;
442 } else
443 t = NULL;
444
445 if ( ! (HTML_NOSPACE & h->flags))
446 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
447 /* Manage keeps! */
448 if ( ! (HTML_KEEP & h->flags)) {
449 if (HTML_PREKEEP & h->flags)
450 h->flags |= HTML_KEEP;
451 putchar(' ');
452 } else
453 printf("&#160;");
454 }
455
456 if ( ! (h->flags & HTML_NONOSPACE))
457 h->flags &= ~HTML_NOSPACE;
458 else
459 h->flags |= HTML_NOSPACE;
460
461 /* Print out the tag name and attributes. */
462
463 printf("<%s", htmltags[tag].name);
464 for (i = 0; i < sz; i++)
465 print_attr(h, htmlattrs[p[i].key], p[i].val);
466
467 /* Add non-overridable attributes. */
468
469 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
470 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
471 print_attr(h, "xml:lang", "en");
472 print_attr(h, "lang", "en");
473 }
474
475 /* Accommodate for XML "well-formed" singleton escaping. */
476
477 if (HTML_AUTOCLOSE & htmltags[tag].flags)
478 switch (h->type) {
479 case (HTML_XHTML_1_0_STRICT):
480 putchar('/');
481 break;
482 default:
483 break;
484 }
485
486 putchar('>');
487
488 h->flags |= HTML_NOSPACE;
489
490 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
491 putchar('\n');
492
493 return(t);
494 }
495
496
497 static void
498 print_ctag(struct html *h, enum htmltag tag)
499 {
500
501 printf("</%s>", htmltags[tag].name);
502 if (HTML_CLRLINE & htmltags[tag].flags) {
503 h->flags |= HTML_NOSPACE;
504 putchar('\n');
505 }
506 }
507
508 void
509 print_gen_decls(struct html *h)
510 {
511 const char *doctype;
512 const char *dtd;
513 const char *name;
514
515 switch (h->type) {
516 case (HTML_HTML_4_01_STRICT):
517 name = "HTML";
518 doctype = "-//W3C//DTD HTML 4.01//EN";
519 dtd = "http://www.w3.org/TR/html4/strict.dtd";
520 break;
521 default:
522 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
523 name = "html";
524 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
525 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
526 break;
527 }
528
529 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
530 name, doctype, dtd);
531 }
532
533 void
534 print_text(struct html *h, const char *word)
535 {
536
537 if ( ! (HTML_NOSPACE & h->flags)) {
538 /* Manage keeps! */
539 if ( ! (HTML_KEEP & h->flags)) {
540 if (HTML_PREKEEP & h->flags)
541 h->flags |= HTML_KEEP;
542 putchar(' ');
543 } else
544 printf("&#160;");
545 }
546
547 assert(NULL == h->metaf);
548 if (HTMLFONT_NONE != h->metac)
549 h->metaf = HTMLFONT_BOLD == h->metac ?
550 print_otag(h, TAG_B, 0, NULL) :
551 print_otag(h, TAG_I, 0, NULL);
552
553 assert(word);
554 if ( ! print_encode(h, word, 0))
555 if ( ! (h->flags & HTML_NONOSPACE))
556 h->flags &= ~HTML_NOSPACE;
557
558 if (h->metaf) {
559 print_tagq(h, h->metaf);
560 h->metaf = NULL;
561 }
562
563 h->flags &= ~HTML_IGNDELIM;
564 }
565
566
567 void
568 print_tagq(struct html *h, const struct tag *until)
569 {
570 struct tag *tag;
571
572 while ((tag = h->tags.head) != NULL) {
573 /*
574 * Remember to close out and nullify the current
575 * meta-font and table, if applicable.
576 */
577 if (tag == h->metaf)
578 h->metaf = NULL;
579 if (tag == h->tblt)
580 h->tblt = NULL;
581 print_ctag(h, tag->tag);
582 h->tags.head = tag->next;
583 free(tag);
584 if (until && tag == until)
585 return;
586 }
587 }
588
589
590 void
591 print_stagq(struct html *h, const struct tag *suntil)
592 {
593 struct tag *tag;
594
595 while ((tag = h->tags.head) != NULL) {
596 if (suntil && tag == suntil)
597 return;
598 /*
599 * Remember to close out and nullify the current
600 * meta-font and table, if applicable.
601 */
602 if (tag == h->metaf)
603 h->metaf = NULL;
604 if (tag == h->tblt)
605 h->tblt = NULL;
606 print_ctag(h, tag->tag);
607 h->tags.head = tag->next;
608 free(tag);
609 }
610 }
611
612 void
613 bufinit(struct html *h)
614 {
615
616 h->buf[0] = '\0';
617 h->buflen = 0;
618 }
619
620 void
621 bufcat_style(struct html *h, const char *key, const char *val)
622 {
623
624 bufcat(h, key);
625 bufcat(h, ":");
626 bufcat(h, val);
627 bufcat(h, ";");
628 }
629
630 void
631 bufcat(struct html *h, const char *p)
632 {
633
634 h->buflen = strlcat(h->buf, p, BUFSIZ);
635 assert(h->buflen < BUFSIZ);
636 h->buflen--;
637 }
638
639 void
640 bufcat_fmt(struct html *h, const char *fmt, ...)
641 {
642 va_list ap;
643
644 va_start(ap, fmt);
645 (void)vsnprintf(h->buf + (int)h->buflen,
646 BUFSIZ - h->buflen - 1, fmt, ap);
647 va_end(ap);
648 h->buflen = strlen(h->buf);
649 }
650
651 static void
652 bufncat(struct html *h, const char *p, size_t sz)
653 {
654
655 assert(h->buflen + sz + 1 < BUFSIZ);
656 strncat(h->buf, p, sz);
657 h->buflen += sz;
658 }
659
660 void
661 buffmt_includes(struct html *h, const char *name)
662 {
663 const char *p, *pp;
664
665 pp = h->base_includes;
666
667 bufinit(h);
668 while (NULL != (p = strchr(pp, '%'))) {
669 bufncat(h, pp, (size_t)(p - pp));
670 switch (*(p + 1)) {
671 case('I'):
672 bufcat(h, name);
673 break;
674 default:
675 bufncat(h, p, 2);
676 break;
677 }
678 pp = p + 2;
679 }
680 if (pp)
681 bufcat(h, pp);
682 }
683
684 void
685 buffmt_man(struct html *h,
686 const char *name, const char *sec)
687 {
688 const char *p, *pp;
689
690 pp = h->base_man;
691
692 bufinit(h);
693 while (NULL != (p = strchr(pp, '%'))) {
694 bufncat(h, pp, (size_t)(p - pp));
695 switch (*(p + 1)) {
696 case('S'):
697 bufcat(h, sec ? sec : "1");
698 break;
699 case('N'):
700 bufcat_fmt(h, name);
701 break;
702 default:
703 bufncat(h, p, 2);
704 break;
705 }
706 pp = p + 2;
707 }
708 if (pp)
709 bufcat(h, pp);
710 }
711
712 void
713 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
714 {
715 double v;
716
717 v = su->scale;
718 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
719 v = 1.0;
720
721 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
722 }
723
724 void
725 bufcat_id(struct html *h, const char *src)
726 {
727
728 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
729
730 while ('\0' != *src)
731 bufcat_fmt(h, "%.2x", *src++);
732 }