]> git.cameronkatri.com Git - mandoc.git/blob - html.c
d99bd58fbdc28e083b30567fa0481cddda98c3db
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.121 2010/12/22 11:15:16 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "mandoc.h"
33 #include "out.h"
34 #include "chars.h"
35 #include "html.h"
36 #include "main.h"
37
38 struct htmldata {
39 const char *name;
40 int flags;
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
44 };
45
46 static const struct htmldata htmltags[TAG_MAX] = {
47 {"html", HTML_CLRLINE}, /* TAG_HTML */
48 {"head", HTML_CLRLINE}, /* TAG_HEAD */
49 {"body", HTML_CLRLINE}, /* TAG_BODY */
50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51 {"title", HTML_CLRLINE}, /* TAG_TITLE */
52 {"div", HTML_CLRLINE}, /* TAG_DIV */
53 {"h1", 0}, /* TAG_H1 */
54 {"h2", 0}, /* TAG_H2 */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 {"dl", HTML_CLRLINE}, /* TAG_DL */
68 {"dt", HTML_CLRLINE}, /* TAG_DT */
69 {"dd", HTML_CLRLINE}, /* TAG_DD */
70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
72 {"pre", HTML_CLRLINE }, /* TAG_PRE */
73 {"b", 0 }, /* TAG_B */
74 {"i", 0 }, /* TAG_I */
75 {"u", 0 }, /* TAG_U */
76 {"code", 0 }, /* TAG_CODE */
77 };
78
79 static const char *const htmlfonts[HTMLFONT_MAX] = {
80 "roman",
81 "bold",
82 "italic"
83 };
84
85 static const char *const htmlattrs[ATTR_MAX] = {
86 "http-equiv", /* ATTR_HTTPEQUIV */
87 "content", /* ATTR_CONTENT */
88 "name", /* ATTR_NAME */
89 "rel", /* ATTR_REL */
90 "href", /* ATTR_HREF */
91 "type", /* ATTR_TYPE */
92 "media", /* ATTR_MEDIA */
93 "class", /* ATTR_CLASS */
94 "style", /* ATTR_STYLE */
95 "width", /* ATTR_WIDTH */
96 "id", /* ATTR_ID */
97 "summary", /* ATTR_SUMMARY */
98 "align", /* ATTR_ALIGN */
99 };
100
101 static void print_spec(struct html *, enum roffdeco,
102 const char *, size_t);
103 static void print_res(struct html *, const char *, size_t);
104 static void print_ctag(struct html *, enum htmltag);
105 static void print_doctype(struct html *);
106 static void print_xmltype(struct html *);
107 static int print_encode(struct html *, const char *, int);
108 static void print_metaf(struct html *, enum roffdeco);
109 static void print_attr(struct html *,
110 const char *, const char *);
111 static void *ml_alloc(char *, enum htmltype);
112
113
114 static void *
115 ml_alloc(char *outopts, enum htmltype type)
116 {
117 struct html *h;
118 const char *toks[4];
119 char *v;
120
121 toks[0] = "style";
122 toks[1] = "man";
123 toks[2] = "includes";
124 toks[3] = NULL;
125
126 h = calloc(1, sizeof(struct html));
127 if (NULL == h) {
128 perror(NULL);
129 exit((int)MANDOCLEVEL_SYSERR);
130 }
131
132 h->type = type;
133 h->tags.head = NULL;
134 h->symtab = chars_init(CHARS_HTML);
135
136 while (outopts && *outopts)
137 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
138 case (0):
139 h->style = v;
140 break;
141 case (1):
142 h->base_man = v;
143 break;
144 case (2):
145 h->base_includes = v;
146 break;
147 default:
148 break;
149 }
150
151 return(h);
152 }
153
154 void *
155 html_alloc(char *outopts)
156 {
157
158 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
159 }
160
161
162 void *
163 xhtml_alloc(char *outopts)
164 {
165
166 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
167 }
168
169
170 void
171 html_free(void *p)
172 {
173 struct tag *tag;
174 struct html *h;
175
176 h = (struct html *)p;
177
178 while ((tag = h->tags.head) != NULL) {
179 h->tags.head = tag->next;
180 free(tag);
181 }
182
183 if (h->symtab)
184 chars_free(h->symtab);
185
186 free(h);
187 }
188
189
190 void
191 print_gen_head(struct html *h)
192 {
193 struct htmlpair tag[4];
194
195 tag[0].key = ATTR_HTTPEQUIV;
196 tag[0].val = "Content-Type";
197 tag[1].key = ATTR_CONTENT;
198 tag[1].val = "text/html; charset=utf-8";
199 print_otag(h, TAG_META, 2, tag);
200
201 tag[0].key = ATTR_NAME;
202 tag[0].val = "resource-type";
203 tag[1].key = ATTR_CONTENT;
204 tag[1].val = "document";
205 print_otag(h, TAG_META, 2, tag);
206
207 if (h->style) {
208 tag[0].key = ATTR_REL;
209 tag[0].val = "stylesheet";
210 tag[1].key = ATTR_HREF;
211 tag[1].val = h->style;
212 tag[2].key = ATTR_TYPE;
213 tag[2].val = "text/css";
214 tag[3].key = ATTR_MEDIA;
215 tag[3].val = "all";
216 print_otag(h, TAG_LINK, 4, tag);
217 }
218 }
219
220
221 static void
222 print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
223 {
224 int cp;
225 const char *rhs;
226 size_t sz;
227
228 if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
229 printf("&#%d;", cp);
230 return;
231 } else if (-1 == cp && DECO_SSPECIAL == d) {
232 fwrite(p, 1, len, stdout);
233 return;
234 } else if (-1 == cp)
235 return;
236
237 if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))
238 fwrite(rhs, 1, sz, stdout);
239 }
240
241
242 static void
243 print_res(struct html *h, const char *p, size_t len)
244 {
245 int cp;
246 const char *rhs;
247 size_t sz;
248
249 if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {
250 printf("&#%d;", cp);
251 return;
252 } else if (-1 == cp)
253 return;
254
255 if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))
256 fwrite(rhs, 1, sz, stdout);
257 }
258
259
260 struct tag *
261 print_ofont(struct html *h, enum htmlfont font)
262 {
263 struct htmlpair tag;
264
265 h->metal = h->metac;
266 h->metac = font;
267
268 /* FIXME: DECO_ROMAN should just close out preexisting. */
269
270 if (h->metaf && h->tags.head == h->metaf)
271 print_tagq(h, h->metaf);
272
273 PAIR_CLASS_INIT(&tag, htmlfonts[font]);
274 h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
275 return(h->metaf);
276 }
277
278
279 static void
280 print_metaf(struct html *h, enum roffdeco deco)
281 {
282 enum htmlfont font;
283
284 switch (deco) {
285 case (DECO_PREVIOUS):
286 font = h->metal;
287 break;
288 case (DECO_ITALIC):
289 font = HTMLFONT_ITALIC;
290 break;
291 case (DECO_BOLD):
292 font = HTMLFONT_BOLD;
293 break;
294 case (DECO_ROMAN):
295 font = HTMLFONT_NONE;
296 break;
297 default:
298 abort();
299 /* NOTREACHED */
300 }
301
302 (void)print_ofont(h, font);
303 }
304
305
306 static int
307 print_encode(struct html *h, const char *p, int norecurse)
308 {
309 size_t sz;
310 int len, nospace;
311 const char *seq;
312 enum roffdeco deco;
313 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
314
315 nospace = 0;
316
317 for (; *p; p++) {
318 sz = strcspn(p, rejs);
319
320 fwrite(p, 1, sz, stdout);
321 p += /* LINTED */
322 sz;
323
324 if ('<' == *p) {
325 printf("&lt;");
326 continue;
327 } else if ('>' == *p) {
328 printf("&gt;");
329 continue;
330 } else if ('&' == *p) {
331 printf("&amp;");
332 continue;
333 } else if (ASCII_HYPH == *p) {
334 /*
335 * Note: "soft hyphens" aren't graphically
336 * displayed when not breaking the text; we want
337 * them to be displayed.
338 */
339 /*printf("&#173;");*/
340 putchar('-');
341 continue;
342 } else if ('\0' == *p)
343 break;
344
345 seq = ++p;
346 len = a2roffdeco(&deco, &seq, &sz);
347
348 switch (deco) {
349 case (DECO_RESERVED):
350 print_res(h, seq, sz);
351 break;
352 case (DECO_SSPECIAL):
353 /* FALLTHROUGH */
354 case (DECO_SPECIAL):
355 print_spec(h, deco, seq, sz);
356 break;
357 case (DECO_PREVIOUS):
358 /* FALLTHROUGH */
359 case (DECO_BOLD):
360 /* FALLTHROUGH */
361 case (DECO_ITALIC):
362 /* FALLTHROUGH */
363 case (DECO_ROMAN):
364 if (norecurse)
365 break;
366 print_metaf(h, deco);
367 break;
368 default:
369 break;
370 }
371
372 p += len - 1;
373
374 if (DECO_NOSPACE == deco && '\0' == *(p + 1))
375 nospace = 1;
376 }
377
378 return(nospace);
379 }
380
381
382 static void
383 print_attr(struct html *h, const char *key, const char *val)
384 {
385 printf(" %s=\"", key);
386 (void)print_encode(h, val, 1);
387 putchar('\"');
388 }
389
390
391 struct tag *
392 print_otag(struct html *h, enum htmltag tag,
393 int sz, const struct htmlpair *p)
394 {
395 int i;
396 struct tag *t;
397
398 /* Push this tags onto the stack of open scopes. */
399
400 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
401 t = malloc(sizeof(struct tag));
402 if (NULL == t) {
403 perror(NULL);
404 exit((int)MANDOCLEVEL_SYSERR);
405 }
406 t->tag = tag;
407 t->next = h->tags.head;
408 h->tags.head = t;
409 } else
410 t = NULL;
411
412 if ( ! (HTML_NOSPACE & h->flags))
413 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
414 /* Manage keeps! */
415 if ( ! (HTML_KEEP & h->flags)) {
416 if (HTML_PREKEEP & h->flags)
417 h->flags |= HTML_KEEP;
418 putchar(' ');
419 } else
420 printf("&#160;");
421 }
422
423 if ( ! (h->flags & HTML_NONOSPACE))
424 h->flags &= ~HTML_NOSPACE;
425 else
426 h->flags |= HTML_NOSPACE;
427
428 /* Print out the tag name and attributes. */
429
430 printf("<%s", htmltags[tag].name);
431 for (i = 0; i < sz; i++)
432 print_attr(h, htmlattrs[p[i].key], p[i].val);
433
434 /* Add non-overridable attributes. */
435
436 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
437 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
438 print_attr(h, "xml:lang", "en");
439 print_attr(h, "lang", "en");
440 }
441
442 /* Accomodate for XML "well-formed" singleton escaping. */
443
444 if (HTML_AUTOCLOSE & htmltags[tag].flags)
445 switch (h->type) {
446 case (HTML_XHTML_1_0_STRICT):
447 putchar('/');
448 break;
449 default:
450 break;
451 }
452
453 putchar('>');
454
455 h->flags |= HTML_NOSPACE;
456
457 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
458 putchar('\n');
459
460 return(t);
461 }
462
463
464 static void
465 print_ctag(struct html *h, enum htmltag tag)
466 {
467
468 printf("</%s>", htmltags[tag].name);
469 if (HTML_CLRLINE & htmltags[tag].flags) {
470 h->flags |= HTML_NOSPACE;
471 putchar('\n');
472 }
473 }
474
475
476 void
477 print_gen_decls(struct html *h)
478 {
479
480 print_xmltype(h);
481 print_doctype(h);
482 }
483
484
485 static void
486 print_xmltype(struct html *h)
487 {
488
489 if (HTML_XHTML_1_0_STRICT == h->type)
490 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
491 }
492
493
494 static void
495 print_doctype(struct html *h)
496 {
497 const char *doctype;
498 const char *dtd;
499 const char *name;
500
501 switch (h->type) {
502 case (HTML_HTML_4_01_STRICT):
503 name = "HTML";
504 doctype = "-//W3C//DTD HTML 4.01//EN";
505 dtd = "http://www.w3.org/TR/html4/strict.dtd";
506 break;
507 default:
508 name = "html";
509 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
510 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
511 break;
512 }
513
514 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
515 name, doctype, dtd);
516 }
517
518
519 void
520 print_text(struct html *h, const char *word)
521 {
522
523 if (word[0] && '\0' == word[1])
524 switch (word[0]) {
525 case('.'):
526 /* FALLTHROUGH */
527 case(','):
528 /* FALLTHROUGH */
529 case(';'):
530 /* FALLTHROUGH */
531 case(':'):
532 /* FALLTHROUGH */
533 case('?'):
534 /* FALLTHROUGH */
535 case('!'):
536 /* FALLTHROUGH */
537 case(')'):
538 /* FALLTHROUGH */
539 case(']'):
540 if ( ! (HTML_IGNDELIM & h->flags))
541 h->flags |= HTML_NOSPACE;
542 break;
543 default:
544 break;
545 }
546
547 if ( ! (HTML_NOSPACE & h->flags)) {
548 /* Manage keeps! */
549 if ( ! (HTML_KEEP & h->flags)) {
550 if (HTML_PREKEEP & h->flags)
551 h->flags |= HTML_KEEP;
552 putchar(' ');
553 } else
554 printf("&#160;");
555 }
556
557 assert(word);
558 if ( ! print_encode(h, word, 0))
559 if ( ! (h->flags & HTML_NONOSPACE))
560 h->flags &= ~HTML_NOSPACE;
561
562 h->flags &= ~HTML_IGNDELIM;
563
564 /*
565 * Note that we don't process the pipe: the parser sees it as
566 * punctuation, but we don't in terms of typography.
567 */
568 if (word[0] && '\0' == word[1])
569 switch (word[0]) {
570 case('('):
571 /* FALLTHROUGH */
572 case('['):
573 h->flags |= HTML_NOSPACE;
574 break;
575 default:
576 break;
577 }
578 }
579
580
581 void
582 print_tagq(struct html *h, const struct tag *until)
583 {
584 struct tag *tag;
585
586 while ((tag = h->tags.head) != NULL) {
587 if (tag == h->metaf)
588 h->metaf = NULL;
589 print_ctag(h, tag->tag);
590 h->tags.head = tag->next;
591 free(tag);
592 if (until && tag == until)
593 return;
594 }
595 }
596
597
598 void
599 print_stagq(struct html *h, const struct tag *suntil)
600 {
601 struct tag *tag;
602
603 while ((tag = h->tags.head) != NULL) {
604 if (suntil && tag == suntil)
605 return;
606 if (tag == h->metaf)
607 h->metaf = NULL;
608 print_ctag(h, tag->tag);
609 h->tags.head = tag->next;
610 free(tag);
611 }
612 }
613
614
615 void
616 bufinit(struct html *h)
617 {
618
619 h->buf[0] = '\0';
620 h->buflen = 0;
621 }
622
623
624 void
625 bufcat_style(struct html *h, const char *key, const char *val)
626 {
627
628 bufcat(h, key);
629 bufncat(h, ":", 1);
630 bufcat(h, val);
631 bufncat(h, ";", 1);
632 }
633
634
635 void
636 bufcat(struct html *h, const char *p)
637 {
638
639 bufncat(h, p, strlen(p));
640 }
641
642
643 void
644 buffmt(struct html *h, const char *fmt, ...)
645 {
646 va_list ap;
647
648 va_start(ap, fmt);
649 (void)vsnprintf(h->buf + (int)h->buflen,
650 BUFSIZ - h->buflen - 1, fmt, ap);
651 va_end(ap);
652 h->buflen = strlen(h->buf);
653 }
654
655
656 void
657 bufncat(struct html *h, const char *p, size_t sz)
658 {
659
660 if (h->buflen + sz > BUFSIZ - 1)
661 sz = BUFSIZ - 1 - h->buflen;
662
663 (void)strncat(h->buf, p, sz);
664 h->buflen += sz;
665 }
666
667
668 void
669 buffmt_includes(struct html *h, const char *name)
670 {
671 const char *p, *pp;
672
673 pp = h->base_includes;
674
675 while (NULL != (p = strchr(pp, '%'))) {
676 bufncat(h, pp, (size_t)(p - pp));
677 switch (*(p + 1)) {
678 case('I'):
679 bufcat(h, name);
680 break;
681 default:
682 bufncat(h, p, 2);
683 break;
684 }
685 pp = p + 2;
686 }
687 if (pp)
688 bufcat(h, pp);
689 }
690
691
692 void
693 buffmt_man(struct html *h,
694 const char *name, const char *sec)
695 {
696 const char *p, *pp;
697
698 pp = h->base_man;
699
700 /* LINTED */
701 while (NULL != (p = strchr(pp, '%'))) {
702 bufncat(h, pp, (size_t)(p - pp));
703 switch (*(p + 1)) {
704 case('S'):
705 bufcat(h, sec ? sec : "1");
706 break;
707 case('N'):
708 buffmt(h, name);
709 break;
710 default:
711 bufncat(h, p, 2);
712 break;
713 }
714 pp = p + 2;
715 }
716 if (pp)
717 bufcat(h, pp);
718 }
719
720
721 void
722 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
723 {
724 double v;
725 const char *u;
726
727 v = su->scale;
728
729 switch (su->unit) {
730 case (SCALE_CM):
731 u = "cm";
732 break;
733 case (SCALE_IN):
734 u = "in";
735 break;
736 case (SCALE_PC):
737 u = "pc";
738 break;
739 case (SCALE_PT):
740 u = "pt";
741 break;
742 case (SCALE_EM):
743 u = "em";
744 break;
745 case (SCALE_MM):
746 if (0 == (v /= 100))
747 v = 1;
748 u = "em";
749 break;
750 case (SCALE_EN):
751 u = "ex";
752 break;
753 case (SCALE_BU):
754 u = "ex";
755 break;
756 case (SCALE_VS):
757 u = "em";
758 break;
759 default:
760 u = "ex";
761 break;
762 }
763
764 /*
765 * XXX: the CSS spec isn't clear as to which types accept
766 * integer or real numbers, so we just make them all decimals.
767 */
768 buffmt(h, "%s: %.2f%s;", p, v, u);
769 }
770
771
772 void
773 html_idcat(char *dst, const char *src, int sz)
774 {
775 int ssz;
776
777 assert(sz);
778
779 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
780
781 for ( ; *dst != '\0' && sz; dst++, sz--)
782 /* Jump to end. */ ;
783
784 assert(sz > 2);
785
786 /* We can't start with a number (bah). */
787
788 *dst++ = 'x';
789 *dst = '\0';
790 sz--;
791
792 for ( ; *src != '\0' && sz > 1; src++) {
793 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
794 sz -= ssz;
795 dst += ssz;
796 }
797 }