]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Move mdoc_isdelim() into mandoc.h as mandoc_isdelim(). This allows the
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.129 2011/03/17 09:16:38 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc.h"
34 #include "out.h"
35 #include "chars.h"
36 #include "html.h"
37 #include "main.h"
38
39 struct htmldata {
40 const char *name;
41 int flags;
42 #define HTML_CLRLINE (1 << 0)
43 #define HTML_NOSTACK (1 << 1)
44 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
45 };
46
47 static const struct htmldata htmltags[TAG_MAX] = {
48 {"html", HTML_CLRLINE}, /* TAG_HTML */
49 {"head", HTML_CLRLINE}, /* TAG_HEAD */
50 {"body", HTML_CLRLINE}, /* TAG_BODY */
51 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
52 {"title", HTML_CLRLINE}, /* TAG_TITLE */
53 {"div", HTML_CLRLINE}, /* TAG_DIV */
54 {"h1", 0}, /* TAG_H1 */
55 {"h2", 0}, /* TAG_H2 */
56 {"span", 0}, /* TAG_SPAN */
57 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
58 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
59 {"a", 0}, /* TAG_A */
60 {"table", HTML_CLRLINE}, /* TAG_TABLE */
61 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
62 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
63 {"tr", HTML_CLRLINE}, /* TAG_TR */
64 {"td", HTML_CLRLINE}, /* TAG_TD */
65 {"li", HTML_CLRLINE}, /* TAG_LI */
66 {"ul", HTML_CLRLINE}, /* TAG_UL */
67 {"ol", HTML_CLRLINE}, /* TAG_OL */
68 {"dl", HTML_CLRLINE}, /* TAG_DL */
69 {"dt", HTML_CLRLINE}, /* TAG_DT */
70 {"dd", HTML_CLRLINE}, /* TAG_DD */
71 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
72 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
73 {"pre", HTML_CLRLINE }, /* TAG_PRE */
74 {"b", 0 }, /* TAG_B */
75 {"i", 0 }, /* TAG_I */
76 {"code", 0 }, /* TAG_CODE */
77 {"small", 0 }, /* TAG_SMALL */
78 };
79
80 static const char *const htmlattrs[ATTR_MAX] = {
81 "http-equiv", /* ATTR_HTTPEQUIV */
82 "content", /* ATTR_CONTENT */
83 "name", /* ATTR_NAME */
84 "rel", /* ATTR_REL */
85 "href", /* ATTR_HREF */
86 "type", /* ATTR_TYPE */
87 "media", /* ATTR_MEDIA */
88 "class", /* ATTR_CLASS */
89 "style", /* ATTR_STYLE */
90 "width", /* ATTR_WIDTH */
91 "id", /* ATTR_ID */
92 "summary", /* ATTR_SUMMARY */
93 "align", /* ATTR_ALIGN */
94 "colspan", /* ATTR_COLSPAN */
95 };
96
97 static void print_num(struct html *, const char *, size_t);
98 static void print_spec(struct html *, enum roffdeco,
99 const char *, size_t);
100 static void print_res(struct html *, const char *, size_t);
101 static void print_ctag(struct html *, enum htmltag);
102 static void print_doctype(struct html *);
103 static void print_xmltype(struct html *);
104 static int print_encode(struct html *, const char *, int);
105 static void print_metaf(struct html *, enum roffdeco);
106 static void print_attr(struct html *,
107 const char *, const char *);
108 static void *ml_alloc(char *, enum htmltype);
109
110
111 static void *
112 ml_alloc(char *outopts, enum htmltype type)
113 {
114 struct html *h;
115 const char *toks[4];
116 char *v;
117
118 toks[0] = "style";
119 toks[1] = "man";
120 toks[2] = "includes";
121 toks[3] = NULL;
122
123 h = mandoc_calloc(1, sizeof(struct html));
124
125 h->type = type;
126 h->tags.head = NULL;
127 h->symtab = chars_init(CHARS_HTML);
128
129 while (outopts && *outopts)
130 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
131 case (0):
132 h->style = v;
133 break;
134 case (1):
135 h->base_man = v;
136 break;
137 case (2):
138 h->base_includes = v;
139 break;
140 default:
141 break;
142 }
143
144 return(h);
145 }
146
147 void *
148 html_alloc(char *outopts)
149 {
150
151 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
152 }
153
154
155 void *
156 xhtml_alloc(char *outopts)
157 {
158
159 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
160 }
161
162
163 void
164 html_free(void *p)
165 {
166 struct tag *tag;
167 struct html *h;
168
169 h = (struct html *)p;
170
171 while ((tag = h->tags.head) != NULL) {
172 h->tags.head = tag->next;
173 free(tag);
174 }
175
176 if (h->symtab)
177 chars_free(h->symtab);
178
179 free(h);
180 }
181
182
183 void
184 print_gen_head(struct html *h)
185 {
186 struct htmlpair tag[4];
187
188 tag[0].key = ATTR_HTTPEQUIV;
189 tag[0].val = "Content-Type";
190 tag[1].key = ATTR_CONTENT;
191 tag[1].val = "text/html; charset=utf-8";
192 print_otag(h, TAG_META, 2, tag);
193
194 tag[0].key = ATTR_NAME;
195 tag[0].val = "resource-type";
196 tag[1].key = ATTR_CONTENT;
197 tag[1].val = "document";
198 print_otag(h, TAG_META, 2, tag);
199
200 if (h->style) {
201 tag[0].key = ATTR_REL;
202 tag[0].val = "stylesheet";
203 tag[1].key = ATTR_HREF;
204 tag[1].val = h->style;
205 tag[2].key = ATTR_TYPE;
206 tag[2].val = "text/css";
207 tag[3].key = ATTR_MEDIA;
208 tag[3].val = "all";
209 print_otag(h, TAG_LINK, 4, tag);
210 }
211 }
212
213 /* ARGSUSED */
214 static void
215 print_num(struct html *h, const char *p, size_t len)
216 {
217 const char *rhs;
218
219 rhs = chars_num2char(p, len);
220 if (rhs)
221 putchar((int)*rhs);
222 }
223
224 static void
225 print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
226 {
227 int cp;
228 const char *rhs;
229 size_t sz;
230
231 if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
232 printf("&#%d;", cp);
233 return;
234 } else if (-1 == cp && DECO_SSPECIAL == d) {
235 fwrite(p, 1, len, stdout);
236 return;
237 } else if (-1 == cp)
238 return;
239
240 if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))
241 fwrite(rhs, 1, sz, stdout);
242 }
243
244
245 static void
246 print_res(struct html *h, const char *p, size_t len)
247 {
248 int cp;
249 const char *rhs;
250 size_t sz;
251
252 if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {
253 printf("&#%d;", cp);
254 return;
255 } else if (-1 == cp)
256 return;
257
258 if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))
259 fwrite(rhs, 1, sz, stdout);
260 }
261
262
263 static void
264 print_metaf(struct html *h, enum roffdeco deco)
265 {
266 enum htmlfont font;
267
268 switch (deco) {
269 case (DECO_PREVIOUS):
270 font = h->metal;
271 break;
272 case (DECO_ITALIC):
273 font = HTMLFONT_ITALIC;
274 break;
275 case (DECO_BOLD):
276 font = HTMLFONT_BOLD;
277 break;
278 case (DECO_ROMAN):
279 font = HTMLFONT_NONE;
280 break;
281 default:
282 abort();
283 /* NOTREACHED */
284 }
285
286 if (h->metaf) {
287 print_tagq(h, h->metaf);
288 h->metaf = NULL;
289 }
290
291 h->metal = h->metac;
292 h->metac = font;
293
294 if (HTMLFONT_NONE != font)
295 h->metaf = HTMLFONT_BOLD == font ?
296 print_otag(h, TAG_B, 0, NULL) :
297 print_otag(h, TAG_I, 0, NULL);
298 }
299
300
301 static int
302 print_encode(struct html *h, const char *p, int norecurse)
303 {
304 size_t sz;
305 int len, nospace;
306 const char *seq;
307 enum roffdeco deco;
308 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
309
310 nospace = 0;
311
312 for (; *p; p++) {
313 sz = strcspn(p, rejs);
314
315 fwrite(p, 1, sz, stdout);
316 p += /* LINTED */
317 sz;
318
319 if ('<' == *p) {
320 printf("&lt;");
321 continue;
322 } else if ('>' == *p) {
323 printf("&gt;");
324 continue;
325 } else if ('&' == *p) {
326 printf("&amp;");
327 continue;
328 } else if (ASCII_HYPH == *p) {
329 /*
330 * Note: "soft hyphens" aren't graphically
331 * displayed when not breaking the text; we want
332 * them to be displayed.
333 */
334 /*printf("&#173;");*/
335 putchar('-');
336 continue;
337 } else if ('\0' == *p)
338 break;
339
340 seq = ++p;
341 len = a2roffdeco(&deco, &seq, &sz);
342
343 switch (deco) {
344 case (DECO_NUMBERED):
345 print_num(h, seq, sz);
346 break;
347 case (DECO_RESERVED):
348 print_res(h, seq, sz);
349 break;
350 case (DECO_SSPECIAL):
351 /* FALLTHROUGH */
352 case (DECO_SPECIAL):
353 print_spec(h, deco, seq, sz);
354 break;
355 case (DECO_PREVIOUS):
356 /* FALLTHROUGH */
357 case (DECO_BOLD):
358 /* FALLTHROUGH */
359 case (DECO_ITALIC):
360 /* FALLTHROUGH */
361 case (DECO_ROMAN):
362 if (norecurse)
363 break;
364 print_metaf(h, deco);
365 break;
366 default:
367 break;
368 }
369
370 p += len - 1;
371
372 if (DECO_NOSPACE == deco && '\0' == *(p + 1))
373 nospace = 1;
374 }
375
376 return(nospace);
377 }
378
379
380 static void
381 print_attr(struct html *h, const char *key, const char *val)
382 {
383 printf(" %s=\"", key);
384 (void)print_encode(h, val, 1);
385 putchar('\"');
386 }
387
388
389 struct tag *
390 print_otag(struct html *h, enum htmltag tag,
391 int sz, const struct htmlpair *p)
392 {
393 int i;
394 struct tag *t;
395
396 /* Push this tags onto the stack of open scopes. */
397
398 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
399 t = mandoc_malloc(sizeof(struct tag));
400 t->tag = tag;
401 t->next = h->tags.head;
402 h->tags.head = t;
403 } else
404 t = NULL;
405
406 if ( ! (HTML_NOSPACE & h->flags))
407 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
408 /* Manage keeps! */
409 if ( ! (HTML_KEEP & h->flags)) {
410 if (HTML_PREKEEP & h->flags)
411 h->flags |= HTML_KEEP;
412 putchar(' ');
413 } else
414 printf("&#160;");
415 }
416
417 if ( ! (h->flags & HTML_NONOSPACE))
418 h->flags &= ~HTML_NOSPACE;
419 else
420 h->flags |= HTML_NOSPACE;
421
422 /* Print out the tag name and attributes. */
423
424 printf("<%s", htmltags[tag].name);
425 for (i = 0; i < sz; i++)
426 print_attr(h, htmlattrs[p[i].key], p[i].val);
427
428 /* Add non-overridable attributes. */
429
430 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
431 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
432 print_attr(h, "xml:lang", "en");
433 print_attr(h, "lang", "en");
434 }
435
436 /* Accomodate for XML "well-formed" singleton escaping. */
437
438 if (HTML_AUTOCLOSE & htmltags[tag].flags)
439 switch (h->type) {
440 case (HTML_XHTML_1_0_STRICT):
441 putchar('/');
442 break;
443 default:
444 break;
445 }
446
447 putchar('>');
448
449 h->flags |= HTML_NOSPACE;
450
451 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
452 putchar('\n');
453
454 return(t);
455 }
456
457
458 static void
459 print_ctag(struct html *h, enum htmltag tag)
460 {
461
462 printf("</%s>", htmltags[tag].name);
463 if (HTML_CLRLINE & htmltags[tag].flags) {
464 h->flags |= HTML_NOSPACE;
465 putchar('\n');
466 }
467 }
468
469
470 void
471 print_gen_decls(struct html *h)
472 {
473
474 print_xmltype(h);
475 print_doctype(h);
476 }
477
478
479 static void
480 print_xmltype(struct html *h)
481 {
482
483 if (HTML_XHTML_1_0_STRICT == h->type)
484 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
485 }
486
487
488 static void
489 print_doctype(struct html *h)
490 {
491 const char *doctype;
492 const char *dtd;
493 const char *name;
494
495 switch (h->type) {
496 case (HTML_HTML_4_01_STRICT):
497 name = "HTML";
498 doctype = "-//W3C//DTD HTML 4.01//EN";
499 dtd = "http://www.w3.org/TR/html4/strict.dtd";
500 break;
501 default:
502 name = "html";
503 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
504 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
505 break;
506 }
507
508 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
509 name, doctype, dtd);
510 }
511
512
513 void
514 print_text(struct html *h, const char *word)
515 {
516
517 if (DELIM_CLOSE == mandoc_isdelim(word))
518 if ( ! (HTML_IGNDELIM & h->flags))
519 h->flags |= HTML_NOSPACE;
520
521 if ( ! (HTML_NOSPACE & h->flags)) {
522 /* Manage keeps! */
523 if ( ! (HTML_KEEP & h->flags)) {
524 if (HTML_PREKEEP & h->flags)
525 h->flags |= HTML_KEEP;
526 putchar(' ');
527 } else
528 printf("&#160;");
529 }
530
531 assert(NULL == h->metaf);
532 if (HTMLFONT_NONE != h->metac)
533 h->metaf = HTMLFONT_BOLD == h->metac ?
534 print_otag(h, TAG_B, 0, NULL) :
535 print_otag(h, TAG_I, 0, NULL);
536
537 assert(word);
538 if ( ! print_encode(h, word, 0))
539 if ( ! (h->flags & HTML_NONOSPACE))
540 h->flags &= ~HTML_NOSPACE;
541
542 if (h->metaf) {
543 print_tagq(h, h->metaf);
544 h->metaf = NULL;
545 }
546
547 h->flags &= ~HTML_IGNDELIM;
548
549 if (DELIM_OPEN == mandoc_isdelim(word))
550 h->flags |= HTML_NOSPACE;
551 }
552
553
554 void
555 print_tagq(struct html *h, const struct tag *until)
556 {
557 struct tag *tag;
558
559 while ((tag = h->tags.head) != NULL) {
560 /*
561 * Remember to close out and nullify the current
562 * meta-font and table, if applicable.
563 */
564 if (tag == h->metaf)
565 h->metaf = NULL;
566 if (tag == h->tblt)
567 h->tblt = NULL;
568 print_ctag(h, tag->tag);
569 h->tags.head = tag->next;
570 free(tag);
571 if (until && tag == until)
572 return;
573 }
574 }
575
576
577 void
578 print_stagq(struct html *h, const struct tag *suntil)
579 {
580 struct tag *tag;
581
582 while ((tag = h->tags.head) != NULL) {
583 if (suntil && tag == suntil)
584 return;
585 /*
586 * Remember to close out and nullify the current
587 * meta-font and table, if applicable.
588 */
589 if (tag == h->metaf)
590 h->metaf = NULL;
591 if (tag == h->tblt)
592 h->tblt = NULL;
593 print_ctag(h, tag->tag);
594 h->tags.head = tag->next;
595 free(tag);
596 }
597 }
598
599
600 void
601 bufinit(struct html *h)
602 {
603
604 h->buf[0] = '\0';
605 h->buflen = 0;
606 }
607
608
609 void
610 bufcat_style(struct html *h, const char *key, const char *val)
611 {
612
613 bufcat(h, key);
614 bufncat(h, ":", 1);
615 bufcat(h, val);
616 bufncat(h, ";", 1);
617 }
618
619
620 void
621 bufcat(struct html *h, const char *p)
622 {
623
624 bufncat(h, p, strlen(p));
625 }
626
627
628 void
629 buffmt(struct html *h, const char *fmt, ...)
630 {
631 va_list ap;
632
633 va_start(ap, fmt);
634 (void)vsnprintf(h->buf + (int)h->buflen,
635 BUFSIZ - h->buflen - 1, fmt, ap);
636 va_end(ap);
637 h->buflen = strlen(h->buf);
638 }
639
640
641 void
642 bufncat(struct html *h, const char *p, size_t sz)
643 {
644
645 if (h->buflen + sz > BUFSIZ - 1)
646 sz = BUFSIZ - 1 - h->buflen;
647
648 (void)strncat(h->buf, p, sz);
649 h->buflen += sz;
650 }
651
652
653 void
654 buffmt_includes(struct html *h, const char *name)
655 {
656 const char *p, *pp;
657
658 pp = h->base_includes;
659
660 while (NULL != (p = strchr(pp, '%'))) {
661 bufncat(h, pp, (size_t)(p - pp));
662 switch (*(p + 1)) {
663 case('I'):
664 bufcat(h, name);
665 break;
666 default:
667 bufncat(h, p, 2);
668 break;
669 }
670 pp = p + 2;
671 }
672 if (pp)
673 bufcat(h, pp);
674 }
675
676
677 void
678 buffmt_man(struct html *h,
679 const char *name, const char *sec)
680 {
681 const char *p, *pp;
682
683 pp = h->base_man;
684
685 /* LINTED */
686 while (NULL != (p = strchr(pp, '%'))) {
687 bufncat(h, pp, (size_t)(p - pp));
688 switch (*(p + 1)) {
689 case('S'):
690 bufcat(h, sec ? sec : "1");
691 break;
692 case('N'):
693 buffmt(h, name);
694 break;
695 default:
696 bufncat(h, p, 2);
697 break;
698 }
699 pp = p + 2;
700 }
701 if (pp)
702 bufcat(h, pp);
703 }
704
705
706 void
707 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
708 {
709 double v;
710 const char *u;
711
712 v = su->scale;
713
714 switch (su->unit) {
715 case (SCALE_CM):
716 u = "cm";
717 break;
718 case (SCALE_IN):
719 u = "in";
720 break;
721 case (SCALE_PC):
722 u = "pc";
723 break;
724 case (SCALE_PT):
725 u = "pt";
726 break;
727 case (SCALE_EM):
728 u = "em";
729 break;
730 case (SCALE_MM):
731 if (0 == (v /= 100))
732 v = 1;
733 u = "em";
734 break;
735 case (SCALE_EN):
736 u = "ex";
737 break;
738 case (SCALE_BU):
739 u = "ex";
740 break;
741 case (SCALE_VS):
742 u = "em";
743 break;
744 default:
745 u = "ex";
746 break;
747 }
748
749 /*
750 * XXX: the CSS spec isn't clear as to which types accept
751 * integer or real numbers, so we just make them all decimals.
752 */
753 buffmt(h, "%s: %.2f%s;", p, v, u);
754 }
755
756
757 void
758 html_idcat(char *dst, const char *src, int sz)
759 {
760 int ssz;
761
762 assert(sz > 2);
763
764 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
765
766 /* We can't start with a number (bah). */
767
768 if ('#' == *dst) {
769 dst++;
770 sz--;
771 }
772 if ('\0' == *dst) {
773 *dst++ = 'x';
774 *dst = '\0';
775 sz--;
776 }
777
778 for ( ; *dst != '\0' && sz; dst++, sz--)
779 /* Jump to end. */ ;
780
781 for ( ; *src != '\0' && sz > 1; src++) {
782 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
783 sz -= ssz;
784 dst += ssz;
785 }
786 }