]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Implement the \N'number' (numbered character) roff escape sequence.
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.126 2011/01/30 16:05:37 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc.h"
34 #include "out.h"
35 #include "chars.h"
36 #include "html.h"
37 #include "main.h"
38
39 struct htmldata {
40 const char *name;
41 int flags;
42 #define HTML_CLRLINE (1 << 0)
43 #define HTML_NOSTACK (1 << 1)
44 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
45 };
46
47 static const struct htmldata htmltags[TAG_MAX] = {
48 {"html", HTML_CLRLINE}, /* TAG_HTML */
49 {"head", HTML_CLRLINE}, /* TAG_HEAD */
50 {"body", HTML_CLRLINE}, /* TAG_BODY */
51 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
52 {"title", HTML_CLRLINE}, /* TAG_TITLE */
53 {"div", HTML_CLRLINE}, /* TAG_DIV */
54 {"h1", 0}, /* TAG_H1 */
55 {"h2", 0}, /* TAG_H2 */
56 {"span", 0}, /* TAG_SPAN */
57 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
58 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
59 {"a", 0}, /* TAG_A */
60 {"table", HTML_CLRLINE}, /* TAG_TABLE */
61 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
62 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
63 {"tr", HTML_CLRLINE}, /* TAG_TR */
64 {"td", HTML_CLRLINE}, /* TAG_TD */
65 {"li", HTML_CLRLINE}, /* TAG_LI */
66 {"ul", HTML_CLRLINE}, /* TAG_UL */
67 {"ol", HTML_CLRLINE}, /* TAG_OL */
68 {"dl", HTML_CLRLINE}, /* TAG_DL */
69 {"dt", HTML_CLRLINE}, /* TAG_DT */
70 {"dd", HTML_CLRLINE}, /* TAG_DD */
71 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
72 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
73 {"pre", HTML_CLRLINE }, /* TAG_PRE */
74 {"b", 0 }, /* TAG_B */
75 {"i", 0 }, /* TAG_I */
76 {"code", 0 }, /* TAG_CODE */
77 {"small", 0 }, /* TAG_SMALL */
78 };
79
80 static const char *const htmlattrs[ATTR_MAX] = {
81 "http-equiv", /* ATTR_HTTPEQUIV */
82 "content", /* ATTR_CONTENT */
83 "name", /* ATTR_NAME */
84 "rel", /* ATTR_REL */
85 "href", /* ATTR_HREF */
86 "type", /* ATTR_TYPE */
87 "media", /* ATTR_MEDIA */
88 "class", /* ATTR_CLASS */
89 "style", /* ATTR_STYLE */
90 "width", /* ATTR_WIDTH */
91 "id", /* ATTR_ID */
92 "summary", /* ATTR_SUMMARY */
93 "align", /* ATTR_ALIGN */
94 "colspan", /* ATTR_COLSPAN */
95 };
96
97 static void print_num(struct html *, const char *, size_t);
98 static void print_spec(struct html *, enum roffdeco,
99 const char *, size_t);
100 static void print_res(struct html *, const char *, size_t);
101 static void print_ctag(struct html *, enum htmltag);
102 static void print_doctype(struct html *);
103 static void print_xmltype(struct html *);
104 static int print_encode(struct html *, const char *, int);
105 static void print_metaf(struct html *, enum roffdeco);
106 static void print_attr(struct html *,
107 const char *, const char *);
108 static void *ml_alloc(char *, enum htmltype);
109
110
111 static void *
112 ml_alloc(char *outopts, enum htmltype type)
113 {
114 struct html *h;
115 const char *toks[4];
116 char *v;
117
118 toks[0] = "style";
119 toks[1] = "man";
120 toks[2] = "includes";
121 toks[3] = NULL;
122
123 h = calloc(1, sizeof(struct html));
124 if (NULL == h) {
125 perror(NULL);
126 exit((int)MANDOCLEVEL_SYSERR);
127 }
128
129 h->type = type;
130 h->tags.head = NULL;
131 h->symtab = chars_init(CHARS_HTML);
132
133 while (outopts && *outopts)
134 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
135 case (0):
136 h->style = v;
137 break;
138 case (1):
139 h->base_man = v;
140 break;
141 case (2):
142 h->base_includes = v;
143 break;
144 default:
145 break;
146 }
147
148 return(h);
149 }
150
151 void *
152 html_alloc(char *outopts)
153 {
154
155 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
156 }
157
158
159 void *
160 xhtml_alloc(char *outopts)
161 {
162
163 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
164 }
165
166
167 void
168 html_free(void *p)
169 {
170 struct tag *tag;
171 struct html *h;
172
173 h = (struct html *)p;
174
175 while ((tag = h->tags.head) != NULL) {
176 h->tags.head = tag->next;
177 free(tag);
178 }
179
180 if (h->symtab)
181 chars_free(h->symtab);
182
183 free(h);
184 }
185
186
187 void
188 print_gen_head(struct html *h)
189 {
190 struct htmlpair tag[4];
191
192 tag[0].key = ATTR_HTTPEQUIV;
193 tag[0].val = "Content-Type";
194 tag[1].key = ATTR_CONTENT;
195 tag[1].val = "text/html; charset=utf-8";
196 print_otag(h, TAG_META, 2, tag);
197
198 tag[0].key = ATTR_NAME;
199 tag[0].val = "resource-type";
200 tag[1].key = ATTR_CONTENT;
201 tag[1].val = "document";
202 print_otag(h, TAG_META, 2, tag);
203
204 if (h->style) {
205 tag[0].key = ATTR_REL;
206 tag[0].val = "stylesheet";
207 tag[1].key = ATTR_HREF;
208 tag[1].val = h->style;
209 tag[2].key = ATTR_TYPE;
210 tag[2].val = "text/css";
211 tag[3].key = ATTR_MEDIA;
212 tag[3].val = "all";
213 print_otag(h, TAG_LINK, 4, tag);
214 }
215 }
216
217
218 static void
219 print_num(struct html *h, const char *p, size_t len)
220 {
221 const char *rhs;
222
223 rhs = chars_num2char(p, len);
224 if (rhs)
225 putchar((int)*rhs);
226 }
227
228
229 static void
230 print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
231 {
232 int cp;
233 const char *rhs;
234 size_t sz;
235
236 if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
237 printf("&#%d;", cp);
238 return;
239 } else if (-1 == cp && DECO_SSPECIAL == d) {
240 fwrite(p, 1, len, stdout);
241 return;
242 } else if (-1 == cp)
243 return;
244
245 if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))
246 fwrite(rhs, 1, sz, stdout);
247 }
248
249
250 static void
251 print_res(struct html *h, const char *p, size_t len)
252 {
253 int cp;
254 const char *rhs;
255 size_t sz;
256
257 if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {
258 printf("&#%d;", cp);
259 return;
260 } else if (-1 == cp)
261 return;
262
263 if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))
264 fwrite(rhs, 1, sz, stdout);
265 }
266
267
268 static void
269 print_metaf(struct html *h, enum roffdeco deco)
270 {
271 enum htmlfont font;
272
273 switch (deco) {
274 case (DECO_PREVIOUS):
275 font = h->metal;
276 break;
277 case (DECO_ITALIC):
278 font = HTMLFONT_ITALIC;
279 break;
280 case (DECO_BOLD):
281 font = HTMLFONT_BOLD;
282 break;
283 case (DECO_ROMAN):
284 font = HTMLFONT_NONE;
285 break;
286 default:
287 abort();
288 /* NOTREACHED */
289 }
290
291 if (h->metaf) {
292 print_tagq(h, h->metaf);
293 h->metaf = NULL;
294 }
295
296 h->metal = h->metac;
297 h->metac = font;
298
299 if (HTMLFONT_NONE != font)
300 h->metaf = HTMLFONT_BOLD == font ?
301 print_otag(h, TAG_B, 0, NULL) :
302 print_otag(h, TAG_I, 0, NULL);
303 }
304
305
306 static int
307 print_encode(struct html *h, const char *p, int norecurse)
308 {
309 size_t sz;
310 int len, nospace;
311 const char *seq;
312 enum roffdeco deco;
313 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
314
315 nospace = 0;
316
317 for (; *p; p++) {
318 sz = strcspn(p, rejs);
319
320 fwrite(p, 1, sz, stdout);
321 p += /* LINTED */
322 sz;
323
324 if ('<' == *p) {
325 printf("&lt;");
326 continue;
327 } else if ('>' == *p) {
328 printf("&gt;");
329 continue;
330 } else if ('&' == *p) {
331 printf("&amp;");
332 continue;
333 } else if (ASCII_HYPH == *p) {
334 /*
335 * Note: "soft hyphens" aren't graphically
336 * displayed when not breaking the text; we want
337 * them to be displayed.
338 */
339 /*printf("&#173;");*/
340 putchar('-');
341 continue;
342 } else if ('\0' == *p)
343 break;
344
345 seq = ++p;
346 len = a2roffdeco(&deco, &seq, &sz);
347
348 switch (deco) {
349 case (DECO_NUMBERED):
350 print_num(h, seq, sz);
351 break;
352 case (DECO_RESERVED):
353 print_res(h, seq, sz);
354 break;
355 case (DECO_SSPECIAL):
356 /* FALLTHROUGH */
357 case (DECO_SPECIAL):
358 print_spec(h, deco, seq, sz);
359 break;
360 case (DECO_PREVIOUS):
361 /* FALLTHROUGH */
362 case (DECO_BOLD):
363 /* FALLTHROUGH */
364 case (DECO_ITALIC):
365 /* FALLTHROUGH */
366 case (DECO_ROMAN):
367 if (norecurse)
368 break;
369 print_metaf(h, deco);
370 break;
371 default:
372 break;
373 }
374
375 p += len - 1;
376
377 if (DECO_NOSPACE == deco && '\0' == *(p + 1))
378 nospace = 1;
379 }
380
381 return(nospace);
382 }
383
384
385 static void
386 print_attr(struct html *h, const char *key, const char *val)
387 {
388 printf(" %s=\"", key);
389 (void)print_encode(h, val, 1);
390 putchar('\"');
391 }
392
393
394 struct tag *
395 print_otag(struct html *h, enum htmltag tag,
396 int sz, const struct htmlpair *p)
397 {
398 int i;
399 struct tag *t;
400
401 /* Push this tags onto the stack of open scopes. */
402
403 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
404 t = malloc(sizeof(struct tag));
405 if (NULL == t) {
406 perror(NULL);
407 exit((int)MANDOCLEVEL_SYSERR);
408 }
409 t->tag = tag;
410 t->next = h->tags.head;
411 h->tags.head = t;
412 } else
413 t = NULL;
414
415 if ( ! (HTML_NOSPACE & h->flags))
416 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
417 /* Manage keeps! */
418 if ( ! (HTML_KEEP & h->flags)) {
419 if (HTML_PREKEEP & h->flags)
420 h->flags |= HTML_KEEP;
421 putchar(' ');
422 } else
423 printf("&#160;");
424 }
425
426 if ( ! (h->flags & HTML_NONOSPACE))
427 h->flags &= ~HTML_NOSPACE;
428 else
429 h->flags |= HTML_NOSPACE;
430
431 /* Print out the tag name and attributes. */
432
433 printf("<%s", htmltags[tag].name);
434 for (i = 0; i < sz; i++)
435 print_attr(h, htmlattrs[p[i].key], p[i].val);
436
437 /* Add non-overridable attributes. */
438
439 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
440 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
441 print_attr(h, "xml:lang", "en");
442 print_attr(h, "lang", "en");
443 }
444
445 /* Accomodate for XML "well-formed" singleton escaping. */
446
447 if (HTML_AUTOCLOSE & htmltags[tag].flags)
448 switch (h->type) {
449 case (HTML_XHTML_1_0_STRICT):
450 putchar('/');
451 break;
452 default:
453 break;
454 }
455
456 putchar('>');
457
458 h->flags |= HTML_NOSPACE;
459
460 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
461 putchar('\n');
462
463 return(t);
464 }
465
466
467 static void
468 print_ctag(struct html *h, enum htmltag tag)
469 {
470
471 printf("</%s>", htmltags[tag].name);
472 if (HTML_CLRLINE & htmltags[tag].flags) {
473 h->flags |= HTML_NOSPACE;
474 putchar('\n');
475 }
476 }
477
478
479 void
480 print_gen_decls(struct html *h)
481 {
482
483 print_xmltype(h);
484 print_doctype(h);
485 }
486
487
488 static void
489 print_xmltype(struct html *h)
490 {
491
492 if (HTML_XHTML_1_0_STRICT == h->type)
493 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
494 }
495
496
497 static void
498 print_doctype(struct html *h)
499 {
500 const char *doctype;
501 const char *dtd;
502 const char *name;
503
504 switch (h->type) {
505 case (HTML_HTML_4_01_STRICT):
506 name = "HTML";
507 doctype = "-//W3C//DTD HTML 4.01//EN";
508 dtd = "http://www.w3.org/TR/html4/strict.dtd";
509 break;
510 default:
511 name = "html";
512 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
513 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
514 break;
515 }
516
517 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
518 name, doctype, dtd);
519 }
520
521
522 void
523 print_text(struct html *h, const char *word)
524 {
525
526 if (word[0] && '\0' == word[1])
527 switch (word[0]) {
528 case('.'):
529 /* FALLTHROUGH */
530 case(','):
531 /* FALLTHROUGH */
532 case(';'):
533 /* FALLTHROUGH */
534 case(':'):
535 /* FALLTHROUGH */
536 case('?'):
537 /* FALLTHROUGH */
538 case('!'):
539 /* FALLTHROUGH */
540 case(')'):
541 /* FALLTHROUGH */
542 case(']'):
543 if ( ! (HTML_IGNDELIM & h->flags))
544 h->flags |= HTML_NOSPACE;
545 break;
546 default:
547 break;
548 }
549
550 if ( ! (HTML_NOSPACE & h->flags)) {
551 /* Manage keeps! */
552 if ( ! (HTML_KEEP & h->flags)) {
553 if (HTML_PREKEEP & h->flags)
554 h->flags |= HTML_KEEP;
555 putchar(' ');
556 } else
557 printf("&#160;");
558 }
559
560 assert(NULL == h->metaf);
561 if (HTMLFONT_NONE != h->metac)
562 h->metaf = HTMLFONT_BOLD == h->metac ?
563 print_otag(h, TAG_B, 0, NULL) :
564 print_otag(h, TAG_I, 0, NULL);
565
566 assert(word);
567 if ( ! print_encode(h, word, 0))
568 if ( ! (h->flags & HTML_NONOSPACE))
569 h->flags &= ~HTML_NOSPACE;
570
571 if (h->metaf) {
572 print_tagq(h, h->metaf);
573 h->metaf = NULL;
574 }
575
576 h->flags &= ~HTML_IGNDELIM;
577
578 /*
579 * Note that we don't process the pipe: the parser sees it as
580 * punctuation, but we don't in terms of typography.
581 */
582 if (word[0] && '\0' == word[1])
583 switch (word[0]) {
584 case('('):
585 /* FALLTHROUGH */
586 case('['):
587 h->flags |= HTML_NOSPACE;
588 break;
589 default:
590 break;
591 }
592 }
593
594
595 void
596 print_tagq(struct html *h, const struct tag *until)
597 {
598 struct tag *tag;
599
600 while ((tag = h->tags.head) != NULL) {
601 /*
602 * Remember to close out and nullify the current
603 * meta-font and table, if applicable.
604 */
605 if (tag == h->metaf)
606 h->metaf = NULL;
607 if (tag == h->tblt)
608 h->tblt = NULL;
609 print_ctag(h, tag->tag);
610 h->tags.head = tag->next;
611 free(tag);
612 if (until && tag == until)
613 return;
614 }
615 }
616
617
618 void
619 print_stagq(struct html *h, const struct tag *suntil)
620 {
621 struct tag *tag;
622
623 while ((tag = h->tags.head) != NULL) {
624 if (suntil && tag == suntil)
625 return;
626 /*
627 * Remember to close out and nullify the current
628 * meta-font and table, if applicable.
629 */
630 if (tag == h->metaf)
631 h->metaf = NULL;
632 if (tag == h->tblt)
633 h->tblt = NULL;
634 print_ctag(h, tag->tag);
635 h->tags.head = tag->next;
636 free(tag);
637 }
638 }
639
640
641 void
642 bufinit(struct html *h)
643 {
644
645 h->buf[0] = '\0';
646 h->buflen = 0;
647 }
648
649
650 void
651 bufcat_style(struct html *h, const char *key, const char *val)
652 {
653
654 bufcat(h, key);
655 bufncat(h, ":", 1);
656 bufcat(h, val);
657 bufncat(h, ";", 1);
658 }
659
660
661 void
662 bufcat(struct html *h, const char *p)
663 {
664
665 bufncat(h, p, strlen(p));
666 }
667
668
669 void
670 buffmt(struct html *h, const char *fmt, ...)
671 {
672 va_list ap;
673
674 va_start(ap, fmt);
675 (void)vsnprintf(h->buf + (int)h->buflen,
676 BUFSIZ - h->buflen - 1, fmt, ap);
677 va_end(ap);
678 h->buflen = strlen(h->buf);
679 }
680
681
682 void
683 bufncat(struct html *h, const char *p, size_t sz)
684 {
685
686 if (h->buflen + sz > BUFSIZ - 1)
687 sz = BUFSIZ - 1 - h->buflen;
688
689 (void)strncat(h->buf, p, sz);
690 h->buflen += sz;
691 }
692
693
694 void
695 buffmt_includes(struct html *h, const char *name)
696 {
697 const char *p, *pp;
698
699 pp = h->base_includes;
700
701 while (NULL != (p = strchr(pp, '%'))) {
702 bufncat(h, pp, (size_t)(p - pp));
703 switch (*(p + 1)) {
704 case('I'):
705 bufcat(h, name);
706 break;
707 default:
708 bufncat(h, p, 2);
709 break;
710 }
711 pp = p + 2;
712 }
713 if (pp)
714 bufcat(h, pp);
715 }
716
717
718 void
719 buffmt_man(struct html *h,
720 const char *name, const char *sec)
721 {
722 const char *p, *pp;
723
724 pp = h->base_man;
725
726 /* LINTED */
727 while (NULL != (p = strchr(pp, '%'))) {
728 bufncat(h, pp, (size_t)(p - pp));
729 switch (*(p + 1)) {
730 case('S'):
731 bufcat(h, sec ? sec : "1");
732 break;
733 case('N'):
734 buffmt(h, name);
735 break;
736 default:
737 bufncat(h, p, 2);
738 break;
739 }
740 pp = p + 2;
741 }
742 if (pp)
743 bufcat(h, pp);
744 }
745
746
747 void
748 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
749 {
750 double v;
751 const char *u;
752
753 v = su->scale;
754
755 switch (su->unit) {
756 case (SCALE_CM):
757 u = "cm";
758 break;
759 case (SCALE_IN):
760 u = "in";
761 break;
762 case (SCALE_PC):
763 u = "pc";
764 break;
765 case (SCALE_PT):
766 u = "pt";
767 break;
768 case (SCALE_EM):
769 u = "em";
770 break;
771 case (SCALE_MM):
772 if (0 == (v /= 100))
773 v = 1;
774 u = "em";
775 break;
776 case (SCALE_EN):
777 u = "ex";
778 break;
779 case (SCALE_BU):
780 u = "ex";
781 break;
782 case (SCALE_VS):
783 u = "em";
784 break;
785 default:
786 u = "ex";
787 break;
788 }
789
790 /*
791 * XXX: the CSS spec isn't clear as to which types accept
792 * integer or real numbers, so we just make them all decimals.
793 */
794 buffmt(h, "%s: %.2f%s;", p, v, u);
795 }
796
797
798 void
799 html_idcat(char *dst, const char *src, int sz)
800 {
801 int ssz;
802
803 assert(sz > 2);
804
805 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
806
807 /* We can't start with a number (bah). */
808
809 if ('#' == *dst) {
810 dst++;
811 sz--;
812 }
813 if ('\0' == *dst) {
814 *dst++ = 'x';
815 *dst = '\0';
816 sz--;
817 }
818
819 for ( ; *dst != '\0' && sz; dst++, sz--)
820 /* Jump to end. */ ;
821
822 for ( ; *src != '\0' && sz > 1; src++) {
823 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
824 sz -= ssz;
825 dst += ssz;
826 }
827 }