]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Make lint shut up a little bit.
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.127 2011/03/15 16:23:51 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc.h"
34 #include "out.h"
35 #include "chars.h"
36 #include "html.h"
37 #include "main.h"
38
39 struct htmldata {
40 const char *name;
41 int flags;
42 #define HTML_CLRLINE (1 << 0)
43 #define HTML_NOSTACK (1 << 1)
44 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
45 };
46
47 static const struct htmldata htmltags[TAG_MAX] = {
48 {"html", HTML_CLRLINE}, /* TAG_HTML */
49 {"head", HTML_CLRLINE}, /* TAG_HEAD */
50 {"body", HTML_CLRLINE}, /* TAG_BODY */
51 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
52 {"title", HTML_CLRLINE}, /* TAG_TITLE */
53 {"div", HTML_CLRLINE}, /* TAG_DIV */
54 {"h1", 0}, /* TAG_H1 */
55 {"h2", 0}, /* TAG_H2 */
56 {"span", 0}, /* TAG_SPAN */
57 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
58 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
59 {"a", 0}, /* TAG_A */
60 {"table", HTML_CLRLINE}, /* TAG_TABLE */
61 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
62 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
63 {"tr", HTML_CLRLINE}, /* TAG_TR */
64 {"td", HTML_CLRLINE}, /* TAG_TD */
65 {"li", HTML_CLRLINE}, /* TAG_LI */
66 {"ul", HTML_CLRLINE}, /* TAG_UL */
67 {"ol", HTML_CLRLINE}, /* TAG_OL */
68 {"dl", HTML_CLRLINE}, /* TAG_DL */
69 {"dt", HTML_CLRLINE}, /* TAG_DT */
70 {"dd", HTML_CLRLINE}, /* TAG_DD */
71 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
72 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
73 {"pre", HTML_CLRLINE }, /* TAG_PRE */
74 {"b", 0 }, /* TAG_B */
75 {"i", 0 }, /* TAG_I */
76 {"code", 0 }, /* TAG_CODE */
77 {"small", 0 }, /* TAG_SMALL */
78 };
79
80 static const char *const htmlattrs[ATTR_MAX] = {
81 "http-equiv", /* ATTR_HTTPEQUIV */
82 "content", /* ATTR_CONTENT */
83 "name", /* ATTR_NAME */
84 "rel", /* ATTR_REL */
85 "href", /* ATTR_HREF */
86 "type", /* ATTR_TYPE */
87 "media", /* ATTR_MEDIA */
88 "class", /* ATTR_CLASS */
89 "style", /* ATTR_STYLE */
90 "width", /* ATTR_WIDTH */
91 "id", /* ATTR_ID */
92 "summary", /* ATTR_SUMMARY */
93 "align", /* ATTR_ALIGN */
94 "colspan", /* ATTR_COLSPAN */
95 };
96
97 static void print_num(struct html *, const char *, size_t);
98 static void print_spec(struct html *, enum roffdeco,
99 const char *, size_t);
100 static void print_res(struct html *, const char *, size_t);
101 static void print_ctag(struct html *, enum htmltag);
102 static void print_doctype(struct html *);
103 static void print_xmltype(struct html *);
104 static int print_encode(struct html *, const char *, int);
105 static void print_metaf(struct html *, enum roffdeco);
106 static void print_attr(struct html *,
107 const char *, const char *);
108 static void *ml_alloc(char *, enum htmltype);
109
110
111 static void *
112 ml_alloc(char *outopts, enum htmltype type)
113 {
114 struct html *h;
115 const char *toks[4];
116 char *v;
117
118 toks[0] = "style";
119 toks[1] = "man";
120 toks[2] = "includes";
121 toks[3] = NULL;
122
123 h = calloc(1, sizeof(struct html));
124 if (NULL == h) {
125 perror(NULL);
126 exit((int)MANDOCLEVEL_SYSERR);
127 }
128
129 h->type = type;
130 h->tags.head = NULL;
131 h->symtab = chars_init(CHARS_HTML);
132
133 while (outopts && *outopts)
134 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
135 case (0):
136 h->style = v;
137 break;
138 case (1):
139 h->base_man = v;
140 break;
141 case (2):
142 h->base_includes = v;
143 break;
144 default:
145 break;
146 }
147
148 return(h);
149 }
150
151 void *
152 html_alloc(char *outopts)
153 {
154
155 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
156 }
157
158
159 void *
160 xhtml_alloc(char *outopts)
161 {
162
163 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
164 }
165
166
167 void
168 html_free(void *p)
169 {
170 struct tag *tag;
171 struct html *h;
172
173 h = (struct html *)p;
174
175 while ((tag = h->tags.head) != NULL) {
176 h->tags.head = tag->next;
177 free(tag);
178 }
179
180 if (h->symtab)
181 chars_free(h->symtab);
182
183 free(h);
184 }
185
186
187 void
188 print_gen_head(struct html *h)
189 {
190 struct htmlpair tag[4];
191
192 tag[0].key = ATTR_HTTPEQUIV;
193 tag[0].val = "Content-Type";
194 tag[1].key = ATTR_CONTENT;
195 tag[1].val = "text/html; charset=utf-8";
196 print_otag(h, TAG_META, 2, tag);
197
198 tag[0].key = ATTR_NAME;
199 tag[0].val = "resource-type";
200 tag[1].key = ATTR_CONTENT;
201 tag[1].val = "document";
202 print_otag(h, TAG_META, 2, tag);
203
204 if (h->style) {
205 tag[0].key = ATTR_REL;
206 tag[0].val = "stylesheet";
207 tag[1].key = ATTR_HREF;
208 tag[1].val = h->style;
209 tag[2].key = ATTR_TYPE;
210 tag[2].val = "text/css";
211 tag[3].key = ATTR_MEDIA;
212 tag[3].val = "all";
213 print_otag(h, TAG_LINK, 4, tag);
214 }
215 }
216
217 /* ARGSUSED */
218 static void
219 print_num(struct html *h, const char *p, size_t len)
220 {
221 const char *rhs;
222
223 rhs = chars_num2char(p, len);
224 if (rhs)
225 putchar((int)*rhs);
226 }
227
228 static void
229 print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
230 {
231 int cp;
232 const char *rhs;
233 size_t sz;
234
235 if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
236 printf("&#%d;", cp);
237 return;
238 } else if (-1 == cp && DECO_SSPECIAL == d) {
239 fwrite(p, 1, len, stdout);
240 return;
241 } else if (-1 == cp)
242 return;
243
244 if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))
245 fwrite(rhs, 1, sz, stdout);
246 }
247
248
249 static void
250 print_res(struct html *h, const char *p, size_t len)
251 {
252 int cp;
253 const char *rhs;
254 size_t sz;
255
256 if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {
257 printf("&#%d;", cp);
258 return;
259 } else if (-1 == cp)
260 return;
261
262 if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))
263 fwrite(rhs, 1, sz, stdout);
264 }
265
266
267 static void
268 print_metaf(struct html *h, enum roffdeco deco)
269 {
270 enum htmlfont font;
271
272 switch (deco) {
273 case (DECO_PREVIOUS):
274 font = h->metal;
275 break;
276 case (DECO_ITALIC):
277 font = HTMLFONT_ITALIC;
278 break;
279 case (DECO_BOLD):
280 font = HTMLFONT_BOLD;
281 break;
282 case (DECO_ROMAN):
283 font = HTMLFONT_NONE;
284 break;
285 default:
286 abort();
287 /* NOTREACHED */
288 }
289
290 if (h->metaf) {
291 print_tagq(h, h->metaf);
292 h->metaf = NULL;
293 }
294
295 h->metal = h->metac;
296 h->metac = font;
297
298 if (HTMLFONT_NONE != font)
299 h->metaf = HTMLFONT_BOLD == font ?
300 print_otag(h, TAG_B, 0, NULL) :
301 print_otag(h, TAG_I, 0, NULL);
302 }
303
304
305 static int
306 print_encode(struct html *h, const char *p, int norecurse)
307 {
308 size_t sz;
309 int len, nospace;
310 const char *seq;
311 enum roffdeco deco;
312 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
313
314 nospace = 0;
315
316 for (; *p; p++) {
317 sz = strcspn(p, rejs);
318
319 fwrite(p, 1, sz, stdout);
320 p += /* LINTED */
321 sz;
322
323 if ('<' == *p) {
324 printf("&lt;");
325 continue;
326 } else if ('>' == *p) {
327 printf("&gt;");
328 continue;
329 } else if ('&' == *p) {
330 printf("&amp;");
331 continue;
332 } else if (ASCII_HYPH == *p) {
333 /*
334 * Note: "soft hyphens" aren't graphically
335 * displayed when not breaking the text; we want
336 * them to be displayed.
337 */
338 /*printf("&#173;");*/
339 putchar('-');
340 continue;
341 } else if ('\0' == *p)
342 break;
343
344 seq = ++p;
345 len = a2roffdeco(&deco, &seq, &sz);
346
347 switch (deco) {
348 case (DECO_NUMBERED):
349 print_num(h, seq, sz);
350 break;
351 case (DECO_RESERVED):
352 print_res(h, seq, sz);
353 break;
354 case (DECO_SSPECIAL):
355 /* FALLTHROUGH */
356 case (DECO_SPECIAL):
357 print_spec(h, deco, seq, sz);
358 break;
359 case (DECO_PREVIOUS):
360 /* FALLTHROUGH */
361 case (DECO_BOLD):
362 /* FALLTHROUGH */
363 case (DECO_ITALIC):
364 /* FALLTHROUGH */
365 case (DECO_ROMAN):
366 if (norecurse)
367 break;
368 print_metaf(h, deco);
369 break;
370 default:
371 break;
372 }
373
374 p += len - 1;
375
376 if (DECO_NOSPACE == deco && '\0' == *(p + 1))
377 nospace = 1;
378 }
379
380 return(nospace);
381 }
382
383
384 static void
385 print_attr(struct html *h, const char *key, const char *val)
386 {
387 printf(" %s=\"", key);
388 (void)print_encode(h, val, 1);
389 putchar('\"');
390 }
391
392
393 struct tag *
394 print_otag(struct html *h, enum htmltag tag,
395 int sz, const struct htmlpair *p)
396 {
397 int i;
398 struct tag *t;
399
400 /* Push this tags onto the stack of open scopes. */
401
402 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
403 t = malloc(sizeof(struct tag));
404 if (NULL == t) {
405 perror(NULL);
406 exit((int)MANDOCLEVEL_SYSERR);
407 }
408 t->tag = tag;
409 t->next = h->tags.head;
410 h->tags.head = t;
411 } else
412 t = NULL;
413
414 if ( ! (HTML_NOSPACE & h->flags))
415 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
416 /* Manage keeps! */
417 if ( ! (HTML_KEEP & h->flags)) {
418 if (HTML_PREKEEP & h->flags)
419 h->flags |= HTML_KEEP;
420 putchar(' ');
421 } else
422 printf("&#160;");
423 }
424
425 if ( ! (h->flags & HTML_NONOSPACE))
426 h->flags &= ~HTML_NOSPACE;
427 else
428 h->flags |= HTML_NOSPACE;
429
430 /* Print out the tag name and attributes. */
431
432 printf("<%s", htmltags[tag].name);
433 for (i = 0; i < sz; i++)
434 print_attr(h, htmlattrs[p[i].key], p[i].val);
435
436 /* Add non-overridable attributes. */
437
438 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
439 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
440 print_attr(h, "xml:lang", "en");
441 print_attr(h, "lang", "en");
442 }
443
444 /* Accomodate for XML "well-formed" singleton escaping. */
445
446 if (HTML_AUTOCLOSE & htmltags[tag].flags)
447 switch (h->type) {
448 case (HTML_XHTML_1_0_STRICT):
449 putchar('/');
450 break;
451 default:
452 break;
453 }
454
455 putchar('>');
456
457 h->flags |= HTML_NOSPACE;
458
459 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
460 putchar('\n');
461
462 return(t);
463 }
464
465
466 static void
467 print_ctag(struct html *h, enum htmltag tag)
468 {
469
470 printf("</%s>", htmltags[tag].name);
471 if (HTML_CLRLINE & htmltags[tag].flags) {
472 h->flags |= HTML_NOSPACE;
473 putchar('\n');
474 }
475 }
476
477
478 void
479 print_gen_decls(struct html *h)
480 {
481
482 print_xmltype(h);
483 print_doctype(h);
484 }
485
486
487 static void
488 print_xmltype(struct html *h)
489 {
490
491 if (HTML_XHTML_1_0_STRICT == h->type)
492 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
493 }
494
495
496 static void
497 print_doctype(struct html *h)
498 {
499 const char *doctype;
500 const char *dtd;
501 const char *name;
502
503 switch (h->type) {
504 case (HTML_HTML_4_01_STRICT):
505 name = "HTML";
506 doctype = "-//W3C//DTD HTML 4.01//EN";
507 dtd = "http://www.w3.org/TR/html4/strict.dtd";
508 break;
509 default:
510 name = "html";
511 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
512 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
513 break;
514 }
515
516 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
517 name, doctype, dtd);
518 }
519
520
521 void
522 print_text(struct html *h, const char *word)
523 {
524
525 if (word[0] && '\0' == word[1])
526 switch (word[0]) {
527 case('.'):
528 /* FALLTHROUGH */
529 case(','):
530 /* FALLTHROUGH */
531 case(';'):
532 /* FALLTHROUGH */
533 case(':'):
534 /* FALLTHROUGH */
535 case('?'):
536 /* FALLTHROUGH */
537 case('!'):
538 /* FALLTHROUGH */
539 case(')'):
540 /* FALLTHROUGH */
541 case(']'):
542 if ( ! (HTML_IGNDELIM & h->flags))
543 h->flags |= HTML_NOSPACE;
544 break;
545 default:
546 break;
547 }
548
549 if ( ! (HTML_NOSPACE & h->flags)) {
550 /* Manage keeps! */
551 if ( ! (HTML_KEEP & h->flags)) {
552 if (HTML_PREKEEP & h->flags)
553 h->flags |= HTML_KEEP;
554 putchar(' ');
555 } else
556 printf("&#160;");
557 }
558
559 assert(NULL == h->metaf);
560 if (HTMLFONT_NONE != h->metac)
561 h->metaf = HTMLFONT_BOLD == h->metac ?
562 print_otag(h, TAG_B, 0, NULL) :
563 print_otag(h, TAG_I, 0, NULL);
564
565 assert(word);
566 if ( ! print_encode(h, word, 0))
567 if ( ! (h->flags & HTML_NONOSPACE))
568 h->flags &= ~HTML_NOSPACE;
569
570 if (h->metaf) {
571 print_tagq(h, h->metaf);
572 h->metaf = NULL;
573 }
574
575 h->flags &= ~HTML_IGNDELIM;
576
577 /*
578 * Note that we don't process the pipe: the parser sees it as
579 * punctuation, but we don't in terms of typography.
580 */
581 if (word[0] && '\0' == word[1])
582 switch (word[0]) {
583 case('('):
584 /* FALLTHROUGH */
585 case('['):
586 h->flags |= HTML_NOSPACE;
587 break;
588 default:
589 break;
590 }
591 }
592
593
594 void
595 print_tagq(struct html *h, const struct tag *until)
596 {
597 struct tag *tag;
598
599 while ((tag = h->tags.head) != NULL) {
600 /*
601 * Remember to close out and nullify the current
602 * meta-font and table, if applicable.
603 */
604 if (tag == h->metaf)
605 h->metaf = NULL;
606 if (tag == h->tblt)
607 h->tblt = NULL;
608 print_ctag(h, tag->tag);
609 h->tags.head = tag->next;
610 free(tag);
611 if (until && tag == until)
612 return;
613 }
614 }
615
616
617 void
618 print_stagq(struct html *h, const struct tag *suntil)
619 {
620 struct tag *tag;
621
622 while ((tag = h->tags.head) != NULL) {
623 if (suntil && tag == suntil)
624 return;
625 /*
626 * Remember to close out and nullify the current
627 * meta-font and table, if applicable.
628 */
629 if (tag == h->metaf)
630 h->metaf = NULL;
631 if (tag == h->tblt)
632 h->tblt = NULL;
633 print_ctag(h, tag->tag);
634 h->tags.head = tag->next;
635 free(tag);
636 }
637 }
638
639
640 void
641 bufinit(struct html *h)
642 {
643
644 h->buf[0] = '\0';
645 h->buflen = 0;
646 }
647
648
649 void
650 bufcat_style(struct html *h, const char *key, const char *val)
651 {
652
653 bufcat(h, key);
654 bufncat(h, ":", 1);
655 bufcat(h, val);
656 bufncat(h, ";", 1);
657 }
658
659
660 void
661 bufcat(struct html *h, const char *p)
662 {
663
664 bufncat(h, p, strlen(p));
665 }
666
667
668 void
669 buffmt(struct html *h, const char *fmt, ...)
670 {
671 va_list ap;
672
673 va_start(ap, fmt);
674 (void)vsnprintf(h->buf + (int)h->buflen,
675 BUFSIZ - h->buflen - 1, fmt, ap);
676 va_end(ap);
677 h->buflen = strlen(h->buf);
678 }
679
680
681 void
682 bufncat(struct html *h, const char *p, size_t sz)
683 {
684
685 if (h->buflen + sz > BUFSIZ - 1)
686 sz = BUFSIZ - 1 - h->buflen;
687
688 (void)strncat(h->buf, p, sz);
689 h->buflen += sz;
690 }
691
692
693 void
694 buffmt_includes(struct html *h, const char *name)
695 {
696 const char *p, *pp;
697
698 pp = h->base_includes;
699
700 while (NULL != (p = strchr(pp, '%'))) {
701 bufncat(h, pp, (size_t)(p - pp));
702 switch (*(p + 1)) {
703 case('I'):
704 bufcat(h, name);
705 break;
706 default:
707 bufncat(h, p, 2);
708 break;
709 }
710 pp = p + 2;
711 }
712 if (pp)
713 bufcat(h, pp);
714 }
715
716
717 void
718 buffmt_man(struct html *h,
719 const char *name, const char *sec)
720 {
721 const char *p, *pp;
722
723 pp = h->base_man;
724
725 /* LINTED */
726 while (NULL != (p = strchr(pp, '%'))) {
727 bufncat(h, pp, (size_t)(p - pp));
728 switch (*(p + 1)) {
729 case('S'):
730 bufcat(h, sec ? sec : "1");
731 break;
732 case('N'):
733 buffmt(h, name);
734 break;
735 default:
736 bufncat(h, p, 2);
737 break;
738 }
739 pp = p + 2;
740 }
741 if (pp)
742 bufcat(h, pp);
743 }
744
745
746 void
747 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
748 {
749 double v;
750 const char *u;
751
752 v = su->scale;
753
754 switch (su->unit) {
755 case (SCALE_CM):
756 u = "cm";
757 break;
758 case (SCALE_IN):
759 u = "in";
760 break;
761 case (SCALE_PC):
762 u = "pc";
763 break;
764 case (SCALE_PT):
765 u = "pt";
766 break;
767 case (SCALE_EM):
768 u = "em";
769 break;
770 case (SCALE_MM):
771 if (0 == (v /= 100))
772 v = 1;
773 u = "em";
774 break;
775 case (SCALE_EN):
776 u = "ex";
777 break;
778 case (SCALE_BU):
779 u = "ex";
780 break;
781 case (SCALE_VS):
782 u = "em";
783 break;
784 default:
785 u = "ex";
786 break;
787 }
788
789 /*
790 * XXX: the CSS spec isn't clear as to which types accept
791 * integer or real numbers, so we just make them all decimals.
792 */
793 buffmt(h, "%s: %.2f%s;", p, v, u);
794 }
795
796
797 void
798 html_idcat(char *dst, const char *src, int sz)
799 {
800 int ssz;
801
802 assert(sz > 2);
803
804 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
805
806 /* We can't start with a number (bah). */
807
808 if ('#' == *dst) {
809 dst++;
810 sz--;
811 }
812 if ('\0' == *dst) {
813 *dst++ = 'x';
814 *dst = '\0';
815 sz--;
816 }
817
818 for ( ; *dst != '\0' && sz; dst++, sz--)
819 /* Jump to end. */ ;
820
821 for ( ; *src != '\0' && sz > 1; src++) {
822 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
823 sz -= ssz;
824 dst += ssz;
825 }
826 }