]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Remove all references to ESCAPE_PREDEF, which is now not exposed passed
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.146 2011/05/24 21:31:23 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc.h"
34 #include "libmandoc.h"
35 #include "out.h"
36 #include "html.h"
37 #include "main.h"
38
39 struct htmldata {
40 const char *name;
41 int flags;
42 #define HTML_CLRLINE (1 << 0)
43 #define HTML_NOSTACK (1 << 1)
44 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
45 };
46
47 static const struct htmldata htmltags[TAG_MAX] = {
48 {"html", HTML_CLRLINE}, /* TAG_HTML */
49 {"head", HTML_CLRLINE}, /* TAG_HEAD */
50 {"body", HTML_CLRLINE}, /* TAG_BODY */
51 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
52 {"title", HTML_CLRLINE}, /* TAG_TITLE */
53 {"div", HTML_CLRLINE}, /* TAG_DIV */
54 {"h1", 0}, /* TAG_H1 */
55 {"h2", 0}, /* TAG_H2 */
56 {"span", 0}, /* TAG_SPAN */
57 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
58 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
59 {"a", 0}, /* TAG_A */
60 {"table", HTML_CLRLINE}, /* TAG_TABLE */
61 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
62 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
63 {"tr", HTML_CLRLINE}, /* TAG_TR */
64 {"td", HTML_CLRLINE}, /* TAG_TD */
65 {"li", HTML_CLRLINE}, /* TAG_LI */
66 {"ul", HTML_CLRLINE}, /* TAG_UL */
67 {"ol", HTML_CLRLINE}, /* TAG_OL */
68 {"dl", HTML_CLRLINE}, /* TAG_DL */
69 {"dt", HTML_CLRLINE}, /* TAG_DT */
70 {"dd", HTML_CLRLINE}, /* TAG_DD */
71 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
72 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
73 {"pre", HTML_CLRLINE }, /* TAG_PRE */
74 {"b", 0 }, /* TAG_B */
75 {"i", 0 }, /* TAG_I */
76 {"code", 0 }, /* TAG_CODE */
77 {"small", 0 }, /* TAG_SMALL */
78 };
79
80 static const char *const htmlattrs[ATTR_MAX] = {
81 "http-equiv", /* ATTR_HTTPEQUIV */
82 "content", /* ATTR_CONTENT */
83 "name", /* ATTR_NAME */
84 "rel", /* ATTR_REL */
85 "href", /* ATTR_HREF */
86 "type", /* ATTR_TYPE */
87 "media", /* ATTR_MEDIA */
88 "class", /* ATTR_CLASS */
89 "style", /* ATTR_STYLE */
90 "width", /* ATTR_WIDTH */
91 "id", /* ATTR_ID */
92 "summary", /* ATTR_SUMMARY */
93 "align", /* ATTR_ALIGN */
94 "colspan", /* ATTR_COLSPAN */
95 };
96
97 static const char *const roffscales[SCALE_MAX] = {
98 "cm", /* SCALE_CM */
99 "in", /* SCALE_IN */
100 "pc", /* SCALE_PC */
101 "pt", /* SCALE_PT */
102 "em", /* SCALE_EM */
103 "em", /* SCALE_MM */
104 "ex", /* SCALE_EN */
105 "ex", /* SCALE_BU */
106 "em", /* SCALE_VS */
107 "ex", /* SCALE_FS */
108 };
109
110 static void bufncat(struct html *, const char *, size_t);
111 static void print_spec(struct html *, const char *, size_t);
112 static void print_ctag(struct html *, enum htmltag);
113 static int print_encode(struct html *, const char *, int);
114 static void print_metaf(struct html *, enum mandoc_esc);
115 static void print_attr(struct html *, const char *, const char *);
116 static void *ml_alloc(char *, enum htmltype);
117
118 static void *
119 ml_alloc(char *outopts, enum htmltype type)
120 {
121 struct html *h;
122 const char *toks[4];
123 char *v;
124
125 toks[0] = "style";
126 toks[1] = "man";
127 toks[2] = "includes";
128 toks[3] = NULL;
129
130 h = mandoc_calloc(1, sizeof(struct html));
131
132 h->type = type;
133 h->tags.head = NULL;
134 h->symtab = mchars_alloc();
135
136 while (outopts && *outopts)
137 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
138 case (0):
139 h->style = v;
140 break;
141 case (1):
142 h->base_man = v;
143 break;
144 case (2):
145 h->base_includes = v;
146 break;
147 default:
148 break;
149 }
150
151 return(h);
152 }
153
154 void *
155 html_alloc(char *outopts)
156 {
157
158 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
159 }
160
161
162 void *
163 xhtml_alloc(char *outopts)
164 {
165
166 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
167 }
168
169
170 void
171 html_free(void *p)
172 {
173 struct tag *tag;
174 struct html *h;
175
176 h = (struct html *)p;
177
178 while ((tag = h->tags.head) != NULL) {
179 h->tags.head = tag->next;
180 free(tag);
181 }
182
183 if (h->symtab)
184 mchars_free(h->symtab);
185
186 free(h);
187 }
188
189
190 void
191 print_gen_head(struct html *h)
192 {
193 struct htmlpair tag[4];
194
195 tag[0].key = ATTR_HTTPEQUIV;
196 tag[0].val = "Content-Type";
197 tag[1].key = ATTR_CONTENT;
198 tag[1].val = "text/html; charset=utf-8";
199 print_otag(h, TAG_META, 2, tag);
200
201 tag[0].key = ATTR_NAME;
202 tag[0].val = "resource-type";
203 tag[1].key = ATTR_CONTENT;
204 tag[1].val = "document";
205 print_otag(h, TAG_META, 2, tag);
206
207 if (h->style) {
208 tag[0].key = ATTR_REL;
209 tag[0].val = "stylesheet";
210 tag[1].key = ATTR_HREF;
211 tag[1].val = h->style;
212 tag[2].key = ATTR_TYPE;
213 tag[2].val = "text/css";
214 tag[3].key = ATTR_MEDIA;
215 tag[3].val = "all";
216 print_otag(h, TAG_LINK, 4, tag);
217 }
218 }
219
220 static void
221 print_spec(struct html *h, const char *p, size_t len)
222 {
223 int cp;
224 const char *rhs;
225 size_t sz;
226
227 if ((cp = mchars_spec2cp(h->symtab, p, len)) > 0) {
228 printf("&#%d;", cp);
229 return;
230 } else if (-1 == cp && 1 == len) {
231 fwrite(p, 1, len, stdout);
232 return;
233 } else if (-1 == cp)
234 return;
235
236 if (NULL != (rhs = mchars_spec2str(h->symtab, p, len, &sz)))
237 fwrite(rhs, 1, sz, stdout);
238 }
239
240 static void
241 print_metaf(struct html *h, enum mandoc_esc deco)
242 {
243 enum htmlfont font;
244
245 switch (deco) {
246 case (ESCAPE_FONTPREV):
247 font = h->metal;
248 break;
249 case (ESCAPE_FONTITALIC):
250 font = HTMLFONT_ITALIC;
251 break;
252 case (ESCAPE_FONTBOLD):
253 font = HTMLFONT_BOLD;
254 break;
255 case (ESCAPE_FONT):
256 /* FALLTHROUGH */
257 case (ESCAPE_FONTROMAN):
258 font = HTMLFONT_NONE;
259 break;
260 default:
261 abort();
262 /* NOTREACHED */
263 }
264
265 if (h->metaf) {
266 print_tagq(h, h->metaf);
267 h->metaf = NULL;
268 }
269
270 h->metal = h->metac;
271 h->metac = font;
272
273 if (HTMLFONT_NONE != font)
274 h->metaf = HTMLFONT_BOLD == font ?
275 print_otag(h, TAG_B, 0, NULL) :
276 print_otag(h, TAG_I, 0, NULL);
277 }
278
279 int
280 html_strlen(const char *cp)
281 {
282 int ssz, sz;
283 const char *seq, *p;
284
285 /*
286 * Account for escaped sequences within string length
287 * calculations. This follows the logic in term_strlen() as we
288 * must calculate the width of produced strings.
289 * Assume that characters are always width of "1". This is
290 * hacky, but it gets the job done for approximation of widths.
291 */
292
293 sz = 0;
294 while (NULL != (p = strchr(cp, '\\'))) {
295 sz += (int)(p - cp);
296 ++cp;
297 switch (mandoc_escape(&cp, &seq, &ssz)) {
298 case (ESCAPE_ERROR):
299 return(sz);
300 case (ESCAPE_UNICODE):
301 /* FALLTHROUGH */
302 case (ESCAPE_NUMBERED):
303 /* FALLTHROUGH */
304 case (ESCAPE_SPECIAL):
305 sz++;
306 break;
307 default:
308 break;
309 }
310 }
311
312 assert(sz >= 0);
313 return(sz + strlen(cp));
314 }
315
316 static int
317 print_encode(struct html *h, const char *p, int norecurse)
318 {
319 size_t sz;
320 int c, len, nospace;
321 const char *seq;
322 enum mandoc_esc esc;
323 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
324
325 nospace = 0;
326
327 while ('\0' != *p) {
328 sz = strcspn(p, rejs);
329
330 fwrite(p, 1, sz, stdout);
331 p += (int)sz;
332
333 if ('\0' == *p)
334 break;
335
336 switch (*p++) {
337 case ('<'):
338 printf("&lt;");
339 continue;
340 case ('>'):
341 printf("&gt;");
342 continue;
343 case ('&'):
344 printf("&amp;");
345 continue;
346 case (ASCII_HYPH):
347 putchar('-');
348 continue;
349 default:
350 break;
351 }
352
353 esc = mandoc_escape(&p, &seq, &len);
354 if (ESCAPE_ERROR == esc)
355 break;
356
357 switch (esc) {
358 case (ESCAPE_UNICODE):
359 /* Skip passed "u" header. */
360 c = mchars_num2uc(seq + 1, len - 1);
361 if ('\0' != c)
362 printf("&#x%x;", c);
363 break;
364 case (ESCAPE_NUMBERED):
365 c = mchars_num2char(seq, len);
366 if ('\0' != c)
367 putchar(c);
368 break;
369 case (ESCAPE_SPECIAL):
370 print_spec(h, seq, len);
371 break;
372 case (ESCAPE_FONT):
373 /* FALLTHROUGH */
374 case (ESCAPE_FONTPREV):
375 /* FALLTHROUGH */
376 case (ESCAPE_FONTBOLD):
377 /* FALLTHROUGH */
378 case (ESCAPE_FONTITALIC):
379 /* FALLTHROUGH */
380 case (ESCAPE_FONTROMAN):
381 if (norecurse)
382 break;
383 print_metaf(h, esc);
384 break;
385 case (ESCAPE_NOSPACE):
386 if ('\0' == *p)
387 nospace = 1;
388 break;
389 default:
390 break;
391 }
392 }
393
394 return(nospace);
395 }
396
397
398 static void
399 print_attr(struct html *h, const char *key, const char *val)
400 {
401 printf(" %s=\"", key);
402 (void)print_encode(h, val, 1);
403 putchar('\"');
404 }
405
406
407 struct tag *
408 print_otag(struct html *h, enum htmltag tag,
409 int sz, const struct htmlpair *p)
410 {
411 int i;
412 struct tag *t;
413
414 /* Push this tags onto the stack of open scopes. */
415
416 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
417 t = mandoc_malloc(sizeof(struct tag));
418 t->tag = tag;
419 t->next = h->tags.head;
420 h->tags.head = t;
421 } else
422 t = NULL;
423
424 if ( ! (HTML_NOSPACE & h->flags))
425 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
426 /* Manage keeps! */
427 if ( ! (HTML_KEEP & h->flags)) {
428 if (HTML_PREKEEP & h->flags)
429 h->flags |= HTML_KEEP;
430 putchar(' ');
431 } else
432 printf("&#160;");
433 }
434
435 if ( ! (h->flags & HTML_NONOSPACE))
436 h->flags &= ~HTML_NOSPACE;
437 else
438 h->flags |= HTML_NOSPACE;
439
440 /* Print out the tag name and attributes. */
441
442 printf("<%s", htmltags[tag].name);
443 for (i = 0; i < sz; i++)
444 print_attr(h, htmlattrs[p[i].key], p[i].val);
445
446 /* Add non-overridable attributes. */
447
448 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
449 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
450 print_attr(h, "xml:lang", "en");
451 print_attr(h, "lang", "en");
452 }
453
454 /* Accommodate for XML "well-formed" singleton escaping. */
455
456 if (HTML_AUTOCLOSE & htmltags[tag].flags)
457 switch (h->type) {
458 case (HTML_XHTML_1_0_STRICT):
459 putchar('/');
460 break;
461 default:
462 break;
463 }
464
465 putchar('>');
466
467 h->flags |= HTML_NOSPACE;
468
469 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
470 putchar('\n');
471
472 return(t);
473 }
474
475
476 static void
477 print_ctag(struct html *h, enum htmltag tag)
478 {
479
480 printf("</%s>", htmltags[tag].name);
481 if (HTML_CLRLINE & htmltags[tag].flags) {
482 h->flags |= HTML_NOSPACE;
483 putchar('\n');
484 }
485 }
486
487 void
488 print_gen_decls(struct html *h)
489 {
490 const char *doctype;
491 const char *dtd;
492 const char *name;
493
494 switch (h->type) {
495 case (HTML_HTML_4_01_STRICT):
496 name = "HTML";
497 doctype = "-//W3C//DTD HTML 4.01//EN";
498 dtd = "http://www.w3.org/TR/html4/strict.dtd";
499 break;
500 default:
501 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
502 name = "html";
503 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
504 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
505 break;
506 }
507
508 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
509 name, doctype, dtd);
510 }
511
512 void
513 print_text(struct html *h, const char *word)
514 {
515
516 if ( ! (HTML_NOSPACE & h->flags)) {
517 /* Manage keeps! */
518 if ( ! (HTML_KEEP & h->flags)) {
519 if (HTML_PREKEEP & h->flags)
520 h->flags |= HTML_KEEP;
521 putchar(' ');
522 } else
523 printf("&#160;");
524 }
525
526 assert(NULL == h->metaf);
527 if (HTMLFONT_NONE != h->metac)
528 h->metaf = HTMLFONT_BOLD == h->metac ?
529 print_otag(h, TAG_B, 0, NULL) :
530 print_otag(h, TAG_I, 0, NULL);
531
532 assert(word);
533 if ( ! print_encode(h, word, 0))
534 if ( ! (h->flags & HTML_NONOSPACE))
535 h->flags &= ~HTML_NOSPACE;
536
537 if (h->metaf) {
538 print_tagq(h, h->metaf);
539 h->metaf = NULL;
540 }
541
542 h->flags &= ~HTML_IGNDELIM;
543 }
544
545
546 void
547 print_tagq(struct html *h, const struct tag *until)
548 {
549 struct tag *tag;
550
551 while ((tag = h->tags.head) != NULL) {
552 /*
553 * Remember to close out and nullify the current
554 * meta-font and table, if applicable.
555 */
556 if (tag == h->metaf)
557 h->metaf = NULL;
558 if (tag == h->tblt)
559 h->tblt = NULL;
560 print_ctag(h, tag->tag);
561 h->tags.head = tag->next;
562 free(tag);
563 if (until && tag == until)
564 return;
565 }
566 }
567
568
569 void
570 print_stagq(struct html *h, const struct tag *suntil)
571 {
572 struct tag *tag;
573
574 while ((tag = h->tags.head) != NULL) {
575 if (suntil && tag == suntil)
576 return;
577 /*
578 * Remember to close out and nullify the current
579 * meta-font and table, if applicable.
580 */
581 if (tag == h->metaf)
582 h->metaf = NULL;
583 if (tag == h->tblt)
584 h->tblt = NULL;
585 print_ctag(h, tag->tag);
586 h->tags.head = tag->next;
587 free(tag);
588 }
589 }
590
591 void
592 bufinit(struct html *h)
593 {
594
595 h->buf[0] = '\0';
596 h->buflen = 0;
597 }
598
599 void
600 bufcat_style(struct html *h, const char *key, const char *val)
601 {
602
603 bufcat(h, key);
604 bufcat(h, ":");
605 bufcat(h, val);
606 bufcat(h, ";");
607 }
608
609 void
610 bufcat(struct html *h, const char *p)
611 {
612
613 h->buflen = strlcat(h->buf, p, BUFSIZ);
614 assert(h->buflen < BUFSIZ);
615 h->buflen--;
616 }
617
618 void
619 bufcat_fmt(struct html *h, const char *fmt, ...)
620 {
621 va_list ap;
622
623 va_start(ap, fmt);
624 (void)vsnprintf(h->buf + (int)h->buflen,
625 BUFSIZ - h->buflen - 1, fmt, ap);
626 va_end(ap);
627 h->buflen = strlen(h->buf);
628 }
629
630 static void
631 bufncat(struct html *h, const char *p, size_t sz)
632 {
633
634 assert(h->buflen + sz + 1 < BUFSIZ);
635 strncat(h->buf, p, sz);
636 h->buflen += sz;
637 }
638
639 void
640 buffmt_includes(struct html *h, const char *name)
641 {
642 const char *p, *pp;
643
644 pp = h->base_includes;
645
646 bufinit(h);
647 while (NULL != (p = strchr(pp, '%'))) {
648 bufncat(h, pp, (size_t)(p - pp));
649 switch (*(p + 1)) {
650 case('I'):
651 bufcat(h, name);
652 break;
653 default:
654 bufncat(h, p, 2);
655 break;
656 }
657 pp = p + 2;
658 }
659 if (pp)
660 bufcat(h, pp);
661 }
662
663 void
664 buffmt_man(struct html *h,
665 const char *name, const char *sec)
666 {
667 const char *p, *pp;
668
669 pp = h->base_man;
670
671 bufinit(h);
672 while (NULL != (p = strchr(pp, '%'))) {
673 bufncat(h, pp, (size_t)(p - pp));
674 switch (*(p + 1)) {
675 case('S'):
676 bufcat(h, sec ? sec : "1");
677 break;
678 case('N'):
679 bufcat_fmt(h, name);
680 break;
681 default:
682 bufncat(h, p, 2);
683 break;
684 }
685 pp = p + 2;
686 }
687 if (pp)
688 bufcat(h, pp);
689 }
690
691 void
692 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
693 {
694 double v;
695
696 v = su->scale;
697 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
698 v = 1.0;
699
700 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
701 }
702
703 void
704 bufcat_id(struct html *h, const char *src)
705 {
706
707 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
708
709 while ('\0' != *src)
710 bufcat_fmt(h, "%.2x", *src++);
711 }