]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Fixed Makefile for `make lint' dep. on config.h
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.93 2010/01/29 14:39:37 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "out.h"
33 #include "chars.h"
34 #include "html.h"
35 #include "main.h"
36
37 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
38
39 struct htmldata {
40 const char *name;
41 int flags;
42 #define HTML_CLRLINE (1 << 0)
43 #define HTML_NOSTACK (1 << 1)
44 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
45 };
46
47 static const struct htmldata htmltags[TAG_MAX] = {
48 {"html", HTML_CLRLINE}, /* TAG_HTML */
49 {"head", HTML_CLRLINE}, /* TAG_HEAD */
50 {"body", HTML_CLRLINE}, /* TAG_BODY */
51 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
52 {"title", HTML_CLRLINE}, /* TAG_TITLE */
53 {"div", HTML_CLRLINE}, /* TAG_DIV */
54 {"h1", 0}, /* TAG_H1 */
55 {"h2", 0}, /* TAG_H2 */
56 {"span", 0}, /* TAG_SPAN */
57 {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
59 {"a", 0}, /* TAG_A */
60 {"table", HTML_CLRLINE}, /* TAG_TABLE */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 };
68
69 static const char *const htmlfonts[HTMLFONT_MAX] = {
70 "roman",
71 "bold",
72 "italic"
73 };
74
75 static const char *const htmlattrs[ATTR_MAX] = {
76 "http-equiv",
77 "content",
78 "name",
79 "rel",
80 "href",
81 "type",
82 "media",
83 "class",
84 "style",
85 "width",
86 "valign",
87 "target",
88 "id",
89 "summary",
90 };
91
92 static void print_spec(struct html *, const char *, size_t);
93 static void print_res(struct html *, const char *, size_t);
94 static void print_ctag(struct html *, enum htmltag);
95 static void print_doctype(struct html *);
96 static void print_xmltype(struct html *);
97 static int print_encode(struct html *, const char *, int);
98 static void print_metaf(struct html *, enum roffdeco);
99 static void *ml_alloc(char *, enum htmltype);
100
101
102 static void *
103 ml_alloc(char *outopts, enum htmltype type)
104 {
105 struct html *h;
106 const char *toks[4];
107 char *v;
108
109 toks[0] = "style";
110 toks[1] = "man";
111 toks[2] = "includes";
112 toks[3] = NULL;
113
114 h = calloc(1, sizeof(struct html));
115 if (NULL == h) {
116 perror(NULL);
117 exit(EXIT_FAILURE);
118 }
119
120 h->type = type;
121 h->tags.head = NULL;
122 h->ords.head = NULL;
123 h->symtab = chars_init(CHARS_HTML);
124
125 while (outopts && *outopts)
126 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
127 case (0):
128 h->style = v;
129 break;
130 case (1):
131 h->base_man = v;
132 break;
133 case (2):
134 h->base_includes = v;
135 break;
136 default:
137 break;
138 }
139
140 return(h);
141 }
142
143 void *
144 html_alloc(char *outopts)
145 {
146
147 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
148 }
149
150
151 void *
152 xhtml_alloc(char *outopts)
153 {
154
155 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
156 }
157
158
159 void
160 html_free(void *p)
161 {
162 struct tag *tag;
163 struct ord *ord;
164 struct html *h;
165
166 h = (struct html *)p;
167
168 while ((ord = h->ords.head) != NULL) {
169 h->ords.head = ord->next;
170 free(ord);
171 }
172
173 while ((tag = h->tags.head) != NULL) {
174 h->tags.head = tag->next;
175 free(tag);
176 }
177
178 if (h->symtab)
179 chars_free(h->symtab);
180
181 free(h);
182 }
183
184
185 void
186 print_gen_head(struct html *h)
187 {
188 struct htmlpair tag[4];
189
190 tag[0].key = ATTR_HTTPEQUIV;
191 tag[0].val = "Content-Type";
192 tag[1].key = ATTR_CONTENT;
193 tag[1].val = "text/html; charset=utf-8";
194 print_otag(h, TAG_META, 2, tag);
195
196 tag[0].key = ATTR_NAME;
197 tag[0].val = "resource-type";
198 tag[1].key = ATTR_CONTENT;
199 tag[1].val = "document";
200 print_otag(h, TAG_META, 2, tag);
201
202 if (h->style) {
203 tag[0].key = ATTR_REL;
204 tag[0].val = "stylesheet";
205 tag[1].key = ATTR_HREF;
206 tag[1].val = h->style;
207 tag[2].key = ATTR_TYPE;
208 tag[2].val = "text/css";
209 tag[3].key = ATTR_MEDIA;
210 tag[3].val = "all";
211 print_otag(h, TAG_LINK, 4, tag);
212 }
213 }
214
215
216 static void
217 print_spec(struct html *h, const char *p, size_t len)
218 {
219 const char *rhs;
220 size_t sz;
221
222 rhs = chars_a2ascii(h->symtab, p, len, &sz);
223
224 if (NULL == rhs)
225 return;
226 fwrite(rhs, 1, sz, stdout);
227 }
228
229
230 static void
231 print_res(struct html *h, const char *p, size_t len)
232 {
233 const char *rhs;
234 size_t sz;
235
236 rhs = chars_a2res(h->symtab, p, len, &sz);
237
238 if (NULL == rhs)
239 return;
240 fwrite(rhs, 1, sz, stdout);
241 }
242
243
244 struct tag *
245 print_ofont(struct html *h, enum htmlfont font)
246 {
247 struct htmlpair tag;
248
249 h->metal = h->metac;
250 h->metac = font;
251
252 /* FIXME: DECO_ROMAN should just close out preexisting. */
253
254 if (h->metaf && h->tags.head == h->metaf)
255 print_tagq(h, h->metaf);
256
257 PAIR_CLASS_INIT(&tag, htmlfonts[font]);
258 h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
259 return(h->metaf);
260 }
261
262
263 static void
264 print_metaf(struct html *h, enum roffdeco deco)
265 {
266 enum htmlfont font;
267
268 switch (deco) {
269 case (DECO_PREVIOUS):
270 font = h->metal;
271 break;
272 case (DECO_ITALIC):
273 font = HTMLFONT_ITALIC;
274 break;
275 case (DECO_BOLD):
276 font = HTMLFONT_BOLD;
277 break;
278 case (DECO_ROMAN):
279 font = HTMLFONT_NONE;
280 break;
281 default:
282 abort();
283 /* NOTREACHED */
284 }
285
286 (void)print_ofont(h, font);
287 }
288
289
290 static int
291 print_encode(struct html *h, const char *p, int norecurse)
292 {
293 size_t sz;
294 int len, nospace;
295 const char *seq;
296 enum roffdeco deco;
297
298 nospace = 0;
299
300 for (; *p; p++) {
301 sz = strcspn(p, "\\<>&");
302
303 fwrite(p, 1, sz, stdout);
304 p += /* LINTED */
305 sz;
306
307 if ('<' == *p) {
308 printf("&lt;");
309 continue;
310 } else if ('>' == *p) {
311 printf("&gt;");
312 continue;
313 } else if ('&' == *p) {
314 printf("&amp;");
315 continue;
316 } else if ('\0' == *p)
317 break;
318
319 seq = ++p;
320 len = a2roffdeco(&deco, &seq, &sz);
321
322 switch (deco) {
323 case (DECO_RESERVED):
324 print_res(h, seq, sz);
325 break;
326 case (DECO_SPECIAL):
327 print_spec(h, seq, sz);
328 break;
329 case (DECO_PREVIOUS):
330 /* FALLTHROUGH */
331 case (DECO_BOLD):
332 /* FALLTHROUGH */
333 case (DECO_ITALIC):
334 /* FALLTHROUGH */
335 case (DECO_ROMAN):
336 if (norecurse)
337 break;
338 print_metaf(h, deco);
339 break;
340 default:
341 break;
342 }
343
344 p += len - 1;
345
346 if (DECO_NOSPACE == deco && '\0' == *(p + 1))
347 nospace = 1;
348 }
349
350 return(nospace);
351 }
352
353
354 struct tag *
355 print_otag(struct html *h, enum htmltag tag,
356 int sz, const struct htmlpair *p)
357 {
358 int i;
359 struct tag *t;
360
361 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
362 t = malloc(sizeof(struct tag));
363 if (NULL == t) {
364 perror(NULL);
365 exit(EXIT_FAILURE);
366 }
367 t->tag = tag;
368 t->next = h->tags.head;
369 h->tags.head = t;
370 } else
371 t = NULL;
372
373 if ( ! (HTML_NOSPACE & h->flags))
374 if ( ! (HTML_CLRLINE & htmltags[tag].flags))
375 putchar(' ');
376
377 printf("<%s", htmltags[tag].name);
378 for (i = 0; i < sz; i++) {
379 printf(" %s=\"", htmlattrs[p[i].key]);
380 assert(p->val);
381 (void)print_encode(h, p[i].val, 1);
382 putchar('\"');
383 }
384
385 if (HTML_AUTOCLOSE & htmltags[tag].flags)
386 switch (h->type) {
387 case (HTML_XHTML_1_0_STRICT):
388 putchar('/');
389 break;
390 default:
391 break;
392 }
393
394 putchar('>');
395
396 h->flags |= HTML_NOSPACE;
397 return(t);
398 }
399
400
401 static void
402 print_ctag(struct html *h, enum htmltag tag)
403 {
404
405 printf("</%s>", htmltags[tag].name);
406 if (HTML_CLRLINE & htmltags[tag].flags) {
407 h->flags |= HTML_NOSPACE;
408 putchar('\n');
409 }
410 }
411
412
413 void
414 print_gen_decls(struct html *h)
415 {
416
417 print_xmltype(h);
418 print_doctype(h);
419 }
420
421
422 static void
423 print_xmltype(struct html *h)
424 {
425 const char *decl;
426
427 switch (h->type) {
428 case (HTML_XHTML_1_0_STRICT):
429 decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
430 break;
431 default:
432 decl = NULL;
433 break;
434 }
435
436 if (NULL == decl)
437 return;
438
439 printf("%s\n", decl);
440 }
441
442
443 static void
444 print_doctype(struct html *h)
445 {
446 const char *doctype;
447 const char *dtd;
448
449 switch (h->type) {
450 case (HTML_HTML_4_01_STRICT):
451 doctype = "-//W3C//DTD HTML 4.01//EN";
452 dtd = "http://www.w3.org/TR/html4/strict.dtd";
453 break;
454 default:
455 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
456 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
457 break;
458 }
459
460 printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">\n", doctype, dtd);
461 }
462
463
464 void
465 print_text(struct html *h, const char *p)
466 {
467
468 if (*p && 0 == *(p + 1))
469 switch (*p) {
470 case('.'):
471 /* FALLTHROUGH */
472 case(','):
473 /* FALLTHROUGH */
474 case(';'):
475 /* FALLTHROUGH */
476 case(':'):
477 /* FALLTHROUGH */
478 case('?'):
479 /* FALLTHROUGH */
480 case('!'):
481 /* FALLTHROUGH */
482 case(')'):
483 /* FALLTHROUGH */
484 case(']'):
485 /* FALLTHROUGH */
486 case('}'):
487 if ( ! (HTML_IGNDELIM & h->flags))
488 h->flags |= HTML_NOSPACE;
489 break;
490 default:
491 break;
492 }
493
494 if ( ! (h->flags & HTML_NOSPACE))
495 putchar(' ');
496
497 assert(p);
498 if ( ! print_encode(h, p, 0))
499 h->flags &= ~HTML_NOSPACE;
500
501 if (*p && 0 == *(p + 1))
502 switch (*p) {
503 case('('):
504 /* FALLTHROUGH */
505 case('['):
506 /* FALLTHROUGH */
507 case('{'):
508 h->flags |= HTML_NOSPACE;
509 break;
510 default:
511 break;
512 }
513 }
514
515
516 void
517 print_tagq(struct html *h, const struct tag *until)
518 {
519 struct tag *tag;
520
521 while ((tag = h->tags.head) != NULL) {
522 if (tag == h->metaf)
523 h->metaf = NULL;
524 print_ctag(h, tag->tag);
525 h->tags.head = tag->next;
526 free(tag);
527 if (until && tag == until)
528 return;
529 }
530 }
531
532
533 void
534 print_stagq(struct html *h, const struct tag *suntil)
535 {
536 struct tag *tag;
537
538 while ((tag = h->tags.head) != NULL) {
539 if (suntil && tag == suntil)
540 return;
541 if (tag == h->metaf)
542 h->metaf = NULL;
543 print_ctag(h, tag->tag);
544 h->tags.head = tag->next;
545 free(tag);
546 }
547 }
548
549
550 void
551 bufinit(struct html *h)
552 {
553
554 h->buf[0] = '\0';
555 h->buflen = 0;
556 }
557
558
559 void
560 bufcat_style(struct html *h, const char *key, const char *val)
561 {
562
563 bufcat(h, key);
564 bufncat(h, ":", 1);
565 bufcat(h, val);
566 bufncat(h, ";", 1);
567 }
568
569
570 void
571 bufcat(struct html *h, const char *p)
572 {
573
574 bufncat(h, p, strlen(p));
575 }
576
577
578 void
579 buffmt(struct html *h, const char *fmt, ...)
580 {
581 va_list ap;
582
583 va_start(ap, fmt);
584 (void)vsnprintf(h->buf + (int)h->buflen,
585 BUFSIZ - h->buflen - 1, fmt, ap);
586 va_end(ap);
587 h->buflen = strlen(h->buf);
588 }
589
590
591 void
592 bufncat(struct html *h, const char *p, size_t sz)
593 {
594
595 if (h->buflen + sz > BUFSIZ - 1)
596 sz = BUFSIZ - 1 - h->buflen;
597
598 (void)strncat(h->buf, p, sz);
599 h->buflen += sz;
600 }
601
602
603 void
604 buffmt_includes(struct html *h, const char *name)
605 {
606 const char *p, *pp;
607
608 pp = h->base_includes;
609
610 while (NULL != (p = strchr(pp, '%'))) {
611 bufncat(h, pp, (size_t)(p - pp));
612 switch (*(p + 1)) {
613 case('I'):
614 bufcat(h, name);
615 break;
616 default:
617 bufncat(h, p, 2);
618 break;
619 }
620 pp = p + 2;
621 }
622 if (pp)
623 bufcat(h, pp);
624 }
625
626
627 void
628 buffmt_man(struct html *h,
629 const char *name, const char *sec)
630 {
631 const char *p, *pp;
632
633 pp = h->base_man;
634
635 /* LINTED */
636 while (NULL != (p = strchr(pp, '%'))) {
637 bufncat(h, pp, (size_t)(p - pp));
638 switch (*(p + 1)) {
639 case('S'):
640 bufcat(h, sec ? sec : "1");
641 break;
642 case('N'):
643 buffmt(h, name);
644 break;
645 default:
646 bufncat(h, p, 2);
647 break;
648 }
649 pp = p + 2;
650 }
651 if (pp)
652 bufcat(h, pp);
653 }
654
655
656 void
657 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
658 {
659 double v;
660 const char *u;
661
662 v = su->scale;
663
664 switch (su->unit) {
665 case (SCALE_CM):
666 u = "cm";
667 break;
668 case (SCALE_IN):
669 u = "in";
670 break;
671 case (SCALE_PC):
672 u = "pc";
673 break;
674 case (SCALE_PT):
675 u = "pt";
676 break;
677 case (SCALE_EM):
678 u = "em";
679 break;
680 case (SCALE_MM):
681 if (0 == (v /= 100))
682 v = 1;
683 u = "em";
684 break;
685 case (SCALE_EN):
686 u = "ex";
687 break;
688 case (SCALE_BU):
689 u = "ex";
690 break;
691 case (SCALE_VS):
692 u = "em";
693 break;
694 default:
695 u = "ex";
696 break;
697 }
698
699 if (su->pt)
700 buffmt(h, "%s: %f%s;", p, v, u);
701 else
702 /* LINTED */
703 buffmt(h, "%s: %d%s;", p, (int)v, u);
704 }
705
706
707 void
708 html_idcat(char *dst, const char *src, int sz)
709 {
710 int ssz;
711
712 assert(sz);
713
714 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
715
716 for ( ; *dst != '\0' && sz; dst++, sz--)
717 /* Jump to end. */ ;
718
719 assert(sz > 2);
720
721 /* We can't start with a number (bah). */
722
723 *dst++ = 'x';
724 *dst = '\0';
725 sz--;
726
727 for ( ; *src != '\0' && sz > 1; src++) {
728 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
729 sz -= ssz;
730 dst += ssz;
731 }
732 }