Protected example.style.css from cascading styles.
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.83 2009/11/10 16:32:00 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <sys/types.h>
18
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27
28 #include "out.h"
29 #include "chars.h"
30 #include "html.h"
31 #include "main.h"
32
33 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34
35 #define DOCTYPE "-//W3C//DTD HTML 4.01//EN"
36 #define DTD "http://www.w3.org/TR/html4/strict.dtd"
37
38 struct htmldata {
39 const char *name;
40 int flags;
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 };
44
45 static const struct htmldata htmltags[TAG_MAX] = {
46 {"html", HTML_CLRLINE}, /* TAG_HTML */
47 {"head", HTML_CLRLINE}, /* TAG_HEAD */
48 {"body", HTML_CLRLINE}, /* TAG_BODY */
49 {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
50 {"title", HTML_CLRLINE}, /* TAG_TITLE */
51 {"div", HTML_CLRLINE}, /* TAG_DIV */
52 {"h1", 0}, /* TAG_H1 */
53 {"h2", 0}, /* TAG_H2 */
54 {"p", HTML_CLRLINE}, /* TAG_P */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61 {"tr", HTML_CLRLINE}, /* TAG_TR */
62 {"td", HTML_CLRLINE}, /* TAG_TD */
63 {"li", HTML_CLRLINE}, /* TAG_LI */
64 {"ul", HTML_CLRLINE}, /* TAG_UL */
65 {"ol", HTML_CLRLINE}, /* TAG_OL */
66 {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
67 };
68
69 static const char *const htmlattrs[ATTR_MAX] = {
70 "http-equiv",
71 "content",
72 "name",
73 "rel",
74 "href",
75 "type",
76 "media",
77 "class",
78 "style",
79 "width",
80 "valign",
81 "target",
82 "id",
83 "summary",
84 };
85
86 #ifdef __linux__
87 extern int getsubopt(char **, char * const *, char **);
88 #endif
89
90
91 static void print_spec(struct html *, const char *, size_t);
92 static void print_res(struct html *, const char *, size_t);
93 static void print_ctag(struct html *, enum htmltag);
94 static void print_encode(struct html *, const char *);
95
96
97 void *
98 html_alloc(char *outopts)
99 {
100 struct html *h;
101 const char *toks[4];
102 char *v;
103
104 toks[0] = "style";
105 toks[1] = "man";
106 toks[2] = "includes";
107 toks[3] = NULL;
108
109 h = calloc(1, sizeof(struct html));
110 if (NULL == h) {
111 perror(NULL);
112 exit(EXIT_FAILURE);
113 }
114
115 h->tags.head = NULL;
116 h->ords.head = NULL;
117 h->symtab = chars_init(CHARS_HTML);
118
119 while (outopts && *outopts)
120 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
121 case (0):
122 h->style = v;
123 break;
124 case (1):
125 h->base_man = v;
126 break;
127 case (2):
128 h->base_includes = v;
129 break;
130 default:
131 break;
132 }
133
134 return(h);
135 }
136
137
138 void
139 html_free(void *p)
140 {
141 struct tag *tag;
142 struct ord *ord;
143 struct html *h;
144
145 h = (struct html *)p;
146
147 while ((ord = h->ords.head) != NULL) {
148 h->ords.head = ord->next;
149 free(ord);
150 }
151
152 while ((tag = h->tags.head) != NULL) {
153 h->tags.head = tag->next;
154 free(tag);
155 }
156
157 if (h->symtab)
158 chars_free(h->symtab);
159
160 free(h);
161 }
162
163
164 void
165 print_gen_head(struct html *h)
166 {
167 struct htmlpair tag[4];
168
169 tag[0].key = ATTR_HTTPEQUIV;
170 tag[0].val = "Content-Type";
171 tag[1].key = ATTR_CONTENT;
172 tag[1].val = "text/html; charset=utf-8";
173 print_otag(h, TAG_META, 2, tag);
174
175 tag[0].key = ATTR_NAME;
176 tag[0].val = "resource-type";
177 tag[1].key = ATTR_CONTENT;
178 tag[1].val = "document";
179 print_otag(h, TAG_META, 2, tag);
180
181 if (h->style) {
182 tag[0].key = ATTR_REL;
183 tag[0].val = "stylesheet";
184 tag[1].key = ATTR_HREF;
185 tag[1].val = h->style;
186 tag[2].key = ATTR_TYPE;
187 tag[2].val = "text/css";
188 tag[3].key = ATTR_MEDIA;
189 tag[3].val = "all";
190 print_otag(h, TAG_LINK, 4, tag);
191 }
192 }
193
194
195 static void
196 print_spec(struct html *h, const char *p, size_t len)
197 {
198 const char *rhs;
199 size_t sz;
200
201 rhs = chars_a2ascii(h->symtab, p, len, &sz);
202
203 if (NULL == rhs)
204 return;
205 fwrite(rhs, 1, sz, stdout);
206 }
207
208
209 static void
210 print_res(struct html *h, const char *p, size_t len)
211 {
212 const char *rhs;
213 size_t sz;
214
215 rhs = chars_a2res(h->symtab, p, len, &sz);
216
217 if (NULL == rhs)
218 return;
219 fwrite(rhs, 1, sz, stdout);
220 }
221
222
223 static void
224 print_encode(struct html *h, const char *p)
225 {
226 size_t sz;
227 int len;
228 const char *seq;
229 enum roffdeco deco;
230
231 for (; *p; p++) {
232 sz = strcspn(p, "\\<>&");
233
234 fwrite(p, 1, sz, stdout);
235 p += /* LINTED */
236 sz;
237
238 if ('<' == *p) {
239 printf("&lt;");
240 continue;
241 } else if ('>' == *p) {
242 printf("&gt;");
243 continue;
244 } else if ('&' == *p) {
245 printf("&amp;");
246 continue;
247 } else if ('\0' == *p)
248 break;
249
250 seq = ++p;
251 len = a2roffdeco(&deco, &seq, &sz);
252
253 switch (deco) {
254 case (DECO_RESERVED):
255 print_res(h, seq, sz);
256 break;
257 case (DECO_SPECIAL):
258 print_spec(h, seq, sz);
259 break;
260 default:
261 break;
262 }
263
264 p += len - 1;
265 }
266 }
267
268
269 struct tag *
270 print_otag(struct html *h, enum htmltag tag,
271 int sz, const struct htmlpair *p)
272 {
273 int i;
274 struct tag *t;
275
276 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
277 t = malloc(sizeof(struct tag));
278 if (NULL == t) {
279 perror(NULL);
280 exit(EXIT_FAILURE);
281 }
282 t->tag = tag;
283 t->next = h->tags.head;
284 h->tags.head = t;
285 } else
286 t = NULL;
287
288 if ( ! (HTML_NOSPACE & h->flags))
289 if ( ! (HTML_CLRLINE & htmltags[tag].flags))
290 putchar(' ');
291
292 printf("<%s", htmltags[tag].name);
293 for (i = 0; i < sz; i++) {
294 printf(" %s=\"", htmlattrs[p[i].key]);
295 assert(p->val);
296 print_encode(h, p[i].val);
297 putchar('\"');
298 }
299 putchar('>');
300
301 h->flags |= HTML_NOSPACE;
302 if (HTML_CLRLINE & htmltags[tag].flags)
303 h->flags |= HTML_NEWLINE;
304 else
305 h->flags &= ~HTML_NEWLINE;
306
307 return(t);
308 }
309
310
311 /* ARGSUSED */
312 static void
313 print_ctag(struct html *h, enum htmltag tag)
314 {
315
316 printf("</%s>", htmltags[tag].name);
317 if (HTML_CLRLINE & htmltags[tag].flags) {
318 h->flags |= HTML_NOSPACE;
319 h->flags |= HTML_NEWLINE;
320 putchar('\n');
321 } else
322 h->flags &= ~HTML_NEWLINE;
323 }
324
325
326 /* ARGSUSED */
327 void
328 print_gen_doctype(struct html *h)
329 {
330
331 printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
332 }
333
334
335 void
336 print_text(struct html *h, const char *p)
337 {
338
339 if (*p && 0 == *(p + 1))
340 switch (*p) {
341 case('.'):
342 /* FALLTHROUGH */
343 case(','):
344 /* FALLTHROUGH */
345 case(';'):
346 /* FALLTHROUGH */
347 case(':'):
348 /* FALLTHROUGH */
349 case('?'):
350 /* FALLTHROUGH */
351 case('!'):
352 /* FALLTHROUGH */
353 case(')'):
354 /* FALLTHROUGH */
355 case(']'):
356 /* FALLTHROUGH */
357 case('}'):
358 if ( ! (HTML_IGNDELIM & h->flags))
359 h->flags |= HTML_NOSPACE;
360 break;
361 default:
362 break;
363 }
364
365 if ( ! (h->flags & HTML_NOSPACE))
366 putchar(' ');
367
368 h->flags &= ~HTML_NOSPACE;
369 h->flags &= ~HTML_NEWLINE;
370
371 if (p)
372 print_encode(h, p);
373
374 if (*p && 0 == *(p + 1))
375 switch (*p) {
376 case('('):
377 /* FALLTHROUGH */
378 case('['):
379 /* FALLTHROUGH */
380 case('{'):
381 h->flags |= HTML_NOSPACE;
382 break;
383 default:
384 break;
385 }
386 }
387
388
389 void
390 print_tagq(struct html *h, const struct tag *until)
391 {
392 struct tag *tag;
393
394 while ((tag = h->tags.head) != NULL) {
395 print_ctag(h, tag->tag);
396 h->tags.head = tag->next;
397 free(tag);
398 if (until && tag == until)
399 return;
400 }
401 }
402
403
404 void
405 print_stagq(struct html *h, const struct tag *suntil)
406 {
407 struct tag *tag;
408
409 while ((tag = h->tags.head) != NULL) {
410 if (suntil && tag == suntil)
411 return;
412 print_ctag(h, tag->tag);
413 h->tags.head = tag->next;
414 free(tag);
415 }
416 }
417
418
419 void
420 bufinit(struct html *h)
421 {
422
423 h->buf[0] = '\0';
424 h->buflen = 0;
425 }
426
427
428 void
429 bufcat_style(struct html *h, const char *key, const char *val)
430 {
431
432 bufcat(h, key);
433 bufncat(h, ":", 1);
434 bufcat(h, val);
435 bufncat(h, ";", 1);
436 }
437
438
439 void
440 bufcat(struct html *h, const char *p)
441 {
442
443 bufncat(h, p, strlen(p));
444 }
445
446
447 void
448 buffmt(struct html *h, const char *fmt, ...)
449 {
450 va_list ap;
451
452 va_start(ap, fmt);
453 (void)vsnprintf(h->buf + (int)h->buflen,
454 BUFSIZ - h->buflen - 1, fmt, ap);
455 va_end(ap);
456 h->buflen = strlen(h->buf);
457 }
458
459
460 void
461 bufncat(struct html *h, const char *p, size_t sz)
462 {
463
464 if (h->buflen + sz > BUFSIZ - 1)
465 sz = BUFSIZ - 1 - h->buflen;
466
467 (void)strncat(h->buf, p, sz);
468 h->buflen += sz;
469 }
470
471
472 void
473 buffmt_includes(struct html *h, const char *name)
474 {
475 const char *p, *pp;
476
477 pp = h->base_includes;
478
479 while (NULL != (p = strchr(pp, '%'))) {
480 bufncat(h, pp, (size_t)(p - pp));
481 switch (*(p + 1)) {
482 case('I'):
483 bufcat(h, name);
484 break;
485 default:
486 bufncat(h, p, 2);
487 break;
488 }
489 pp = p + 2;
490 }
491 if (pp)
492 bufcat(h, pp);
493 }
494
495
496 void
497 buffmt_man(struct html *h,
498 const char *name, const char *sec)
499 {
500 const char *p, *pp;
501
502 pp = h->base_man;
503
504 /* LINTED */
505 while (NULL != (p = strchr(pp, '%'))) {
506 bufncat(h, pp, (size_t)(p - pp));
507 switch (*(p + 1)) {
508 case('S'):
509 bufcat(h, sec ? sec : "1");
510 break;
511 case('N'):
512 buffmt(h, name);
513 break;
514 default:
515 bufncat(h, p, 2);
516 break;
517 }
518 pp = p + 2;
519 }
520 if (pp)
521 bufcat(h, pp);
522 }
523
524
525 void
526 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
527 {
528 double v;
529 const char *u;
530
531 v = su->scale;
532
533 switch (su->unit) {
534 case (SCALE_CM):
535 u = "cm";
536 break;
537 case (SCALE_IN):
538 u = "in";
539 break;
540 case (SCALE_PC):
541 u = "pc";
542 break;
543 case (SCALE_PT):
544 u = "pt";
545 break;
546 case (SCALE_EM):
547 u = "em";
548 break;
549 case (SCALE_MM):
550 if (0 == (v /= 100))
551 v = 1;
552 u = "em";
553 break;
554 case (SCALE_EN):
555 u = "ex";
556 break;
557 case (SCALE_BU):
558 u = "ex";
559 break;
560 case (SCALE_VS):
561 u = "em";
562 break;
563 default:
564 u = "ex";
565 break;
566 }
567
568 if (su->pt)
569 buffmt(h, "%s: %f%s;", p, v, u);
570 else
571 /* LINTED */
572 buffmt(h, "%s: %d%s;", p, (int)v, u);
573 }
574
575
576 void
577 html_idcat(char *dst, const char *src, int sz)
578 {
579 int ssz;
580
581 assert(sz);
582
583 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
584
585 for ( ; *dst != '\0' && sz; dst++, sz--)
586 /* Jump to end. */ ;
587
588 assert(sz > 2);
589
590 /* We can't start with a number (bah). */
591
592 *dst++ = 'x';
593 *dst = '\0';
594 sz--;
595
596 for ( ; *src != '\0' && sz > 1; src++) {
597 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
598 sz -= ssz;
599 dst += ssz;
600 }
601 }