]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Fixed \c support for all input and output modes (documented in mandoc_char.7).
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.84 2009/11/12 08:21:05 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <sys/types.h>
18
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27
28 #include "out.h"
29 #include "chars.h"
30 #include "html.h"
31 #include "main.h"
32
33 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34
35 #define DOCTYPE "-//W3C//DTD HTML 4.01//EN"
36 #define DTD "http://www.w3.org/TR/html4/strict.dtd"
37
38 struct htmldata {
39 const char *name;
40 int flags;
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 };
44
45 static const struct htmldata htmltags[TAG_MAX] = {
46 {"html", HTML_CLRLINE}, /* TAG_HTML */
47 {"head", HTML_CLRLINE}, /* TAG_HEAD */
48 {"body", HTML_CLRLINE}, /* TAG_BODY */
49 {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
50 {"title", HTML_CLRLINE}, /* TAG_TITLE */
51 {"div", HTML_CLRLINE}, /* TAG_DIV */
52 {"h1", 0}, /* TAG_H1 */
53 {"h2", 0}, /* TAG_H2 */
54 {"p", HTML_CLRLINE}, /* TAG_P */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61 {"tr", HTML_CLRLINE}, /* TAG_TR */
62 {"td", HTML_CLRLINE}, /* TAG_TD */
63 {"li", HTML_CLRLINE}, /* TAG_LI */
64 {"ul", HTML_CLRLINE}, /* TAG_UL */
65 {"ol", HTML_CLRLINE}, /* TAG_OL */
66 {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
67 };
68
69 static const char *const htmlattrs[ATTR_MAX] = {
70 "http-equiv",
71 "content",
72 "name",
73 "rel",
74 "href",
75 "type",
76 "media",
77 "class",
78 "style",
79 "width",
80 "valign",
81 "target",
82 "id",
83 "summary",
84 };
85
86 #ifdef __linux__
87 extern int getsubopt(char **, char * const *, char **);
88 #endif
89
90
91 static void print_spec(struct html *, const char *, size_t);
92 static void print_res(struct html *, const char *, size_t);
93 static void print_ctag(struct html *, enum htmltag);
94 static void print_encode(struct html *, const char *);
95
96
97 void *
98 html_alloc(char *outopts)
99 {
100 struct html *h;
101 const char *toks[4];
102 char *v;
103
104 toks[0] = "style";
105 toks[1] = "man";
106 toks[2] = "includes";
107 toks[3] = NULL;
108
109 h = calloc(1, sizeof(struct html));
110 if (NULL == h) {
111 perror(NULL);
112 exit(EXIT_FAILURE);
113 }
114
115 h->tags.head = NULL;
116 h->ords.head = NULL;
117 h->symtab = chars_init(CHARS_HTML);
118
119 while (outopts && *outopts)
120 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
121 case (0):
122 h->style = v;
123 break;
124 case (1):
125 h->base_man = v;
126 break;
127 case (2):
128 h->base_includes = v;
129 break;
130 default:
131 break;
132 }
133
134 return(h);
135 }
136
137
138 void
139 html_free(void *p)
140 {
141 struct tag *tag;
142 struct ord *ord;
143 struct html *h;
144
145 h = (struct html *)p;
146
147 while ((ord = h->ords.head) != NULL) {
148 h->ords.head = ord->next;
149 free(ord);
150 }
151
152 while ((tag = h->tags.head) != NULL) {
153 h->tags.head = tag->next;
154 free(tag);
155 }
156
157 if (h->symtab)
158 chars_free(h->symtab);
159
160 free(h);
161 }
162
163
164 void
165 print_gen_head(struct html *h)
166 {
167 struct htmlpair tag[4];
168
169 tag[0].key = ATTR_HTTPEQUIV;
170 tag[0].val = "Content-Type";
171 tag[1].key = ATTR_CONTENT;
172 tag[1].val = "text/html; charset=utf-8";
173 print_otag(h, TAG_META, 2, tag);
174
175 tag[0].key = ATTR_NAME;
176 tag[0].val = "resource-type";
177 tag[1].key = ATTR_CONTENT;
178 tag[1].val = "document";
179 print_otag(h, TAG_META, 2, tag);
180
181 if (h->style) {
182 tag[0].key = ATTR_REL;
183 tag[0].val = "stylesheet";
184 tag[1].key = ATTR_HREF;
185 tag[1].val = h->style;
186 tag[2].key = ATTR_TYPE;
187 tag[2].val = "text/css";
188 tag[3].key = ATTR_MEDIA;
189 tag[3].val = "all";
190 print_otag(h, TAG_LINK, 4, tag);
191 }
192 }
193
194
195 static void
196 print_spec(struct html *h, const char *p, size_t len)
197 {
198 const char *rhs;
199 size_t sz;
200
201 rhs = chars_a2ascii(h->symtab, p, len, &sz);
202
203 if (NULL == rhs)
204 return;
205 fwrite(rhs, 1, sz, stdout);
206 }
207
208
209 static void
210 print_res(struct html *h, const char *p, size_t len)
211 {
212 const char *rhs;
213 size_t sz;
214
215 rhs = chars_a2res(h->symtab, p, len, &sz);
216
217 if (NULL == rhs)
218 return;
219 fwrite(rhs, 1, sz, stdout);
220 }
221
222
223 static void
224 print_encode(struct html *h, const char *p)
225 {
226 size_t sz;
227 int len;
228 const char *seq;
229 enum roffdeco deco;
230
231 for (; *p; p++) {
232 sz = strcspn(p, "\\<>&");
233
234 fwrite(p, 1, sz, stdout);
235 p += /* LINTED */
236 sz;
237
238 if ('<' == *p) {
239 printf("&lt;");
240 continue;
241 } else if ('>' == *p) {
242 printf("&gt;");
243 continue;
244 } else if ('&' == *p) {
245 printf("&amp;");
246 continue;
247 } else if ('\0' == *p)
248 break;
249
250 seq = ++p;
251 len = a2roffdeco(&deco, &seq, &sz);
252
253 switch (deco) {
254 case (DECO_RESERVED):
255 print_res(h, seq, sz);
256 break;
257 case (DECO_SPECIAL):
258 print_spec(h, seq, sz);
259 break;
260 default:
261 break;
262 }
263
264 p += len - 1;
265
266 if (DECO_NOSPACE == deco && '\0' == *(p + 1))
267 h->flags |= HTML_NOSPACE;
268 }
269 }
270
271
272 struct tag *
273 print_otag(struct html *h, enum htmltag tag,
274 int sz, const struct htmlpair *p)
275 {
276 int i;
277 struct tag *t;
278
279 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
280 t = malloc(sizeof(struct tag));
281 if (NULL == t) {
282 perror(NULL);
283 exit(EXIT_FAILURE);
284 }
285 t->tag = tag;
286 t->next = h->tags.head;
287 h->tags.head = t;
288 } else
289 t = NULL;
290
291 if ( ! (HTML_NOSPACE & h->flags))
292 if ( ! (HTML_CLRLINE & htmltags[tag].flags))
293 putchar(' ');
294
295 printf("<%s", htmltags[tag].name);
296 for (i = 0; i < sz; i++) {
297 printf(" %s=\"", htmlattrs[p[i].key]);
298 assert(p->val);
299 print_encode(h, p[i].val);
300 putchar('\"');
301 }
302 putchar('>');
303
304 h->flags |= HTML_NOSPACE;
305 if (HTML_CLRLINE & htmltags[tag].flags)
306 h->flags |= HTML_NEWLINE;
307 else
308 h->flags &= ~HTML_NEWLINE;
309
310 return(t);
311 }
312
313
314 /* ARGSUSED */
315 static void
316 print_ctag(struct html *h, enum htmltag tag)
317 {
318
319 printf("</%s>", htmltags[tag].name);
320 if (HTML_CLRLINE & htmltags[tag].flags) {
321 h->flags |= HTML_NOSPACE;
322 h->flags |= HTML_NEWLINE;
323 putchar('\n');
324 } else
325 h->flags &= ~HTML_NEWLINE;
326 }
327
328
329 /* ARGSUSED */
330 void
331 print_gen_doctype(struct html *h)
332 {
333
334 printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
335 }
336
337
338 void
339 print_text(struct html *h, const char *p)
340 {
341
342 if (*p && 0 == *(p + 1))
343 switch (*p) {
344 case('.'):
345 /* FALLTHROUGH */
346 case(','):
347 /* FALLTHROUGH */
348 case(';'):
349 /* FALLTHROUGH */
350 case(':'):
351 /* FALLTHROUGH */
352 case('?'):
353 /* FALLTHROUGH */
354 case('!'):
355 /* FALLTHROUGH */
356 case(')'):
357 /* FALLTHROUGH */
358 case(']'):
359 /* FALLTHROUGH */
360 case('}'):
361 if ( ! (HTML_IGNDELIM & h->flags))
362 h->flags |= HTML_NOSPACE;
363 break;
364 default:
365 break;
366 }
367
368 if ( ! (h->flags & HTML_NOSPACE))
369 putchar(' ');
370
371 h->flags &= ~HTML_NOSPACE;
372 h->flags &= ~HTML_NEWLINE;
373
374 if (p)
375 print_encode(h, p);
376
377 if (*p && 0 == *(p + 1))
378 switch (*p) {
379 case('('):
380 /* FALLTHROUGH */
381 case('['):
382 /* FALLTHROUGH */
383 case('{'):
384 h->flags |= HTML_NOSPACE;
385 break;
386 default:
387 break;
388 }
389 }
390
391
392 void
393 print_tagq(struct html *h, const struct tag *until)
394 {
395 struct tag *tag;
396
397 while ((tag = h->tags.head) != NULL) {
398 print_ctag(h, tag->tag);
399 h->tags.head = tag->next;
400 free(tag);
401 if (until && tag == until)
402 return;
403 }
404 }
405
406
407 void
408 print_stagq(struct html *h, const struct tag *suntil)
409 {
410 struct tag *tag;
411
412 while ((tag = h->tags.head) != NULL) {
413 if (suntil && tag == suntil)
414 return;
415 print_ctag(h, tag->tag);
416 h->tags.head = tag->next;
417 free(tag);
418 }
419 }
420
421
422 void
423 bufinit(struct html *h)
424 {
425
426 h->buf[0] = '\0';
427 h->buflen = 0;
428 }
429
430
431 void
432 bufcat_style(struct html *h, const char *key, const char *val)
433 {
434
435 bufcat(h, key);
436 bufncat(h, ":", 1);
437 bufcat(h, val);
438 bufncat(h, ";", 1);
439 }
440
441
442 void
443 bufcat(struct html *h, const char *p)
444 {
445
446 bufncat(h, p, strlen(p));
447 }
448
449
450 void
451 buffmt(struct html *h, const char *fmt, ...)
452 {
453 va_list ap;
454
455 va_start(ap, fmt);
456 (void)vsnprintf(h->buf + (int)h->buflen,
457 BUFSIZ - h->buflen - 1, fmt, ap);
458 va_end(ap);
459 h->buflen = strlen(h->buf);
460 }
461
462
463 void
464 bufncat(struct html *h, const char *p, size_t sz)
465 {
466
467 if (h->buflen + sz > BUFSIZ - 1)
468 sz = BUFSIZ - 1 - h->buflen;
469
470 (void)strncat(h->buf, p, sz);
471 h->buflen += sz;
472 }
473
474
475 void
476 buffmt_includes(struct html *h, const char *name)
477 {
478 const char *p, *pp;
479
480 pp = h->base_includes;
481
482 while (NULL != (p = strchr(pp, '%'))) {
483 bufncat(h, pp, (size_t)(p - pp));
484 switch (*(p + 1)) {
485 case('I'):
486 bufcat(h, name);
487 break;
488 default:
489 bufncat(h, p, 2);
490 break;
491 }
492 pp = p + 2;
493 }
494 if (pp)
495 bufcat(h, pp);
496 }
497
498
499 void
500 buffmt_man(struct html *h,
501 const char *name, const char *sec)
502 {
503 const char *p, *pp;
504
505 pp = h->base_man;
506
507 /* LINTED */
508 while (NULL != (p = strchr(pp, '%'))) {
509 bufncat(h, pp, (size_t)(p - pp));
510 switch (*(p + 1)) {
511 case('S'):
512 bufcat(h, sec ? sec : "1");
513 break;
514 case('N'):
515 buffmt(h, name);
516 break;
517 default:
518 bufncat(h, p, 2);
519 break;
520 }
521 pp = p + 2;
522 }
523 if (pp)
524 bufcat(h, pp);
525 }
526
527
528 void
529 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
530 {
531 double v;
532 const char *u;
533
534 v = su->scale;
535
536 switch (su->unit) {
537 case (SCALE_CM):
538 u = "cm";
539 break;
540 case (SCALE_IN):
541 u = "in";
542 break;
543 case (SCALE_PC):
544 u = "pc";
545 break;
546 case (SCALE_PT):
547 u = "pt";
548 break;
549 case (SCALE_EM):
550 u = "em";
551 break;
552 case (SCALE_MM):
553 if (0 == (v /= 100))
554 v = 1;
555 u = "em";
556 break;
557 case (SCALE_EN):
558 u = "ex";
559 break;
560 case (SCALE_BU):
561 u = "ex";
562 break;
563 case (SCALE_VS):
564 u = "em";
565 break;
566 default:
567 u = "ex";
568 break;
569 }
570
571 if (su->pt)
572 buffmt(h, "%s: %f%s;", p, v, u);
573 else
574 /* LINTED */
575 buffmt(h, "%s: %d%s;", p, (int)v, u);
576 }
577
578
579 void
580 html_idcat(char *dst, const char *src, int sz)
581 {
582 int ssz;
583
584 assert(sz);
585
586 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
587
588 for ( ; *dst != '\0' && sz; dst++, sz--)
589 /* Jump to end. */ ;
590
591 assert(sz > 2);
592
593 /* We can't start with a number (bah). */
594
595 *dst++ = 'x';
596 *dst = '\0';
597 sz--;
598
599 for ( ; *src != '\0' && sz > 1; src++) {
600 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
601 sz -= ssz;
602 dst += ssz;
603 }
604 }