]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Accent marks sync'd with current groff.
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.51 2009/09/21 14:56:56 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <sys/types.h>
18 #include <sys/queue.h>
19
20 #include <assert.h>
21 #include <err.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26
27 #include "chars.h"
28 #include "html.h"
29
30 #define DOCTYPE "-//W3C//DTD HTML 4.01//EN"
31 #define DTD "http://www.w3.org/TR/html4/strict.dtd"
32
33 struct htmldata {
34 char *name;
35 int flags;
36 #define HTML_CLRLINE (1 << 0)
37 #define HTML_NOSTACK (1 << 1)
38 };
39
40 static const struct htmldata htmltags[TAG_MAX] = {
41 {"html", HTML_CLRLINE}, /* TAG_HTML */
42 {"head", HTML_CLRLINE}, /* TAG_HEAD */
43 {"body", HTML_CLRLINE}, /* TAG_BODY */
44 {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
45 {"title", HTML_CLRLINE}, /* TAG_TITLE */
46 {"div", HTML_CLRLINE}, /* TAG_DIV */
47 {"h1", 0}, /* TAG_H1 */
48 {"h2", 0}, /* TAG_H2 */
49 {"p", HTML_CLRLINE}, /* TAG_P */
50 {"span", 0}, /* TAG_SPAN */
51 {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
52 {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
53 {"a", 0}, /* TAG_A */
54 {"table", HTML_CLRLINE}, /* TAG_TABLE */
55 {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
56 {"tr", HTML_CLRLINE}, /* TAG_TR */
57 {"td", HTML_CLRLINE}, /* TAG_TD */
58 {"li", HTML_CLRLINE}, /* TAG_LI */
59 {"ul", HTML_CLRLINE}, /* TAG_UL */
60 {"ol", HTML_CLRLINE}, /* TAG_OL */
61 {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
62 };
63
64 static const char *const htmlattrs[ATTR_MAX] = {
65 "http-equiv",
66 "content",
67 "name",
68 "rel",
69 "href",
70 "type",
71 "media",
72 "class",
73 "style",
74 "width",
75 "valign",
76 };
77
78 #ifdef __linux__
79 extern int getsubopt(char **, char * const *, char **);
80 #endif
81
82 void *
83 html_alloc(char *outopts)
84 {
85 struct html *h;
86 char *toks[3], *v;
87
88 toks[0] = "style";
89 toks[1] = "base";
90 toks[2] = NULL;
91
92 if (NULL == (h = calloc(1, sizeof(struct html))))
93 return(NULL);
94
95 SLIST_INIT(&h->tags);
96 SLIST_INIT(&h->ords);
97
98 if (NULL == (h->symtab = chars_init(CHARS_HTML))) {
99 free(h);
100 return(NULL);
101 }
102
103 while (outopts && *outopts)
104 switch (getsubopt(&outopts, toks, &v)) {
105 case (0):
106 h->style = v;
107 break;
108 case (1):
109 h->base = v;
110 break;
111 default:
112 break;
113 }
114
115 return(h);
116 }
117
118
119 void
120 html_free(void *p)
121 {
122 struct tag *tag;
123 struct ord *ord;
124 struct html *h;
125
126 h = (struct html *)p;
127
128 while ( ! SLIST_EMPTY(&h->ords)) {
129 ord = SLIST_FIRST(&h->ords);
130 SLIST_REMOVE_HEAD(&h->ords, entry);
131 free(ord);
132 }
133
134 while ( ! SLIST_EMPTY(&h->tags)) {
135 tag = SLIST_FIRST(&h->tags);
136 SLIST_REMOVE_HEAD(&h->tags, entry);
137 free(tag);
138 }
139
140 if (h->symtab)
141 chars_free(h->symtab);
142 free(h);
143 }
144
145
146 void
147 print_gen_head(struct html *h)
148 {
149 struct htmlpair tag[4];
150
151 tag[0].key = ATTR_HTTPEQUIV;
152 tag[0].val = "Content-Type";
153 tag[1].key = ATTR_CONTENT;
154 tag[1].val = "text/html; charset=utf-8";
155 print_otag(h, TAG_META, 2, tag);
156
157 tag[0].key = ATTR_NAME;
158 tag[0].val = "resource-type";
159 tag[1].key = ATTR_CONTENT;
160 tag[1].val = "document";
161 print_otag(h, TAG_META, 2, tag);
162
163 if (h->style) {
164 tag[0].key = ATTR_REL;
165 tag[0].val = "stylesheet";
166 tag[1].key = ATTR_HREF;
167 tag[1].val = h->style;
168 tag[2].key = ATTR_TYPE;
169 tag[2].val = "text/css";
170 tag[3].key = ATTR_MEDIA;
171 tag[3].val = "all";
172 print_otag(h, TAG_LINK, 4, tag);
173 }
174
175 if (h->base) {
176 tag[0].key = ATTR_HREF;
177 tag[1].val = h->base;
178 print_otag(h, TAG_BASE, 1, tag);
179 }
180 }
181
182
183 static void
184 print_spec(struct html *h, const char *p, int len)
185 {
186 const char *rhs;
187 int i;
188 size_t sz;
189
190 rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz);
191
192 if (NULL == rhs)
193 return;
194 for (i = 0; i < (int)sz; i++)
195 putchar(rhs[i]);
196 }
197
198
199 static void
200 print_res(struct html *h, const char *p, int len)
201 {
202 const char *rhs;
203 int i;
204 size_t sz;
205
206 rhs = chars_a2res(h->symtab, p, (size_t)len, &sz);
207
208 if (NULL == rhs)
209 return;
210 for (i = 0; i < (int)sz; i++)
211 putchar(rhs[i]);
212 }
213
214
215 static void
216 print_escape(struct html *h, const char **p)
217 {
218 int j, type;
219 const char *wp;
220
221 wp = *p;
222 type = 1;
223
224 if (0 == *(++wp)) {
225 *p = wp;
226 return;
227 }
228
229 if ('(' == *wp) {
230 wp++;
231 if (0 == *wp || 0 == *(wp + 1)) {
232 *p = 0 == *wp ? wp : wp + 1;
233 return;
234 }
235
236 print_spec(h, wp, 2);
237 *p = ++wp;
238 return;
239
240 } else if ('*' == *wp) {
241 if (0 == *(++wp)) {
242 *p = wp;
243 return;
244 }
245
246 switch (*wp) {
247 case ('('):
248 wp++;
249 if (0 == *wp || 0 == *(wp + 1)) {
250 *p = 0 == *wp ? wp : wp + 1;
251 return;
252 }
253
254 print_res(h, wp, 2);
255 *p = ++wp;
256 return;
257 case ('['):
258 type = 0;
259 break;
260 default:
261 print_res(h, wp, 1);
262 *p = wp;
263 return;
264 }
265
266 } else if ('f' == *wp) {
267 if (0 == *(++wp)) {
268 *p = wp;
269 return;
270 }
271
272 switch (*wp) {
273 case ('B'):
274 /* TODO */
275 break;
276 case ('I'):
277 /* TODO */
278 break;
279 case ('P'):
280 /* FALLTHROUGH */
281 case ('R'):
282 /* TODO */
283 break;
284 default:
285 break;
286 }
287
288 *p = wp;
289 return;
290
291 } else if ('[' != *wp) {
292 print_spec(h, wp, 1);
293 *p = wp;
294 return;
295 }
296
297 wp++;
298 for (j = 0; *wp && ']' != *wp; wp++, j++)
299 /* Loop... */ ;
300
301 if (0 == *wp) {
302 *p = wp;
303 return;
304 }
305
306 if (type)
307 print_spec(h, wp - j, j);
308 else
309 print_res(h, wp - j, j);
310
311 *p = wp;
312 }
313
314
315 static void
316 print_encode(struct html *h, const char *p)
317 {
318
319 for (; *p; p++) {
320 if ('\\' == *p) {
321 print_escape(h, &p);
322 continue;
323 }
324 switch (*p) {
325 case ('<'):
326 printf("&lt;");
327 break;
328 case ('>'):
329 printf("&gt;");
330 break;
331 case ('&'):
332 printf("&amp;");
333 break;
334 default:
335 putchar(*p);
336 break;
337 }
338 }
339 }
340
341
342 struct tag *
343 print_otag(struct html *h, enum htmltag tag,
344 int sz, const struct htmlpair *p)
345 {
346 int i;
347 struct tag *t;
348
349 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
350 if (NULL == (t = malloc(sizeof(struct tag))))
351 err(EXIT_FAILURE, "malloc");
352 t->tag = tag;
353 SLIST_INSERT_HEAD(&h->tags, t, entry);
354 } else
355 t = NULL;
356
357 if ( ! (HTML_NOSPACE & h->flags))
358 if ( ! (HTML_CLRLINE & htmltags[tag].flags))
359 printf(" ");
360
361 printf("<%s", htmltags[tag].name);
362 for (i = 0; i < sz; i++) {
363 printf(" %s=\"", htmlattrs[p[i].key]);
364 assert(p->val);
365 print_encode(h, p[i].val);
366 printf("\"");
367 }
368 printf(">");
369
370 h->flags |= HTML_NOSPACE;
371 if (HTML_CLRLINE & htmltags[tag].flags)
372 h->flags |= HTML_NEWLINE;
373 else
374 h->flags &= ~HTML_NEWLINE;
375
376 return(t);
377 }
378
379
380 /* ARGSUSED */
381 static void
382 print_ctag(struct html *h, enum htmltag tag)
383 {
384
385 printf("</%s>", htmltags[tag].name);
386 if (HTML_CLRLINE & htmltags[tag].flags)
387 h->flags |= HTML_NOSPACE;
388 if (HTML_CLRLINE & htmltags[tag].flags)
389 h->flags |= HTML_NEWLINE;
390 else
391 h->flags &= ~HTML_NEWLINE;
392 }
393
394
395 /* ARGSUSED */
396 void
397 print_gen_doctype(struct html *h)
398 {
399
400 printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
401 }
402
403
404 void
405 print_text(struct html *h, const char *p)
406 {
407
408 if (*p && 0 == *(p + 1))
409 switch (*p) {
410 case('.'):
411 /* FALLTHROUGH */
412 case(','):
413 /* FALLTHROUGH */
414 case(';'):
415 /* FALLTHROUGH */
416 case(':'):
417 /* FALLTHROUGH */
418 case('?'):
419 /* FALLTHROUGH */
420 case('!'):
421 /* FALLTHROUGH */
422 case(')'):
423 /* FALLTHROUGH */
424 case(']'):
425 /* FALLTHROUGH */
426 case('}'):
427 h->flags |= HTML_NOSPACE;
428 break;
429 default:
430 break;
431 }
432
433 if ( ! (h->flags & HTML_NOSPACE))
434 printf(" ");
435
436 h->flags &= ~HTML_NOSPACE;
437 h->flags &= ~HTML_NEWLINE;
438
439 if (p)
440 print_encode(h, p);
441
442 if (*p && 0 == *(p + 1))
443 switch (*p) {
444 case('('):
445 /* FALLTHROUGH */
446 case('['):
447 /* FALLTHROUGH */
448 case('{'):
449 h->flags |= HTML_NOSPACE;
450 break;
451 default:
452 break;
453 }
454 }
455
456
457 void
458 print_tagq(struct html *h, const struct tag *until)
459 {
460 struct tag *tag;
461
462 while ( ! SLIST_EMPTY(&h->tags)) {
463 tag = SLIST_FIRST(&h->tags);
464 print_ctag(h, tag->tag);
465 SLIST_REMOVE_HEAD(&h->tags, entry);
466 free(tag);
467 if (until && tag == until)
468 return;
469 }
470 }
471
472
473 void
474 print_stagq(struct html *h, const struct tag *suntil)
475 {
476 struct tag *tag;
477
478 while ( ! SLIST_EMPTY(&h->tags)) {
479 tag = SLIST_FIRST(&h->tags);
480 if (suntil && tag == suntil)
481 return;
482 print_ctag(h, tag->tag);
483 SLIST_REMOVE_HEAD(&h->tags, entry);
484 free(tag);
485 }
486 }