]> git.cameronkatri.com Git - mandoc.git/blob - html.c
While the HTML standard allows multiple <h1> elements in the same
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.278 2022/07/06 14:34:59 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Common functions for mandoc(1) HTML formatters.
20 * For use by individual formatters and by the main program.
21 */
22 #include "config.h"
23
24 #include <sys/types.h>
25 #include <sys/stat.h>
26
27 #include <assert.h>
28 #include <ctype.h>
29 #include <stdarg.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "mandoc_aux.h"
38 #include "mandoc_ohash.h"
39 #include "mandoc.h"
40 #include "roff.h"
41 #include "out.h"
42 #include "html.h"
43 #include "manconf.h"
44 #include "main.h"
45
46 struct htmldata {
47 const char *name;
48 int flags;
49 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
50 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
51 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
52 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
53 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
54 #define HTML_NLEND (1 << 5) /* Output line break before closing. */
55 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
56 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
57 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
58 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
59 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
60 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
61 };
62
63 static const struct htmldata htmltags[TAG_MAX] = {
64 {"html", HTML_NLALL},
65 {"head", HTML_NLALL | HTML_INDENT},
66 {"meta", HTML_NOSTACK | HTML_NLALL},
67 {"link", HTML_NOSTACK | HTML_NLALL},
68 {"style", HTML_NLALL | HTML_INDENT},
69 {"title", HTML_NLAROUND},
70 {"body", HTML_NLALL},
71 {"main", HTML_NLALL},
72 {"div", HTML_NLAROUND},
73 {"section", HTML_NLALL},
74 {"nav", HTML_NLALL},
75 {"table", HTML_NLALL | HTML_INDENT},
76 {"tr", HTML_NLALL | HTML_INDENT},
77 {"td", HTML_NLAROUND},
78 {"li", HTML_NLAROUND | HTML_INDENT},
79 {"ul", HTML_NLALL | HTML_INDENT},
80 {"ol", HTML_NLALL | HTML_INDENT},
81 {"dl", HTML_NLALL | HTML_INDENT},
82 {"dt", HTML_NLAROUND},
83 {"dd", HTML_NLAROUND | HTML_INDENT},
84 {"h2", HTML_TOPHRASE | HTML_NLAROUND},
85 {"h3", HTML_TOPHRASE | HTML_NLAROUND},
86 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
87 {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
88 {"a", HTML_INPHRASE | HTML_TOPHRASE},
89 {"b", HTML_INPHRASE | HTML_TOPHRASE},
90 {"cite", HTML_INPHRASE | HTML_TOPHRASE},
91 {"code", HTML_INPHRASE | HTML_TOPHRASE},
92 {"i", HTML_INPHRASE | HTML_TOPHRASE},
93 {"small", HTML_INPHRASE | HTML_TOPHRASE},
94 {"span", HTML_INPHRASE | HTML_TOPHRASE},
95 {"var", HTML_INPHRASE | HTML_TOPHRASE},
96 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
97 {"hr", HTML_INPHRASE | HTML_NOSTACK},
98 {"mark", HTML_INPHRASE },
99 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
100 {"mrow", 0},
101 {"mi", 0},
102 {"mn", 0},
103 {"mo", 0},
104 {"msup", 0},
105 {"msub", 0},
106 {"msubsup", 0},
107 {"mfrac", 0},
108 {"msqrt", 0},
109 {"mfenced", 0},
110 {"mtable", 0},
111 {"mtr", 0},
112 {"mtd", 0},
113 {"munderover", 0},
114 {"munder", 0},
115 {"mover", 0},
116 };
117
118 /* Avoid duplicate HTML id= attributes. */
119
120 struct id_entry {
121 int ord; /* Ordinal number of the latest occurrence. */
122 char id[]; /* The id= attribute without any ordinal suffix. */
123 };
124 static struct ohash id_unique;
125
126 static void html_reset_internal(struct html *);
127 static void print_byte(struct html *, char);
128 static void print_endword(struct html *);
129 static void print_indent(struct html *);
130 static void print_word(struct html *, const char *);
131
132 static void print_ctag(struct html *, struct tag *);
133 static int print_escape(struct html *, char);
134 static int print_encode(struct html *, const char *, const char *, int);
135 static void print_href(struct html *, const char *, const char *, int);
136 static void print_metaf(struct html *);
137
138
139 void *
140 html_alloc(const struct manoutput *outopts)
141 {
142 struct html *h;
143
144 h = mandoc_calloc(1, sizeof(struct html));
145
146 h->tag = NULL;
147 h->metac = h->metal = ESCAPE_FONTROMAN;
148 h->style = outopts->style;
149 if ((h->base_man1 = outopts->man) == NULL)
150 h->base_man2 = NULL;
151 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
152 *h->base_man2++ = '\0';
153 h->base_includes = outopts->includes;
154 if (outopts->fragment)
155 h->oflags |= HTML_FRAGMENT;
156 if (outopts->toc)
157 h->oflags |= HTML_TOC;
158
159 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
160
161 return h;
162 }
163
164 static void
165 html_reset_internal(struct html *h)
166 {
167 struct tag *tag;
168 struct id_entry *entry;
169 unsigned int slot;
170
171 while ((tag = h->tag) != NULL) {
172 h->tag = tag->next;
173 free(tag);
174 }
175 entry = ohash_first(&id_unique, &slot);
176 while (entry != NULL) {
177 free(entry);
178 entry = ohash_next(&id_unique, &slot);
179 }
180 ohash_delete(&id_unique);
181 }
182
183 void
184 html_reset(void *p)
185 {
186 html_reset_internal(p);
187 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
188 }
189
190 void
191 html_free(void *p)
192 {
193 html_reset_internal(p);
194 free(p);
195 }
196
197 void
198 print_gen_head(struct html *h)
199 {
200 struct tag *t;
201
202 print_otag(h, TAG_META, "?", "charset", "utf-8");
203 print_otag(h, TAG_META, "??", "name", "viewport",
204 "content", "width=device-width, initial-scale=1.0");
205 if (h->style != NULL) {
206 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
207 h->style, "type", "text/css", "media", "all");
208 return;
209 }
210
211 /*
212 * Print a minimal embedded style sheet.
213 */
214
215 t = print_otag(h, TAG_STYLE, "");
216 print_text(h, "table.head, table.foot { width: 100%; }");
217 print_endline(h);
218 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
219 print_endline(h);
220 print_text(h, "td.head-vol { text-align: center; }");
221 print_endline(h);
222 print_text(h, ".Nd, .Bf, .Op { display: inline; }");
223 print_endline(h);
224 print_text(h, ".Pa, .Ad { font-style: italic; }");
225 print_endline(h);
226 print_text(h, ".Ms { font-weight: bold; }");
227 print_endline(h);
228 print_text(h, ".Bl-diag ");
229 print_byte(h, '>');
230 print_text(h, " dt { font-weight: bold; }");
231 print_endline(h);
232 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
233 "{ font-weight: bold; font-family: inherit; }");
234 print_tagq(h, t);
235 }
236
237 int
238 html_setfont(struct html *h, enum mandoc_esc font)
239 {
240 switch (font) {
241 case ESCAPE_FONTPREV:
242 font = h->metal;
243 break;
244 case ESCAPE_FONTITALIC:
245 case ESCAPE_FONTBOLD:
246 case ESCAPE_FONTBI:
247 case ESCAPE_FONTROMAN:
248 case ESCAPE_FONTCR:
249 case ESCAPE_FONTCB:
250 case ESCAPE_FONTCI:
251 break;
252 case ESCAPE_FONT:
253 font = ESCAPE_FONTROMAN;
254 break;
255 default:
256 return 0;
257 }
258 h->metal = h->metac;
259 h->metac = font;
260 return 1;
261 }
262
263 static void
264 print_metaf(struct html *h)
265 {
266 if (h->metaf) {
267 print_tagq(h, h->metaf);
268 h->metaf = NULL;
269 }
270 switch (h->metac) {
271 case ESCAPE_FONTITALIC:
272 h->metaf = print_otag(h, TAG_I, "");
273 break;
274 case ESCAPE_FONTBOLD:
275 h->metaf = print_otag(h, TAG_B, "");
276 break;
277 case ESCAPE_FONTBI:
278 h->metaf = print_otag(h, TAG_B, "");
279 print_otag(h, TAG_I, "");
280 break;
281 case ESCAPE_FONTCR:
282 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
283 break;
284 case ESCAPE_FONTCB:
285 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
286 print_otag(h, TAG_B, "");
287 break;
288 case ESCAPE_FONTCI:
289 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
290 print_otag(h, TAG_I, "");
291 break;
292 default:
293 break;
294 }
295 }
296
297 void
298 html_close_paragraph(struct html *h)
299 {
300 struct tag *this, *next;
301 int flags;
302
303 this = h->tag;
304 for (;;) {
305 next = this->next;
306 flags = htmltags[this->tag].flags;
307 if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
308 print_ctag(h, this);
309 if ((flags & HTML_INPHRASE) == 0)
310 break;
311 this = next;
312 }
313 }
314
315 /*
316 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
317 * TOKEN_NONE does not switch. The old mode is returned.
318 */
319 enum roff_tok
320 html_fillmode(struct html *h, enum roff_tok want)
321 {
322 struct tag *t;
323 enum roff_tok had;
324
325 for (t = h->tag; t != NULL; t = t->next)
326 if (t->tag == TAG_PRE)
327 break;
328
329 had = t == NULL ? ROFF_fi : ROFF_nf;
330
331 if (want != had) {
332 switch (want) {
333 case ROFF_fi:
334 print_tagq(h, t);
335 break;
336 case ROFF_nf:
337 html_close_paragraph(h);
338 print_otag(h, TAG_PRE, "");
339 break;
340 case TOKEN_NONE:
341 break;
342 default:
343 abort();
344 }
345 }
346 return had;
347 }
348
349 /*
350 * Allocate a string to be used for the "id=" attribute of an HTML
351 * element and/or as a segment identifier for a URI in an <a> element.
352 * The function may fail and return NULL if the node lacks text data
353 * to create the attribute from.
354 * The caller is responsible for free(3)ing the returned string.
355 *
356 * If the "unique" argument is non-zero, the "id_unique" ohash table
357 * is used for de-duplication. If the "unique" argument is 1,
358 * it is the first time the function is called for this tag and
359 * location, so if an ordinal suffix is needed, it is incremented.
360 * If the "unique" argument is 2, it is the second time the function
361 * is called for this tag and location, so the ordinal suffix
362 * remains unchanged.
363 */
364 char *
365 html_make_id(const struct roff_node *n, int unique)
366 {
367 const struct roff_node *nch;
368 struct id_entry *entry;
369 char *buf, *cp;
370 size_t len;
371 unsigned int slot;
372
373 if (n->tag != NULL)
374 buf = mandoc_strdup(n->tag);
375 else {
376 switch (n->tok) {
377 case MDOC_Sh:
378 case MDOC_Ss:
379 case MDOC_Sx:
380 case MAN_SH:
381 case MAN_SS:
382 for (nch = n->child; nch != NULL; nch = nch->next)
383 if (nch->type != ROFFT_TEXT)
384 return NULL;
385 buf = NULL;
386 deroff(&buf, n);
387 if (buf == NULL)
388 return NULL;
389 break;
390 default:
391 if (n->child == NULL || n->child->type != ROFFT_TEXT)
392 return NULL;
393 buf = mandoc_strdup(n->child->string);
394 break;
395 }
396 }
397
398 /*
399 * In ID attributes, only use ASCII characters that are
400 * permitted in URL-fragment strings according to the
401 * explicit list at:
402 * https://url.spec.whatwg.org/#url-fragment-string
403 * In addition, reserve '~' for ordinal suffixes.
404 */
405
406 for (cp = buf; *cp != '\0'; cp++)
407 if (isalnum((unsigned char)*cp) == 0 &&
408 strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
409 *cp = '_';
410
411 if (unique == 0)
412 return buf;
413
414 /* Avoid duplicate HTML id= attributes. */
415
416 slot = ohash_qlookup(&id_unique, buf);
417 if ((entry = ohash_find(&id_unique, slot)) == NULL) {
418 len = strlen(buf) + 1;
419 entry = mandoc_malloc(sizeof(*entry) + len);
420 entry->ord = 1;
421 memcpy(entry->id, buf, len);
422 ohash_insert(&id_unique, slot, entry);
423 } else if (unique == 1)
424 entry->ord++;
425
426 if (entry->ord > 1) {
427 cp = buf;
428 mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
429 free(cp);
430 }
431 return buf;
432 }
433
434 static int
435 print_escape(struct html *h, char c)
436 {
437
438 switch (c) {
439 case '<':
440 print_word(h, "&lt;");
441 break;
442 case '>':
443 print_word(h, "&gt;");
444 break;
445 case '&':
446 print_word(h, "&amp;");
447 break;
448 case '"':
449 print_word(h, "&quot;");
450 break;
451 case ASCII_NBRSP:
452 print_word(h, "&nbsp;");
453 break;
454 case ASCII_HYPH:
455 print_byte(h, '-');
456 break;
457 case ASCII_BREAK:
458 break;
459 default:
460 return 0;
461 }
462 return 1;
463 }
464
465 static int
466 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
467 {
468 char numbuf[16];
469 const char *seq;
470 size_t sz;
471 int c, len, breakline, nospace;
472 enum mandoc_esc esc;
473 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
474 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
475
476 if (pend == NULL)
477 pend = strchr(p, '\0');
478
479 breakline = 0;
480 nospace = 0;
481
482 while (p < pend) {
483 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
484 h->flags &= ~HTML_SKIPCHAR;
485 p++;
486 continue;
487 }
488
489 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
490 print_byte(h, *p);
491
492 if (breakline &&
493 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
494 print_otag(h, TAG_BR, "");
495 breakline = 0;
496 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
497 p++;
498 continue;
499 }
500
501 if (p >= pend)
502 break;
503
504 if (*p == ' ') {
505 print_endword(h);
506 p++;
507 continue;
508 }
509
510 if (print_escape(h, *p++))
511 continue;
512
513 esc = mandoc_escape(&p, &seq, &len);
514 switch (esc) {
515 case ESCAPE_FONT:
516 case ESCAPE_FONTPREV:
517 case ESCAPE_FONTBOLD:
518 case ESCAPE_FONTITALIC:
519 case ESCAPE_FONTBI:
520 case ESCAPE_FONTROMAN:
521 case ESCAPE_FONTCR:
522 case ESCAPE_FONTCB:
523 case ESCAPE_FONTCI:
524 if (0 == norecurse) {
525 h->flags |= HTML_NOSPACE;
526 if (html_setfont(h, esc))
527 print_metaf(h);
528 h->flags &= ~HTML_NOSPACE;
529 }
530 continue;
531 case ESCAPE_SKIPCHAR:
532 h->flags |= HTML_SKIPCHAR;
533 continue;
534 case ESCAPE_ERROR:
535 continue;
536 default:
537 break;
538 }
539
540 if (h->flags & HTML_SKIPCHAR) {
541 h->flags &= ~HTML_SKIPCHAR;
542 continue;
543 }
544
545 switch (esc) {
546 case ESCAPE_UNICODE:
547 /* Skip past "u" header. */
548 c = mchars_num2uc(seq + 1, len - 1);
549 break;
550 case ESCAPE_NUMBERED:
551 c = mchars_num2char(seq, len);
552 if (c < 0)
553 continue;
554 break;
555 case ESCAPE_SPECIAL:
556 c = mchars_spec2cp(seq, len);
557 if (c <= 0)
558 continue;
559 break;
560 case ESCAPE_UNDEF:
561 c = *seq;
562 break;
563 case ESCAPE_DEVICE:
564 print_word(h, "html");
565 continue;
566 case ESCAPE_BREAK:
567 breakline = 1;
568 continue;
569 case ESCAPE_NOSPACE:
570 if ('\0' == *p)
571 nospace = 1;
572 continue;
573 case ESCAPE_OVERSTRIKE:
574 if (len == 0)
575 continue;
576 c = seq[len - 1];
577 break;
578 default:
579 continue;
580 }
581 if ((c < 0x20 && c != 0x09) ||
582 (c > 0x7E && c < 0xA0))
583 c = 0xFFFD;
584 if (c > 0x7E) {
585 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
586 print_word(h, numbuf);
587 } else if (print_escape(h, c) == 0)
588 print_byte(h, c);
589 }
590
591 return nospace;
592 }
593
594 static void
595 print_href(struct html *h, const char *name, const char *sec, int man)
596 {
597 struct stat sb;
598 const char *p, *pp;
599 char *filename;
600
601 if (man) {
602 pp = h->base_man1;
603 if (h->base_man2 != NULL) {
604 mandoc_asprintf(&filename, "%s.%s", name, sec);
605 if (stat(filename, &sb) == -1)
606 pp = h->base_man2;
607 free(filename);
608 }
609 } else
610 pp = h->base_includes;
611
612 while ((p = strchr(pp, '%')) != NULL) {
613 print_encode(h, pp, p, 1);
614 if (man && p[1] == 'S') {
615 if (sec == NULL)
616 print_byte(h, '1');
617 else
618 print_encode(h, sec, NULL, 1);
619 } else if ((man && p[1] == 'N') ||
620 (man == 0 && p[1] == 'I'))
621 print_encode(h, name, NULL, 1);
622 else
623 print_encode(h, p, p + 2, 1);
624 pp = p + 2;
625 }
626 if (*pp != '\0')
627 print_encode(h, pp, NULL, 1);
628 }
629
630 struct tag *
631 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
632 {
633 va_list ap;
634 struct tag *t;
635 const char *attr;
636 char *arg1, *arg2;
637 int style_written, tflags;
638
639 tflags = htmltags[tag].flags;
640
641 /* Flow content is not allowed in phrasing context. */
642
643 if ((tflags & HTML_INPHRASE) == 0) {
644 for (t = h->tag; t != NULL; t = t->next) {
645 if (t->closed)
646 continue;
647 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
648 break;
649 }
650
651 /*
652 * Always wrap phrasing elements in a paragraph
653 * unless already contained in some flow container;
654 * never put them directly into a section.
655 */
656
657 } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
658 print_otag(h, TAG_P, "c", "Pp");
659
660 /* Push this tag onto the stack of open scopes. */
661
662 if ((tflags & HTML_NOSTACK) == 0) {
663 t = mandoc_malloc(sizeof(struct tag));
664 t->tag = tag;
665 t->next = h->tag;
666 t->refcnt = 0;
667 t->closed = 0;
668 h->tag = t;
669 } else
670 t = NULL;
671
672 if (tflags & HTML_NLBEFORE)
673 print_endline(h);
674 if (h->col == 0)
675 print_indent(h);
676 else if ((h->flags & HTML_NOSPACE) == 0) {
677 if (h->flags & HTML_KEEP)
678 print_word(h, "&#x00A0;");
679 else {
680 if (h->flags & HTML_PREKEEP)
681 h->flags |= HTML_KEEP;
682 print_endword(h);
683 }
684 }
685
686 if ( ! (h->flags & HTML_NONOSPACE))
687 h->flags &= ~HTML_NOSPACE;
688 else
689 h->flags |= HTML_NOSPACE;
690
691 /* Print out the tag name and attributes. */
692
693 print_byte(h, '<');
694 print_word(h, htmltags[tag].name);
695
696 va_start(ap, fmt);
697
698 while (*fmt != '\0' && *fmt != 's') {
699
700 /* Parse attributes and arguments. */
701
702 arg1 = va_arg(ap, char *);
703 arg2 = NULL;
704 switch (*fmt++) {
705 case 'c':
706 attr = "class";
707 break;
708 case 'h':
709 attr = "href";
710 break;
711 case 'i':
712 attr = "id";
713 break;
714 case 'r':
715 attr = "role";
716 break;
717 case '?':
718 attr = arg1;
719 arg1 = va_arg(ap, char *);
720 break;
721 default:
722 abort();
723 }
724 if (*fmt == 'M')
725 arg2 = va_arg(ap, char *);
726 if (arg1 == NULL)
727 continue;
728
729 /* Print the attributes. */
730
731 print_byte(h, ' ');
732 print_word(h, attr);
733 print_byte(h, '=');
734 print_byte(h, '"');
735 switch (*fmt) {
736 case 'I':
737 print_href(h, arg1, NULL, 0);
738 fmt++;
739 break;
740 case 'M':
741 print_href(h, arg1, arg2, 1);
742 fmt++;
743 break;
744 case 'R':
745 print_byte(h, '#');
746 print_encode(h, arg1, NULL, 1);
747 fmt++;
748 break;
749 default:
750 print_encode(h, arg1, NULL, 1);
751 break;
752 }
753 print_byte(h, '"');
754 }
755
756 style_written = 0;
757 while (*fmt++ == 's') {
758 arg1 = va_arg(ap, char *);
759 arg2 = va_arg(ap, char *);
760 if (arg2 == NULL)
761 continue;
762 print_byte(h, ' ');
763 if (style_written == 0) {
764 print_word(h, "style=\"");
765 style_written = 1;
766 }
767 print_word(h, arg1);
768 print_byte(h, ':');
769 print_byte(h, ' ');
770 print_word(h, arg2);
771 print_byte(h, ';');
772 }
773 if (style_written)
774 print_byte(h, '"');
775
776 va_end(ap);
777
778 /* Accommodate for "well-formed" singleton escaping. */
779
780 if (htmltags[tag].flags & HTML_NOSTACK)
781 print_byte(h, '/');
782
783 print_byte(h, '>');
784
785 if (tflags & HTML_NLBEGIN)
786 print_endline(h);
787 else
788 h->flags |= HTML_NOSPACE;
789
790 if (tflags & HTML_INDENT)
791 h->indent++;
792 if (tflags & HTML_NOINDENT)
793 h->noindent++;
794
795 return t;
796 }
797
798 /*
799 * Print an element with an optional "id=" attribute.
800 * If the element has phrasing content and an "id=" attribute,
801 * also add a permalink: outside if it can be in phrasing context,
802 * inside otherwise.
803 */
804 struct tag *
805 print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
806 struct roff_node *n)
807 {
808 struct roff_node *nch;
809 struct tag *ret, *t;
810 char *id, *href;
811
812 ret = NULL;
813 id = href = NULL;
814 if (n->flags & NODE_ID)
815 id = html_make_id(n, 1);
816 if (n->flags & NODE_HREF)
817 href = id == NULL ? html_make_id(n, 2) : id;
818 if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
819 ret = print_otag(h, TAG_A, "chR", "permalink", href);
820 t = print_otag(h, elemtype, "ci", cattr, id);
821 if (ret == NULL) {
822 ret = t;
823 if (href != NULL && (nch = n->child) != NULL) {
824 /* man(7) is safe, it tags phrasing content only. */
825 if (n->tok > MDOC_MAX ||
826 htmltags[elemtype].flags & HTML_TOPHRASE)
827 nch = NULL;
828 else /* For mdoc(7), beware of nested blocks. */
829 while (nch != NULL && nch->type == ROFFT_TEXT)
830 nch = nch->next;
831 if (nch == NULL)
832 print_otag(h, TAG_A, "chR", "permalink", href);
833 }
834 }
835 free(id);
836 if (id == NULL)
837 free(href);
838 return ret;
839 }
840
841 static void
842 print_ctag(struct html *h, struct tag *tag)
843 {
844 int tflags;
845
846 if (tag->closed == 0) {
847 tag->closed = 1;
848 if (tag == h->metaf)
849 h->metaf = NULL;
850 if (tag == h->tblt)
851 h->tblt = NULL;
852
853 tflags = htmltags[tag->tag].flags;
854 if (tflags & HTML_INDENT)
855 h->indent--;
856 if (tflags & HTML_NOINDENT)
857 h->noindent--;
858 if (tflags & HTML_NLEND)
859 print_endline(h);
860 print_indent(h);
861 print_byte(h, '<');
862 print_byte(h, '/');
863 print_word(h, htmltags[tag->tag].name);
864 print_byte(h, '>');
865 if (tflags & HTML_NLAFTER)
866 print_endline(h);
867 }
868 if (tag->refcnt == 0) {
869 h->tag = tag->next;
870 free(tag);
871 }
872 }
873
874 void
875 print_gen_decls(struct html *h)
876 {
877 print_word(h, "<!DOCTYPE html>");
878 print_endline(h);
879 }
880
881 void
882 print_gen_comment(struct html *h, struct roff_node *n)
883 {
884 int wantblank;
885
886 print_word(h, "<!-- This is an automatically generated file."
887 " Do not edit.");
888 h->indent = 1;
889 wantblank = 0;
890 while (n != NULL && n->type == ROFFT_COMMENT) {
891 if (strstr(n->string, "-->") == NULL &&
892 (wantblank || *n->string != '\0')) {
893 print_endline(h);
894 print_indent(h);
895 print_word(h, n->string);
896 wantblank = *n->string != '\0';
897 }
898 n = n->next;
899 }
900 if (wantblank)
901 print_endline(h);
902 print_word(h, " -->");
903 print_endline(h);
904 h->indent = 0;
905 }
906
907 void
908 print_text(struct html *h, const char *word)
909 {
910 print_tagged_text(h, word, NULL);
911 }
912
913 void
914 print_tagged_text(struct html *h, const char *word, struct roff_node *n)
915 {
916 struct tag *t;
917 char *href;
918
919 /*
920 * Always wrap text in a paragraph unless already contained in
921 * some flow container; never put it directly into a section.
922 */
923
924 if (h->tag->tag == TAG_SECTION)
925 print_otag(h, TAG_P, "c", "Pp");
926
927 /* Output whitespace before this text? */
928
929 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
930 if ( ! (HTML_KEEP & h->flags)) {
931 if (HTML_PREKEEP & h->flags)
932 h->flags |= HTML_KEEP;
933 print_endword(h);
934 } else
935 print_word(h, "&#x00A0;");
936 }
937
938 /*
939 * Optionally switch fonts, optionally write a permalink, then
940 * print the text, optionally surrounded by HTML whitespace.
941 */
942
943 assert(h->metaf == NULL);
944 print_metaf(h);
945 print_indent(h);
946
947 if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
948 t = print_otag(h, TAG_A, "chR", "permalink", href);
949 free(href);
950 } else
951 t = NULL;
952
953 if ( ! print_encode(h, word, NULL, 0)) {
954 if ( ! (h->flags & HTML_NONOSPACE))
955 h->flags &= ~HTML_NOSPACE;
956 h->flags &= ~HTML_NONEWLINE;
957 } else
958 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
959
960 if (h->metaf != NULL) {
961 print_tagq(h, h->metaf);
962 h->metaf = NULL;
963 } else if (t != NULL)
964 print_tagq(h, t);
965
966 h->flags &= ~HTML_IGNDELIM;
967 }
968
969 void
970 print_tagq(struct html *h, const struct tag *until)
971 {
972 struct tag *this, *next;
973
974 for (this = h->tag; this != NULL; this = next) {
975 next = this == until ? NULL : this->next;
976 print_ctag(h, this);
977 }
978 }
979
980 /*
981 * Close out all open elements up to but excluding suntil.
982 * Note that a paragraph just inside stays open together with it
983 * because paragraphs include subsequent phrasing content.
984 */
985 void
986 print_stagq(struct html *h, const struct tag *suntil)
987 {
988 struct tag *this, *next;
989
990 for (this = h->tag; this != NULL; this = next) {
991 next = this->next;
992 if (this == suntil || (next == suntil &&
993 (this->tag == TAG_P || this->tag == TAG_PRE)))
994 break;
995 print_ctag(h, this);
996 }
997 }
998
999
1000 /***********************************************************************
1001 * Low level output functions.
1002 * They implement line breaking using a short static buffer.
1003 ***********************************************************************/
1004
1005 /*
1006 * Buffer one HTML output byte.
1007 * If the buffer is full, flush and deactivate it and start a new line.
1008 * If the buffer is inactive, print directly.
1009 */
1010 static void
1011 print_byte(struct html *h, char c)
1012 {
1013 if ((h->flags & HTML_BUFFER) == 0) {
1014 putchar(c);
1015 h->col++;
1016 return;
1017 }
1018
1019 if (h->col + h->bufcol < sizeof(h->buf)) {
1020 h->buf[h->bufcol++] = c;
1021 return;
1022 }
1023
1024 putchar('\n');
1025 h->col = 0;
1026 print_indent(h);
1027 putchar(' ');
1028 putchar(' ');
1029 fwrite(h->buf, h->bufcol, 1, stdout);
1030 putchar(c);
1031 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1032 h->bufcol = 0;
1033 h->flags &= ~HTML_BUFFER;
1034 }
1035
1036 /*
1037 * If something was printed on the current output line, end it.
1038 * Not to be called right after print_indent().
1039 */
1040 void
1041 print_endline(struct html *h)
1042 {
1043 if (h->col == 0)
1044 return;
1045
1046 if (h->bufcol) {
1047 putchar(' ');
1048 fwrite(h->buf, h->bufcol, 1, stdout);
1049 h->bufcol = 0;
1050 }
1051 putchar('\n');
1052 h->col = 0;
1053 h->flags |= HTML_NOSPACE;
1054 h->flags &= ~HTML_BUFFER;
1055 }
1056
1057 /*
1058 * Flush the HTML output buffer.
1059 * If it is inactive, activate it.
1060 */
1061 static void
1062 print_endword(struct html *h)
1063 {
1064 if (h->noindent) {
1065 print_byte(h, ' ');
1066 return;
1067 }
1068
1069 if ((h->flags & HTML_BUFFER) == 0) {
1070 h->col++;
1071 h->flags |= HTML_BUFFER;
1072 } else if (h->bufcol) {
1073 putchar(' ');
1074 fwrite(h->buf, h->bufcol, 1, stdout);
1075 h->col += h->bufcol + 1;
1076 }
1077 h->bufcol = 0;
1078 }
1079
1080 /*
1081 * If at the beginning of a new output line,
1082 * perform indentation and mark the line as containing output.
1083 * Make sure to really produce some output right afterwards,
1084 * but do not use print_otag() for producing it.
1085 */
1086 static void
1087 print_indent(struct html *h)
1088 {
1089 size_t i;
1090
1091 if (h->col || h->noindent)
1092 return;
1093
1094 h->col = h->indent * 2;
1095 for (i = 0; i < h->col; i++)
1096 putchar(' ');
1097 }
1098
1099 /*
1100 * Print or buffer some characters
1101 * depending on the current HTML output buffer state.
1102 */
1103 static void
1104 print_word(struct html *h, const char *cp)
1105 {
1106 while (*cp != '\0')
1107 print_byte(h, *cp++);
1108 }