]> git.cameronkatri.com Git - mandoc.git/blob - html.c
print a BAGARG message if -T markdown is requested on man(7) input;
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.274 2021/08/10 12:55:03 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Common functions for mandoc(1) HTML formatters.
19 * For use by individual formatters and by the main program.
20 */
21 #include "config.h"
22
23 #include <sys/types.h>
24 #include <sys/stat.h>
25
26 #include <assert.h>
27 #include <ctype.h>
28 #include <stdarg.h>
29 #include <stddef.h>
30 #include <stdio.h>
31 #include <stdint.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc_ohash.h"
38 #include "mandoc.h"
39 #include "roff.h"
40 #include "out.h"
41 #include "html.h"
42 #include "manconf.h"
43 #include "main.h"
44
45 struct htmldata {
46 const char *name;
47 int flags;
48 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
49 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
50 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
51 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
52 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
53 #define HTML_NLEND (1 << 5) /* Output line break before closing. */
54 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
55 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
56 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
57 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
58 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
59 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
60 };
61
62 static const struct htmldata htmltags[TAG_MAX] = {
63 {"html", HTML_NLALL},
64 {"head", HTML_NLALL | HTML_INDENT},
65 {"meta", HTML_NOSTACK | HTML_NLALL},
66 {"link", HTML_NOSTACK | HTML_NLALL},
67 {"style", HTML_NLALL | HTML_INDENT},
68 {"title", HTML_NLAROUND},
69 {"body", HTML_NLALL},
70 {"div", HTML_NLAROUND},
71 {"section", HTML_NLALL},
72 {"table", HTML_NLALL | HTML_INDENT},
73 {"tr", HTML_NLALL | HTML_INDENT},
74 {"td", HTML_NLAROUND},
75 {"li", HTML_NLAROUND | HTML_INDENT},
76 {"ul", HTML_NLALL | HTML_INDENT},
77 {"ol", HTML_NLALL | HTML_INDENT},
78 {"dl", HTML_NLALL | HTML_INDENT},
79 {"dt", HTML_NLAROUND},
80 {"dd", HTML_NLAROUND | HTML_INDENT},
81 {"h1", HTML_TOPHRASE | HTML_NLAROUND},
82 {"h2", HTML_TOPHRASE | HTML_NLAROUND},
83 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
84 {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
85 {"a", HTML_INPHRASE | HTML_TOPHRASE},
86 {"b", HTML_INPHRASE | HTML_TOPHRASE},
87 {"cite", HTML_INPHRASE | HTML_TOPHRASE},
88 {"code", HTML_INPHRASE | HTML_TOPHRASE},
89 {"i", HTML_INPHRASE | HTML_TOPHRASE},
90 {"small", HTML_INPHRASE | HTML_TOPHRASE},
91 {"span", HTML_INPHRASE | HTML_TOPHRASE},
92 {"var", HTML_INPHRASE | HTML_TOPHRASE},
93 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
94 {"mark", HTML_INPHRASE },
95 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
96 {"mrow", 0},
97 {"mi", 0},
98 {"mn", 0},
99 {"mo", 0},
100 {"msup", 0},
101 {"msub", 0},
102 {"msubsup", 0},
103 {"mfrac", 0},
104 {"msqrt", 0},
105 {"mfenced", 0},
106 {"mtable", 0},
107 {"mtr", 0},
108 {"mtd", 0},
109 {"munderover", 0},
110 {"munder", 0},
111 {"mover", 0},
112 };
113
114 /* Avoid duplicate HTML id= attributes. */
115
116 struct id_entry {
117 int ord; /* Ordinal number of the latest occurrence. */
118 char id[]; /* The id= attribute without any ordinal suffix. */
119 };
120 static struct ohash id_unique;
121
122 static void html_reset_internal(struct html *);
123 static void print_byte(struct html *, char);
124 static void print_endword(struct html *);
125 static void print_indent(struct html *);
126 static void print_word(struct html *, const char *);
127
128 static void print_ctag(struct html *, struct tag *);
129 static int print_escape(struct html *, char);
130 static int print_encode(struct html *, const char *, const char *, int);
131 static void print_href(struct html *, const char *, const char *, int);
132 static void print_metaf(struct html *);
133
134
135 void *
136 html_alloc(const struct manoutput *outopts)
137 {
138 struct html *h;
139
140 h = mandoc_calloc(1, sizeof(struct html));
141
142 h->tag = NULL;
143 h->metac = h->metal = ESCAPE_FONTROMAN;
144 h->style = outopts->style;
145 if ((h->base_man1 = outopts->man) == NULL)
146 h->base_man2 = NULL;
147 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
148 *h->base_man2++ = '\0';
149 h->base_includes = outopts->includes;
150 if (outopts->fragment)
151 h->oflags |= HTML_FRAGMENT;
152 if (outopts->toc)
153 h->oflags |= HTML_TOC;
154
155 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
156
157 return h;
158 }
159
160 static void
161 html_reset_internal(struct html *h)
162 {
163 struct tag *tag;
164 struct id_entry *entry;
165 unsigned int slot;
166
167 while ((tag = h->tag) != NULL) {
168 h->tag = tag->next;
169 free(tag);
170 }
171 entry = ohash_first(&id_unique, &slot);
172 while (entry != NULL) {
173 free(entry);
174 entry = ohash_next(&id_unique, &slot);
175 }
176 ohash_delete(&id_unique);
177 }
178
179 void
180 html_reset(void *p)
181 {
182 html_reset_internal(p);
183 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
184 }
185
186 void
187 html_free(void *p)
188 {
189 html_reset_internal(p);
190 free(p);
191 }
192
193 void
194 print_gen_head(struct html *h)
195 {
196 struct tag *t;
197
198 print_otag(h, TAG_META, "?", "charset", "utf-8");
199 print_otag(h, TAG_META, "??", "name", "viewport",
200 "content", "width=device-width, initial-scale=1.0");
201 if (h->style != NULL) {
202 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
203 h->style, "type", "text/css", "media", "all");
204 return;
205 }
206
207 /*
208 * Print a minimal embedded style sheet.
209 */
210
211 t = print_otag(h, TAG_STYLE, "");
212 print_text(h, "table.head, table.foot { width: 100%; }");
213 print_endline(h);
214 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
215 print_endline(h);
216 print_text(h, "td.head-vol { text-align: center; }");
217 print_endline(h);
218 print_text(h, ".Nd, .Bf, .Op { display: inline; }");
219 print_endline(h);
220 print_text(h, ".Pa, .Ad { font-style: italic; }");
221 print_endline(h);
222 print_text(h, ".Ms { font-weight: bold; }");
223 print_endline(h);
224 print_text(h, ".Bl-diag ");
225 print_byte(h, '>');
226 print_text(h, " dt { font-weight: bold; }");
227 print_endline(h);
228 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
229 "{ font-weight: bold; font-family: inherit; }");
230 print_tagq(h, t);
231 }
232
233 int
234 html_setfont(struct html *h, enum mandoc_esc font)
235 {
236 switch (font) {
237 case ESCAPE_FONTPREV:
238 font = h->metal;
239 break;
240 case ESCAPE_FONTITALIC:
241 case ESCAPE_FONTBOLD:
242 case ESCAPE_FONTBI:
243 case ESCAPE_FONTROMAN:
244 case ESCAPE_FONTCR:
245 case ESCAPE_FONTCB:
246 case ESCAPE_FONTCI:
247 break;
248 case ESCAPE_FONT:
249 font = ESCAPE_FONTROMAN;
250 break;
251 default:
252 return 0;
253 }
254 h->metal = h->metac;
255 h->metac = font;
256 return 1;
257 }
258
259 static void
260 print_metaf(struct html *h)
261 {
262 if (h->metaf) {
263 print_tagq(h, h->metaf);
264 h->metaf = NULL;
265 }
266 switch (h->metac) {
267 case ESCAPE_FONTITALIC:
268 h->metaf = print_otag(h, TAG_I, "");
269 break;
270 case ESCAPE_FONTBOLD:
271 h->metaf = print_otag(h, TAG_B, "");
272 break;
273 case ESCAPE_FONTBI:
274 h->metaf = print_otag(h, TAG_B, "");
275 print_otag(h, TAG_I, "");
276 break;
277 case ESCAPE_FONTCR:
278 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
279 break;
280 case ESCAPE_FONTCB:
281 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
282 print_otag(h, TAG_B, "");
283 break;
284 case ESCAPE_FONTCI:
285 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
286 print_otag(h, TAG_I, "");
287 break;
288 default:
289 break;
290 }
291 }
292
293 void
294 html_close_paragraph(struct html *h)
295 {
296 struct tag *this, *next;
297 int flags;
298
299 this = h->tag;
300 for (;;) {
301 next = this->next;
302 flags = htmltags[this->tag].flags;
303 if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
304 print_ctag(h, this);
305 if ((flags & HTML_INPHRASE) == 0)
306 break;
307 this = next;
308 }
309 }
310
311 /*
312 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
313 * TOKEN_NONE does not switch. The old mode is returned.
314 */
315 enum roff_tok
316 html_fillmode(struct html *h, enum roff_tok want)
317 {
318 struct tag *t;
319 enum roff_tok had;
320
321 for (t = h->tag; t != NULL; t = t->next)
322 if (t->tag == TAG_PRE)
323 break;
324
325 had = t == NULL ? ROFF_fi : ROFF_nf;
326
327 if (want != had) {
328 switch (want) {
329 case ROFF_fi:
330 print_tagq(h, t);
331 break;
332 case ROFF_nf:
333 html_close_paragraph(h);
334 print_otag(h, TAG_PRE, "");
335 break;
336 case TOKEN_NONE:
337 break;
338 default:
339 abort();
340 }
341 }
342 return had;
343 }
344
345 /*
346 * Allocate a string to be used for the "id=" attribute of an HTML
347 * element and/or as a segment identifier for a URI in an <a> element.
348 * The function may fail and return NULL if the node lacks text data
349 * to create the attribute from.
350 * The caller is responsible for free(3)ing the returned string.
351 *
352 * If the "unique" argument is non-zero, the "id_unique" ohash table
353 * is used for de-duplication. If the "unique" argument is 1,
354 * it is the first time the function is called for this tag and
355 * location, so if an ordinal suffix is needed, it is incremented.
356 * If the "unique" argument is 2, it is the second time the function
357 * is called for this tag and location, so the ordinal suffix
358 * remains unchanged.
359 */
360 char *
361 html_make_id(const struct roff_node *n, int unique)
362 {
363 const struct roff_node *nch;
364 struct id_entry *entry;
365 char *buf, *cp;
366 size_t len;
367 unsigned int slot;
368
369 if (n->tag != NULL)
370 buf = mandoc_strdup(n->tag);
371 else {
372 switch (n->tok) {
373 case MDOC_Sh:
374 case MDOC_Ss:
375 case MDOC_Sx:
376 case MAN_SH:
377 case MAN_SS:
378 for (nch = n->child; nch != NULL; nch = nch->next)
379 if (nch->type != ROFFT_TEXT)
380 return NULL;
381 buf = NULL;
382 deroff(&buf, n);
383 if (buf == NULL)
384 return NULL;
385 break;
386 default:
387 if (n->child == NULL || n->child->type != ROFFT_TEXT)
388 return NULL;
389 buf = mandoc_strdup(n->child->string);
390 break;
391 }
392 }
393
394 /*
395 * In ID attributes, only use ASCII characters that are
396 * permitted in URL-fragment strings according to the
397 * explicit list at:
398 * https://url.spec.whatwg.org/#url-fragment-string
399 * In addition, reserve '~' for ordinal suffixes.
400 */
401
402 for (cp = buf; *cp != '\0'; cp++)
403 if (isalnum((unsigned char)*cp) == 0 &&
404 strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
405 *cp = '_';
406
407 if (unique == 0)
408 return buf;
409
410 /* Avoid duplicate HTML id= attributes. */
411
412 slot = ohash_qlookup(&id_unique, buf);
413 if ((entry = ohash_find(&id_unique, slot)) == NULL) {
414 len = strlen(buf) + 1;
415 entry = mandoc_malloc(sizeof(*entry) + len);
416 entry->ord = 1;
417 memcpy(entry->id, buf, len);
418 ohash_insert(&id_unique, slot, entry);
419 } else if (unique == 1)
420 entry->ord++;
421
422 if (entry->ord > 1) {
423 cp = buf;
424 mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
425 free(cp);
426 }
427 return buf;
428 }
429
430 static int
431 print_escape(struct html *h, char c)
432 {
433
434 switch (c) {
435 case '<':
436 print_word(h, "&lt;");
437 break;
438 case '>':
439 print_word(h, "&gt;");
440 break;
441 case '&':
442 print_word(h, "&amp;");
443 break;
444 case '"':
445 print_word(h, "&quot;");
446 break;
447 case ASCII_NBRSP:
448 print_word(h, "&nbsp;");
449 break;
450 case ASCII_HYPH:
451 print_byte(h, '-');
452 break;
453 case ASCII_BREAK:
454 break;
455 default:
456 return 0;
457 }
458 return 1;
459 }
460
461 static int
462 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
463 {
464 char numbuf[16];
465 const char *seq;
466 size_t sz;
467 int c, len, breakline, nospace;
468 enum mandoc_esc esc;
469 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
470 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
471
472 if (pend == NULL)
473 pend = strchr(p, '\0');
474
475 breakline = 0;
476 nospace = 0;
477
478 while (p < pend) {
479 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
480 h->flags &= ~HTML_SKIPCHAR;
481 p++;
482 continue;
483 }
484
485 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
486 print_byte(h, *p);
487
488 if (breakline &&
489 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
490 print_otag(h, TAG_BR, "");
491 breakline = 0;
492 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
493 p++;
494 continue;
495 }
496
497 if (p >= pend)
498 break;
499
500 if (*p == ' ') {
501 print_endword(h);
502 p++;
503 continue;
504 }
505
506 if (print_escape(h, *p++))
507 continue;
508
509 esc = mandoc_escape(&p, &seq, &len);
510 switch (esc) {
511 case ESCAPE_FONT:
512 case ESCAPE_FONTPREV:
513 case ESCAPE_FONTBOLD:
514 case ESCAPE_FONTITALIC:
515 case ESCAPE_FONTBI:
516 case ESCAPE_FONTROMAN:
517 case ESCAPE_FONTCR:
518 case ESCAPE_FONTCB:
519 case ESCAPE_FONTCI:
520 if (0 == norecurse) {
521 h->flags |= HTML_NOSPACE;
522 if (html_setfont(h, esc))
523 print_metaf(h);
524 h->flags &= ~HTML_NOSPACE;
525 }
526 continue;
527 case ESCAPE_SKIPCHAR:
528 h->flags |= HTML_SKIPCHAR;
529 continue;
530 case ESCAPE_ERROR:
531 continue;
532 default:
533 break;
534 }
535
536 if (h->flags & HTML_SKIPCHAR) {
537 h->flags &= ~HTML_SKIPCHAR;
538 continue;
539 }
540
541 switch (esc) {
542 case ESCAPE_UNICODE:
543 /* Skip past "u" header. */
544 c = mchars_num2uc(seq + 1, len - 1);
545 break;
546 case ESCAPE_NUMBERED:
547 c = mchars_num2char(seq, len);
548 if (c < 0)
549 continue;
550 break;
551 case ESCAPE_SPECIAL:
552 c = mchars_spec2cp(seq, len);
553 if (c <= 0)
554 continue;
555 break;
556 case ESCAPE_UNDEF:
557 c = *seq;
558 break;
559 case ESCAPE_DEVICE:
560 print_word(h, "html");
561 continue;
562 case ESCAPE_BREAK:
563 breakline = 1;
564 continue;
565 case ESCAPE_NOSPACE:
566 if ('\0' == *p)
567 nospace = 1;
568 continue;
569 case ESCAPE_OVERSTRIKE:
570 if (len == 0)
571 continue;
572 c = seq[len - 1];
573 break;
574 default:
575 continue;
576 }
577 if ((c < 0x20 && c != 0x09) ||
578 (c > 0x7E && c < 0xA0))
579 c = 0xFFFD;
580 if (c > 0x7E) {
581 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
582 print_word(h, numbuf);
583 } else if (print_escape(h, c) == 0)
584 print_byte(h, c);
585 }
586
587 return nospace;
588 }
589
590 static void
591 print_href(struct html *h, const char *name, const char *sec, int man)
592 {
593 struct stat sb;
594 const char *p, *pp;
595 char *filename;
596
597 if (man) {
598 pp = h->base_man1;
599 if (h->base_man2 != NULL) {
600 mandoc_asprintf(&filename, "%s.%s", name, sec);
601 if (stat(filename, &sb) == -1)
602 pp = h->base_man2;
603 free(filename);
604 }
605 } else
606 pp = h->base_includes;
607
608 while ((p = strchr(pp, '%')) != NULL) {
609 print_encode(h, pp, p, 1);
610 if (man && p[1] == 'S') {
611 if (sec == NULL)
612 print_byte(h, '1');
613 else
614 print_encode(h, sec, NULL, 1);
615 } else if ((man && p[1] == 'N') ||
616 (man == 0 && p[1] == 'I'))
617 print_encode(h, name, NULL, 1);
618 else
619 print_encode(h, p, p + 2, 1);
620 pp = p + 2;
621 }
622 if (*pp != '\0')
623 print_encode(h, pp, NULL, 1);
624 }
625
626 struct tag *
627 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
628 {
629 va_list ap;
630 struct tag *t;
631 const char *attr;
632 char *arg1, *arg2;
633 int style_written, tflags;
634
635 tflags = htmltags[tag].flags;
636
637 /* Flow content is not allowed in phrasing context. */
638
639 if ((tflags & HTML_INPHRASE) == 0) {
640 for (t = h->tag; t != NULL; t = t->next) {
641 if (t->closed)
642 continue;
643 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
644 break;
645 }
646
647 /*
648 * Always wrap phrasing elements in a paragraph
649 * unless already contained in some flow container;
650 * never put them directly into a section.
651 */
652
653 } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
654 print_otag(h, TAG_P, "c", "Pp");
655
656 /* Push this tag onto the stack of open scopes. */
657
658 if ((tflags & HTML_NOSTACK) == 0) {
659 t = mandoc_malloc(sizeof(struct tag));
660 t->tag = tag;
661 t->next = h->tag;
662 t->refcnt = 0;
663 t->closed = 0;
664 h->tag = t;
665 } else
666 t = NULL;
667
668 if (tflags & HTML_NLBEFORE)
669 print_endline(h);
670 if (h->col == 0)
671 print_indent(h);
672 else if ((h->flags & HTML_NOSPACE) == 0) {
673 if (h->flags & HTML_KEEP)
674 print_word(h, "&#x00A0;");
675 else {
676 if (h->flags & HTML_PREKEEP)
677 h->flags |= HTML_KEEP;
678 print_endword(h);
679 }
680 }
681
682 if ( ! (h->flags & HTML_NONOSPACE))
683 h->flags &= ~HTML_NOSPACE;
684 else
685 h->flags |= HTML_NOSPACE;
686
687 /* Print out the tag name and attributes. */
688
689 print_byte(h, '<');
690 print_word(h, htmltags[tag].name);
691
692 va_start(ap, fmt);
693
694 while (*fmt != '\0' && *fmt != 's') {
695
696 /* Parse attributes and arguments. */
697
698 arg1 = va_arg(ap, char *);
699 arg2 = NULL;
700 switch (*fmt++) {
701 case 'c':
702 attr = "class";
703 break;
704 case 'h':
705 attr = "href";
706 break;
707 case 'i':
708 attr = "id";
709 break;
710 case '?':
711 attr = arg1;
712 arg1 = va_arg(ap, char *);
713 break;
714 default:
715 abort();
716 }
717 if (*fmt == 'M')
718 arg2 = va_arg(ap, char *);
719 if (arg1 == NULL)
720 continue;
721
722 /* Print the attributes. */
723
724 print_byte(h, ' ');
725 print_word(h, attr);
726 print_byte(h, '=');
727 print_byte(h, '"');
728 switch (*fmt) {
729 case 'I':
730 print_href(h, arg1, NULL, 0);
731 fmt++;
732 break;
733 case 'M':
734 print_href(h, arg1, arg2, 1);
735 fmt++;
736 break;
737 case 'R':
738 print_byte(h, '#');
739 print_encode(h, arg1, NULL, 1);
740 fmt++;
741 break;
742 default:
743 print_encode(h, arg1, NULL, 1);
744 break;
745 }
746 print_byte(h, '"');
747 }
748
749 style_written = 0;
750 while (*fmt++ == 's') {
751 arg1 = va_arg(ap, char *);
752 arg2 = va_arg(ap, char *);
753 if (arg2 == NULL)
754 continue;
755 print_byte(h, ' ');
756 if (style_written == 0) {
757 print_word(h, "style=\"");
758 style_written = 1;
759 }
760 print_word(h, arg1);
761 print_byte(h, ':');
762 print_byte(h, ' ');
763 print_word(h, arg2);
764 print_byte(h, ';');
765 }
766 if (style_written)
767 print_byte(h, '"');
768
769 va_end(ap);
770
771 /* Accommodate for "well-formed" singleton escaping. */
772
773 if (htmltags[tag].flags & HTML_NOSTACK)
774 print_byte(h, '/');
775
776 print_byte(h, '>');
777
778 if (tflags & HTML_NLBEGIN)
779 print_endline(h);
780 else
781 h->flags |= HTML_NOSPACE;
782
783 if (tflags & HTML_INDENT)
784 h->indent++;
785 if (tflags & HTML_NOINDENT)
786 h->noindent++;
787
788 return t;
789 }
790
791 /*
792 * Print an element with an optional "id=" attribute.
793 * If the element has phrasing content and an "id=" attribute,
794 * also add a permalink: outside if it can be in phrasing context,
795 * inside otherwise.
796 */
797 struct tag *
798 print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
799 struct roff_node *n)
800 {
801 struct roff_node *nch;
802 struct tag *ret, *t;
803 char *id, *href;
804
805 ret = NULL;
806 id = href = NULL;
807 if (n->flags & NODE_ID)
808 id = html_make_id(n, 1);
809 if (n->flags & NODE_HREF)
810 href = id == NULL ? html_make_id(n, 2) : id;
811 if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
812 ret = print_otag(h, TAG_A, "chR", "permalink", href);
813 t = print_otag(h, elemtype, "ci", cattr, id);
814 if (ret == NULL) {
815 ret = t;
816 if (href != NULL && (nch = n->child) != NULL) {
817 /* man(7) is safe, it tags phrasing content only. */
818 if (n->tok > MDOC_MAX ||
819 htmltags[elemtype].flags & HTML_TOPHRASE)
820 nch = NULL;
821 else /* For mdoc(7), beware of nested blocks. */
822 while (nch != NULL && nch->type == ROFFT_TEXT)
823 nch = nch->next;
824 if (nch == NULL)
825 print_otag(h, TAG_A, "chR", "permalink", href);
826 }
827 }
828 free(id);
829 if (id == NULL)
830 free(href);
831 return ret;
832 }
833
834 static void
835 print_ctag(struct html *h, struct tag *tag)
836 {
837 int tflags;
838
839 if (tag->closed == 0) {
840 tag->closed = 1;
841 if (tag == h->metaf)
842 h->metaf = NULL;
843 if (tag == h->tblt)
844 h->tblt = NULL;
845
846 tflags = htmltags[tag->tag].flags;
847 if (tflags & HTML_INDENT)
848 h->indent--;
849 if (tflags & HTML_NOINDENT)
850 h->noindent--;
851 if (tflags & HTML_NLEND)
852 print_endline(h);
853 print_indent(h);
854 print_byte(h, '<');
855 print_byte(h, '/');
856 print_word(h, htmltags[tag->tag].name);
857 print_byte(h, '>');
858 if (tflags & HTML_NLAFTER)
859 print_endline(h);
860 }
861 if (tag->refcnt == 0) {
862 h->tag = tag->next;
863 free(tag);
864 }
865 }
866
867 void
868 print_gen_decls(struct html *h)
869 {
870 print_word(h, "<!DOCTYPE html>");
871 print_endline(h);
872 }
873
874 void
875 print_gen_comment(struct html *h, struct roff_node *n)
876 {
877 int wantblank;
878
879 print_word(h, "<!-- This is an automatically generated file."
880 " Do not edit.");
881 h->indent = 1;
882 wantblank = 0;
883 while (n != NULL && n->type == ROFFT_COMMENT) {
884 if (strstr(n->string, "-->") == NULL &&
885 (wantblank || *n->string != '\0')) {
886 print_endline(h);
887 print_indent(h);
888 print_word(h, n->string);
889 wantblank = *n->string != '\0';
890 }
891 n = n->next;
892 }
893 if (wantblank)
894 print_endline(h);
895 print_word(h, " -->");
896 print_endline(h);
897 h->indent = 0;
898 }
899
900 void
901 print_text(struct html *h, const char *word)
902 {
903 print_tagged_text(h, word, NULL);
904 }
905
906 void
907 print_tagged_text(struct html *h, const char *word, struct roff_node *n)
908 {
909 struct tag *t;
910 char *href;
911
912 /*
913 * Always wrap text in a paragraph unless already contained in
914 * some flow container; never put it directly into a section.
915 */
916
917 if (h->tag->tag == TAG_SECTION)
918 print_otag(h, TAG_P, "c", "Pp");
919
920 /* Output whitespace before this text? */
921
922 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
923 if ( ! (HTML_KEEP & h->flags)) {
924 if (HTML_PREKEEP & h->flags)
925 h->flags |= HTML_KEEP;
926 print_endword(h);
927 } else
928 print_word(h, "&#x00A0;");
929 }
930
931 /*
932 * Optionally switch fonts, optionally write a permalink, then
933 * print the text, optionally surrounded by HTML whitespace.
934 */
935
936 assert(h->metaf == NULL);
937 print_metaf(h);
938 print_indent(h);
939
940 if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
941 t = print_otag(h, TAG_A, "chR", "permalink", href);
942 free(href);
943 } else
944 t = NULL;
945
946 if ( ! print_encode(h, word, NULL, 0)) {
947 if ( ! (h->flags & HTML_NONOSPACE))
948 h->flags &= ~HTML_NOSPACE;
949 h->flags &= ~HTML_NONEWLINE;
950 } else
951 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
952
953 if (h->metaf != NULL) {
954 print_tagq(h, h->metaf);
955 h->metaf = NULL;
956 } else if (t != NULL)
957 print_tagq(h, t);
958
959 h->flags &= ~HTML_IGNDELIM;
960 }
961
962 void
963 print_tagq(struct html *h, const struct tag *until)
964 {
965 struct tag *this, *next;
966
967 for (this = h->tag; this != NULL; this = next) {
968 next = this == until ? NULL : this->next;
969 print_ctag(h, this);
970 }
971 }
972
973 /*
974 * Close out all open elements up to but excluding suntil.
975 * Note that a paragraph just inside stays open together with it
976 * because paragraphs include subsequent phrasing content.
977 */
978 void
979 print_stagq(struct html *h, const struct tag *suntil)
980 {
981 struct tag *this, *next;
982
983 for (this = h->tag; this != NULL; this = next) {
984 next = this->next;
985 if (this == suntil || (next == suntil &&
986 (this->tag == TAG_P || this->tag == TAG_PRE)))
987 break;
988 print_ctag(h, this);
989 }
990 }
991
992
993 /***********************************************************************
994 * Low level output functions.
995 * They implement line breaking using a short static buffer.
996 ***********************************************************************/
997
998 /*
999 * Buffer one HTML output byte.
1000 * If the buffer is full, flush and deactivate it and start a new line.
1001 * If the buffer is inactive, print directly.
1002 */
1003 static void
1004 print_byte(struct html *h, char c)
1005 {
1006 if ((h->flags & HTML_BUFFER) == 0) {
1007 putchar(c);
1008 h->col++;
1009 return;
1010 }
1011
1012 if (h->col + h->bufcol < sizeof(h->buf)) {
1013 h->buf[h->bufcol++] = c;
1014 return;
1015 }
1016
1017 putchar('\n');
1018 h->col = 0;
1019 print_indent(h);
1020 putchar(' ');
1021 putchar(' ');
1022 fwrite(h->buf, h->bufcol, 1, stdout);
1023 putchar(c);
1024 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1025 h->bufcol = 0;
1026 h->flags &= ~HTML_BUFFER;
1027 }
1028
1029 /*
1030 * If something was printed on the current output line, end it.
1031 * Not to be called right after print_indent().
1032 */
1033 void
1034 print_endline(struct html *h)
1035 {
1036 if (h->col == 0)
1037 return;
1038
1039 if (h->bufcol) {
1040 putchar(' ');
1041 fwrite(h->buf, h->bufcol, 1, stdout);
1042 h->bufcol = 0;
1043 }
1044 putchar('\n');
1045 h->col = 0;
1046 h->flags |= HTML_NOSPACE;
1047 h->flags &= ~HTML_BUFFER;
1048 }
1049
1050 /*
1051 * Flush the HTML output buffer.
1052 * If it is inactive, activate it.
1053 */
1054 static void
1055 print_endword(struct html *h)
1056 {
1057 if (h->noindent) {
1058 print_byte(h, ' ');
1059 return;
1060 }
1061
1062 if ((h->flags & HTML_BUFFER) == 0) {
1063 h->col++;
1064 h->flags |= HTML_BUFFER;
1065 } else if (h->bufcol) {
1066 putchar(' ');
1067 fwrite(h->buf, h->bufcol, 1, stdout);
1068 h->col += h->bufcol + 1;
1069 }
1070 h->bufcol = 0;
1071 }
1072
1073 /*
1074 * If at the beginning of a new output line,
1075 * perform indentation and mark the line as containing output.
1076 * Make sure to really produce some output right afterwards,
1077 * but do not use print_otag() for producing it.
1078 */
1079 static void
1080 print_indent(struct html *h)
1081 {
1082 size_t i;
1083
1084 if (h->col || h->noindent)
1085 return;
1086
1087 h->col = h->indent * 2;
1088 for (i = 0; i < h->col; i++)
1089 putchar(' ');
1090 }
1091
1092 /*
1093 * Print or buffer some characters
1094 * depending on the current HTML output buffer state.
1095 */
1096 static void
1097 print_word(struct html *h, const char *cp)
1098 {
1099 while (*cp != '\0')
1100 print_byte(h, *cp++);
1101 }