]> git.cameronkatri.com Git - mandoc.git/blob - html.c
Enable generation of the desired delim/basic output with groff(1).
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.261 2019/09/05 13:35:04 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stddef.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "out.h"
38 #include "html.h"
39 #include "manconf.h"
40 #include "main.h"
41
42 struct htmldata {
43 const char *name;
44 int flags;
45 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
46 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
47 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
48 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
49 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
50 #define HTML_NLEND (1 << 5) /* Output line break before closing. */
51 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
52 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
53 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
54 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
55 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
56 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
57 };
58
59 static const struct htmldata htmltags[TAG_MAX] = {
60 {"html", HTML_NLALL},
61 {"head", HTML_NLALL | HTML_INDENT},
62 {"meta", HTML_NOSTACK | HTML_NLALL},
63 {"link", HTML_NOSTACK | HTML_NLALL},
64 {"style", HTML_NLALL | HTML_INDENT},
65 {"title", HTML_NLAROUND},
66 {"body", HTML_NLALL},
67 {"div", HTML_NLAROUND},
68 {"section", HTML_NLALL},
69 {"table", HTML_NLALL | HTML_INDENT},
70 {"tr", HTML_NLALL | HTML_INDENT},
71 {"td", HTML_NLAROUND},
72 {"li", HTML_NLAROUND | HTML_INDENT},
73 {"ul", HTML_NLALL | HTML_INDENT},
74 {"ol", HTML_NLALL | HTML_INDENT},
75 {"dl", HTML_NLALL | HTML_INDENT},
76 {"dt", HTML_NLAROUND},
77 {"dd", HTML_NLAROUND | HTML_INDENT},
78 {"h1", HTML_TOPHRASE | HTML_NLAROUND},
79 {"h2", HTML_TOPHRASE | HTML_NLAROUND},
80 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
81 {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
82 {"a", HTML_INPHRASE | HTML_TOPHRASE},
83 {"b", HTML_INPHRASE | HTML_TOPHRASE},
84 {"cite", HTML_INPHRASE | HTML_TOPHRASE},
85 {"code", HTML_INPHRASE | HTML_TOPHRASE},
86 {"i", HTML_INPHRASE | HTML_TOPHRASE},
87 {"small", HTML_INPHRASE | HTML_TOPHRASE},
88 {"span", HTML_INPHRASE | HTML_TOPHRASE},
89 {"var", HTML_INPHRASE | HTML_TOPHRASE},
90 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
91 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
92 {"mrow", 0},
93 {"mi", 0},
94 {"mn", 0},
95 {"mo", 0},
96 {"msup", 0},
97 {"msub", 0},
98 {"msubsup", 0},
99 {"mfrac", 0},
100 {"msqrt", 0},
101 {"mfenced", 0},
102 {"mtable", 0},
103 {"mtr", 0},
104 {"mtd", 0},
105 {"munderover", 0},
106 {"munder", 0},
107 {"mover", 0},
108 };
109
110 /* Avoid duplicate HTML id= attributes. */
111 static struct ohash id_unique;
112
113 static void html_reset_internal(struct html *);
114 static void print_byte(struct html *, char);
115 static void print_endword(struct html *);
116 static void print_indent(struct html *);
117 static void print_word(struct html *, const char *);
118
119 static void print_ctag(struct html *, struct tag *);
120 static int print_escape(struct html *, char);
121 static int print_encode(struct html *, const char *, const char *, int);
122 static void print_href(struct html *, const char *, const char *, int);
123 static void print_metaf(struct html *);
124
125
126 void *
127 html_alloc(const struct manoutput *outopts)
128 {
129 struct html *h;
130
131 h = mandoc_calloc(1, sizeof(struct html));
132
133 h->tag = NULL;
134 h->style = outopts->style;
135 if ((h->base_man1 = outopts->man) == NULL)
136 h->base_man2 = NULL;
137 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
138 *h->base_man2++ = '\0';
139 h->base_includes = outopts->includes;
140 if (outopts->fragment)
141 h->oflags |= HTML_FRAGMENT;
142 if (outopts->toc)
143 h->oflags |= HTML_TOC;
144
145 mandoc_ohash_init(&id_unique, 4, 0);
146
147 return h;
148 }
149
150 static void
151 html_reset_internal(struct html *h)
152 {
153 struct tag *tag;
154 char *cp;
155 unsigned int slot;
156
157 while ((tag = h->tag) != NULL) {
158 h->tag = tag->next;
159 free(tag);
160 }
161 cp = ohash_first(&id_unique, &slot);
162 while (cp != NULL) {
163 free(cp);
164 cp = ohash_next(&id_unique, &slot);
165 }
166 ohash_delete(&id_unique);
167 }
168
169 void
170 html_reset(void *p)
171 {
172 html_reset_internal(p);
173 mandoc_ohash_init(&id_unique, 4, 0);
174 }
175
176 void
177 html_free(void *p)
178 {
179 html_reset_internal(p);
180 free(p);
181 }
182
183 void
184 print_gen_head(struct html *h)
185 {
186 struct tag *t;
187
188 print_otag(h, TAG_META, "?", "charset", "utf-8");
189 if (h->style != NULL) {
190 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
191 h->style, "type", "text/css", "media", "all");
192 return;
193 }
194
195 /*
196 * Print a minimal embedded style sheet.
197 */
198
199 t = print_otag(h, TAG_STYLE, "");
200 print_text(h, "table.head, table.foot { width: 100%; }");
201 print_endline(h);
202 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
203 print_endline(h);
204 print_text(h, "td.head-vol { text-align: center; }");
205 print_endline(h);
206 print_text(h, ".Nd, .Bf, .Op { display: inline; }");
207 print_endline(h);
208 print_text(h, ".Pa, .Ad { font-style: italic; }");
209 print_endline(h);
210 print_text(h, ".Ms { font-weight: bold; }");
211 print_endline(h);
212 print_text(h, ".Bl-diag ");
213 print_byte(h, '>');
214 print_text(h, " dt { font-weight: bold; }");
215 print_endline(h);
216 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
217 "{ font-weight: bold; font-family: inherit; }");
218 print_tagq(h, t);
219 }
220
221 int
222 html_setfont(struct html *h, enum mandoc_esc font)
223 {
224 switch (font) {
225 case ESCAPE_FONTPREV:
226 font = h->metal;
227 break;
228 case ESCAPE_FONTITALIC:
229 case ESCAPE_FONTBOLD:
230 case ESCAPE_FONTBI:
231 case ESCAPE_FONTCW:
232 case ESCAPE_FONTROMAN:
233 break;
234 case ESCAPE_FONT:
235 font = ESCAPE_FONTROMAN;
236 break;
237 default:
238 return 0;
239 }
240 h->metal = h->metac;
241 h->metac = font;
242 return 1;
243 }
244
245 static void
246 print_metaf(struct html *h)
247 {
248 if (h->metaf) {
249 print_tagq(h, h->metaf);
250 h->metaf = NULL;
251 }
252 switch (h->metac) {
253 case ESCAPE_FONTITALIC:
254 h->metaf = print_otag(h, TAG_I, "");
255 break;
256 case ESCAPE_FONTBOLD:
257 h->metaf = print_otag(h, TAG_B, "");
258 break;
259 case ESCAPE_FONTBI:
260 h->metaf = print_otag(h, TAG_B, "");
261 print_otag(h, TAG_I, "");
262 break;
263 case ESCAPE_FONTCW:
264 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
265 break;
266 default:
267 break;
268 }
269 }
270
271 void
272 html_close_paragraph(struct html *h)
273 {
274 struct tag *this, *next;
275 int flags;
276
277 this = h->tag;
278 for (;;) {
279 next = this->next;
280 flags = htmltags[this->tag].flags;
281 if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
282 print_ctag(h, this);
283 if ((flags & HTML_INPHRASE) == 0)
284 break;
285 this = next;
286 }
287 }
288
289 /*
290 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
291 * TOKEN_NONE does not switch. The old mode is returned.
292 */
293 enum roff_tok
294 html_fillmode(struct html *h, enum roff_tok want)
295 {
296 struct tag *t;
297 enum roff_tok had;
298
299 for (t = h->tag; t != NULL; t = t->next)
300 if (t->tag == TAG_PRE)
301 break;
302
303 had = t == NULL ? ROFF_fi : ROFF_nf;
304
305 if (want != had) {
306 switch (want) {
307 case ROFF_fi:
308 print_tagq(h, t);
309 break;
310 case ROFF_nf:
311 html_close_paragraph(h);
312 print_otag(h, TAG_PRE, "");
313 break;
314 case TOKEN_NONE:
315 break;
316 default:
317 abort();
318 }
319 }
320 return had;
321 }
322
323 char *
324 html_make_id(const struct roff_node *n, int unique)
325 {
326 const struct roff_node *nch;
327 char *buf, *bufs, *cp;
328 unsigned int slot;
329 int suffix;
330
331 for (nch = n->child; nch != NULL; nch = nch->next)
332 if (nch->type != ROFFT_TEXT)
333 return NULL;
334
335 buf = NULL;
336 deroff(&buf, n);
337 if (buf == NULL)
338 return NULL;
339
340 /*
341 * In ID attributes, only use ASCII characters that are
342 * permitted in URL-fragment strings according to the
343 * explicit list at:
344 * https://url.spec.whatwg.org/#url-fragment-string
345 */
346
347 for (cp = buf; *cp != '\0'; cp++)
348 if (isalnum((unsigned char)*cp) == 0 &&
349 strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
350 *cp = '_';
351
352 if (unique == 0)
353 return buf;
354
355 /* Avoid duplicate HTML id= attributes. */
356
357 bufs = NULL;
358 suffix = 1;
359 slot = ohash_qlookup(&id_unique, buf);
360 cp = ohash_find(&id_unique, slot);
361 if (cp != NULL) {
362 while (cp != NULL) {
363 free(bufs);
364 if (++suffix > 127) {
365 free(buf);
366 return NULL;
367 }
368 mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
369 slot = ohash_qlookup(&id_unique, bufs);
370 cp = ohash_find(&id_unique, slot);
371 }
372 free(buf);
373 buf = bufs;
374 }
375 ohash_insert(&id_unique, slot, buf);
376 return buf;
377 }
378
379 static int
380 print_escape(struct html *h, char c)
381 {
382
383 switch (c) {
384 case '<':
385 print_word(h, "&lt;");
386 break;
387 case '>':
388 print_word(h, "&gt;");
389 break;
390 case '&':
391 print_word(h, "&amp;");
392 break;
393 case '"':
394 print_word(h, "&quot;");
395 break;
396 case ASCII_NBRSP:
397 print_word(h, "&nbsp;");
398 break;
399 case ASCII_HYPH:
400 print_byte(h, '-');
401 break;
402 case ASCII_BREAK:
403 break;
404 default:
405 return 0;
406 }
407 return 1;
408 }
409
410 static int
411 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
412 {
413 char numbuf[16];
414 const char *seq;
415 size_t sz;
416 int c, len, breakline, nospace;
417 enum mandoc_esc esc;
418 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
419 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
420
421 if (pend == NULL)
422 pend = strchr(p, '\0');
423
424 breakline = 0;
425 nospace = 0;
426
427 while (p < pend) {
428 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
429 h->flags &= ~HTML_SKIPCHAR;
430 p++;
431 continue;
432 }
433
434 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
435 print_byte(h, *p);
436
437 if (breakline &&
438 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
439 print_otag(h, TAG_BR, "");
440 breakline = 0;
441 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
442 p++;
443 continue;
444 }
445
446 if (p >= pend)
447 break;
448
449 if (*p == ' ') {
450 print_endword(h);
451 p++;
452 continue;
453 }
454
455 if (print_escape(h, *p++))
456 continue;
457
458 esc = mandoc_escape(&p, &seq, &len);
459 switch (esc) {
460 case ESCAPE_FONT:
461 case ESCAPE_FONTPREV:
462 case ESCAPE_FONTBOLD:
463 case ESCAPE_FONTITALIC:
464 case ESCAPE_FONTBI:
465 case ESCAPE_FONTCW:
466 case ESCAPE_FONTROMAN:
467 if (0 == norecurse) {
468 h->flags |= HTML_NOSPACE;
469 if (html_setfont(h, esc))
470 print_metaf(h);
471 h->flags &= ~HTML_NOSPACE;
472 }
473 continue;
474 case ESCAPE_SKIPCHAR:
475 h->flags |= HTML_SKIPCHAR;
476 continue;
477 case ESCAPE_ERROR:
478 continue;
479 default:
480 break;
481 }
482
483 if (h->flags & HTML_SKIPCHAR) {
484 h->flags &= ~HTML_SKIPCHAR;
485 continue;
486 }
487
488 switch (esc) {
489 case ESCAPE_UNICODE:
490 /* Skip past "u" header. */
491 c = mchars_num2uc(seq + 1, len - 1);
492 break;
493 case ESCAPE_NUMBERED:
494 c = mchars_num2char(seq, len);
495 if (c < 0)
496 continue;
497 break;
498 case ESCAPE_SPECIAL:
499 c = mchars_spec2cp(seq, len);
500 if (c <= 0)
501 continue;
502 break;
503 case ESCAPE_UNDEF:
504 c = *seq;
505 break;
506 case ESCAPE_DEVICE:
507 print_word(h, "html");
508 continue;
509 case ESCAPE_BREAK:
510 breakline = 1;
511 continue;
512 case ESCAPE_NOSPACE:
513 if ('\0' == *p)
514 nospace = 1;
515 continue;
516 case ESCAPE_OVERSTRIKE:
517 if (len == 0)
518 continue;
519 c = seq[len - 1];
520 break;
521 default:
522 continue;
523 }
524 if ((c < 0x20 && c != 0x09) ||
525 (c > 0x7E && c < 0xA0))
526 c = 0xFFFD;
527 if (c > 0x7E) {
528 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
529 print_word(h, numbuf);
530 } else if (print_escape(h, c) == 0)
531 print_byte(h, c);
532 }
533
534 return nospace;
535 }
536
537 static void
538 print_href(struct html *h, const char *name, const char *sec, int man)
539 {
540 struct stat sb;
541 const char *p, *pp;
542 char *filename;
543
544 if (man) {
545 pp = h->base_man1;
546 if (h->base_man2 != NULL) {
547 mandoc_asprintf(&filename, "%s.%s", name, sec);
548 if (stat(filename, &sb) == -1)
549 pp = h->base_man2;
550 free(filename);
551 }
552 } else
553 pp = h->base_includes;
554
555 while ((p = strchr(pp, '%')) != NULL) {
556 print_encode(h, pp, p, 1);
557 if (man && p[1] == 'S') {
558 if (sec == NULL)
559 print_byte(h, '1');
560 else
561 print_encode(h, sec, NULL, 1);
562 } else if ((man && p[1] == 'N') ||
563 (man == 0 && p[1] == 'I'))
564 print_encode(h, name, NULL, 1);
565 else
566 print_encode(h, p, p + 2, 1);
567 pp = p + 2;
568 }
569 if (*pp != '\0')
570 print_encode(h, pp, NULL, 1);
571 }
572
573 struct tag *
574 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
575 {
576 va_list ap;
577 struct tag *t;
578 const char *attr;
579 char *arg1, *arg2;
580 int style_written, tflags;
581
582 tflags = htmltags[tag].flags;
583
584 /* Flow content is not allowed in phrasing context. */
585
586 if ((tflags & HTML_INPHRASE) == 0) {
587 for (t = h->tag; t != NULL; t = t->next) {
588 if (t->closed)
589 continue;
590 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
591 break;
592 }
593
594 /*
595 * Always wrap phrasing elements in a paragraph
596 * unless already contained in some flow container;
597 * never put them directly into a section.
598 */
599
600 } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
601 print_otag(h, TAG_P, "c", "Pp");
602
603 /* Push this tag onto the stack of open scopes. */
604
605 if ((tflags & HTML_NOSTACK) == 0) {
606 t = mandoc_malloc(sizeof(struct tag));
607 t->tag = tag;
608 t->next = h->tag;
609 t->refcnt = 0;
610 t->closed = 0;
611 h->tag = t;
612 } else
613 t = NULL;
614
615 if (tflags & HTML_NLBEFORE)
616 print_endline(h);
617 if (h->col == 0)
618 print_indent(h);
619 else if ((h->flags & HTML_NOSPACE) == 0) {
620 if (h->flags & HTML_KEEP)
621 print_word(h, "&#x00A0;");
622 else {
623 if (h->flags & HTML_PREKEEP)
624 h->flags |= HTML_KEEP;
625 print_endword(h);
626 }
627 }
628
629 if ( ! (h->flags & HTML_NONOSPACE))
630 h->flags &= ~HTML_NOSPACE;
631 else
632 h->flags |= HTML_NOSPACE;
633
634 /* Print out the tag name and attributes. */
635
636 print_byte(h, '<');
637 print_word(h, htmltags[tag].name);
638
639 va_start(ap, fmt);
640
641 while (*fmt != '\0' && *fmt != 's') {
642
643 /* Parse attributes and arguments. */
644
645 arg1 = va_arg(ap, char *);
646 arg2 = NULL;
647 switch (*fmt++) {
648 case 'c':
649 attr = "class";
650 break;
651 case 'h':
652 attr = "href";
653 break;
654 case 'i':
655 attr = "id";
656 break;
657 case '?':
658 attr = arg1;
659 arg1 = va_arg(ap, char *);
660 break;
661 default:
662 abort();
663 }
664 if (*fmt == 'M')
665 arg2 = va_arg(ap, char *);
666 if (arg1 == NULL)
667 continue;
668
669 /* Print the attributes. */
670
671 print_byte(h, ' ');
672 print_word(h, attr);
673 print_byte(h, '=');
674 print_byte(h, '"');
675 switch (*fmt) {
676 case 'I':
677 print_href(h, arg1, NULL, 0);
678 fmt++;
679 break;
680 case 'M':
681 print_href(h, arg1, arg2, 1);
682 fmt++;
683 break;
684 case 'R':
685 print_byte(h, '#');
686 print_encode(h, arg1, NULL, 1);
687 fmt++;
688 break;
689 default:
690 print_encode(h, arg1, NULL, 1);
691 break;
692 }
693 print_byte(h, '"');
694 }
695
696 style_written = 0;
697 while (*fmt++ == 's') {
698 arg1 = va_arg(ap, char *);
699 arg2 = va_arg(ap, char *);
700 if (arg2 == NULL)
701 continue;
702 print_byte(h, ' ');
703 if (style_written == 0) {
704 print_word(h, "style=\"");
705 style_written = 1;
706 }
707 print_word(h, arg1);
708 print_byte(h, ':');
709 print_byte(h, ' ');
710 print_word(h, arg2);
711 print_byte(h, ';');
712 }
713 if (style_written)
714 print_byte(h, '"');
715
716 va_end(ap);
717
718 /* Accommodate for "well-formed" singleton escaping. */
719
720 if (htmltags[tag].flags & HTML_NOSTACK)
721 print_byte(h, '/');
722
723 print_byte(h, '>');
724
725 if (tflags & HTML_NLBEGIN)
726 print_endline(h);
727 else
728 h->flags |= HTML_NOSPACE;
729
730 if (tflags & HTML_INDENT)
731 h->indent++;
732 if (tflags & HTML_NOINDENT)
733 h->noindent++;
734
735 return t;
736 }
737
738 static void
739 print_ctag(struct html *h, struct tag *tag)
740 {
741 int tflags;
742
743 if (tag->closed == 0) {
744 tag->closed = 1;
745 if (tag == h->metaf)
746 h->metaf = NULL;
747 if (tag == h->tblt)
748 h->tblt = NULL;
749
750 tflags = htmltags[tag->tag].flags;
751 if (tflags & HTML_INDENT)
752 h->indent--;
753 if (tflags & HTML_NOINDENT)
754 h->noindent--;
755 if (tflags & HTML_NLEND)
756 print_endline(h);
757 print_indent(h);
758 print_byte(h, '<');
759 print_byte(h, '/');
760 print_word(h, htmltags[tag->tag].name);
761 print_byte(h, '>');
762 if (tflags & HTML_NLAFTER)
763 print_endline(h);
764 }
765 if (tag->refcnt == 0) {
766 h->tag = tag->next;
767 free(tag);
768 }
769 }
770
771 void
772 print_gen_decls(struct html *h)
773 {
774 print_word(h, "<!DOCTYPE html>");
775 print_endline(h);
776 }
777
778 void
779 print_gen_comment(struct html *h, struct roff_node *n)
780 {
781 int wantblank;
782
783 print_word(h, "<!-- This is an automatically generated file."
784 " Do not edit.");
785 h->indent = 1;
786 wantblank = 0;
787 while (n != NULL && n->type == ROFFT_COMMENT) {
788 if (strstr(n->string, "-->") == NULL &&
789 (wantblank || *n->string != '\0')) {
790 print_endline(h);
791 print_indent(h);
792 print_word(h, n->string);
793 wantblank = *n->string != '\0';
794 }
795 n = n->next;
796 }
797 if (wantblank)
798 print_endline(h);
799 print_word(h, " -->");
800 print_endline(h);
801 h->indent = 0;
802 }
803
804 void
805 print_text(struct html *h, const char *word)
806 {
807 /*
808 * Always wrap text in a paragraph unless already contained in
809 * some flow container; never put it directly into a section.
810 */
811
812 if (h->tag->tag == TAG_SECTION)
813 print_otag(h, TAG_P, "c", "Pp");
814
815 /* Output whitespace before this text? */
816
817 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
818 if ( ! (HTML_KEEP & h->flags)) {
819 if (HTML_PREKEEP & h->flags)
820 h->flags |= HTML_KEEP;
821 print_endword(h);
822 } else
823 print_word(h, "&#x00A0;");
824 }
825
826 /*
827 * Print the text, optionally surrounded by HTML whitespace,
828 * optionally manually switching fonts before and after.
829 */
830
831 assert(h->metaf == NULL);
832 print_metaf(h);
833 print_indent(h);
834 if ( ! print_encode(h, word, NULL, 0)) {
835 if ( ! (h->flags & HTML_NONOSPACE))
836 h->flags &= ~HTML_NOSPACE;
837 h->flags &= ~HTML_NONEWLINE;
838 } else
839 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
840
841 if (h->metaf != NULL) {
842 print_tagq(h, h->metaf);
843 h->metaf = NULL;
844 }
845
846 h->flags &= ~HTML_IGNDELIM;
847 }
848
849 void
850 print_tagq(struct html *h, const struct tag *until)
851 {
852 struct tag *this, *next;
853
854 for (this = h->tag; this != NULL; this = next) {
855 next = this == until ? NULL : this->next;
856 print_ctag(h, this);
857 }
858 }
859
860 /*
861 * Close out all open elements up to but excluding suntil.
862 * Note that a paragraph just inside stays open together with it
863 * because paragraphs include subsequent phrasing content.
864 */
865 void
866 print_stagq(struct html *h, const struct tag *suntil)
867 {
868 struct tag *this, *next;
869
870 for (this = h->tag; this != NULL; this = next) {
871 next = this->next;
872 if (this == suntil || (next == suntil &&
873 (this->tag == TAG_P || this->tag == TAG_PRE)))
874 break;
875 print_ctag(h, this);
876 }
877 }
878
879
880 /***********************************************************************
881 * Low level output functions.
882 * They implement line breaking using a short static buffer.
883 ***********************************************************************/
884
885 /*
886 * Buffer one HTML output byte.
887 * If the buffer is full, flush and deactivate it and start a new line.
888 * If the buffer is inactive, print directly.
889 */
890 static void
891 print_byte(struct html *h, char c)
892 {
893 if ((h->flags & HTML_BUFFER) == 0) {
894 putchar(c);
895 h->col++;
896 return;
897 }
898
899 if (h->col + h->bufcol < sizeof(h->buf)) {
900 h->buf[h->bufcol++] = c;
901 return;
902 }
903
904 putchar('\n');
905 h->col = 0;
906 print_indent(h);
907 putchar(' ');
908 putchar(' ');
909 fwrite(h->buf, h->bufcol, 1, stdout);
910 putchar(c);
911 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
912 h->bufcol = 0;
913 h->flags &= ~HTML_BUFFER;
914 }
915
916 /*
917 * If something was printed on the current output line, end it.
918 * Not to be called right after print_indent().
919 */
920 void
921 print_endline(struct html *h)
922 {
923 if (h->col == 0)
924 return;
925
926 if (h->bufcol) {
927 putchar(' ');
928 fwrite(h->buf, h->bufcol, 1, stdout);
929 h->bufcol = 0;
930 }
931 putchar('\n');
932 h->col = 0;
933 h->flags |= HTML_NOSPACE;
934 h->flags &= ~HTML_BUFFER;
935 }
936
937 /*
938 * Flush the HTML output buffer.
939 * If it is inactive, activate it.
940 */
941 static void
942 print_endword(struct html *h)
943 {
944 if (h->noindent) {
945 print_byte(h, ' ');
946 return;
947 }
948
949 if ((h->flags & HTML_BUFFER) == 0) {
950 h->col++;
951 h->flags |= HTML_BUFFER;
952 } else if (h->bufcol) {
953 putchar(' ');
954 fwrite(h->buf, h->bufcol, 1, stdout);
955 h->col += h->bufcol + 1;
956 }
957 h->bufcol = 0;
958 }
959
960 /*
961 * If at the beginning of a new output line,
962 * perform indentation and mark the line as containing output.
963 * Make sure to really produce some output right afterwards,
964 * but do not use print_otag() for producing it.
965 */
966 static void
967 print_indent(struct html *h)
968 {
969 size_t i;
970
971 if (h->col || h->noindent)
972 return;
973
974 h->col = h->indent * 2;
975 for (i = 0; i < h->col; i++)
976 putchar(' ');
977 }
978
979 /*
980 * Print or buffer some characters
981 * depending on the current HTML output buffer state.
982 */
983 static void
984 print_word(struct html *h, const char *cp)
985 {
986 while (*cp != '\0')
987 print_byte(h, *cp++);
988 }