]> git.cameronkatri.com Git - mandoc.git/blob - html.c
delete the TAG_IDIV crutch, which is no longer used
[mandoc.git] / html.c
1 /* $Id: html.c,v 1.258 2019/09/01 15:12:19 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stddef.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "out.h"
38 #include "html.h"
39 #include "manconf.h"
40 #include "main.h"
41
42 struct htmldata {
43 const char *name;
44 int flags;
45 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
46 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
47 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
48 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
49 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
50 #define HTML_NLEND (1 << 5) /* Output line break before closing. */
51 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
52 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
53 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
54 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
55 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
56 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
57 };
58
59 static const struct htmldata htmltags[TAG_MAX] = {
60 {"html", HTML_NLALL},
61 {"head", HTML_NLALL | HTML_INDENT},
62 {"meta", HTML_NOSTACK | HTML_NLALL},
63 {"link", HTML_NOSTACK | HTML_NLALL},
64 {"style", HTML_NLALL | HTML_INDENT},
65 {"title", HTML_NLAROUND},
66 {"body", HTML_NLALL},
67 {"div", HTML_NLAROUND},
68 {"section", HTML_NLALL},
69 {"table", HTML_NLALL | HTML_INDENT},
70 {"tr", HTML_NLALL | HTML_INDENT},
71 {"td", HTML_NLAROUND},
72 {"li", HTML_NLAROUND | HTML_INDENT},
73 {"ul", HTML_NLALL | HTML_INDENT},
74 {"ol", HTML_NLALL | HTML_INDENT},
75 {"dl", HTML_NLALL | HTML_INDENT},
76 {"dt", HTML_NLAROUND},
77 {"dd", HTML_NLAROUND | HTML_INDENT},
78 {"h1", HTML_TOPHRASE | HTML_NLAROUND},
79 {"h2", HTML_TOPHRASE | HTML_NLAROUND},
80 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
81 {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
82 {"a", HTML_INPHRASE | HTML_TOPHRASE},
83 {"b", HTML_INPHRASE | HTML_TOPHRASE},
84 {"cite", HTML_INPHRASE | HTML_TOPHRASE},
85 {"code", HTML_INPHRASE | HTML_TOPHRASE},
86 {"i", HTML_INPHRASE | HTML_TOPHRASE},
87 {"small", HTML_INPHRASE | HTML_TOPHRASE},
88 {"span", HTML_INPHRASE | HTML_TOPHRASE},
89 {"var", HTML_INPHRASE | HTML_TOPHRASE},
90 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
91 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
92 {"mrow", 0},
93 {"mi", 0},
94 {"mn", 0},
95 {"mo", 0},
96 {"msup", 0},
97 {"msub", 0},
98 {"msubsup", 0},
99 {"mfrac", 0},
100 {"msqrt", 0},
101 {"mfenced", 0},
102 {"mtable", 0},
103 {"mtr", 0},
104 {"mtd", 0},
105 {"munderover", 0},
106 {"munder", 0},
107 {"mover", 0},
108 };
109
110 /* Avoid duplicate HTML id= attributes. */
111 static struct ohash id_unique;
112
113 static void html_reset_internal(struct html *);
114 static void print_byte(struct html *, char);
115 static void print_endword(struct html *);
116 static void print_indent(struct html *);
117 static void print_word(struct html *, const char *);
118
119 static void print_ctag(struct html *, struct tag *);
120 static int print_escape(struct html *, char);
121 static int print_encode(struct html *, const char *, const char *, int);
122 static void print_href(struct html *, const char *, const char *, int);
123 static void print_metaf(struct html *);
124
125
126 void *
127 html_alloc(const struct manoutput *outopts)
128 {
129 struct html *h;
130
131 h = mandoc_calloc(1, sizeof(struct html));
132
133 h->tag = NULL;
134 h->style = outopts->style;
135 if ((h->base_man1 = outopts->man) == NULL)
136 h->base_man2 = NULL;
137 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
138 *h->base_man2++ = '\0';
139 h->base_includes = outopts->includes;
140 if (outopts->fragment)
141 h->oflags |= HTML_FRAGMENT;
142 if (outopts->toc)
143 h->oflags |= HTML_TOC;
144
145 mandoc_ohash_init(&id_unique, 4, 0);
146
147 return h;
148 }
149
150 static void
151 html_reset_internal(struct html *h)
152 {
153 struct tag *tag;
154 char *cp;
155 unsigned int slot;
156
157 while ((tag = h->tag) != NULL) {
158 h->tag = tag->next;
159 free(tag);
160 }
161 cp = ohash_first(&id_unique, &slot);
162 while (cp != NULL) {
163 free(cp);
164 cp = ohash_next(&id_unique, &slot);
165 }
166 ohash_delete(&id_unique);
167 }
168
169 void
170 html_reset(void *p)
171 {
172 html_reset_internal(p);
173 mandoc_ohash_init(&id_unique, 4, 0);
174 }
175
176 void
177 html_free(void *p)
178 {
179 html_reset_internal(p);
180 free(p);
181 }
182
183 void
184 print_gen_head(struct html *h)
185 {
186 struct tag *t;
187
188 print_otag(h, TAG_META, "?", "charset", "utf-8");
189 if (h->style != NULL) {
190 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
191 h->style, "type", "text/css", "media", "all");
192 return;
193 }
194
195 /*
196 * Print a minimal embedded style sheet.
197 */
198
199 t = print_otag(h, TAG_STYLE, "");
200 print_text(h, "table.head, table.foot { width: 100%; }");
201 print_endline(h);
202 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
203 print_endline(h);
204 print_text(h, "td.head-vol { text-align: center; }");
205 print_endline(h);
206 print_text(h, ".Nd, .Bf, .Op { display: inline; }");
207 print_endline(h);
208 print_text(h, ".Pa, .Ad { font-style: italic; }");
209 print_endline(h);
210 print_text(h, ".Ms { font-weight: bold; }");
211 print_endline(h);
212 print_text(h, ".Bl-diag ");
213 print_byte(h, '>');
214 print_text(h, " dt { font-weight: bold; }");
215 print_endline(h);
216 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
217 "{ font-weight: bold; font-family: inherit; }");
218 print_tagq(h, t);
219 }
220
221 int
222 html_setfont(struct html *h, enum mandoc_esc font)
223 {
224 switch (font) {
225 case ESCAPE_FONTPREV:
226 font = h->metal;
227 break;
228 case ESCAPE_FONTITALIC:
229 case ESCAPE_FONTBOLD:
230 case ESCAPE_FONTBI:
231 case ESCAPE_FONTCW:
232 case ESCAPE_FONTROMAN:
233 break;
234 case ESCAPE_FONT:
235 font = ESCAPE_FONTROMAN;
236 break;
237 default:
238 return 0;
239 }
240 h->metal = h->metac;
241 h->metac = font;
242 return 1;
243 }
244
245 static void
246 print_metaf(struct html *h)
247 {
248 if (h->metaf) {
249 print_tagq(h, h->metaf);
250 h->metaf = NULL;
251 }
252 switch (h->metac) {
253 case ESCAPE_FONTITALIC:
254 h->metaf = print_otag(h, TAG_I, "");
255 break;
256 case ESCAPE_FONTBOLD:
257 h->metaf = print_otag(h, TAG_B, "");
258 break;
259 case ESCAPE_FONTBI:
260 h->metaf = print_otag(h, TAG_B, "");
261 print_otag(h, TAG_I, "");
262 break;
263 case ESCAPE_FONTCW:
264 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
265 break;
266 default:
267 break;
268 }
269 }
270
271 void
272 html_close_paragraph(struct html *h)
273 {
274 struct tag *t;
275
276 for (t = h->tag; t != NULL && t->closed == 0; t = t->next) {
277 switch(t->tag) {
278 case TAG_P:
279 case TAG_PRE:
280 print_tagq(h, t);
281 break;
282 case TAG_A:
283 print_tagq(h, t);
284 continue;
285 default:
286 continue;
287 }
288 break;
289 }
290 }
291
292 /*
293 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
294 * TOKEN_NONE does not switch. The old mode is returned.
295 */
296 enum roff_tok
297 html_fillmode(struct html *h, enum roff_tok want)
298 {
299 struct tag *t;
300 enum roff_tok had;
301
302 for (t = h->tag; t != NULL; t = t->next)
303 if (t->tag == TAG_PRE)
304 break;
305
306 had = t == NULL ? ROFF_fi : ROFF_nf;
307
308 if (want != had) {
309 switch (want) {
310 case ROFF_fi:
311 print_tagq(h, t);
312 break;
313 case ROFF_nf:
314 html_close_paragraph(h);
315 print_otag(h, TAG_PRE, "");
316 break;
317 case TOKEN_NONE:
318 break;
319 default:
320 abort();
321 }
322 }
323 return had;
324 }
325
326 char *
327 html_make_id(const struct roff_node *n, int unique)
328 {
329 const struct roff_node *nch;
330 char *buf, *bufs, *cp;
331 unsigned int slot;
332 int suffix;
333
334 for (nch = n->child; nch != NULL; nch = nch->next)
335 if (nch->type != ROFFT_TEXT)
336 return NULL;
337
338 buf = NULL;
339 deroff(&buf, n);
340 if (buf == NULL)
341 return NULL;
342
343 /*
344 * In ID attributes, only use ASCII characters that are
345 * permitted in URL-fragment strings according to the
346 * explicit list at:
347 * https://url.spec.whatwg.org/#url-fragment-string
348 */
349
350 for (cp = buf; *cp != '\0'; cp++)
351 if (isalnum((unsigned char)*cp) == 0 &&
352 strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
353 *cp = '_';
354
355 if (unique == 0)
356 return buf;
357
358 /* Avoid duplicate HTML id= attributes. */
359
360 bufs = NULL;
361 suffix = 1;
362 slot = ohash_qlookup(&id_unique, buf);
363 cp = ohash_find(&id_unique, slot);
364 if (cp != NULL) {
365 while (cp != NULL) {
366 free(bufs);
367 if (++suffix > 127) {
368 free(buf);
369 return NULL;
370 }
371 mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
372 slot = ohash_qlookup(&id_unique, bufs);
373 cp = ohash_find(&id_unique, slot);
374 }
375 free(buf);
376 buf = bufs;
377 }
378 ohash_insert(&id_unique, slot, buf);
379 return buf;
380 }
381
382 static int
383 print_escape(struct html *h, char c)
384 {
385
386 switch (c) {
387 case '<':
388 print_word(h, "&lt;");
389 break;
390 case '>':
391 print_word(h, "&gt;");
392 break;
393 case '&':
394 print_word(h, "&amp;");
395 break;
396 case '"':
397 print_word(h, "&quot;");
398 break;
399 case ASCII_NBRSP:
400 print_word(h, "&nbsp;");
401 break;
402 case ASCII_HYPH:
403 print_byte(h, '-');
404 break;
405 case ASCII_BREAK:
406 break;
407 default:
408 return 0;
409 }
410 return 1;
411 }
412
413 static int
414 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
415 {
416 char numbuf[16];
417 const char *seq;
418 size_t sz;
419 int c, len, breakline, nospace;
420 enum mandoc_esc esc;
421 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
422 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
423
424 if (pend == NULL)
425 pend = strchr(p, '\0');
426
427 breakline = 0;
428 nospace = 0;
429
430 while (p < pend) {
431 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
432 h->flags &= ~HTML_SKIPCHAR;
433 p++;
434 continue;
435 }
436
437 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
438 print_byte(h, *p);
439
440 if (breakline &&
441 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
442 print_otag(h, TAG_BR, "");
443 breakline = 0;
444 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
445 p++;
446 continue;
447 }
448
449 if (p >= pend)
450 break;
451
452 if (*p == ' ') {
453 print_endword(h);
454 p++;
455 continue;
456 }
457
458 if (print_escape(h, *p++))
459 continue;
460
461 esc = mandoc_escape(&p, &seq, &len);
462 switch (esc) {
463 case ESCAPE_FONT:
464 case ESCAPE_FONTPREV:
465 case ESCAPE_FONTBOLD:
466 case ESCAPE_FONTITALIC:
467 case ESCAPE_FONTBI:
468 case ESCAPE_FONTCW:
469 case ESCAPE_FONTROMAN:
470 if (0 == norecurse) {
471 h->flags |= HTML_NOSPACE;
472 if (html_setfont(h, esc))
473 print_metaf(h);
474 h->flags &= ~HTML_NOSPACE;
475 }
476 continue;
477 case ESCAPE_SKIPCHAR:
478 h->flags |= HTML_SKIPCHAR;
479 continue;
480 case ESCAPE_ERROR:
481 continue;
482 default:
483 break;
484 }
485
486 if (h->flags & HTML_SKIPCHAR) {
487 h->flags &= ~HTML_SKIPCHAR;
488 continue;
489 }
490
491 switch (esc) {
492 case ESCAPE_UNICODE:
493 /* Skip past "u" header. */
494 c = mchars_num2uc(seq + 1, len - 1);
495 break;
496 case ESCAPE_NUMBERED:
497 c = mchars_num2char(seq, len);
498 if (c < 0)
499 continue;
500 break;
501 case ESCAPE_SPECIAL:
502 c = mchars_spec2cp(seq, len);
503 if (c <= 0)
504 continue;
505 break;
506 case ESCAPE_UNDEF:
507 c = *seq;
508 break;
509 case ESCAPE_DEVICE:
510 print_word(h, "html");
511 continue;
512 case ESCAPE_BREAK:
513 breakline = 1;
514 continue;
515 case ESCAPE_NOSPACE:
516 if ('\0' == *p)
517 nospace = 1;
518 continue;
519 case ESCAPE_OVERSTRIKE:
520 if (len == 0)
521 continue;
522 c = seq[len - 1];
523 break;
524 default:
525 continue;
526 }
527 if ((c < 0x20 && c != 0x09) ||
528 (c > 0x7E && c < 0xA0))
529 c = 0xFFFD;
530 if (c > 0x7E) {
531 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
532 print_word(h, numbuf);
533 } else if (print_escape(h, c) == 0)
534 print_byte(h, c);
535 }
536
537 return nospace;
538 }
539
540 static void
541 print_href(struct html *h, const char *name, const char *sec, int man)
542 {
543 struct stat sb;
544 const char *p, *pp;
545 char *filename;
546
547 if (man) {
548 pp = h->base_man1;
549 if (h->base_man2 != NULL) {
550 mandoc_asprintf(&filename, "%s.%s", name, sec);
551 if (stat(filename, &sb) == -1)
552 pp = h->base_man2;
553 free(filename);
554 }
555 } else
556 pp = h->base_includes;
557
558 while ((p = strchr(pp, '%')) != NULL) {
559 print_encode(h, pp, p, 1);
560 if (man && p[1] == 'S') {
561 if (sec == NULL)
562 print_byte(h, '1');
563 else
564 print_encode(h, sec, NULL, 1);
565 } else if ((man && p[1] == 'N') ||
566 (man == 0 && p[1] == 'I'))
567 print_encode(h, name, NULL, 1);
568 else
569 print_encode(h, p, p + 2, 1);
570 pp = p + 2;
571 }
572 if (*pp != '\0')
573 print_encode(h, pp, NULL, 1);
574 }
575
576 struct tag *
577 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
578 {
579 va_list ap;
580 struct tag *t;
581 const char *attr;
582 char *arg1, *arg2;
583 int style_written, tflags;
584
585 tflags = htmltags[tag].flags;
586
587 /* Flow content is not allowed in phrasing context. */
588
589 if ((tflags & HTML_INPHRASE) == 0) {
590 for (t = h->tag; t != NULL; t = t->next) {
591 if (t->closed)
592 continue;
593 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
594 break;
595 }
596 }
597
598 /* Push this tag onto the stack of open scopes. */
599
600 if ((tflags & HTML_NOSTACK) == 0) {
601 t = mandoc_malloc(sizeof(struct tag));
602 t->tag = tag;
603 t->next = h->tag;
604 t->refcnt = 0;
605 t->closed = 0;
606 h->tag = t;
607 } else
608 t = NULL;
609
610 if (tflags & HTML_NLBEFORE)
611 print_endline(h);
612 if (h->col == 0)
613 print_indent(h);
614 else if ((h->flags & HTML_NOSPACE) == 0) {
615 if (h->flags & HTML_KEEP)
616 print_word(h, "&#x00A0;");
617 else {
618 if (h->flags & HTML_PREKEEP)
619 h->flags |= HTML_KEEP;
620 print_endword(h);
621 }
622 }
623
624 if ( ! (h->flags & HTML_NONOSPACE))
625 h->flags &= ~HTML_NOSPACE;
626 else
627 h->flags |= HTML_NOSPACE;
628
629 /* Print out the tag name and attributes. */
630
631 print_byte(h, '<');
632 print_word(h, htmltags[tag].name);
633
634 va_start(ap, fmt);
635
636 while (*fmt != '\0' && *fmt != 's') {
637
638 /* Parse attributes and arguments. */
639
640 arg1 = va_arg(ap, char *);
641 arg2 = NULL;
642 switch (*fmt++) {
643 case 'c':
644 attr = "class";
645 break;
646 case 'h':
647 attr = "href";
648 break;
649 case 'i':
650 attr = "id";
651 break;
652 case '?':
653 attr = arg1;
654 arg1 = va_arg(ap, char *);
655 break;
656 default:
657 abort();
658 }
659 if (*fmt == 'M')
660 arg2 = va_arg(ap, char *);
661 if (arg1 == NULL)
662 continue;
663
664 /* Print the attributes. */
665
666 print_byte(h, ' ');
667 print_word(h, attr);
668 print_byte(h, '=');
669 print_byte(h, '"');
670 switch (*fmt) {
671 case 'I':
672 print_href(h, arg1, NULL, 0);
673 fmt++;
674 break;
675 case 'M':
676 print_href(h, arg1, arg2, 1);
677 fmt++;
678 break;
679 case 'R':
680 print_byte(h, '#');
681 print_encode(h, arg1, NULL, 1);
682 fmt++;
683 break;
684 default:
685 print_encode(h, arg1, NULL, 1);
686 break;
687 }
688 print_byte(h, '"');
689 }
690
691 style_written = 0;
692 while (*fmt++ == 's') {
693 arg1 = va_arg(ap, char *);
694 arg2 = va_arg(ap, char *);
695 if (arg2 == NULL)
696 continue;
697 print_byte(h, ' ');
698 if (style_written == 0) {
699 print_word(h, "style=\"");
700 style_written = 1;
701 }
702 print_word(h, arg1);
703 print_byte(h, ':');
704 print_byte(h, ' ');
705 print_word(h, arg2);
706 print_byte(h, ';');
707 }
708 if (style_written)
709 print_byte(h, '"');
710
711 va_end(ap);
712
713 /* Accommodate for "well-formed" singleton escaping. */
714
715 if (htmltags[tag].flags & HTML_NOSTACK)
716 print_byte(h, '/');
717
718 print_byte(h, '>');
719
720 if (tflags & HTML_NLBEGIN)
721 print_endline(h);
722 else
723 h->flags |= HTML_NOSPACE;
724
725 if (tflags & HTML_INDENT)
726 h->indent++;
727 if (tflags & HTML_NOINDENT)
728 h->noindent++;
729
730 return t;
731 }
732
733 static void
734 print_ctag(struct html *h, struct tag *tag)
735 {
736 int tflags;
737
738 if (tag->closed == 0) {
739 tag->closed = 1;
740 if (tag == h->metaf)
741 h->metaf = NULL;
742 if (tag == h->tblt)
743 h->tblt = NULL;
744
745 tflags = htmltags[tag->tag].flags;
746 if (tflags & HTML_INDENT)
747 h->indent--;
748 if (tflags & HTML_NOINDENT)
749 h->noindent--;
750 if (tflags & HTML_NLEND)
751 print_endline(h);
752 print_indent(h);
753 print_byte(h, '<');
754 print_byte(h, '/');
755 print_word(h, htmltags[tag->tag].name);
756 print_byte(h, '>');
757 if (tflags & HTML_NLAFTER)
758 print_endline(h);
759 }
760 if (tag->refcnt == 0) {
761 h->tag = tag->next;
762 free(tag);
763 }
764 }
765
766 void
767 print_gen_decls(struct html *h)
768 {
769 print_word(h, "<!DOCTYPE html>");
770 print_endline(h);
771 }
772
773 void
774 print_gen_comment(struct html *h, struct roff_node *n)
775 {
776 int wantblank;
777
778 print_word(h, "<!-- This is an automatically generated file."
779 " Do not edit.");
780 h->indent = 1;
781 wantblank = 0;
782 while (n != NULL && n->type == ROFFT_COMMENT) {
783 if (strstr(n->string, "-->") == NULL &&
784 (wantblank || *n->string != '\0')) {
785 print_endline(h);
786 print_indent(h);
787 print_word(h, n->string);
788 wantblank = *n->string != '\0';
789 }
790 n = n->next;
791 }
792 if (wantblank)
793 print_endline(h);
794 print_word(h, " -->");
795 print_endline(h);
796 h->indent = 0;
797 }
798
799 void
800 print_text(struct html *h, const char *word)
801 {
802 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
803 if ( ! (HTML_KEEP & h->flags)) {
804 if (HTML_PREKEEP & h->flags)
805 h->flags |= HTML_KEEP;
806 print_endword(h);
807 } else
808 print_word(h, "&#x00A0;");
809 }
810
811 assert(h->metaf == NULL);
812 print_metaf(h);
813 print_indent(h);
814 if ( ! print_encode(h, word, NULL, 0)) {
815 if ( ! (h->flags & HTML_NONOSPACE))
816 h->flags &= ~HTML_NOSPACE;
817 h->flags &= ~HTML_NONEWLINE;
818 } else
819 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
820
821 if (h->metaf != NULL) {
822 print_tagq(h, h->metaf);
823 h->metaf = NULL;
824 }
825
826 h->flags &= ~HTML_IGNDELIM;
827 }
828
829 void
830 print_tagq(struct html *h, const struct tag *until)
831 {
832 struct tag *this, *next;
833
834 for (this = h->tag; this != NULL; this = next) {
835 next = this == until ? NULL : this->next;
836 print_ctag(h, this);
837 }
838 }
839
840 /*
841 * Close out all open elements up to but excluding suntil.
842 * Note that a paragraph just inside stays open together with it
843 * because paragraphs include subsequent phrasing content.
844 */
845 void
846 print_stagq(struct html *h, const struct tag *suntil)
847 {
848 struct tag *this, *next;
849
850 for (this = h->tag; this != NULL; this = next) {
851 next = this->next;
852 if (this == suntil || (next == suntil &&
853 (this->tag == TAG_P || this->tag == TAG_PRE)))
854 break;
855 print_ctag(h, this);
856 }
857 }
858
859
860 /***********************************************************************
861 * Low level output functions.
862 * They implement line breaking using a short static buffer.
863 ***********************************************************************/
864
865 /*
866 * Buffer one HTML output byte.
867 * If the buffer is full, flush and deactivate it and start a new line.
868 * If the buffer is inactive, print directly.
869 */
870 static void
871 print_byte(struct html *h, char c)
872 {
873 if ((h->flags & HTML_BUFFER) == 0) {
874 putchar(c);
875 h->col++;
876 return;
877 }
878
879 if (h->col + h->bufcol < sizeof(h->buf)) {
880 h->buf[h->bufcol++] = c;
881 return;
882 }
883
884 putchar('\n');
885 h->col = 0;
886 print_indent(h);
887 putchar(' ');
888 putchar(' ');
889 fwrite(h->buf, h->bufcol, 1, stdout);
890 putchar(c);
891 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
892 h->bufcol = 0;
893 h->flags &= ~HTML_BUFFER;
894 }
895
896 /*
897 * If something was printed on the current output line, end it.
898 * Not to be called right after print_indent().
899 */
900 void
901 print_endline(struct html *h)
902 {
903 if (h->col == 0)
904 return;
905
906 if (h->bufcol) {
907 putchar(' ');
908 fwrite(h->buf, h->bufcol, 1, stdout);
909 h->bufcol = 0;
910 }
911 putchar('\n');
912 h->col = 0;
913 h->flags |= HTML_NOSPACE;
914 h->flags &= ~HTML_BUFFER;
915 }
916
917 /*
918 * Flush the HTML output buffer.
919 * If it is inactive, activate it.
920 */
921 static void
922 print_endword(struct html *h)
923 {
924 if (h->noindent) {
925 print_byte(h, ' ');
926 return;
927 }
928
929 if ((h->flags & HTML_BUFFER) == 0) {
930 h->col++;
931 h->flags |= HTML_BUFFER;
932 } else if (h->bufcol) {
933 putchar(' ');
934 fwrite(h->buf, h->bufcol, 1, stdout);
935 h->col += h->bufcol + 1;
936 }
937 h->bufcol = 0;
938 }
939
940 /*
941 * If at the beginning of a new output line,
942 * perform indentation and mark the line as containing output.
943 * Make sure to really produce some output right afterwards,
944 * but do not use print_otag() for producing it.
945 */
946 static void
947 print_indent(struct html *h)
948 {
949 size_t i;
950
951 if (h->col)
952 return;
953
954 if (h->noindent == 0) {
955 h->col = h->indent * 2;
956 for (i = 0; i < h->col; i++)
957 putchar(' ');
958 }
959 h->flags &= ~HTML_NOSPACE;
960 }
961
962 /*
963 * Print or buffer some characters
964 * depending on the current HTML output buffer state.
965 */
966 static void
967 print_word(struct html *h, const char *cp)
968 {
969 while (*cp != '\0')
970 print_byte(h, *cp++);
971 }