From 94cd81895ea53a20400d13b0e9c2bcf756ed382b Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Thu, 26 Jan 2017 18:28:18 +0000 Subject: Fix -man -Thtml formatting after .nf (which has nothing to do with "literal", by the way, it means "no fill"): * Use
 such that whitespace is preserved.
* Preserve lines breaks.
* For font alternating macros, avoid node recursion which required
scary juggling with the fill state.  Instead, simply print the text
children directly.

Missing feature first noticed by kristaps@ in 2011,
the again reported by afresh1@ in 2016,
and finally reported here: https://github.com/Debian/debiman/issues/21 ,
which i only found because of Shane Kerr's comment here:
https://plus.google.com/110314300533310775053/posts/H1eaw9Yskoc
---
 TODO       |  11 +------
 html.c     |   5 ++-
 html.h     |   3 +-
 man_html.c | 105 +++++++++++++++++++++++++++----------------------------------
 4 files changed, 52 insertions(+), 72 deletions(-)

diff --git a/TODO b/TODO
index 30313e90..f4b9a74e 100644
--- a/TODO
+++ b/TODO
@@ -1,6 +1,6 @@
 ************************************************************************
 * Official mandoc TODO.
-* $Id: TODO,v 1.224 2017/01/25 02:14:43 schwarze Exp $
+* $Id: TODO,v 1.225 2017/01/26 18:28:18 schwarze Exp $
 ************************************************************************
 
 Many issues are annotated for difficulty as follows:
@@ -193,10 +193,6 @@ are mere guesses, and some may be wrong.
   uqs@  Thu, 2 Jun 2011 11:33:35 +0200
   loc *  exist **  algo ***  size *  imp **
 
---- missing man features -----------------------------------------------
-
-- -T[x]html doesn't stipulate non-collapsing spaces in literal mode
-
 --- missing tbl features -----------------------------------------------
 
 - horizontal lines in the layout still consume data cells
@@ -408,11 +404,6 @@ are mere guesses, and some may be wrong.
 
 --- HTML issues --------------------------------------------------------
 
-- In -man -Thtml, .nf does not preserve indentation.
-  It should either convert blanks to  
-  or use 
 rather than 
(like .Bd -literal does). - Reported by afresh1@ 12 Apr 2016 14:35:45 -0700 - - .Bf at the beginning of a paragraph inserts a bogus 1ex horizontal space, see for example random(3). Introduced in http://mdocml.bsd.lv/cgi-bin/cvsweb/mdoc_html.c.diff?r1=1.91&r2=1.92 diff --git a/html.c b/html.c index cee98aea..7af682e7 100644 --- a/html.c +++ b/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.201 2017/01/25 02:14:43 schwarze Exp $ */ +/* $Id: html.c,v 1.202 2017/01/26 18:28:18 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2011-2015, 2017 Ingo Schwarze @@ -114,7 +114,6 @@ static const char *const roffscales[SCALE_MAX] = { static void a2width(const char *, struct roffsu *); static void print_byte(struct html *, char); -static void print_endline(struct html *); static void print_endword(struct html *); static void print_indent(struct html *); static void print_word(struct html *, const char *); @@ -812,7 +811,7 @@ print_byte(struct html *h, char c) * If something was printed on the current output line, end it. * Not to be called right after print_indent(). */ -static void +void print_endline(struct html *h) { if (h->col == 0) diff --git a/html.h b/html.h index 19532c49..a393ff62 100644 --- a/html.h +++ b/html.h @@ -1,4 +1,4 @@ -/* $Id: html.h,v 1.78 2017/01/19 16:59:30 schwarze Exp $ */ +/* $Id: html.h,v 1.79 2017/01/26 18:28:18 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2017 Ingo Schwarze @@ -127,5 +127,6 @@ void print_tblclose(struct html *); void print_tbl(struct html *, const struct tbl_span *); void print_eqn(struct html *, const struct eqn *); void print_paragraph(struct html *); +void print_endline(struct html *); int html_strlen(const char *); diff --git a/man_html.c b/man_html.c index 641e0e33..804859f8 100644 --- a/man_html.c +++ b/man_html.c @@ -1,4 +1,4 @@ -/* $Id: man_html.c,v 1.129 2017/01/21 01:20:32 schwarze Exp $ */ +/* $Id: man_html.c,v 1.130 2017/01/26 18:28:18 schwarze Exp $ */ /* * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze @@ -32,7 +32,6 @@ #include "html.h" #include "main.h" -/* TODO: preserve ident widths. */ /* FIXME: have PD set the default vspace width. */ #define INDENT 5 @@ -43,8 +42,7 @@ struct html *h struct mhtml { - int fl; -#define MANH_LITERAL (1 << 0) /* literal context */ + struct tag *nofill; }; struct htmlman { @@ -72,9 +70,9 @@ static int man_SS_pre(MAN_ARGS); static int man_UR_pre(MAN_ARGS); static int man_alt_pre(MAN_ARGS); static int man_br_pre(MAN_ARGS); +static int man_fill_pre(MAN_ARGS); static int man_ign_pre(MAN_ARGS); static int man_in_pre(MAN_ARGS); -static int man_literal_pre(MAN_ARGS); static void man_root_post(MAN_ARGS); static void man_root_pre(MAN_ARGS); @@ -101,8 +99,8 @@ static const struct htmlman mans[MAN_MAX] = { { man_alt_pre, NULL }, /* IR */ { man_alt_pre, NULL }, /* RI */ { man_br_pre, NULL }, /* sp */ - { man_literal_pre, NULL }, /* nf */ - { man_literal_pre, NULL }, /* fi */ + { man_fill_pre, NULL }, /* nf */ + { man_fill_pre, NULL }, /* fi */ { NULL, NULL }, /* RE */ { man_RS_pre, NULL }, /* RS */ { man_ign_pre, NULL }, /* DT */ @@ -112,8 +110,8 @@ static const struct htmlman mans[MAN_MAX] = { { man_in_pre, NULL }, /* in */ { man_ign_pre, NULL }, /* ft */ { man_OP_pre, NULL }, /* OP */ - { man_literal_pre, NULL }, /* EX */ - { man_literal_pre, NULL }, /* EE */ + { man_fill_pre, NULL }, /* EX */ + { man_fill_pre, NULL }, /* EE */ { man_UR_pre, NULL }, /* UR */ { NULL, NULL }, /* UE */ { man_ign_pre, NULL }, /* ll */ @@ -150,7 +148,7 @@ html_man(void *arg, const struct roff_man *man) struct html *h; struct tag *t; - memset(&mh, 0, sizeof(mh)); + mh.nofill = NULL; h = (struct html *)arg; if ((h->oflags & HTML_FRAGMENT) == 0) { @@ -200,6 +198,8 @@ print_man_node(MAN_ARGS) child = 1; t = h->tags.head; + if (t == mh->nofill) + t = t->next; switch (n->type) { case ROFFT_TEXT: @@ -207,12 +207,11 @@ print_man_node(MAN_ARGS) print_paragraph(h); return; } - if (n->flags & NODE_LINE && (*n->string == ' ' || - (n->prev != NULL && mh->fl & MANH_LITERAL && - ! (h->flags & HTML_NONEWLINE)))) + if (mh->nofill == NULL && + n->flags & NODE_LINE && *n->string == ' ') print_otag(h, TAG_BR, ""); print_text(h, n->string); - return; + break; case ROFFT_EQN: print_eqn(h, n->eqn); break; @@ -252,16 +251,15 @@ print_man_node(MAN_ARGS) print_man_nodelist(man, n->child, mh, h); /* This will automatically close out any font scope. */ - print_stagq(h, t); + print_stagq(h, mh->nofill == NULL ? t : mh->nofill); - switch (n->type) { - case ROFFT_EQN: - break; - default: - if (mans[n->tok].post) - (*mans[n->tok].post)(man, n, mh, h); - break; - } + if (n->type != ROFFT_TEXT && n->type != ROFFT_EQN && + mans[n->tok].post != NULL) + (*mans[n->tok].post)(man, n, mh, h); + + if (mh->nofill != NULL && + (n->next == NULL || n->next->flags & NODE_LINE)) + print_endline(h); } static int @@ -349,13 +347,11 @@ man_br_pre(MAN_ARGS) static int man_SH_pre(MAN_ARGS) { - if (n->type == ROFFT_BLOCK) { - mh->fl &= ~MANH_LITERAL; - return 1; - } else if (n->type == ROFFT_BODY) - return 1; - - print_otag(h, TAG_H1, "c", "Sh"); + if (n->type == ROFFT_BLOCK && mh->nofill != NULL) { + print_tagq(h, mh->nofill); + mh->nofill = NULL; + } else if (n->type == ROFFT_HEAD) + print_otag(h, TAG_H1, "c", "Sh"); return 1; } @@ -363,17 +359,11 @@ static int man_alt_pre(MAN_ARGS) { const struct roff_node *nn; - int i, savelit; + int i; enum htmltag fp; struct tag *t; - if ((savelit = mh->fl & MANH_LITERAL)) - print_otag(h, TAG_BR, ""); - - mh->fl &= ~MANH_LITERAL; - for (i = 0, nn = n->child; nn; nn = nn->next, i++) { - t = NULL; switch (n->tok) { case MAN_BI: fp = i % 2 ? TAG_I : TAG_B; @@ -400,18 +390,14 @@ man_alt_pre(MAN_ARGS) if (i) h->flags |= HTML_NOSPACE; - if (TAG_MAX != fp) + if (fp != TAG_MAX) t = print_otag(h, fp, ""); - print_man_node(man, nn, mh, h); + print_text(h, nn->string); - if (t) + if (fp != TAG_MAX) print_tagq(h, t); } - - if (savelit) - mh->fl |= MANH_LITERAL; - return 0; } @@ -427,13 +413,11 @@ man_SM_pre(MAN_ARGS) static int man_SS_pre(MAN_ARGS) { - if (n->type == ROFFT_BLOCK) { - mh->fl &= ~MANH_LITERAL; - return 1; - } else if (n->type == ROFFT_BODY) - return 1; - - print_otag(h, TAG_H2, "c", "Ss"); + if (n->type == ROFFT_BLOCK && mh->nofill != NULL) { + print_tagq(h, mh->nofill); + mh->nofill = NULL; + } else if (n->type == ROFFT_HEAD) + print_otag(h, TAG_H2, "c", "Ss"); return 1; } @@ -552,15 +536,20 @@ man_I_pre(MAN_ARGS) } static int -man_literal_pre(MAN_ARGS) +man_fill_pre(MAN_ARGS) { - if (MAN_fi == n->tok || MAN_EE == n->tok) { - print_otag(h, TAG_BR, ""); - mh->fl &= ~MANH_LITERAL; - } else - mh->fl |= MANH_LITERAL; - + if (mh->nofill != NULL) { + print_tagq(h, mh->nofill); + mh->nofill = NULL; + } else + print_otag(h, TAG_BR, ""); + } else { + if (mh->nofill == NULL) + mh->nofill = print_otag(h, TAG_PRE, ""); + else + print_otag(h, TAG_BR, ""); + } return 0; } -- cgit v1.2.3-56-ge451