aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2019-01-18 14:36:21 +0000
committerIngo Schwarze <schwarze@openbsd.org>2019-01-18 14:36:21 +0000
commit184cfc5d3feba7f450ad72ccdfae588e143fc84a (patch)
tree78cf093bcdbd7504e582136d033b7a55b65bc2fe
parentecb77e8fa3cae394e775c369416aaf41cbd1648f (diff)
downloadmandoc-184cfc5d3feba7f450ad72ccdfae588e143fc84a.tar.gz
mandoc-184cfc5d3feba7f450ad72ccdfae588e143fc84a.tar.zst
mandoc-184cfc5d3feba7f450ad72ccdfae588e143fc84a.zip
The .UR and .MT blocks in man(7) are represented by <a> elements
which establish phrasing context, but they can contain paragraph breaks (which is relevant for terminal formatting, so we can't just change the structure of the syntax tree), which are respresented by <p> elements and cannot occur inside <a>. Fix this by prematurely closing the <a> element in the HTML formatter. This menas that the clickable text in HTML output is shorter than what is represented as the link text in terminal output, but in HTML, it is frankly impossible to have the clickable area of a hyperlink extend across a paragraph break. The difference in presentation is not a major problem, and besides, paragraph breaks inside .UR are rather poor style in the first place. The implementation is quite tricky. Naively closing out the <a> prematurely would result in accessing a stale pointer when later reaching the physical end of the .UR block. So this commit separates visual and structural closing of "struct tag" stack items. Visual closing means that the HTML element is closed but the "struct tag" remains on the stack, to avoid later access to a stale pointer and to avoid closing the same HTML element a second time later. This also needs reference counting of pointers to "struct tag" stack items because often more than one child holds a pointer to the same parent item, and only the outermost child can safely do the physical closing. In the whole corpus of nearly half a million manual pages on man.openbsd.org, this problem occurs in exactly one page: the groff(1) version 1.20.1 manual contained in DragonFly-3.8.2, which contains a formatting error triggering the bug.
-rw-r--r--html.c92
-rw-r--r--html.h4
-rw-r--r--man_html.c18
-rw-r--r--mdoc_html.c19
4 files changed, 74 insertions, 59 deletions
diff --git a/html.c b/html.c
index cc1bc394..18141d60 100644
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/* $Id: html.c,v 1.251 2019/01/11 12:56:42 schwarze Exp $ */
+/* $Id: html.c,v 1.252 2019/01/18 14:36:21 schwarze Exp $ */
/*
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
@@ -271,11 +271,19 @@ html_close_paragraph(struct html *h)
{
struct tag *t;
- for (t = h->tag; t != NULL; t = t->next) {
- if (t->tag == TAG_P || t->tag == TAG_PRE) {
+ for (t = h->tag; t != NULL && t->closed == 0; t = t->next) {
+ switch(t->tag) {
+ case TAG_P:
+ case TAG_PRE:
print_tagq(h, t);
break;
+ case TAG_A:
+ print_tagq(h, t);
+ continue;
+ default:
+ continue;
}
+ break;
}
}
@@ -579,6 +587,8 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
t = mandoc_malloc(sizeof(struct tag));
t->tag = tag;
t->next = h->tag;
+ t->refcnt = 0;
+ t->closed = 0;
h->tag = t;
} else
t = NULL;
@@ -711,33 +721,32 @@ print_ctag(struct html *h, struct tag *tag)
{
int tflags;
- /*
- * Remember to close out and nullify the current
- * meta-font and table, if applicable.
- */
- if (tag == h->metaf)
- h->metaf = NULL;
- if (tag == h->tblt)
- h->tblt = NULL;
-
- tflags = htmltags[tag->tag].flags;
-
- if (tflags & HTML_INDENT)
- h->indent--;
- if (tflags & HTML_NOINDENT)
- h->noindent--;
- if (tflags & HTML_NLEND)
- print_endline(h);
- print_indent(h);
- print_byte(h, '<');
- print_byte(h, '/');
- print_word(h, htmltags[tag->tag].name);
- print_byte(h, '>');
- if (tflags & HTML_NLAFTER)
- print_endline(h);
-
- h->tag = tag->next;
- free(tag);
+ if (tag->closed == 0) {
+ tag->closed = 1;
+ if (tag == h->metaf)
+ h->metaf = NULL;
+ if (tag == h->tblt)
+ h->tblt = NULL;
+
+ tflags = htmltags[tag->tag].flags;
+ if (tflags & HTML_INDENT)
+ h->indent--;
+ if (tflags & HTML_NOINDENT)
+ h->noindent--;
+ if (tflags & HTML_NLEND)
+ print_endline(h);
+ print_indent(h);
+ print_byte(h, '<');
+ print_byte(h, '/');
+ print_word(h, htmltags[tag->tag].name);
+ print_byte(h, '>');
+ if (tflags & HTML_NLAFTER)
+ print_endline(h);
+ }
+ if (tag->refcnt == 0) {
+ h->tag = tag->next;
+ free(tag);
+ }
}
void
@@ -824,12 +833,11 @@ print_text(struct html *h, const char *word)
void
print_tagq(struct html *h, const struct tag *until)
{
- struct tag *tag;
+ struct tag *this, *next;
- while ((tag = h->tag) != NULL) {
- print_ctag(h, tag);
- if (tag == until)
- return;
+ for (this = h->tag; this != NULL; this = next) {
+ next = this == until ? NULL : this->next;
+ print_ctag(h, this);
}
}
@@ -841,14 +849,14 @@ print_tagq(struct html *h, const struct tag *until)
void
print_stagq(struct html *h, const struct tag *suntil)
{
- struct tag *tag;
+ struct tag *this, *next;
- while ((tag = h->tag) != NULL) {
- if (tag == suntil ||
- (tag->next == suntil &&
- (tag->tag == TAG_P || tag->tag == TAG_PRE)))
- return;
- print_ctag(h, tag);
+ for (this = h->tag; this != NULL; this = next) {
+ next = this->next;
+ if (this == suntil || (next == suntil &&
+ (this->tag == TAG_P || this->tag == TAG_PRE)))
+ break;
+ print_ctag(h, this);
}
}
diff --git a/html.h b/html.h
index 546f3582..7bdf698c 100644
--- a/html.h
+++ b/html.h
@@ -1,4 +1,4 @@
-/* $Id: html.h,v 1.100 2019/01/07 07:26:29 schwarze Exp $ */
+/* $Id: html.h,v 1.101 2019/01/18 14:36:21 schwarze Exp $ */
/*
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2017, 2018, 2019 Ingo Schwarze <schwarze@openbsd.org>
@@ -79,6 +79,8 @@ enum htmlfont {
struct tag {
struct tag *next;
+ int refcnt;
+ int closed;
enum htmltag tag;
};
diff --git a/man_html.c b/man_html.c
index 90aad393..187383e2 100644
--- a/man_html.c
+++ b/man_html.c
@@ -1,4 +1,4 @@
-/* $Id: man_html.c,v 1.169 2019/01/11 16:36:19 schwarze Exp $ */
+/* $Id: man_html.c,v 1.170 2019/01/18 14:36:21 schwarze Exp $ */
/*
* Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2013-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
@@ -168,10 +168,6 @@ print_man_node(MAN_ARGS)
html_fillmode(h, n->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi);
child = 1;
- t = h->tag;
- if (t->tag == TAG_P || t->tag == TAG_PRE)
- t = t->next;
-
switch (n->type) {
case ROFFT_TEXT:
if (*n->string == '\0') {
@@ -183,9 +179,13 @@ print_man_node(MAN_ARGS)
print_endline(h);
else if (n->flags & NODE_DELIMC)
h->flags |= HTML_NOSPACE;
+ t = h->tag;
+ t->refcnt++;
print_text(h, n->string);
break;
case ROFFT_EQN:
+ t = h->tag;
+ t->refcnt++;
print_eqn(h, n->eqn);
break;
case ROFFT_TBL:
@@ -211,12 +211,13 @@ print_man_node(MAN_ARGS)
* the "meta" table state. This will be reopened on the
* next table element.
*/
- if (h->tblt != NULL) {
+ if (h->tblt != NULL)
print_tblclose(h);
- t = h->tag;
- }
+ t = h->tag;
+ t->refcnt++;
if (n->tok < ROFF_MAX) {
roff_html_pre(h, n);
+ t->refcnt--;
print_stagq(h, t);
return;
}
@@ -231,6 +232,7 @@ print_man_node(MAN_ARGS)
print_man_nodelist(man, n->child, h);
/* This will automatically close out any font scope. */
+ t->refcnt--;
print_stagq(h, t);
if (n->flags & NODE_NOFILL && n->tok != MAN_YS &&
diff --git a/mdoc_html.c b/mdoc_html.c
index c7f452ec..ea750cdb 100644
--- a/mdoc_html.c
+++ b/mdoc_html.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_html.c,v 1.326 2019/01/11 16:36:19 schwarze Exp $ */
+/* $Id: mdoc_html.c,v 1.327 2019/01/18 14:36:21 schwarze Exp $ */
/*
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2014-2019 Ingo Schwarze <schwarze@openbsd.org>
@@ -354,13 +354,12 @@ print_mdoc_node(MDOC_ARGS)
html_fillmode(h, n->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi);
child = 1;
- t = h->tag;
- if (t->tag == TAG_P || t->tag == TAG_PRE)
- t = t->next;
-
n->flags &= ~NODE_ENDED;
switch (n->type) {
case ROFFT_TEXT:
+ t = h->tag;
+ t->refcnt++;
+
/* No tables in this mode... */
assert(NULL == h->tblt);
@@ -379,6 +378,8 @@ print_mdoc_node(MDOC_ARGS)
h->flags |= HTML_NOSPACE;
break;
case ROFFT_EQN:
+ t = h->tag;
+ t->refcnt++;
print_eqn(h, n->eqn);
break;
case ROFFT_TBL:
@@ -395,13 +396,14 @@ print_mdoc_node(MDOC_ARGS)
* the "meta" table state. This will be reopened on the
* next table element.
*/
- if (h->tblt != NULL) {
+ if (h->tblt != NULL)
print_tblclose(h);
- t = h->tag;
- }
assert(h->tblt == NULL);
+ t = h->tag;
+ t->refcnt++;
if (n->tok < ROFF_MAX) {
roff_html_pre(h, n);
+ t->refcnt--;
print_stagq(h, t);
return;
}
@@ -421,6 +423,7 @@ print_mdoc_node(MDOC_ARGS)
if (child && n->child != NULL)
print_mdoc_nodelist(meta, n->child, h);
+ t->refcnt--;
print_stagq(h, t);
switch (n->type) {