In the HTML formatter, assert(3) that no HTML nesting violation occurs.
authorIngo Schwarze <schwarze@openbsd.org>
Thu, 29 Aug 2019 17:57:29 +0000 (17:57 +0000)
committerIngo Schwarze <schwarze@openbsd.org>
Thu, 29 Aug 2019 17:57:29 +0000 (17:57 +0000)
Tested on the complete manual page trees of Version 7 AT&T UNIX,
4.4BSD-Lite2, POSIX-2013, OpenBSD 2.2 to 6.5 and -current,
FreeBSD 10.0 to 12.0, NetBSD 6.1.5 to 8.1, DragonFly 3.8.2 to 5.6.1,
and Linux 4.05 to 5.02.

html.c
html.h

diff --git a/html.c b/html.c
index fc6d2533f8830f6aea457ef21374c7a32573e86a..4b9c08760cba4d671c544cfcbb552c08a2143e3c 100644 (file)
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/*     $Id: html.c,v 1.256 2019/08/02 17:06:04 schwarze Exp $ */
+/*     $Id: html.c,v 1.257 2019/08/29 17:57:29 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
 struct htmldata {
        const char       *name;
        int               flags;
-#define        HTML_NOSTACK     (1 << 0)
-#define        HTML_AUTOCLOSE   (1 << 1)
-#define        HTML_NLBEFORE    (1 << 2)
-#define        HTML_NLBEGIN     (1 << 3)
-#define        HTML_NLEND       (1 << 4)
-#define        HTML_NLAFTER     (1 << 5)
+#define        HTML_INPHRASE    (1 << 0)  /* Can appear in phrasing context. */
+#define        HTML_TOPHRASE    (1 << 1)  /* Establishes phrasing context. */
+#define        HTML_NOSTACK     (1 << 2)  /* Does not have an end tag. */
+#define        HTML_NLBEFORE    (1 << 3)  /* Output line break before opening. */
+#define        HTML_NLBEGIN     (1 << 4)  /* Output line break after opening. */
+#define        HTML_NLEND       (1 << 5)  /* Output line break before closing. */
+#define        HTML_NLAFTER     (1 << 6)  /* Output line break after closing. */
 #define        HTML_NLAROUND    (HTML_NLBEFORE | HTML_NLAFTER)
 #define        HTML_NLINSIDE    (HTML_NLBEGIN | HTML_NLEND)
 #define        HTML_NLALL       (HTML_NLAROUND | HTML_NLINSIDE)
-#define        HTML_INDENT      (1 << 6)
-#define        HTML_NOINDENT    (1 << 7)
+#define        HTML_INDENT      (1 << 7)  /* Indent content by two spaces. */
+#define        HTML_NOINDENT    (1 << 8)  /* Exception: never indent content. */
 };
 
 static const struct htmldata htmltags[TAG_MAX] = {
        {"html",        HTML_NLALL},
        {"head",        HTML_NLALL | HTML_INDENT},
-       {"body",        HTML_NLALL},
-       {"meta",        HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
+       {"meta",        HTML_NOSTACK | HTML_NLALL},
+       {"link",        HTML_NOSTACK | HTML_NLALL},
+       {"style",       HTML_NLALL | HTML_INDENT},
        {"title",       HTML_NLAROUND},
+       {"body",        HTML_NLALL},
        {"div",         HTML_NLAROUND},
        {"div",         0},
        {"section",     HTML_NLALL},
-       {"h1",          HTML_NLAROUND},
-       {"h2",          HTML_NLAROUND},
-       {"span",        0},
-       {"link",        HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
-       {"br",          HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
-       {"a",           0},
        {"table",       HTML_NLALL | HTML_INDENT},
        {"tr",          HTML_NLALL | HTML_INDENT},
        {"td",          HTML_NLAROUND},
@@ -79,16 +76,20 @@ static      const struct htmldata htmltags[TAG_MAX] = {
        {"dl",          HTML_NLALL | HTML_INDENT},
        {"dt",          HTML_NLAROUND},
        {"dd",          HTML_NLAROUND | HTML_INDENT},
-       {"p",           HTML_NLAROUND | HTML_INDENT},
-       {"pre",         HTML_NLALL | HTML_NOINDENT},
-       {"var",         0},
-       {"cite",        0},
-       {"b",           0},
-       {"i",           0},
-       {"code",        0},
-       {"small",       0},
-       {"style",       HTML_NLALL | HTML_INDENT},
-       {"math",        HTML_NLALL | HTML_INDENT},
+       {"h1",          HTML_TOPHRASE | HTML_NLAROUND},
+       {"h2",          HTML_TOPHRASE | HTML_NLAROUND},
+       {"p",           HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
+       {"pre",         HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
+       {"a",           HTML_INPHRASE | HTML_TOPHRASE},
+       {"b",           HTML_INPHRASE | HTML_TOPHRASE},
+       {"cite",        HTML_INPHRASE | HTML_TOPHRASE},
+       {"code",        HTML_INPHRASE | HTML_TOPHRASE},
+       {"i",           HTML_INPHRASE | HTML_TOPHRASE},
+       {"small",       HTML_INPHRASE | HTML_TOPHRASE},
+       {"span",        HTML_INPHRASE | HTML_TOPHRASE},
+       {"var",         HTML_INPHRASE | HTML_TOPHRASE},
+       {"br",          HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
+       {"math",        HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
        {"mrow",        0},
        {"mi",          0},
        {"mn",          0},
@@ -584,6 +585,17 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
 
        tflags = htmltags[tag].flags;
 
+       /* Flow content is not allowed in phrasing context. */
+
+       if ((tflags & HTML_INPHRASE) == 0) {
+               for (t = h->tag; t != NULL; t = t->next) {
+                       if (t->closed)
+                               continue;
+                       assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
+                       break;
+               }
+       }
+
        /* Push this tag onto the stack of open scopes. */
 
        if ((tflags & HTML_NOSTACK) == 0) {
@@ -701,7 +713,7 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
 
        /* Accommodate for "well-formed" singleton escaping. */
 
-       if (HTML_AUTOCLOSE & htmltags[tag].flags)
+       if (htmltags[tag].flags & HTML_NOSTACK)
                print_byte(h, '/');
 
        print_byte(h, '>');
diff --git a/html.h b/html.h
index 242a63a8d62446650ed31e64877b657732d132f8..3f9b0231f952ff8ff1b663d6671812aafcc8359e 100644 (file)
--- a/html.h
+++ b/html.h
@@ -1,4 +1,4 @@
-/*     $Id: html.h,v 1.103 2019/04/30 15:53:00 schwarze Exp $ */
+/*     $Id: html.h,v 1.104 2019/08/29 17:57:29 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2017, 2018, 2019 Ingo Schwarze <schwarze@openbsd.org>
 enum   htmltag {
        TAG_HTML,
        TAG_HEAD,
-       TAG_BODY,
        TAG_META,
+       TAG_LINK,
+       TAG_STYLE,
        TAG_TITLE,
+       TAG_BODY,
        TAG_DIV,
        TAG_IDIV,
        TAG_SECTION,
-       TAG_H1,
-       TAG_H2,
-       TAG_SPAN,
-       TAG_LINK,
-       TAG_BR,
-       TAG_A,
        TAG_TABLE,
        TAG_TR,
        TAG_TD,
@@ -40,15 +36,19 @@ enum        htmltag {
        TAG_DL,
        TAG_DT,
        TAG_DD,
+       TAG_H1,
+       TAG_H2,
        TAG_P,
        TAG_PRE,
-       TAG_VAR,
-       TAG_CITE,
+       TAG_A,
        TAG_B,
-       TAG_I,
+       TAG_CITE,
        TAG_CODE,
+       TAG_I,
        TAG_SMALL,
-       TAG_STYLE,
+       TAG_SPAN,
+       TAG_VAR,
+       TAG_BR,
        TAG_MATH,
        TAG_MROW,
        TAG_MI,