]> git.cameronkatri.com Git - mandoc.git/blobdiff - html.c
stricter parsing of Unicode escape names
[mandoc.git] / html.c
diff --git a/html.c b/html.c
index 5a29eb0ef343209ff738b1bae64bfafd168e6324..20b9b4430488256ac99068d746672f3448589049 100644 (file)
--- a/html.c
+++ b/html.c
@@ -1,6 +1,6 @@
-/*     $Id: html.c,v 1.171 2014/09/27 10:56:18 kristaps Exp $ */
+/*     $Id: html.c,v 1.179 2014/10/27 16:29:06 schwarze Exp $ */
 /*
- * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -74,6 +74,22 @@ static       const struct htmldata htmltags[TAG_MAX] = {
        {"code",        0 }, /* TAG_CODE */
        {"small",       0 }, /* TAG_SMALL */
        {"style",       HTML_CLRLINE}, /* TAG_STYLE */
+       {"math",        HTML_CLRLINE}, /* TAG_MATH */
+       {"mrow",        0}, /* TAG_MROW */
+       {"mi",          0}, /* TAG_MI */
+       {"mo",          0}, /* TAG_MO */
+       {"msup",        0}, /* TAG_MSUP */
+       {"msub",        0}, /* TAG_MSUB */
+       {"msubsup",     0}, /* TAG_MSUBSUP */
+       {"mfrac",       0}, /* TAG_MFRAC */
+       {"msqrt",       0}, /* TAG_MSQRT */
+       {"mfenced",     0}, /* TAG_MFENCED */
+       {"mtable",      0}, /* TAG_MTABLE */
+       {"mtr",         0}, /* TAG_MTR */
+       {"mtd",         0}, /* TAG_MTD */
+       {"munderover",  0}, /* TAG_MUNDEROVER */
+       {"munder",      0}, /* TAG_MUNDER*/
+       {"mover",       0}, /* TAG_MOVER*/
 };
 
 static const char      *const htmlattrs[ATTR_MAX] = {
@@ -87,6 +103,9 @@ static       const char      *const htmlattrs[ATTR_MAX] = {
        "id", /* ATTR_ID */
        "colspan", /* ATTR_COLSPAN */
        "charset", /* ATTR_CHARSET */
+       "open", /* ATTR_OPEN */
+       "close", /* ATTR_CLOSE */
+       "mathvariant", /* ATTR_MATHVARIANT */
 };
 
 static const char      *const roffscales[SCALE_MAX] = {
@@ -108,11 +127,11 @@ static    int      print_escape(char);
 static int      print_encode(struct html *, const char *, int);
 static void     print_metaf(struct html *, enum mandoc_esc);
 static void     print_attr(struct html *, const char *, const char *);
-static void     *ml_alloc(char *, enum htmltype);
+static void     *ml_alloc(char *);
 
 
 static void *
-ml_alloc(char *outopts, enum htmltype type)
+ml_alloc(char *outopts)
 {
        struct html     *h;
        const char      *toks[5];
@@ -126,7 +145,6 @@ ml_alloc(char *outopts, enum htmltype type)
 
        h = mandoc_calloc(1, sizeof(struct html));
 
-       h->type = type;
        h->tags.head = NULL;
        h->symtab = mchars_alloc();
 
@@ -155,14 +173,14 @@ void *
 html_alloc(char *outopts)
 {
 
-       return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
+       return(ml_alloc(outopts));
 }
 
 void *
 xhtml_alloc(char *outopts)
 {
 
-       return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
+       return(ml_alloc(outopts));
 }
 
 void
@@ -419,29 +437,28 @@ print_encode(struct html *h, const char *p, int norecurse)
                case ESCAPE_UNICODE:
                        /* Skip past "u" header. */
                        c = mchars_num2uc(seq + 1, len - 1);
-                       if ('\0' != c)
-                               printf("&#x%x;", c);
                        break;
                case ESCAPE_NUMBERED:
                        c = mchars_num2char(seq, len);
-                       if ( ! ('\0' == c || print_escape(c)))
-                               putchar(c);
                        break;
                case ESCAPE_SPECIAL:
                        c = mchars_spec2cp(h->symtab, seq, len);
-                       if (c > 0)
-                               printf("&#%d;", c);
-                       else if (-1 == c && 1 == len &&
-                           !print_escape(*seq))
-                               putchar((int)*seq);
                        break;
                case ESCAPE_NOSPACE:
                        if ('\0' == *p)
                                nospace = 1;
-                       break;
+                       continue;
                default:
-                       break;
+                       continue;
                }
+               if (c <= 0)
+                       continue;
+               if (c < 0x20 || (c > 0x7E && c < 0xA0))
+                       c = 0xFFFD;
+               if (c > 0x7E)
+                       printf("&#%d;", c);
+               else if ( ! print_escape(c))
+                       putchar(c);
        }
 
        return(nospace);
@@ -494,16 +511,10 @@ print_otag(struct html *h, enum htmltag tag,
        for (i = 0; i < sz; i++)
                print_attr(h, htmlattrs[p[i].key], p[i].val);
 
-       /* Accommodate for XML "well-formed" singleton escaping. */
+       /* Accommodate for "well-formed" singleton escaping. */
 
        if (HTML_AUTOCLOSE & htmltags[tag].flags)
-               switch (h->type) {
-               case HTML_XHTML_1_0_STRICT:
-                       putchar('/');
-                       break;
-               default:
-                       break;
-               }
+               putchar('/');
 
        putchar('>');