]> git.cameronkatri.com Git - mandoc.git/blobdiff - html.c
Reduce memory and time consumption on certain malformed input files
[mandoc.git] / html.c
diff --git a/html.c b/html.c
index 8d8d1130c5c06802e673f1c398504e70580faa0b..da9808579a36347313540777410ac6949d5bc221 100644 (file)
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/*     $Id: html.c,v 1.177 2014/10/26 17:12:03 schwarze Exp $ */
+/*     $Id: html.c,v 1.184 2014/12/20 00:20:11 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -30,7 +30,6 @@
 
 #include "mandoc.h"
 #include "mandoc_aux.h"
-#include "libmandoc.h"
 #include "out.h"
 #include "html.h"
 #include "main.h"
@@ -122,16 +121,15 @@ static    const char      *const roffscales[SCALE_MAX] = {
 };
 
 static void     bufncat(struct html *, const char *, size_t);
-static void     print_ctag(struct html *, enum htmltag);
+static void     print_ctag(struct html *, struct tag *);
 static int      print_escape(char);
 static int      print_encode(struct html *, const char *, int);
 static void     print_metaf(struct html *, enum mandoc_esc);
 static void     print_attr(struct html *, const char *, const char *);
-static void     *ml_alloc(char *);
 
 
-static void *
-ml_alloc(char *outopts)
+void *
+html_alloc(const struct mchars *mchars, char *outopts)
 {
        struct html     *h;
        const char      *toks[5];
@@ -146,7 +144,7 @@ ml_alloc(char *outopts)
        h = mandoc_calloc(1, sizeof(struct html));
 
        h->tags.head = NULL;
-       h->symtab = mchars_alloc();
+       h->symtab = mchars;
 
        while (outopts && *outopts)
                switch (getsubopt(&outopts, UNCONST(toks), &v)) {
@@ -169,20 +167,6 @@ ml_alloc(char *outopts)
        return(h);
 }
 
-void *
-html_alloc(char *outopts)
-{
-
-       return(ml_alloc(outopts));
-}
-
-void *
-xhtml_alloc(char *outopts)
-{
-
-       return(ml_alloc(outopts));
-}
-
 void
 html_free(void *p)
 {
@@ -196,9 +180,6 @@ html_free(void *p)
                free(tag);
        }
 
-       if (h->symtab)
-               mchars_free(h->symtab);
-
        free(h);
 }
 
@@ -437,39 +418,31 @@ print_encode(struct html *h, const char *p, int norecurse)
                case ESCAPE_UNICODE:
                        /* Skip past "u" header. */
                        c = mchars_num2uc(seq + 1, len - 1);
-
-                       /*
-                        * XXX Security warning:
-                        * For now, forbid Unicode obfuscation of ASCII
-                        * characters.  An audit of the callers is
-                        * required before this can be removed.
-                        */
-
-                       if (c < 0x80)
-                               c = 0xFFFD;
-
-                       printf("&#x%x;", c);
                        break;
                case ESCAPE_NUMBERED:
                        c = mchars_num2char(seq, len);
-                       if ( ! ('\0' == c || print_escape(c)))
-                               putchar(c);
+                       if (c < 0)
+                               continue;
                        break;
                case ESCAPE_SPECIAL:
                        c = mchars_spec2cp(h->symtab, seq, len);
-                       if (c > 0)
-                               printf("&#%d;", c);
-                       else if (-1 == c && 1 == len &&
-                           !print_escape(*seq))
-                               putchar((int)*seq);
+                       if (c <= 0)
+                               continue;
                        break;
                case ESCAPE_NOSPACE:
                        if ('\0' == *p)
                                nospace = 1;
-                       break;
+                       continue;
                default:
-                       break;
+                       continue;
                }
+               if ((c < 0x20 && c != 0x09) ||
+                   (c > 0x7E && c < 0xA0))
+                       c = 0xFFFD;
+               if (c > 0x7E)
+                       printf("&#%d;", c);
+               else if ( ! print_escape(c))
+                       putchar(c);
        }
 
        return(nospace);
@@ -538,14 +511,26 @@ print_otag(struct html *h, enum htmltag tag,
 }
 
 static void
-print_ctag(struct html *h, enum htmltag tag)
+print_ctag(struct html *h, struct tag *tag)
 {
 
-       printf("</%s>", htmltags[tag].name);
-       if (HTML_CLRLINE & htmltags[tag].flags) {
+       /*
+        * Remember to close out and nullify the current
+        * meta-font and table, if applicable.
+        */
+       if (tag == h->metaf)
+               h->metaf = NULL;
+       if (tag == h->tblt)
+               h->tblt = NULL;
+
+       printf("</%s>", htmltags[tag->tag].name);
+       if (HTML_CLRLINE & htmltags[tag->tag].flags) {
                h->flags |= HTML_NOSPACE;
                putchar('\n');
        }
+
+       h->tags.head = tag->next;
+       free(tag);
 }
 
 void
@@ -589,8 +574,9 @@ print_text(struct html *h, const char *word)
        if ( ! print_encode(h, word, 0)) {
                if ( ! (h->flags & HTML_NONOSPACE))
                        h->flags &= ~HTML_NOSPACE;
+               h->flags &= ~HTML_NONEWLINE;
        } else
-               h->flags |= HTML_NOSPACE;
+               h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
 
        if (h->metaf) {
                print_tagq(h, h->metaf);
@@ -606,17 +592,7 @@ print_tagq(struct html *h, const struct tag *until)
        struct tag      *tag;
 
        while ((tag = h->tags.head) != NULL) {
-               /*
-                * Remember to close out and nullify the current
-                * meta-font and table, if applicable.
-                */
-               if (tag == h->metaf)
-                       h->metaf = NULL;
-               if (tag == h->tblt)
-                       h->tblt = NULL;
-               print_ctag(h, tag->tag);
-               h->tags.head = tag->next;
-               free(tag);
+               print_ctag(h, tag);
                if (until && tag == until)
                        return;
        }
@@ -630,17 +606,7 @@ print_stagq(struct html *h, const struct tag *suntil)
        while ((tag = h->tags.head) != NULL) {
                if (suntil && tag == suntil)
                        return;
-               /*
-                * Remember to close out and nullify the current
-                * meta-font and table, if applicable.
-                */
-               if (tag == h->metaf)
-                       h->metaf = NULL;
-               if (tag == h->tblt)
-                       h->tblt = NULL;
-               print_ctag(h, tag->tag);
-               h->tags.head = tag->next;
-               free(tag);
+               print_ctag(h, tag);
        }
 }