]> git.cameronkatri.com Git - mandoc.git/commitdiff
Moved charset recognition into the filter.
authorKristaps Dzonsons <kristaps@bsd.lv>
Thu, 4 Dec 2008 19:31:57 +0000 (19:31 +0000)
committerKristaps Dzonsons <kristaps@bsd.lv>
Thu, 4 Dec 2008 19:31:57 +0000 (19:31 +0000)
Makefile
index.7
ml.c
mlg.c
private.h
roff.c
tokens.c

index cd9100fc8f6af7ef87319cd11c1f444ff436afcf..db7297b7ec58a03b1a068e0a335b22b8fd81326d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ FAIL  = test.0 test.1 test.2 test.3 test.4 test.5 test.6 \
 SUCCEED        = test.7 test.8 test.9 test.10 test.11 test.12 test.13 \
          test.14 test.16 test.17 test.18 test.19 test.21 test.23 \
          test.25 test.28 test.29 test.31 test.32 test.33 test.34 \
-         test.35 test.37 test.38 test.39
+         test.35 test.38 test.39
 
 
 all: mdocml
diff --git a/index.7 b/index.7
index fdf6eede1d09612d46a5b3d866e76db38ec5a1f9..f78e2553ee81e83794975297e7d9cd54d565d5a6 100644 (file)
--- a/index.7
+++ b/index.7
@@ -47,9 +47,9 @@ respectively),
 correctly-ordered document prelude,
 .It
 sane argument values (such as those for 
-.Sq \& Dt
+.Sq \&.Dt
 or
-.Sq \& Sm ) ,
+.Sq \&.Sm ) ,
 .It
 and so on.
 .El
diff --git a/ml.c b/ml.c
index 317fe01144ace6a104416c0913bf9b233290da9a..7d2f63ed4589da216022485578f5f0adfab5236d 100644 (file)
--- a/ml.c
+++ b/ml.c
@@ -1,4 +1,4 @@
-/* $Id: ml.c,v 1.4 2008/12/04 16:19:52 kristaps Exp $ */
+/* $Id: ml.c,v 1.5 2008/12/04 19:31:57 kristaps Exp $ */
 /*
  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -16,6 +16,7 @@
  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  * PERFORMANCE OF THIS SOFTWARE.
  */
+#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -33,13 +34,147 @@ int
 ml_nputstring(struct md_mbuf *p, 
                const char *buf, size_t sz, size_t *pos)
 {
-       int              i;
+       int              i, v;
        const char      *seq;
        size_t           ssz;
 
        for (i = 0; i < (int)sz; i++) {
                switch (buf[i]) {
 
+               /* Escaped value. */
+               case ('\\'):
+                       if (-1 == (v = rofftok_scan(buf, &i))) {
+                               /* TODO: error. */
+                               return(0);
+                       }
+
+                       switch (v) {
+                       case (ROFFTok_Sp_A):
+                               seq = "\\a";
+                               ssz = 2;
+                               break;
+                       case (ROFFTok_Sp_B):
+                               seq = "\\b";
+                               ssz = 2;
+                               break;
+                       case (ROFFTok_Sp_F):
+                               seq = "\\f";
+                               ssz = 2;
+                               break;
+                       case (ROFFTok_Sp_N):
+                               seq = "\\n";
+                               ssz = 2;
+                               break;
+                       case (ROFFTok_Sp_R):
+                               seq = "\\r";
+                               ssz = 2;
+                               break;
+                       case (ROFFTok_Sp_T):
+                               seq = "\\t";
+                               ssz = 2;
+                               break;
+                       case (ROFFTok_Sp_V):
+                               seq = "\\v";
+                               ssz = 2;
+                               break;
+                       case (ROFFTok_Sp_0):
+                               seq = "\\0";
+                               ssz = 2;
+                               break;
+                       case (ROFFTok_Space):
+                               seq = "&nbsp;";
+                               ssz = 6;
+                               break;
+                       case (ROFFTok_Hyphen):
+                               seq = "&#8208;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Em):
+                               seq = "&#8212;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_En):
+                               seq = "&#8211;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Ge):
+                               seq = "&#8805;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Le):
+                               seq = "&#8804;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Rquote):
+                               seq = "&#8221;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Lquote):
+                               seq = "&#8220;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Uparrow):
+                               seq = "&#8593;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Acute):
+                               seq = "&#180;";
+                               ssz = 6;
+                               break;
+                       case (ROFFTok_Grave):
+                               seq = "&#96;";
+                               ssz = 5;
+                               break;
+                       case (ROFFTok_Pi):
+                               seq = "&#960;";
+                               ssz = 6;
+                               break;
+                       case (ROFFTok_Ne):
+                               seq = "&#8800;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Lt):
+                               seq = "&lt;";
+                               ssz = 4;
+                               break;
+                       case (ROFFTok_Gt):
+                               seq = "&gt;";
+                               ssz = 4;
+                               break;
+                       case (ROFFTok_Plusmin):
+                               seq = "&#177;";
+                               ssz = 6;
+                               break;
+                       case (ROFFTok_Infty):
+                               seq = "&#8734;";
+                               ssz = 7;
+                               break;
+                       case (ROFFTok_Bar):
+                               seq = "&#124;";
+                               ssz = 6;
+                               break;
+                       case (ROFFTok_Nan):
+                               seq = "Nan";
+                               ssz = 3;
+                               break;
+                       case (ROFFTok_Quote):
+                               seq = "&quot;";
+                               ssz = 6;
+                               break;
+                       case (ROFFTok_Slash):
+                               seq = "\\";
+                               ssz = 1;
+                               break;
+                       case (ROFFTok_Null):
+                               seq = "";
+                               ssz = 0;
+                               break;
+                       default:
+                               /* TODO: print error. */
+                               return(-1);
+                       }
+                       break;
+
                /* Ampersand ml-escape. */
                case ('&'):
                        seq = "&amp;";
@@ -70,7 +205,7 @@ ml_nputstring(struct md_mbuf *p,
                        break;
                }
 
-               if ( ! ml_nputs(p, seq, ssz, pos))
+               if (ssz > 0 && ! ml_nputs(p, seq, ssz, pos))
                        return(-1);
        }
        return(1);
diff --git a/mlg.c b/mlg.c
index b8356839d17b7a0b0df00e422f1705662ec4ea2c..f34b5359fb9e8bfc51cfadbfa3ce164b2b1bf437 100644 (file)
--- a/mlg.c
+++ b/mlg.c
@@ -1,4 +1,4 @@
-/* $Id: mlg.c,v 1.6 2008/12/04 16:34:59 kristaps Exp $ */
+/* $Id: mlg.c,v 1.7 2008/12/04 19:31:57 kristaps Exp $ */
 /*
  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -70,7 +70,6 @@ static        int              mlg_rofftail(void *);
 static int              mlg_roffin(void *, int, int *, char **);
 static int              mlg_roffdata(void *, int, 
                                const char *, char *);
-static int              mlg_rofftoken(void *, int, int);
 static int              mlg_roffout(void *, int);
 static int              mlg_roffblkin(void *, int, int *, char **);
 static int              mlg_roffblkout(void *, int);
@@ -357,7 +356,6 @@ mlg_alloc(const struct md_args *args,
        cb.roffspecial = mlg_roffspecial;
        cb.roffmsg = mlg_roffmsg;
        cb.roffdata = mlg_roffdata;
-       cb.rofftoken = mlg_rofftoken;
 
        if (NULL == (p = calloc(1, sizeof(struct md_mlg))))
                err(1, "calloc");
@@ -551,142 +549,6 @@ mlg_roffmsg(void *arg, enum roffmsg lvl,
 }
 
 
-static int
-mlg_rofftoken(void *arg, int space, int value)
-{
-       struct md_mlg   *p;
-       const char      *seq;
-       size_t           sz, res;
-
-       assert(arg);
-       p = (struct md_mlg *)arg;
-
-       switch (value) {
-       case (ROFFTok_Sp_A):
-               seq = "\\a";
-               sz = 2;
-               break;
-       case (ROFFTok_Sp_B):
-               seq = "\\b";
-               sz = 2;
-               break;
-       case (ROFFTok_Sp_F):
-               seq = "\\f";
-               sz = 2;
-               break;
-       case (ROFFTok_Sp_N):
-               seq = "\\n";
-               sz = 2;
-               break;
-       case (ROFFTok_Sp_R):
-               seq = "\\r";
-               sz = 2;
-               break;
-       case (ROFFTok_Sp_T):
-               seq = "\\t";
-               sz = 2;
-               break;
-       case (ROFFTok_Sp_V):
-               seq = "\\v";
-               sz = 2;
-               break;
-       case (ROFFTok_Space):
-               seq = "&nbsp;";
-               sz = 6;
-               break;
-       case (ROFFTok_Hyphen):
-               seq = "&#8208;";
-               sz = 7;
-               break;
-       case (ROFFTok_Em):
-               seq = "&#8212;";
-               sz = 7;
-               break;
-       case (ROFFTok_En):
-               seq = "&#8211;";
-               sz = 7;
-               break;
-       case (ROFFTok_Ge):
-               seq = "&#8805;";
-               sz = 7;
-               break;
-       case (ROFFTok_Le):
-               seq = "&#8804;";
-               sz = 7;
-               break;
-       case (ROFFTok_Rquote):
-               seq = "&#8221;";
-               sz = 7;
-               break;
-       case (ROFFTok_Lquote):
-               seq = "&#8220;";
-               sz = 7;
-               break;
-       case (ROFFTok_Uparrow):
-               seq = "&#8593;";
-               sz = 7;
-               break;
-       case (ROFFTok_Acute):
-               seq = "&#180;";
-               sz = 6;
-               break;
-       case (ROFFTok_Grave):
-               seq = "&#96;";
-               sz = 5;
-               break;
-       case (ROFFTok_Pi):
-               seq = "&#960;";
-               sz = 6;
-               break;
-       case (ROFFTok_Ne):
-               seq = "&#8800;";
-               sz = 7;
-               break;
-       case (ROFFTok_Lt):
-               seq = "&lt;";
-               sz = 4;
-               break;
-       case (ROFFTok_Gt):
-               seq = "&gt;";
-               sz = 4;
-               break;
-       case (ROFFTok_Plusmin):
-               seq = "&#177;";
-               sz = 6;
-               break;
-       case (ROFFTok_Infty):
-               seq = "&#8734;";
-               sz = 7;
-               break;
-       case (ROFFTok_Bar):
-               seq = "&#124;";
-               sz = 6;
-               break;
-       case (ROFFTok_Nan):
-               seq = "Nan";
-               sz = 3;
-               break;
-       case (ROFFTok_Quote):
-               seq = "&quot;";
-               sz = 6;
-               break;
-       default:
-               /* TODO: print error. */
-               return(0);
-       }
-
-       if (space && ! ml_nputs(p->mbuf, " ", 1, &res))
-               return(0);
-       p->pos += res;
-
-       if ( ! ml_nputs(p->mbuf, seq, sz, &res))
-               return(0);
-       p->pos += res;
-
-       return(1);
-}
-
-
 static int
 mlg_roffdata(void *arg, int space, const char *start, char *buf)
 {
index 20b94d570c9ea7d725945411e2ed755bface4873..141ca9958ea7e992cfc1920542a9a0d00126af55 100644 (file)
--- a/private.h
+++ b/private.h
@@ -1,4 +1,4 @@
-/* $Id: private.h,v 1.28 2008/12/04 16:34:59 kristaps Exp $ */
+/* $Id: private.h,v 1.29 2008/12/04 19:31:57 kristaps Exp $ */
 /*
  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -65,7 +65,9 @@ struct        md_mbuf {
 #define        ROFFTok_Bar      25
 #define        ROFFTok_Nan      26
 #define        ROFFTok_Quote    27
-#define        ROFFTok_MAX      28
+#define        ROFFTok_Sp_0     28
+#define        ROFFTok_Slash    29
+#define        ROFFTok_MAX      30
 
 #define        ROFF___          0
 #define        ROFF_Dd          1
@@ -251,7 +253,6 @@ struct      roffcb {
                        const char *, const char *, const char *);
        int     (*rofftail)(void *);
        int     (*roffdata)(void *, int, const char *, char *);
-       int     (*rofftoken)(void *, int, int);
        int     (*roffin)(void *, int, int *, char **);
        int     (*roffout)(void *, int);
        int     (*roffblkin)(void *, int, int *, char **);
@@ -290,7 +291,7 @@ struct      rofftree *roff_alloc(const struct roffcb *, void *);
 int              roff_engine(struct rofftree *, char *);
 int              roff_free(struct rofftree *, int);
 
-int              rofftok_scan(const char *);
+int              rofftok_scan(const char *, int *);
 
 __END_DECLS
 
diff --git a/roff.c b/roff.c
index f5e3cc1fdaae1c9459bb8a2b6091aa81f0ab8fee..576190e593d10b377497bbfca6382b8650188b80 100644 (file)
--- a/roff.c
+++ b/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.38 2008/12/04 16:34:59 kristaps Exp $ */
+/* $Id: roff.c,v 1.39 2008/12/04 19:31:57 kristaps Exp $ */
 /*
  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -969,23 +969,9 @@ roffparseopts(struct rofftree *tree, int tok,
 static int
 roffdata(struct rofftree *tree, int space, char *buf)
 {
-       int              tok;
 
        if (0 == *buf)
                return(1);
-
-       if (-1 == (tok = rofftok_scan(buf))) {
-               roff_err(tree, buf, "invalid character sequence");
-               return(0);
-       } else if (ROFFTok_MAX != tok) {
-               if (ROFFTok_Null == tok) { /* FIXME */
-                       buf += 2;
-                       return(roffdata(tree, space, buf));
-               }
-               return((*tree->cb.rofftoken)
-                               (tree->arg, space != 0, tok));
-       }
-
        return((*tree->cb.roffdata)(tree->arg, 
                                space != 0, tree->cur, buf));
 }
index 82b1af7a748e787413c76ae0c6a1b39faad67a30..0ccd3255e14522ee4bd03a27829e9e2afeb1a83e 100644 (file)
--- a/tokens.c
+++ b/tokens.c
@@ -1,4 +1,4 @@
-/* $Id: tokens.c,v 1.2 2008/12/04 16:34:59 kristaps Exp $ */
+/* $Id: tokens.c,v 1.3 2008/12/04 19:31:57 kristaps Exp $ */
 /*
  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
  *
 #include "private.h"
 
 
-static int              rofftok_dashes(const char *);
-static int              rofftok_special(const char *);
-static int              rofftok_predef(const char *);
-static int              rofftok_defined(const char *);
+static int              rofftok_dashes(const char *, int *);
+static int              rofftok_special(const char *, int *);
+static int              rofftok_predef(const char *, int *);
+static int              rofftok_defined(const char *, int *);
 
 
 static int
-rofftok_defined(const char *buf)
+rofftok_defined(const char *buf, int *i)
 {
-       if (0 == *buf)
-               return(-1);
-       if (0 == *(buf + 1))
+       const char       *p;
+
+       if (0 == buf[*i])
                return(-1);
-       if (0 != *(buf + 2))
+       if (0 == buf[*i + 1])
                return(-1);
 
-       if (0 == strcmp(buf, ">="))
+       (*i)++;
+       p = &buf[(*i)++];
+
+       if (0 == memcmp(p, ">=", 2))
                return(ROFFTok_Ge);
-       else if (0 == strcmp(buf, "<="))
+       else if (0 == memcmp(p, "<=", 2))
                return(ROFFTok_Le);
-       else if (0 == strcmp(buf, "Rq"))
+       else if (0 == memcmp(p, "Rq", 2))
                return(ROFFTok_Rquote);
-       else if (0 == strcmp(buf, "Lq"))
+       else if (0 == memcmp(p, "Lq", 2))
                return(ROFFTok_Lquote);
-       else if (0 == strcmp(buf, "ua"))
+       else if (0 == memcmp(p, "ua", 2))
                return(ROFFTok_Uparrow);
-       else if (0 == strcmp(buf, "aa"))
+       else if (0 == memcmp(p, "aa", 2))
                return(ROFFTok_Acute);
-       else if (0 == strcmp(buf, "ga"))
+       else if (0 == memcmp(p, "ga", 2))
                return(ROFFTok_Grave);
-       else if (0 == strcmp(buf, "Pi"))
+       else if (0 == memcmp(p, "Pi", 2))
                return(ROFFTok_Pi);
-       else if (0 == strcmp(buf, "Ne"))
+       else if (0 == memcmp(p, "Ne", 2))
                return(ROFFTok_Ne);
-       else if (0 == strcmp(buf, "Le"))
+       else if (0 == memcmp(p, "Le", 2))
                return(ROFFTok_Le);
-       else if (0 == strcmp(buf, "Ge"))
+       else if (0 == memcmp(p, "Ge", 2))
                return(ROFFTok_Ge);
-       else if (0 == strcmp(buf, "Lt"))
+       else if (0 == memcmp(p, "Lt", 2))
                return(ROFFTok_Lt);
-       else if (0 == strcmp(buf, "Gt"))
+       else if (0 == memcmp(p, "Gt", 2))
                return(ROFFTok_Gt);
-       else if (0 == strcmp(buf, "Pm"))
+       else if (0 == memcmp(p, "Pm", 2))
                return(ROFFTok_Plusmin);
-       else if (0 == strcmp(buf, "If"))
+       else if (0 == memcmp(p, "If", 2))
                return(ROFFTok_Infty);
-       else if (0 == strcmp(buf, "Na"))
+       else if (0 == memcmp(p, "Na", 2))
                return(ROFFTok_Nan);
-       else if (0 == strcmp(buf, "Ba"))
+       else if (0 == memcmp(p, "Ba", 2))
                return(ROFFTok_Bar);
 
        return(-1);
@@ -80,15 +83,14 @@ rofftok_defined(const char *buf)
 
 
 static int
-rofftok_predef(const char *buf)
+rofftok_predef(const char *buf, int *i)
 {
-       if (0 == *buf)
+       if (0 == buf[*i])
                return(-1);
+       if ('(' == buf[*i])
+               return(rofftok_defined(buf, i));
 
-       if ('(' == *buf)
-               return(rofftok_defined(++buf));
-
-       switch (*buf) {
+       switch (buf[*i]) {
        case ('q'):
                return(ROFFTok_Quote);
        default:
@@ -100,20 +102,17 @@ rofftok_predef(const char *buf)
 
 
 static int
-rofftok_dashes(const char *buf)
+rofftok_dashes(const char *buf, int *i)
 {
 
-       if (0 == *buf)
+       if (0 == buf[*i])
                return(-1);
-       else if (*buf++ != 'e')
+       else if (buf[(*i)++] != 'e')
                return(-1);
-
-       if (0 == *buf)
-               return(-1);
-       else if (0 != *(buf + 1))
+       if (0 == buf[*i])
                return(-1);
 
-       switch (*buf) {
+       switch (buf[*i]) {
        case ('m'):
                return(ROFFTok_Em);
        case ('n'):
@@ -126,15 +125,13 @@ rofftok_dashes(const char *buf)
 
 
 static int
-rofftok_special(const char *buf)
+rofftok_special(const char *buf, int *i)
 {
 
-       if (0 == *buf)
-               return(-1);
-       else if (0 != *(buf + 1))
-               return(-1);
+       if (0 == buf[*i])
+               return(ROFFTok_Slash);
 
-       switch (*buf) {
+       switch (buf[*i]) {
        case ('a'):
                return(ROFFTok_Sp_A);
        case ('b'):
@@ -149,6 +146,8 @@ rofftok_special(const char *buf)
                return(ROFFTok_Sp_T);
        case ('v'):
                return(ROFFTok_Sp_V);
+       case ('0'):
+               return(ROFFTok_Sp_0);
        default:
                break;
        }
@@ -157,19 +156,22 @@ rofftok_special(const char *buf)
 
 
 int
-rofftok_scan(const char *buf)
+rofftok_scan(const char *buf, int *i)
 {
 
        assert(*buf);
-       if ('\\' != *buf++)
-               return(ROFFTok_MAX);
+       assert(buf[*i] == '\\');
+
+       (*i)++;
 
-       for ( ; *buf; buf++) {
-               switch (*buf) {
+       for ( ; buf[*i]; (*i)++) {
+               switch (buf[*i]) {
                case ('e'):
-                       return(rofftok_special(++buf));
+                       (*i)++;
+                       return(rofftok_special(buf, i));
                case ('('):
-                       return(rofftok_dashes(++buf));
+                       (*i)++;
+                       return(rofftok_dashes(buf, i));
                case (' '):
                        return(ROFFTok_Space);
                case ('&'):
@@ -177,9 +179,10 @@ rofftok_scan(const char *buf)
                case ('-'):
                        return(ROFFTok_Hyphen);
                case ('*'):
-                       return(rofftok_predef(++buf));
+                       (*i)++;
+                       return(rofftok_predef(buf, i));
                case ('\\'):
-                       return(ROFFTok_MAX);
+                       return(ROFFTok_Slash);
                default:
                        break;
                }