Character-escape addition simplified (see README.addescape, also added).

author Kristaps Dzonsons <kristaps@bsd.lv>

Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)

committer Kristaps Dzonsons <kristaps@bsd.lv>

Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
author Kristaps Dzonsons <kristaps@bsd.lv>
Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
committer Kristaps Dzonsons <kristaps@bsd.lv>
Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
diff --git a/README.addescape b/README.addescape

new file mode 100644 (file)

index 0000000..da52ee0
--- /dev/null
+++ b/README.addescape
@@ -0,0 +1,17 @@
+$Id: README.addescape,v 1.1 2009/03/03 21:07:01 kristaps Exp $
+
+This documents adding a new character escape to mdocterm(1).  Character
+escapes are only syntax-validated in the back-end.
+
+Character escape may be in the form of \*x, \*(xx, \x, \(xx, \[n] and so
+on and so on.  All of these are recognised according to their byte
+length.  
+
+(1) If the escape is NOT recognised in enum tsym in term.h, add it.
+
+(2) Modify/create static struct termenc termencN, where N is the number
+of characters in the encoding.  This is in mdocterm.c.
+
+(3) Possibly modify nescape() to recognise a new termencN.
+
+Everything else is automatic.
diff --git a/mdocterm.1 b/mdocterm.1

index 49250d59a80b57edac045359c80783432bd5a269..e104a51dd734f4554f79255e6477326524625682 100644 (file)
--- a/mdocterm.1
+++ b/mdocterm.1
@@ -1,4 +1,4 @@
-.\" $Id: mdocterm.1,v 1.9 2009/03/02 17:29:16 kristaps Exp $
+.\" $Id: mdocterm.1,v 1.10 2009/03/03 21:07:01 kristaps Exp $
  .\"
  .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
  .\"
  .\"
  .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
  .\"
@@ -16,7 +16,7 @@
  .\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  .\" PERFORMANCE OF THIS SOFTWARE.
  .\"
  .\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  .\" PERFORMANCE OF THIS SOFTWARE.
  .\"
-.Dd $Mdocdate: March 2 2009 $
+.Dd $Mdocdate: March 3 2009 $
  .Dt mdocmterm 1
  .Os
  .\" SECTION
  .Dt mdocmterm 1
  .Os
  .\" SECTION
@@ -93,11 +93,11 @@ is
  .Ss Character Escapes
  This section documents the character-escapes accepted by
  .Xr mdocterm 1 .
  .Ss Character Escapes
  This section documents the character-escapes accepted by
  .Xr mdocterm 1 .
-Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx
-and \\*x forms described in
+Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx,
+\\*[n] and \\*x forms described in
  .Xr mdoc.samples 7
  .Xr mdoc.samples 7
-are deprecated, but still correctly rendered.  For all two-character
-sequences, \\(xx is equivalent to the n-character \\[xx].
+are deprecated, but still rendered.  All one- and two-character
+sequences may be used in the n-character sequence \\[n].
  .Pp
  Note that the
  .Em Output
  .Pp
  Note that the
  .Em Output
@@ -106,6 +106,22 @@ column will render differently whether executed with
  or another output filter.
  .\" PARAGRAPH
  .Pp
  or another output filter.
  .\" PARAGRAPH
  .Pp
+Grammatic:
+.Pp
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It Em Output
+.Em Input (Name)
+.It \(em
+\\(em (em-dash)
+.It \(en
+\\(en (en-dash)
+.It \-
+\\- (hyphen)
+.It \\
+\\ (back-slash)
+.El
+.\" PARAGRAPH
+.Pp
  Enclosures:
  .Pp
  .Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
  Enclosures:
  .Pp
  .Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
@@ -118,9 +134,9 @@ Enclosures:
  .It \(lq
  \\(lq (left double-quote)
  .It \(rq
  .It \(lq
  \\(lq (left double-quote)
  .It \(rq
-\\(rq (right double-quote)
+\\(rq, \\' (right double-quote)
  .It \(oq
  .It \(oq
-\\(lq (left single-quote)
+\\(lq, \\` (left single-quote)
  .It \(aq
  \\(aq (right single-quote, apostrophe)
  .El
  .It \(aq
  \\(aq (right single-quote, apostrophe)
  .El
@@ -161,13 +177,11 @@ Mathematical:
  \\(na (NaN)*
  .It \(+-
  \\(+- (plus-minus)
  \\(na (NaN)*
  .It \(+-
  \\(+- (plus-minus)
+.It \(**
+\\(** (asterisk)
  .El
  .\" PARAGRAPH
  .Pp
  .El
  .\" PARAGRAPH
  .Pp
-*This is a deviation from the standard, as NaN is usually rendered as
-\\*(Na, which is a deprecated form.  We introduce \\(na, which follows
-the more general syntax.
-.Pp
  Diacritics:
  .Pp
  .Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
  Diacritics:
  .Pp
  .Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
@@ -189,7 +203,13 @@ Special symbols:
  \\(bu (bullet)
  .It \(ba
  \\(ba (bar)
  \\(bu (bullet)
  .It \(ba
  \\(ba (bar)
+.It \(co
+\\(co (copyright)
  .El 
  .El 
+.Pp
+*This is a deviation from the standard, as NaN is usually rendered as
+\\*(Na, which is a deprecated form.  We introduce \\(na, which follows
+the more general syntax.
  .\" SECTION
  .Sh EXAMPLES
  To display this manual page:
  .\" SECTION
  .Sh EXAMPLES
  To display this manual page:
diff --git a/mdocterm.c b/mdocterm.c

index f2f05dfa9edd92bbaf3bf2c98537c1042b4ca5c2..00a1308503a2185b5f7909785afc8686788beb56 100644 (file)
--- a/mdocterm.c
+++ b/mdocterm.c
@@ -1,4 +1,4 @@
-/* $Id: mdocterm.c,v 1.26 2009/03/02 17:29:16 kristaps Exp $ */
+/* $Id: mdocterm.c,v 1.27 2009/03/03 21:07:01 kristaps Exp $ */
  /*
   * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
   *
  /*
   * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
   *
@@ -32,6 +32,11 @@
  #include "mmain.h"
  #include "term.h"
  
  #include "mmain.h"
  #include "term.h"
  
+struct termenc {
+       const char       *enc;
+       int               sym;
+};
+
  static void              body(struct termp *,
                                 struct termpair *,
                                 const struct mdoc_meta *,
  static void              body(struct termp *,
                                 struct termpair *,
                                 const struct mdoc_meta *,
@@ -57,6 +62,67 @@ extern       size_t            strlcat(char *, const char *, size_t);
  extern size_t            strlcpy(char *, const char *, size_t);
  #endif
  
  extern size_t            strlcpy(char *, const char *, size_t);
  #endif
  
+static struct termenc    termenc1[] = {
+       { "\\",           TERMSYM_SLASH },
+       { "\'",           TERMSYM_RSQUOTE },
+       { "`",            TERMSYM_LSQUOTE },
+       { "-",            TERMSYM_HYPHEN },
+       { " ",            TERMSYM_SPACE },
+       { ".",            TERMSYM_PERIOD },
+       { "&",            TERMSYM_BREAK },
+       { "e",            TERMSYM_SLASH },
+       { "q",            TERMSYM_DQUOTE },
+       { NULL,           0 }
+};
+
+static struct termenc    termenc2[] = {
+       { "rB",           TERMSYM_RBRACK },
+       { "lB",           TERMSYM_LBRACK },
+       { "Lq",           TERMSYM_LDQUOTE },
+       { "lq",           TERMSYM_LDQUOTE },
+       { "Rq",           TERMSYM_RDQUOTE },
+       { "rq",           TERMSYM_RDQUOTE },
+       { "oq",           TERMSYM_LSQUOTE },
+       { "aq",           TERMSYM_RSQUOTE },
+
+       { "<-",           TERMSYM_LARROW },
+       { "->",           TERMSYM_RARROW },
+       { "ua",           TERMSYM_UARROW },
+       { "da",           TERMSYM_DARROW },
+
+       { "bu",           TERMSYM_BULLET },
+       { "Ba",           TERMSYM_BAR },
+       { "ba",           TERMSYM_BAR },
+       { "co",           TERMSYM_COPY },
+       { "Am",           TERMSYM_AMP },
+
+       { "Le",           TERMSYM_LE },
+       { "<=",           TERMSYM_LE },
+       { "Ge",           TERMSYM_GE },
+       { "=>",           TERMSYM_GE },
+       { "==",           TERMSYM_EQ },
+       { "Ne",           TERMSYM_NEQ },
+       { "!=",           TERMSYM_NEQ },
+       { "Pm",           TERMSYM_PLUSMINUS },
+       { "+-",           TERMSYM_PLUSMINUS },
+       { "If",           TERMSYM_INF2 },
+       { "if",           TERMSYM_INF },
+       { "Na",           TERMSYM_NAN },
+       { "na",           TERMSYM_NAN },
+       { "**",           TERMSYM_ASTERISK },
+       { "Gt",           TERMSYM_GT },
+       { "Lt",           TERMSYM_LT },
+
+       { "aa",           TERMSYM_ACUTE },
+       { "ga",           TERMSYM_GRAVE },
+
+       { "en",           TERMSYM_EN },
+       { "em",           TERMSYM_EM },
+
+       { "Pi",           TERMSYM_PI },
+       { NULL,           0 }
+};
+
  static struct termsym    termsym_ansi[] = {
         { "]", 1 },             /* TERMSYM_RBRACK */
         { "[", 1 },             /* TERMSYM_LBRACK */
  static struct termsym    termsym_ansi[] = {
         { "]", 1 },             /* TERMSYM_RBRACK */
         { "[", 1 },             /* TERMSYM_LBRACK */
@@ -85,8 +151,16 @@ static      struct termsym    termsym_ansi[] = {
         { "NaN", 3 },           /* TERMSYM_NAN */
         { "|", 1 },             /* TERMSYM_BAR */
         { "o", 1 },             /* TERMSYM_BULLET */
         { "NaN", 3 },           /* TERMSYM_NAN */
         { "|", 1 },             /* TERMSYM_BAR */
         { "o", 1 },             /* TERMSYM_BULLET */
-       { "&", 1 },             /* TERMSYM_AND */
-       { "|", 1 },             /* TERMSYM_OR */
+       { "&", 1 },             /* TERMSYM_AMP */
+       { "--", 2 },            /* TERMSYM_EM */
+       { "-", 1 },             /* TERMSYM_EN */
+       { "(C)", 3 },           /* TERMSYM_COPY */
+       { "*", 1 },             /* TERMSYM_ASTERISK */
+       { "\\", 1 },            /* TERMSYM_SLASH */
+       { "-", 1 },             /* TERMSYM_HYPHEN */
+       { " ", 1 },             /* TERMSYM_SPACE */
+       { ".", 1 },             /* TERMSYM_PERIOD */
+       { "", 0 },              /* TERMSYM_BREAK */
  };
  
  static const char        ansi_clear[]  = { 27, '[', '0', 'm' };
  };
  
  static const char        ansi_clear[]  = { 27, '[', '0', 'm' };
@@ -614,118 +688,27 @@ header(struct termp *p, const struct mdoc_meta *meta)
  static void
  nescape(struct termp *p, const char *word, size_t len)
  {
  static void
  nescape(struct termp *p, const char *word, size_t len)
  {
+       struct termenc  *enc;
  
         switch (len) {
         case (1):
  
         switch (len) {
         case (1):
-               switch (word[0]) {
-               case ('\\'):
-                       /* FALLTHROUGH */
-               case ('\''):
-                       /* FALLTHROUGH */
-               case ('`'):
-                       /* FALLTHROUGH */
-               case ('-'):
-                       /* FALLTHROUGH */
-               case (' '):
-                       /* FALLTHROUGH */
-               case ('.'):
-                       chara(p, word[0]); /* FIXME */
-                       break;
-               case ('&'):
-                       break;
-               case ('e'):
-                       chara(p, '\\'); /* FIXME */
-                       break;
-               case ('q'):
-                       symbola(p, TERMSYM_DQUOTE);
-                       break;
-               default:
-                       warnx("escape sequence not supported: %c",
-                                       word[0]);
-                       break;
-               }
+               enc = termenc1;
                 break;
                 break;
-
         case (2):
         case (2):
-               if ('r' == word[0] && 'B' == word[1])
-                       symbola(p, TERMSYM_RBRACK);
-               else if ('l' == word[0] && 'B' == word[1])
-                       symbola(p, TERMSYM_LBRACK);
-               else if ('l' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LDQUOTE);
-               else if ('r' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RDQUOTE);
-               else if ('o' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LSQUOTE);
-               else if ('a' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RSQUOTE);
-               else if ('<' == word[0] && '-' == word[1])
-                       symbola(p, TERMSYM_LARROW);
-               else if ('-' == word[0] && '>' == word[1])
-                       symbola(p, TERMSYM_RARROW);
-               else if ('b' == word[0] && 'u' == word[1])
-                       symbola(p, TERMSYM_BULLET);
-               else if ('<' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_LE);
-               else if ('>' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_GE);
-               else if ('=' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_EQ);
-               else if ('+' == word[0] && '-' == word[1])
-                       symbola(p, TERMSYM_PLUSMINUS);
-               else if ('u' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_UARROW);
-               else if ('d' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_DARROW);
-               else if ('a' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_ACUTE);
-               else if ('g' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_GRAVE);
-               else if ('!' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_NEQ);
-               else if ('i' == word[0] && 'f' == word[1])
-                       symbola(p, TERMSYM_INF);
-               else if ('n' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_NAN);
-               else if ('b' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_BAR);
-
-               /* Deprecated forms. */
-               else if ('A' == word[0] && 'm' == word[1])
-                       symbola(p, TERMSYM_AMP);
-               else if ('B' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_BAR);
-               else if ('I' == word[0] && 'f' == word[1])
-                       symbola(p, TERMSYM_INF2);
-               else if ('G' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_GE);
-               else if ('G' == word[0] && 't' == word[1])
-                       symbola(p, TERMSYM_GT);
-               else if ('L' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_LE);
-               else if ('L' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LDQUOTE);
-               else if ('L' == word[0] && 't' == word[1])
-                       symbola(p, TERMSYM_LT);
-               else if ('N' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_NAN);
-               else if ('N' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_NEQ);
-               else if ('P' == word[0] && 'i' == word[1])
-                       symbola(p, TERMSYM_PI);
-               else if ('P' == word[0] && 'm' == word[1])
-                       symbola(p, TERMSYM_PLUSMINUS);
-               else if ('R' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RDQUOTE);
-               else
-                       warnx("escape sequence not supported: %c%c",
-                                       word[0], word[1]);
+               enc = termenc2;
                 break;
                 break;
-
         default:
         default:
-               warnx("escape sequence not supported");
-               break;
+               warnx("unsupported %zu-byte escape sequence", len);
+               return;
         }
         }
+
+       for ( ; enc->enc; enc++) 
+               if (0 == memcmp(enc->enc, word, len)) {
+                       symbola(p, enc->sym);
+                       return;
+               }
+
+       warnx("unsupported %zu-byte escape sequence", len);
  }
  
  
  }
  
  
@@ -856,6 +839,9 @@ stringa(struct termp *p, const char *c, size_t sz)
  {
         size_t           s;
  
  {
         size_t           s;
  
+       if (0 == sz)
+               return;
+
         s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;
         
         assert(c);
         s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;
         
         assert(c);
diff --git a/term.h b/term.h

index df7dd545595993b7d414774271419472430c3ea4..846910ed56fc50f14586d9d4987bae2c91e94b89 100644 (file)
--- a/term.h
+++ b/term.h
@@ -1,4 +1,4 @@
-/* $Id: term.h,v 1.14 2009/03/02 17:14:46 kristaps Exp $ */
+/* $Id: term.h,v 1.15 2009/03/03 21:07:01 kristaps Exp $ */
  /*
   * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
   *
  /*
   * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
   *
@@ -54,6 +54,15 @@ enum tsym {
         TERMSYM_BAR =           25,
         TERMSYM_BULLET =        26,
         TERMSYM_AMP =           27,
         TERMSYM_BAR =           25,
         TERMSYM_BULLET =        26,
         TERMSYM_AMP =           27,
+       TERMSYM_EM =            28,
+       TERMSYM_EN =            29,
+       TERMSYM_COPY =          30,
+       TERMSYM_ASTERISK =      31,
+       TERMSYM_SLASH =         32,
+       TERMSYM_HYPHEN =        33,
+       TERMSYM_SPACE =         34,
+       TERMSYM_PERIOD =        35,
+       TERMSYM_BREAK =         36
  };
  
  
  };
author	Kristaps Dzonsons <kristaps@bsd.lv>
	Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
committer	Kristaps Dzonsons <kristaps@bsd.lv>
	Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
README.addescape	[new file with mode: 0644]	patch \| blob
mdocterm.1		patch \| blob \| history
mdocterm.c		patch \| blob \| history
term.h		patch \| blob \| history