Character-escape addition simplified (see README.addescape, also added).

author Kristaps Dzonsons <kristaps@bsd.lv>

Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)

committer Kristaps Dzonsons <kristaps@bsd.lv>

Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
author Kristaps Dzonsons <kristaps@bsd.lv>
Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
committer Kristaps Dzonsons <kristaps@bsd.lv>
Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
diff --git a/README.addescape b/README.addescape

new file mode 100644 (file)

index 0000000..da52ee0
--- /dev/null
+++ b/README.addescape
@@ -0,0 +1,17 @@
+$Id: README.addescape,v 1.1 2009/03/03 21:07:01 kristaps Exp $
+
+This documents adding a new character escape to mdocterm(1).  Character
+escapes are only syntax-validated in the back-end.
+
+Character escape may be in the form of \*x, \*(xx, \x, \(xx, \[n] and so
+on and so on.  All of these are recognised according to their byte
+length.  
+
+(1) If the escape is NOT recognised in enum tsym in term.h, add it.
+
+(2) Modify/create static struct termenc termencN, where N is the number
+of characters in the encoding.  This is in mdocterm.c.
+
+(3) Possibly modify nescape() to recognise a new termencN.
+
+Everything else is automatic.
diff --git a/mdocterm.1 b/mdocterm.1

index 49250d59a80b57edac045359c80783432bd5a269..e104a51dd734f4554f79255e6477326524625682 100644 (file)
--- a/mdocterm.1
+++ b/mdocterm.1
@@ -1,4 +1,4 @@
-.\" $Id: mdocterm.1,v 1.9 2009/03/02 17:29:16 kristaps Exp $
+.\" $Id: mdocterm.1,v 1.10 2009/03/03 21:07:01 kristaps Exp $
  .\"
  .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
  .\"
@@ -16,7 +16,7 @@
  .\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  .\" PERFORMANCE OF THIS SOFTWARE.
  .\"
-.Dd $Mdocdate: March 2 2009 $
+.Dd $Mdocdate: March 3 2009 $
  .Dt mdocmterm 1
  .Os
  .\" SECTION
@@ -93,11 +93,11 @@ is
  .Ss Character Escapes
  This section documents the character-escapes accepted by
  .Xr mdocterm 1 .
-Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx
-and \\*x forms described in
+Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx,
+\\*[n] and \\*x forms described in
  .Xr mdoc.samples 7
-are deprecated, but still correctly rendered.  For all two-character
-sequences, \\(xx is equivalent to the n-character \\[xx].
+are deprecated, but still rendered.  All one- and two-character
+sequences may be used in the n-character sequence \\[n].
  .Pp
  Note that the
  .Em Output
@@ -106,6 +106,22 @@ column will render differently whether executed with
  or another output filter.
  .\" PARAGRAPH
  .Pp
+Grammatic:
+.Pp
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It Em Output
+.Em Input (Name)
+.It \(em
+\\(em (em-dash)
+.It \(en
+\\(en (en-dash)
+.It \-
+\\- (hyphen)
+.It \\
+\\ (back-slash)
+.El
+.\" PARAGRAPH
+.Pp
  Enclosures:
  .Pp
  .Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
@@ -118,9 +134,9 @@ Enclosures:
  .It \(lq
  \\(lq (left double-quote)
  .It \(rq
-\\(rq (right double-quote)
+\\(rq, \\' (right double-quote)
  .It \(oq
-\\(lq (left single-quote)
+\\(lq, \\` (left single-quote)
  .It \(aq
  \\(aq (right single-quote, apostrophe)
  .El
@@ -161,13 +177,11 @@ Mathematical:
  \\(na (NaN)*
  .It \(+-
  \\(+- (plus-minus)
+.It \(**
+\\(** (asterisk)
  .El
  .\" PARAGRAPH
  .Pp
-*This is a deviation from the standard, as NaN is usually rendered as
-\\*(Na, which is a deprecated form.  We introduce \\(na, which follows
-the more general syntax.
-.Pp
  Diacritics:
  .Pp
  .Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
@@ -189,7 +203,13 @@ Special symbols:
  \\(bu (bullet)
  .It \(ba
  \\(ba (bar)
+.It \(co
+\\(co (copyright)
  .El 
+.Pp
+*This is a deviation from the standard, as NaN is usually rendered as
+\\*(Na, which is a deprecated form.  We introduce \\(na, which follows
+the more general syntax.
  .\" SECTION
  .Sh EXAMPLES
  To display this manual page:
diff --git a/mdocterm.c b/mdocterm.c

index f2f05dfa9edd92bbaf3bf2c98537c1042b4ca5c2..00a1308503a2185b5f7909785afc8686788beb56 100644 (file)
--- a/mdocterm.c
+++ b/mdocterm.c
@@ -1,4 +1,4 @@
-/* $Id: mdocterm.c,v 1.26 2009/03/02 17:29:16 kristaps Exp $ */
+/* $Id: mdocterm.c,v 1.27 2009/03/03 21:07:01 kristaps Exp $ */
  /*
   * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
   *
@@ -32,6 +32,11 @@
  #include "mmain.h"
  #include "term.h"
  
+struct termenc {
+       const char       *enc;
+       int               sym;
+};
+
  static void              body(struct termp *,
                                 struct termpair *,
                                 const struct mdoc_meta *,
@@ -57,6 +62,67 @@ extern       size_t            strlcat(char *, const char *, size_t);
  extern size_t            strlcpy(char *, const char *, size_t);
  #endif
  
+static struct termenc    termenc1[] = {
+       { "\\",           TERMSYM_SLASH },
+       { "\'",           TERMSYM_RSQUOTE },
+       { "`",            TERMSYM_LSQUOTE },
+       { "-",            TERMSYM_HYPHEN },
+       { " ",            TERMSYM_SPACE },
+       { ".",            TERMSYM_PERIOD },
+       { "&",            TERMSYM_BREAK },
+       { "e",            TERMSYM_SLASH },
+       { "q",            TERMSYM_DQUOTE },
+       { NULL,           0 }
+};
+
+static struct termenc    termenc2[] = {
+       { "rB",           TERMSYM_RBRACK },
+       { "lB",           TERMSYM_LBRACK },
+       { "Lq",           TERMSYM_LDQUOTE },
+       { "lq",           TERMSYM_LDQUOTE },
+       { "Rq",           TERMSYM_RDQUOTE },
+       { "rq",           TERMSYM_RDQUOTE },
+       { "oq",           TERMSYM_LSQUOTE },
+       { "aq",           TERMSYM_RSQUOTE },
+
+       { "<-",           TERMSYM_LARROW },
+       { "->",           TERMSYM_RARROW },
+       { "ua",           TERMSYM_UARROW },
+       { "da",           TERMSYM_DARROW },
+
+       { "bu",           TERMSYM_BULLET },
+       { "Ba",           TERMSYM_BAR },
+       { "ba",           TERMSYM_BAR },
+       { "co",           TERMSYM_COPY },
+       { "Am",           TERMSYM_AMP },
+
+       { "Le",           TERMSYM_LE },
+       { "<=",           TERMSYM_LE },
+       { "Ge",           TERMSYM_GE },
+       { "=>",           TERMSYM_GE },
+       { "==",           TERMSYM_EQ },
+       { "Ne",           TERMSYM_NEQ },
+       { "!=",           TERMSYM_NEQ },
+       { "Pm",           TERMSYM_PLUSMINUS },
+       { "+-",           TERMSYM_PLUSMINUS },
+       { "If",           TERMSYM_INF2 },
+       { "if",           TERMSYM_INF },
+       { "Na",           TERMSYM_NAN },
+       { "na",           TERMSYM_NAN },
+       { "**",           TERMSYM_ASTERISK },
+       { "Gt",           TERMSYM_GT },
+       { "Lt",           TERMSYM_LT },
+
+       { "aa",           TERMSYM_ACUTE },
+       { "ga",           TERMSYM_GRAVE },
+
+       { "en",           TERMSYM_EN },
+       { "em",           TERMSYM_EM },
+
+       { "Pi",           TERMSYM_PI },
+       { NULL,           0 }
+};
+
  static struct termsym    termsym_ansi[] = {
         { "]", 1 },             /* TERMSYM_RBRACK */
         { "[", 1 },             /* TERMSYM_LBRACK */
@@ -85,8 +151,16 @@ static      struct termsym    termsym_ansi[] = {
         { "NaN", 3 },           /* TERMSYM_NAN */
         { "|", 1 },             /* TERMSYM_BAR */
         { "o", 1 },             /* TERMSYM_BULLET */
-       { "&", 1 },             /* TERMSYM_AND */
-       { "|", 1 },             /* TERMSYM_OR */
+       { "&", 1 },             /* TERMSYM_AMP */
+       { "--", 2 },            /* TERMSYM_EM */
+       { "-", 1 },             /* TERMSYM_EN */
+       { "(C)", 3 },           /* TERMSYM_COPY */
+       { "*", 1 },             /* TERMSYM_ASTERISK */
+       { "\\", 1 },            /* TERMSYM_SLASH */
+       { "-", 1 },             /* TERMSYM_HYPHEN */
+       { " ", 1 },             /* TERMSYM_SPACE */
+       { ".", 1 },             /* TERMSYM_PERIOD */
+       { "", 0 },              /* TERMSYM_BREAK */
  };
  
  static const char        ansi_clear[]  = { 27, '[', '0', 'm' };
@@ -614,118 +688,27 @@ header(struct termp *p, const struct mdoc_meta *meta)
  static void
  nescape(struct termp *p, const char *word, size_t len)
  {
+       struct termenc  *enc;
  
         switch (len) {
         case (1):
-               switch (word[0]) {
-               case ('\\'):
-                       /* FALLTHROUGH */
-               case ('\''):
-                       /* FALLTHROUGH */
-               case ('`'):
-                       /* FALLTHROUGH */
-               case ('-'):
-                       /* FALLTHROUGH */
-               case (' '):
-                       /* FALLTHROUGH */
-               case ('.'):
-                       chara(p, word[0]); /* FIXME */
-                       break;
-               case ('&'):
-                       break;
-               case ('e'):
-                       chara(p, '\\'); /* FIXME */
-                       break;
-               case ('q'):
-                       symbola(p, TERMSYM_DQUOTE);
-                       break;
-               default:
-                       warnx("escape sequence not supported: %c",
-                                       word[0]);
-                       break;
-               }
+               enc = termenc1;
                 break;
-
         case (2):
-               if ('r' == word[0] && 'B' == word[1])
-                       symbola(p, TERMSYM_RBRACK);
-               else if ('l' == word[0] && 'B' == word[1])
-                       symbola(p, TERMSYM_LBRACK);
-               else if ('l' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LDQUOTE);
-               else if ('r' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RDQUOTE);
-               else if ('o' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LSQUOTE);
-               else if ('a' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RSQUOTE);
-               else if ('<' == word[0] && '-' == word[1])
-                       symbola(p, TERMSYM_LARROW);
-               else if ('-' == word[0] && '>' == word[1])
-                       symbola(p, TERMSYM_RARROW);
-               else if ('b' == word[0] && 'u' == word[1])
-                       symbola(p, TERMSYM_BULLET);
-               else if ('<' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_LE);
-               else if ('>' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_GE);
-               else if ('=' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_EQ);
-               else if ('+' == word[0] && '-' == word[1])
-                       symbola(p, TERMSYM_PLUSMINUS);
-               else if ('u' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_UARROW);
-               else if ('d' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_DARROW);
-               else if ('a' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_ACUTE);
-               else if ('g' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_GRAVE);
-               else if ('!' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_NEQ);
-               else if ('i' == word[0] && 'f' == word[1])
-                       symbola(p, TERMSYM_INF);
-               else if ('n' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_NAN);
-               else if ('b' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_BAR);
-
-               /* Deprecated forms. */
-               else if ('A' == word[0] && 'm' == word[1])
-                       symbola(p, TERMSYM_AMP);
-               else if ('B' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_BAR);
-               else if ('I' == word[0] && 'f' == word[1])
-                       symbola(p, TERMSYM_INF2);
-               else if ('G' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_GE);
-               else if ('G' == word[0] && 't' == word[1])
-                       symbola(p, TERMSYM_GT);
-               else if ('L' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_LE);
-               else if ('L' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LDQUOTE);
-               else if ('L' == word[0] && 't' == word[1])
-                       symbola(p, TERMSYM_LT);
-               else if ('N' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_NAN);
-               else if ('N' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_NEQ);
-               else if ('P' == word[0] && 'i' == word[1])
-                       symbola(p, TERMSYM_PI);
-               else if ('P' == word[0] && 'm' == word[1])
-                       symbola(p, TERMSYM_PLUSMINUS);
-               else if ('R' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RDQUOTE);
-               else
-                       warnx("escape sequence not supported: %c%c",
-                                       word[0], word[1]);
+               enc = termenc2;
                 break;
-
         default:
-               warnx("escape sequence not supported");
-               break;
+               warnx("unsupported %zu-byte escape sequence", len);
+               return;
         }
+
+       for ( ; enc->enc; enc++) 
+               if (0 == memcmp(enc->enc, word, len)) {
+                       symbola(p, enc->sym);
+                       return;
+               }
+
+       warnx("unsupported %zu-byte escape sequence", len);
  }
  
  
@@ -856,6 +839,9 @@ stringa(struct termp *p, const char *c, size_t sz)
  {
         size_t           s;
  
+       if (0 == sz)
+               return;
+
         s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;
         
         assert(c);
diff --git a/term.h b/term.h

index df7dd545595993b7d414774271419472430c3ea4..846910ed56fc50f14586d9d4987bae2c91e94b89 100644 (file)
--- a/term.h
+++ b/term.h
@@ -1,4 +1,4 @@
-/* $Id: term.h,v 1.14 2009/03/02 17:14:46 kristaps Exp $ */
+/* $Id: term.h,v 1.15 2009/03/03 21:07:01 kristaps Exp $ */
  /*
   * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
   *
@@ -54,6 +54,15 @@ enum tsym {
         TERMSYM_BAR =           25,
         TERMSYM_BULLET =        26,
         TERMSYM_AMP =           27,
+       TERMSYM_EM =            28,
+       TERMSYM_EN =            29,
+       TERMSYM_COPY =          30,
+       TERMSYM_ASTERISK =      31,
+       TERMSYM_SLASH =         32,
+       TERMSYM_HYPHEN =        33,
+       TERMSYM_SPACE =         34,
+       TERMSYM_PERIOD =        35,
+       TERMSYM_BREAK =         36
  };
author	Kristaps Dzonsons <kristaps@bsd.lv>
	Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
committer	Kristaps Dzonsons <kristaps@bsd.lv>
	Tue, 3 Mar 2009 21:07:01 +0000 (21:07 +0000)
README.addescape	[new file with mode: 0644]	patch \| blob
mdocterm.1		patch \| blob \| history
mdocterm.c		patch \| blob \| history
term.h		patch \| blob \| history