Cleaned up ctype functions (netbsd).

[mandoc.git] / mdocterm.c
diff --git a/mdocterm.c b/mdocterm.c

index 0e36fb308d6fff12cdc44f656ba10bec31cc719d..278d224e840a53f1f8d441977a583835d1b2692a 100644 (file)
--- a/mdocterm.c
+++ b/mdocterm.c
@@ -1,4 +1,4 @@
-/* $Id: mdocterm.c,v 1.25 2009/03/02 17:14:46 kristaps Exp $ */
+/* $Id: mdocterm.c,v 1.33 2009/03/05 13:12:12 kristaps Exp $ */
  /*
   * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
   *
@@ -32,6 +32,11 @@
  #include "mmain.h"
  #include "term.h"
  
+struct termenc {
+       const char       *enc;
+       int               sym;
+};
+
  static void              body(struct termp *,
                                 struct termpair *,
                                 const struct mdoc_meta *,
@@ -44,19 +49,82 @@ static      void              footer(struct termp *,
  static void              pword(struct termp *, const char *, size_t);
  static void              pescape(struct termp *, const char *, 
                                 size_t *, size_t);
-static void              style(struct termp *, enum tstyle);
  static void              nescape(struct termp *,
                                 const char *, size_t);
  static void              chara(struct termp *, char);
  static void              stringa(struct termp *, 
                                 const char *, size_t);
  static void              symbola(struct termp *, enum tsym);
+static void              stylea(struct termp *, enum tstyle);
  
  #ifdef __linux__
  extern size_t            strlcat(char *, const char *, size_t);
  extern size_t            strlcpy(char *, const char *, size_t);
  #endif
  
+static struct termenc    termenc1[] = {
+       { "\\",           TERMSYM_SLASH },
+       { "\'",           TERMSYM_RSQUOTE },
+       { "`",            TERMSYM_LSQUOTE },
+       { "-",            TERMSYM_HYPHEN },
+       { " ",            TERMSYM_SPACE },
+       { ".",            TERMSYM_PERIOD },
+       { "&",            TERMSYM_BREAK },
+       { "e",            TERMSYM_SLASH },
+       { "q",            TERMSYM_DQUOTE },
+       { NULL,           0 }
+};
+
+static struct termenc    termenc2[] = {
+       { "rB",           TERMSYM_RBRACK },
+       { "lB",           TERMSYM_LBRACK },
+       { "ra",           TERMSYM_RANGLE },
+       { "la",           TERMSYM_LANGLE },
+       { "Lq",           TERMSYM_LDQUOTE },
+       { "lq",           TERMSYM_LDQUOTE },
+       { "Rq",           TERMSYM_RDQUOTE },
+       { "rq",           TERMSYM_RDQUOTE },
+       { "oq",           TERMSYM_LSQUOTE },
+       { "aq",           TERMSYM_RSQUOTE },
+
+       { "<-",           TERMSYM_LARROW },
+       { "->",           TERMSYM_RARROW },
+       { "ua",           TERMSYM_UARROW },
+       { "da",           TERMSYM_DARROW },
+
+       { "bu",           TERMSYM_BULLET },
+       { "Ba",           TERMSYM_BAR },
+       { "ba",           TERMSYM_BAR },
+       { "co",           TERMSYM_COPY },
+       { "Am",           TERMSYM_AMP },
+
+       { "Le",           TERMSYM_LE },
+       { "<=",           TERMSYM_LE },
+       { "Ge",           TERMSYM_GE },
+       { ">=",           TERMSYM_GE },
+       { "==",           TERMSYM_EQ },
+       { "Ne",           TERMSYM_NEQ },
+       { "!=",           TERMSYM_NEQ },
+       { "Pm",           TERMSYM_PLUSMINUS },
+       { "+-",           TERMSYM_PLUSMINUS },
+       { "If",           TERMSYM_INF2 },
+       { "if",           TERMSYM_INF },
+       { "Na",           TERMSYM_NAN },
+       { "na",           TERMSYM_NAN },
+       { "**",           TERMSYM_ASTERISK },
+       { "Gt",           TERMSYM_GT },
+       { "Lt",           TERMSYM_LT },
+
+       { "aa",           TERMSYM_ACUTE },
+       { "ga",           TERMSYM_GRAVE },
+
+       { "en",           TERMSYM_EN },
+       { "em",           TERMSYM_EM },
+
+       { "Pi",           TERMSYM_PI },
+       { NULL,           0 }
+};
+
  static struct termsym    termsym_ansi[] = {
         { "]", 1 },             /* TERMSYM_RBRACK */
         { "[", 1 },             /* TERMSYM_LBRACK */
@@ -85,8 +153,18 @@ static      struct termsym    termsym_ansi[] = {
         { "NaN", 3 },           /* TERMSYM_NAN */
         { "|", 1 },             /* TERMSYM_BAR */
         { "o", 1 },             /* TERMSYM_BULLET */
-       { "&", 1 },             /* TERMSYM_AND */
-       { "|", 1 },             /* TERMSYM_OR */
+       { "&", 1 },             /* TERMSYM_AMP */
+       { "--", 2 },            /* TERMSYM_EM */
+       { "-", 1 },             /* TERMSYM_EN */
+       { "(C)", 3 },           /* TERMSYM_COPY */
+       { "*", 1 },             /* TERMSYM_ASTERISK */
+       { "\\", 1 },            /* TERMSYM_SLASH */
+       { "-", 1 },             /* TERMSYM_HYPHEN */
+       { " ", 1 },             /* TERMSYM_SPACE */
+       { ".", 1 },             /* TERMSYM_PERIOD */
+       { "", 0 },              /* TERMSYM_BREAK */
+       { "<", 1 },             /* TERMSYM_LANGLE */
+       { ">", 1 },             /* TERMSYM_RANGLE */
  };
  
  static const char        ansi_clear[]  = { 27, '[', '0', 'm' };
@@ -115,8 +193,7 @@ main(int argc, char *argv[])
         if (NULL == (mdoc = mmain_mdoc(p)))
                 mmain_exit(p, 1);
  
-       termp.maxrmargin = 78; /* XXX */
-       termp.rmargin = termp.maxrmargin;
+       termp.maxrmargin = termp.rmargin = 78; /* XXX */
         termp.maxcols = 1024;
         termp.offset = termp.col = 0;
         termp.flags = TERMP_NOSPACE;
@@ -153,9 +230,6 @@ main(int argc, char *argv[])
   *    offset value.  This is useful when doing columnar lists where the
   *    prior column has right-padded.
   *
- *  - TERMP_LITERAL: don't break apart words.  Note that a long literal
- *    word will violate the right margin.
- *
   *  - TERMP_NOBREAK: this is the most important and is used when making
   *    columns.  In short: don't print a newline and instead pad to the
   *    right margin.  Used in conjunction with TERMP_NOLPAD.
@@ -213,7 +287,7 @@ flushln(struct termp *p)
  
                 /* LINTED */
                 for (j = i, vsz = 0; j < p->col; j++) {
-                       if (isspace((int)p->buf[j]))
+                       if (isspace((u_char)p->buf[j]))
                                 break;
                         else if (27 == p->buf[j]) {
                                 assert(j + 4 <= p->col);
@@ -246,7 +320,7 @@ flushln(struct termp *p)
                                 putchar('\n');
                                 for (j = 0; j < p->rmargin; j++)
                                         putchar(' ');
-                               vis = p->rmargin;
+                               vis = p->rmargin - p->offset;
                         } else if (vis + vsz > bp) 
                                 warnx("word breaks right margin");
  
@@ -260,7 +334,7 @@ flushln(struct termp *p)
                  */
  
                 for ( ; i < p->col; i++) {
-                       if (isspace((int)p->buf[i]))
+                       if (isspace((u_char)p->buf[i]))
                                 break;
                         putchar(p->buf[i]);
                 }
@@ -277,9 +351,11 @@ flushln(struct termp *p)
          */
  
         if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
-               putchar('\n');
-               for (i = 0; i < p->rmargin; i++)
-                       putchar(' ');
+               if ( ! (TERMP_NONOBREAK & p->flags)) {
+                       putchar('\n');
+                       for (i = 0; i < p->rmargin; i++)
+                               putchar(' ');
+               }
                 p->col = 0;
                 return;
         }
@@ -290,8 +366,9 @@ flushln(struct termp *p)
          */
  
         if (p->flags & TERMP_NOBREAK) {
-               for ( ; vis < maxvis; vis++)
-                       putchar(' ');
+               if ( ! (TERMP_NONOBREAK & p->flags))
+                       for ( ; vis < maxvis; vis++)
+                               putchar(' ');
         } else
                 putchar('\n');
  
@@ -360,13 +437,13 @@ word(struct termp *p, const char *word)
  
         /* LINTED */
         for (j = i = 0; i < len; i++) {
-               if ( ! isspace((int)word[i])) {
+               if ( ! isspace((u_char)word[i])) {
                         j++;
                         continue;
                 } 
                 
                 /* Escaped spaces don't delimit... */
-               if (i > 0 && isspace((int)word[i]) && 
+               if (i > 0 && isspace((u_char)word[i]) && 
                                 '\\' == word[i - 1]) {
                         j++;
                         continue;
@@ -572,7 +649,7 @@ header(struct termp *p, const struct mdoc_meta *meta)
                         meta->title, pp ? pp : "");
  
         for (bufp = title; *bufp; bufp++)
-               *bufp = toupper(*bufp);
+               *bufp = toupper((u_char)*bufp);
         
         p->offset = 0;
         p->rmargin = (p->maxrmargin - strlen(buf)) / 2;
@@ -614,149 +691,27 @@ header(struct termp *p, const struct mdoc_meta *meta)
  static void
  nescape(struct termp *p, const char *word, size_t len)
  {
+       struct termenc  *enc;
  
         switch (len) {
         case (1):
-               switch (word[0]) {
-               case ('\\'):
-                       /* FALLTHROUGH */
-               case ('\''):
-                       /* FALLTHROUGH */
-               case ('`'):
-                       /* FALLTHROUGH */
-               case ('-'):
-                       /* FALLTHROUGH */
-               case (' '):
-                       /* FALLTHROUGH */
-               case ('.'):
-                       chara(p, word[0]); /* FIXME */
-                       break;
-               case ('&'):
-                       break;
-               case ('e'):
-                       chara(p, '\\'); /* FIXME */
-                       break;
-               case ('q'):
-                       symbola(p, TERMSYM_DQUOTE);
-                       break;
-               default:
-                       warnx("escape sequence not supported: %c",
-                                       word[0]);
-                       break;
-               }
+               enc = termenc1;
                 break;
-
         case (2):
-               if ('r' == word[0] && 'B' == word[1])
-                       symbola(p, TERMSYM_RBRACK);
-               else if ('l' == word[0] && 'B' == word[1])
-                       symbola(p, TERMSYM_LBRACK);
-               else if ('l' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LDQUOTE);
-               else if ('r' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RDQUOTE);
-               else if ('o' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LSQUOTE);
-               else if ('a' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RSQUOTE);
-               else if ('<' == word[0] && '-' == word[1])
-                       symbola(p, TERMSYM_LARROW);
-               else if ('-' == word[0] && '>' == word[1])
-                       symbola(p, TERMSYM_RARROW);
-               else if ('b' == word[0] && 'u' == word[1])
-                       symbola(p, TERMSYM_BULLET);
-               else if ('<' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_LE);
-               else if ('>' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_GE);
-               else if ('=' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_EQ);
-               else if ('+' == word[0] && '-' == word[1])
-                       symbola(p, TERMSYM_PLUSMINUS);
-               else if ('u' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_UARROW);
-               else if ('d' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_DARROW);
-               else if ('a' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_ACUTE);
-               else if ('g' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_GRAVE);
-               else if ('!' == word[0] && '=' == word[1])
-                       symbola(p, TERMSYM_NEQ);
-               else if ('i' == word[0] && 'f' == word[1])
-                       symbola(p, TERMSYM_INF);
-               else if ('n' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_NAN);
-               else if ('b' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_BAR);
-
-               /* Deprecated forms. */
-               else if ('A' == word[0] && 'm' == word[1])
-                       symbola(p, TERMSYM_AMP);
-               else if ('B' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_BAR);
-               else if ('I' == word[0] && 'f' == word[1])
-                       symbola(p, TERMSYM_INF2);
-               else if ('G' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_GE);
-               else if ('G' == word[0] && 't' == word[1])
-                       symbola(p, TERMSYM_GT);
-               else if ('L' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_LE);
-               else if ('L' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_LDQUOTE);
-               else if ('L' == word[0] && 't' == word[1])
-                       symbola(p, TERMSYM_LT);
-               else if ('N' == word[0] && 'a' == word[1])
-                       symbola(p, TERMSYM_NAN);
-               else if ('N' == word[0] && 'e' == word[1])
-                       symbola(p, TERMSYM_NEQ);
-               else if ('P' == word[0] && 'i' == word[1])
-                       symbola(p, TERMSYM_PI);
-               else if ('P' == word[0] && 'm' == word[1])
-                       symbola(p, TERMSYM_PLUSMINUS);
-               else if ('R' == word[0] && 'q' == word[1])
-                       symbola(p, TERMSYM_RDQUOTE);
-               else
-                       warnx("escape sequence not supported: %c%c",
-                                       word[0], word[1]);
+               enc = termenc2;
                 break;
-
         default:
-               warnx("escape sequence not supported");
-               break;
+               warnx("unsupported %zu-byte escape sequence", len);
+               return;
         }
-}
  
+       for ( ; enc->enc; enc++) 
+               if (0 == memcmp(enc->enc, word, len)) {
+                       symbola(p, enc->sym);
+                       return;
+               }
  
-/*
- * Apply a style to the output buffer.  This is looked up by means of
- * the styletab.
- */
-static void
-style(struct termp *p, enum tstyle esc)
-{
-
-       if (p->col + 4 >= p->maxcols)
-               errx(1, "line overrun");
-
-       p->buf[(p->col)++] = 27;
-       p->buf[(p->col)++] = '[';
-       switch (esc) {
-       case (TERMSTYLE_CLEAR):
-               p->buf[(p->col)++] = '0';
-               break;
-       case (TERMSTYLE_BOLD):
-               p->buf[(p->col)++] = '1';
-               break;
-       case (TERMSTYLE_UNDER):
-               p->buf[(p->col)++] = '4';
-               break;
-       default:
-               abort();
-               /* NOTREACHED */
-       }
-       p->buf[(p->col)++] = 'm';
+       warnx("unsupported %zu-byte escape sequence", len);
  }
  
  
@@ -835,9 +790,9 @@ pword(struct termp *p, const char *word, size_t len)
          */
  
         if (p->flags & TERMP_BOLD)
-               style(p, TERMSTYLE_BOLD);
+               stylea(p, TERMSTYLE_BOLD);
         if (p->flags & TERMP_UNDERLINE)
-               style(p, TERMSTYLE_UNDER);
+               stylea(p, TERMSTYLE_UNDER);
  
         for (i = 0; i < len; i++) {
                 if ('\\' == word[i]) {
@@ -849,7 +804,7 @@ pword(struct termp *p, const char *word, size_t len)
  
         if (p->flags & TERMP_BOLD ||
                         p->flags & TERMP_UNDERLINE)
-               style(p, TERMSTYLE_CLEAR);
+               stylea(p, TERMSTYLE_CLEAR);
  }
  
  
@@ -865,6 +820,18 @@ symbola(struct termp *p, enum tsym sym)
  }
  
  
+/*
+ * Add a style to the output line buffer.
+ */
+static void
+stylea(struct termp *p, enum tstyle style)
+{
+
+       assert(p->styletab[style].sym);
+       stringa(p, p->styletab[style].sym, p->styletab[style].sz);
+}
+
+
  /*
   * Like chara() but for arbitrary-length buffers.  Resize the buffer by
   * a factor of two (if the buffer is less than that) or the buffer's
@@ -875,6 +842,9 @@ stringa(struct termp *p, const char *c, size_t sz)
  {
         size_t           s;
  
+       if (0 == sz)
+               return;
+
         s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;
         
         assert(c);