aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mandoc.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2012-05-31 22:38:16 +0000
committerIngo Schwarze <schwarze@openbsd.org>2012-05-31 22:38:16 +0000
commit42700b3481f9153f53b1fbee9b83a8a8009bebaf (patch)
treefc9accc94e50cb0ecb22ca66f4d6dd40c90c5ab0 /mandoc.c
parent35112543624e30cafdc3d59c920fadd21cfa099f (diff)
downloadmandoc-42700b3481f9153f53b1fbee9b83a8a8009bebaf.tar.gz
mandoc-42700b3481f9153f53b1fbee9b83a8a8009bebaf.tar.zst
mandoc-42700b3481f9153f53b1fbee9b83a8a8009bebaf.zip
While i already got my fingers dirty on mandoc_escape(),
profit of the occasion to pull out some spaghetti, that is, three confusing variables and fourteen pointless assignments among them; instead, always operate on the official pointers **start, **end, and *sz, each of which conveys an obvious meaning. No functional change intended, and the new tests confirm that everything still (err...) "works", as far as that word can be applied to the kind of roff(7) mock-up code i'm polishing here. "just commit" kristaps@
Diffstat (limited to 'mandoc.c')
-rw-r--r--mandoc.c133
1 files changed, 65 insertions, 68 deletions
diff --git a/mandoc.c b/mandoc.c
index 20a6abf1..3ecf56bb 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -1,4 +1,4 @@
-/* $Id: mandoc.c,v 1.64 2012/05/31 22:34:06 schwarze Exp $ */
+/* $Id: mandoc.c,v 1.65 2012/05/31 22:38:16 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
@@ -42,20 +42,33 @@ static char *time2a(time_t);
enum mandoc_esc
mandoc_escape(const char **end, const char **start, int *sz)
{
- char c, term;
- int i, rlim;
- const char *cp, *rstart;
+ const char *local_start;
+ int local_sz;
+ char term;
enum mandoc_esc gly;
- cp = *end;
- rstart = cp;
- if (start)
- *start = rstart;
- i = rlim = 0;
+ /*
+ * When the caller doesn't provide return storage,
+ * use local storage.
+ */
+
+ if (NULL == start)
+ start = &local_start;
+ if (NULL == sz)
+ sz = &local_sz;
+
+ /*
+ * Beyond the backslash, at least one input character
+ * is part of the escape sequence. With one exception
+ * (see below), that character won't be returned.
+ */
+
gly = ESCAPE_ERROR;
+ *start = ++*end;
+ *sz = 0;
term = '\0';
- switch ((c = cp[i++])) {
+ switch ((*start)[-1]) {
/*
* First the glyphs. There are several different forms of
* these, but each eventually returns a substring of the glyph
@@ -63,7 +76,7 @@ mandoc_escape(const char **end, const char **start, int *sz)
*/
case ('('):
gly = ESCAPE_SPECIAL;
- rlim = 2;
+ *sz = 2;
break;
case ('['):
gly = ESCAPE_SPECIAL;
@@ -73,14 +86,15 @@ mandoc_escape(const char **end, const char **start, int *sz)
* Unicode codepoint. Here, however, only check whether
* it's not a zero-width escape.
*/
- if ('u' == cp[i] && ']' != cp[i + 1])
+ if ('u' == (*start)[0] && ']' != (*start)[1])
gly = ESCAPE_UNICODE;
term = ']';
break;
case ('C'):
- if ('\'' != cp[i])
+ if ('\'' != **start)
return(ESCAPE_ERROR);
gly = ESCAPE_SPECIAL;
+ *start = ++*end;
term = '\'';
break;
@@ -91,7 +105,6 @@ mandoc_escape(const char **end, const char **start, int *sz)
* let us just skip the next character.
*/
case ('z'):
- (*end)++;
return(ESCAPE_SKIPCHAR);
/*
@@ -118,21 +131,17 @@ mandoc_escape(const char **end, const char **start, int *sz)
case ('f'):
if (ESCAPE_ERROR == gly)
gly = ESCAPE_FONT;
-
- rstart= &cp[i];
- if (start)
- *start = rstart;
-
- switch (cp[i++]) {
+ switch (**start) {
case ('('):
- rlim = 2;
+ *start = ++*end;
+ *sz = 2;
break;
case ('['):
+ *start = ++*end;
term = ']';
break;
default:
- rlim = 1;
- i--;
+ *sz = 1;
break;
}
break;
@@ -154,9 +163,10 @@ mandoc_escape(const char **end, const char **start, int *sz)
case ('X'):
/* FALLTHROUGH */
case ('Z'):
- if ('\'' != cp[i++])
+ if ('\'' != **start)
return(ESCAPE_ERROR);
gly = ESCAPE_IGNORE;
+ *start = ++*end;
term = '\'';
break;
@@ -182,10 +192,11 @@ mandoc_escape(const char **end, const char **start, int *sz)
case ('w'):
/* FALLTHROUGH */
case ('x'):
+ if ('\'' != **start)
+ return(ESCAPE_ERROR);
if (ESCAPE_ERROR == gly)
gly = ESCAPE_IGNORE;
- if ('\'' != cp[i++])
- return(ESCAPE_ERROR);
+ *start = ++*end;
term = '\'';
break;
@@ -194,17 +205,17 @@ mandoc_escape(const char **end, const char **start, int *sz)
* XXX Do any other escapes need similar handling?
*/
case ('N'):
- if ('\0' == cp[i])
+ if ('\0' == **start)
return(ESCAPE_ERROR);
- *end = &cp[++i];
- if (isdigit((unsigned char)cp[i-1]))
+ (*end)++;
+ if (isdigit((unsigned char)**start)) {
+ *sz = 1;
return(ESCAPE_IGNORE);
+ }
+ (*start)++;
while (isdigit((unsigned char)**end))
(*end)++;
- if (start)
- *start = &cp[i];
- if (sz)
- *sz = *end - &cp[i];
+ *sz = *end - *start;
if ('\0' != **end)
(*end)++;
return(ESCAPE_NUMBERED);
@@ -215,54 +226,43 @@ mandoc_escape(const char **end, const char **start, int *sz)
case ('s'):
gly = ESCAPE_IGNORE;
- rstart = &cp[i];
- if (start)
- *start = rstart;
-
/* See +/- counts as a sign. */
- c = cp[i];
- if ('+' == c || '-' == c || ASCII_HYPH == c)
- ++i;
+ if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
+ (*end)++;
- switch (cp[i++]) {
+ switch (**end) {
case ('('):
- rlim = 2;
+ *start = ++*end;
+ *sz = 2;
break;
case ('['):
+ *start = ++*end;
term = ']';
break;
case ('\''):
+ *start = ++*end;
term = '\'';
break;
default:
- rlim = 1;
- i--;
+ *sz = 1;
break;
}
- /* See +/- counts as a sign. */
- c = cp[i];
- if ('+' == c || '-' == c || ASCII_HYPH == c)
- ++i;
-
break;
/*
* Anything else is assumed to be a glyph.
+ * In this case, pass back the character after the backslash.
*/
default:
gly = ESCAPE_SPECIAL;
- rlim = 1;
- i--;
+ *start = --*end;
+ *sz = 1;
break;
}
assert(ESCAPE_ERROR != gly);
- *end = rstart = &cp[i];
- if (start)
- *start = rstart;
-
/*
* Read up to the terminating character,
* paying attention to nested escapes.
@@ -284,15 +284,13 @@ mandoc_escape(const char **end, const char **start, int *sz)
break;
}
}
- rlim = (*end)++ - rstart;
+ *sz = (*end)++ - *start;
} else {
- assert(rlim > 0);
- if ((size_t)rlim > strlen(rstart))
+ assert(*sz > 0);
+ if ((size_t)*sz > strlen(*start))
return(ESCAPE_ERROR);
- *end += rlim;
+ *end += *sz;
}
- if (sz)
- *sz = rlim;
/* Run post-processors. */
@@ -302,12 +300,13 @@ mandoc_escape(const char **end, const char **start, int *sz)
* Pretend that the constant-width font modes are the
* same as the regular font modes.
*/
- if (2 == rlim && 'C' == *rstart)
- rstart++;
- else if (1 != rlim)
+ if (2 == *sz && 'C' == **start) {
+ (*start)++;
+ (*sz)--;
+ } else if (1 != *sz)
break;
- switch (*rstart) {
+ switch (**start) {
case ('3'):
/* FALLTHROUGH */
case ('B'):
@@ -329,9 +328,7 @@ mandoc_escape(const char **end, const char **start, int *sz)
}
break;
case (ESCAPE_SPECIAL):
- if (1 != rlim)
- break;
- if ('c' == *rstart)
+ if (1 == *sz && 'c' == **start)
gly = ESCAPE_NOSPACE;
break;
default: