-/* $Id: mandoc.c,v 1.46 2011/04/09 15:35:30 kristaps Exp $ */
+/* $Id: mandoc.c,v 1.53 2011/05/24 21:31:23 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
#include <assert.h>
#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
return(++i);
}
-/*
- * Handle an escaped sequeence. This should be called with any
- * string subsequent a `\'. Pass a pointer to this substring as "end";
- * it will be set to the supremum of the parsed escape sequence. If
- * this returns ESCAPE_ERROR, the string is bogus and should be thrown
- * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the
- * first relevant character of the substring (font, glyph, whatever) of
- * length sz. Both "start" and "sz" may be NULL.
- */
enum mandoc_esc
mandoc_escape(const char **end, const char **start, int *sz)
{
break;
case ('['):
gly = ESCAPE_SPECIAL;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX] to
+ * \[u10FFFF], where the contained value must be a valid
+ * Unicode codepoint. Here, however, only check whether
+ * it's not a zero-width escape.
+ */
+ if ('u' == cp[i] && ']' != cp[i + 1])
+ gly = ESCAPE_UNICODE;
term = ']';
break;
case ('C'):
if (ESCAPE_ERROR == gly)
gly = ESCAPE_IGNORE;
/* FALLTHROUGH */
- case ('*'):
- if (ESCAPE_ERROR == gly)
- gly = ESCAPE_PREDEF;
- /* FALLTHROUGH */
case ('f'):
if (ESCAPE_ERROR == gly)
gly = ESCAPE_FONT;
/* Quoting can only start with a new word. */
start = *cpp;
+ quoted = 0;
if ('"' == *start) {
quoted = 1;
start++;
- } else
- quoted = 0;
+ }
pairs = 0;
white = 0;
/*
* End-of-sentence recognition must include situations where
* some symbols, such as `)', allow prior EOS punctuation to
- * propogate outward.
+ * propagate outward.
*/
found = 0;
*ppos = pos;
return(1);
}
+
+/*
+ * Convert a string to a long that may not be <0.
+ * If the string is invalid, or is less than 0, return -1.
+ */
+int
+mandoc_strntou(const char *p, size_t sz, int base)
+{
+ char buf[32];
+ char *ep;
+ long v;
+
+ if (sz > 31)
+ return(-1);
+
+ memcpy(buf, p, sz);
+ buf[(int)sz] = '\0';
+
+ errno = 0;
+ v = strtol(buf, &ep, base);
+
+ if (buf[0] == '\0' || *ep != '\0')
+ return(-1);
+
+ if ((errno == ERANGE &&
+ (v == LONG_MAX || v == LONG_MIN)) ||
+ (v > INT_MAX || v < 0))
+ return(-1);
+
+ return((int)v);
+}
+