-/* $Id: mandoc.c,v 1.47 2011/04/17 09:08:19 kristaps Exp $ */
+/* $Id: mandoc.c,v 1.53 2011/05/24 21:31:23 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
#include <assert.h>
#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
return(++i);
}
-/*
- * Handle an escaped sequeence. This should be called with any
- * string subsequent a `\'. Pass a pointer to this substring as "end";
- * it will be set to the supremum of the parsed escape sequence. If
- * this returns ESCAPE_ERROR, the string is bogus and should be thrown
- * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the
- * first relevant character of the substring (font, glyph, whatever) of
- * length sz. Both "start" and "sz" may be NULL.
- */
enum mandoc_esc
mandoc_escape(const char **end, const char **start, int *sz)
{
break;
case ('['):
gly = ESCAPE_SPECIAL;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX] to
+ * \[u10FFFF], where the contained value must be a valid
+ * Unicode codepoint. Here, however, only check whether
+ * it's not a zero-width escape.
+ */
+ if ('u' == cp[i] && ']' != cp[i + 1])
+ gly = ESCAPE_UNICODE;
term = ']';
break;
case ('C'):
if (ESCAPE_ERROR == gly)
gly = ESCAPE_IGNORE;
/* FALLTHROUGH */
- case ('*'):
- if (ESCAPE_ERROR == gly)
- gly = ESCAPE_PREDEF;
- /* FALLTHROUGH */
case ('f'):
if (ESCAPE_ERROR == gly)
gly = ESCAPE_FONT;
* or to the null byte terminating the argument line.
*/
char *
-mandoc_getarg(struct mparse *parse,
- char **cpp, int ln, int dowarn, int *pos)
+mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
{
char *start, *cp;
int quoted, pairs, white;
}
/* Quoted argument without a closing quote. */
- if (dowarn && 1 == quoted)
+ if (1 == quoted)
mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
/* Null-terminate this argument and move to the next one. */
*pos += (int)(cp - start) + (quoted ? 1 : 0);
*cpp = cp;
- if (dowarn && '\0' == *cp && (white || ' ' == cp[-1]))
+ if ('\0' == *cp && (white || ' ' == cp[-1]))
mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
return(start);
/*
* End-of-sentence recognition must include situations where
* some symbols, such as `)', allow prior EOS punctuation to
- * propogate outward.
+ * propagate outward.
*/
found = 0;
*ppos = pos;
return(1);
}
+
+/*
+ * Convert a string to a long that may not be <0.
+ * If the string is invalid, or is less than 0, return -1.
+ */
+int
+mandoc_strntou(const char *p, size_t sz, int base)
+{
+ char buf[32];
+ char *ep;
+ long v;
+
+ if (sz > 31)
+ return(-1);
+
+ memcpy(buf, p, sz);
+ buf[(int)sz] = '\0';
+
+ errno = 0;
+ v = strtol(buf, &ep, base);
+
+ if (buf[0] == '\0' || *ep != '\0')
+ return(-1);
+
+ if ((errno == ERANGE &&
+ (v == LONG_MAX || v == LONG_MIN)) ||
+ (v > INT_MAX || v < 0))
+ return(-1);
+
+ return((int)v);
+}
+