X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/0880ade932d57aac1fa88eaaf4668570bff7cf1d..b93b7d11befe80f204689861fdaa729f36298ebb:/mandoc.c?ds=sidebyside diff --git a/mandoc.c b/mandoc.c index ca73afd7..465965a4 100644 --- a/mandoc.c +++ b/mandoc.c @@ -1,4 +1,4 @@ -/* $Id: mandoc.c,v 1.47 2011/04/17 09:08:19 kristaps Exp $ */ +/* $Id: mandoc.c,v 1.53 2011/05/24 21:31:23 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -23,6 +23,8 @@ #include #include +#include +#include #include #include #include @@ -95,15 +97,6 @@ numescape(const char *start) return(++i); } -/* - * Handle an escaped sequeence. This should be called with any - * string subsequent a `\'. Pass a pointer to this substring as "end"; - * it will be set to the supremum of the parsed escape sequence. If - * this returns ESCAPE_ERROR, the string is bogus and should be thrown - * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the - * first relevant character of the substring (font, glyph, whatever) of - * length sz. Both "start" and "sz" may be NULL. - */ enum mandoc_esc mandoc_escape(const char **end, const char **start, int *sz) { @@ -132,6 +125,14 @@ mandoc_escape(const char **end, const char **start, int *sz) break; case ('['): gly = ESCAPE_SPECIAL; + /* + * Unicode escapes are defined in groff as \[uXXXX] to + * \[u10FFFF], where the contained value must be a valid + * Unicode codepoint. Here, however, only check whether + * it's not a zero-width escape. + */ + if ('u' == cp[i] && ']' != cp[i + 1]) + gly = ESCAPE_UNICODE; term = ']'; break; case ('C'): @@ -163,10 +164,6 @@ mandoc_escape(const char **end, const char **start, int *sz) if (ESCAPE_ERROR == gly) gly = ESCAPE_IGNORE; /* FALLTHROUGH */ - case ('*'): - if (ESCAPE_ERROR == gly) - gly = ESCAPE_PREDEF; - /* FALLTHROUGH */ case ('f'): if (ESCAPE_ERROR == gly) gly = ESCAPE_FONT; @@ -460,8 +457,7 @@ mandoc_strdup(const char *ptr) * or to the null byte terminating the argument line. */ char * -mandoc_getarg(struct mparse *parse, - char **cpp, int ln, int dowarn, int *pos) +mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) { char *start, *cp; int quoted, pairs, white; @@ -508,7 +504,7 @@ mandoc_getarg(struct mparse *parse, } /* Quoted argument without a closing quote. */ - if (dowarn && 1 == quoted) + if (1 == quoted) mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); /* Null-terminate this argument and move to the next one. */ @@ -522,7 +518,7 @@ mandoc_getarg(struct mparse *parse, *pos += (int)(cp - start) + (quoted ? 1 : 0); *cpp = cp; - if (dowarn && '\0' == *cp && (white || ' ' == cp[-1])) + if ('\0' == *cp && (white || ' ' == cp[-1])) mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); return(start); @@ -613,7 +609,7 @@ mandoc_eos(const char *p, size_t sz, int enclosed) /* * End-of-sentence recognition must include situations where * some symbols, such as `)', allow prior EOS punctuation to - * propogate outward. + * propagate outward. */ found = 0; @@ -696,3 +692,35 @@ mandoc_getcontrol(const char *cp, int *ppos) *ppos = pos; return(1); } + +/* + * Convert a string to a long that may not be <0. + * If the string is invalid, or is less than 0, return -1. + */ +int +mandoc_strntou(const char *p, size_t sz, int base) +{ + char buf[32]; + char *ep; + long v; + + if (sz > 31) + return(-1); + + memcpy(buf, p, sz); + buf[(int)sz] = '\0'; + + errno = 0; + v = strtol(buf, &ep, base); + + if (buf[0] == '\0' || *ep != '\0') + return(-1); + + if ((errno == ERANGE && + (v == LONG_MAX || v == LONG_MIN)) || + (v > INT_MAX || v < 0)) + return(-1); + + return((int)v); +} +