-/* $Id: roff.c,v 1.196 2014/03/07 18:30:11 schwarze Exp $ */
+/* $Id: roff.c,v 1.205 2014/04/07 21:00:08 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
#include <string.h>
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "libroff.h"
#include "libmandoc.h"
ROFF_ns,
ROFF_ps,
ROFF_rm,
+ ROFF_rr,
ROFF_so,
ROFF_ta,
ROFF_tr,
ROFF_MAX
};
-enum roffrule {
- ROFFRULE_DENY,
- ROFFRULE_ALLOW
-};
-
/*
* An incredibly-simple string buffer.
*/
};
struct roff {
- enum mparset parsetype; /* requested parse type */
struct mparse *parse; /* parse point */
- int quick; /* skip standard macro deletion */
+ int options; /* parse options */
struct roffnode *last; /* leaf of stack */
- enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
+ int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
char control; /* control character */
int rstackpos; /* position in rstack */
struct roffreg *regtab; /* number registers */
char *name; /* node name, e.g. macro name */
char *end; /* end-rules: custom token */
int endspan; /* end-rules: next-line or infty */
- enum roffrule rule; /* current evaluation rule */
+ int rule; /* current evaluation rule */
};
#define ROFF_ARGS struct roff *r, /* parse ctx */ \
static enum rofferr roff_cond_text(ROFF_ARGS);
static enum rofferr roff_cond_sub(ROFF_ARGS);
static enum rofferr roff_ds(ROFF_ARGS);
-static enum roffrule roff_evalcond(const char *, int *);
+static int roff_evalcond(const char *, int *);
+static int roff_evalnum(const char *, int *, int *, int);
+static int roff_evalpar(const char *, int *, int *);
+static int roff_evalstrcond(const char *, int *);
static void roff_free1(struct roff *);
static void roff_freereg(struct roffreg *);
static void roff_freestr(struct roffkv *);
static enum rofferr roff_res(struct roff *,
char **, size_t *, int, int);
static enum rofferr roff_rm(ROFF_ARGS);
+static enum rofferr roff_rr(ROFF_ARGS);
static void roff_setstr(struct roff *,
const char *, const char *, int);
static void roff_setstrn(struct roffkv **, const char *,
{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
{ "rm", roff_rm, NULL, NULL, 0, NULL },
+ { "rr", roff_rr, NULL, NULL, 0, NULL },
{ "so", roff_so, NULL, NULL, 0, NULL },
{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
{ "tr", roff_tr, NULL, NULL, 0, NULL },
{ NULL, roff_userdef, NULL, NULL, 0, NULL },
};
+/* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
const char *const __mdoc_reserved[] = {
"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
- "Ds", "Dt", "Dv", "Dx", "D1",
- "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
- "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
+ "Dt", "Dv", "Dx", "D1",
+ "Ec", "Ed", "Ef", "Ek", "El", "Em",
+ "En", "Eo", "Er", "Es", "Ev", "Ex",
"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
- "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
- "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
+ "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
+ "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
"Oc", "Oo", "Op", "Os", "Ot", "Ox",
- "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
- "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
- "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
+ "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
+ "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
+ "Sc", "Sh", "Sm", "So", "Sq",
"Ss", "St", "Sx", "Sy",
"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
- "%A", "%B", "%D", "%I", "%J", "%N", "%O",
+ "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
"%P", "%Q", "%R", "%T", "%U", "%V",
NULL
};
+/* not currently implemented: BT DE DS ME MT PT SY TQ YS */
const char *const __man_reserved[] = {
- "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
- "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
- "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
- "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
- "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
+ "AT", "B", "BI", "BR", "DT",
+ "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
+ "LP", "OP", "P", "PD", "PP",
+ "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
+ "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
NULL
};
p->parent = r->last;
p->line = line;
p->col = col;
- p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
+ p->rule = p->parent ? p->parent->rule : 0;
r->last = p;
}
struct roff *
-roff_alloc(enum mparset type, struct mparse *parse, int quick)
+roff_alloc(struct mparse *parse, int options)
{
struct roff *r;
r = mandoc_calloc(1, sizeof(struct roff));
- r->parsetype = type;
r->parse = parse;
- r->quick = quick;
+ r->options = options;
r->rstackpos = -1;
roffhash_init();
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
char ubuf[12]; /* buffer to print the number */
+ const char *start; /* start of the string to process */
const char *stesc; /* start of an escape sequence ('\\') */
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
const char *res; /* the string to be substituted */
char *nbuf; /* new buffer to copy bufp to */
- size_t nsz; /* size of the new buffer */
size_t maxl; /* expected length of the escape name */
size_t naml; /* actual length of the escape name */
+ size_t ressz; /* size of the replacement string */
int expand_count; /* to avoid infinite loops */
expand_count = 0;
+ start = *bufp + pos;
+ stesc = strchr(start, '\0') - 1;
+ while (stesc-- > start) {
-again:
- cp = *bufp + pos;
- while (NULL != (cp = strchr(cp, '\\'))) {
- stesc = cp++;
+ /* Search backwards for the next backslash. */
+
+ if ('\\' != *stesc)
+ continue;
+
+ /* If it is escaped, skip it. */
+
+ for (cp = stesc - 1; cp >= start; cp--)
+ if ('\\' != *cp)
+ break;
+
+ if (0 == (stesc - cp) % 2) {
+ stesc = cp;
+ continue;
+ }
/*
- * The second character must be an asterisk or an n.
- * If it isn't, skip it anyway: It is escaped,
- * so it can't start another escape sequence.
+ * Everything except user-defined strings and number
+ * registers is only checked, not expanded.
*/
- if ('\0' == *cp)
- return(ROFF_CONT);
-
+ cp = stesc + 1;
switch (*cp) {
case ('*'):
res = NULL;
res = ubuf;
break;
default:
- if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
- continue;
- mandoc_msg
- (MANDOCERR_BADESCAPE, r->parse,
- ln, (int)(stesc - *bufp), NULL);
- return(ROFF_CONT);
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(stesc - *bufp), NULL);
+ continue;
}
- cp++;
+ if (EXPAND_LIMIT < ++expand_count) {
+ mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
+ ln, (int)(stesc - *bufp), NULL);
+ return(ROFF_IGN);
+ }
/*
* The third character decides the length
* Save a pointer to the name.
*/
- switch (*cp) {
+ switch (*++cp) {
case ('\0'):
- return(ROFF_CONT);
+ continue;
case ('('):
cp++;
maxl = 2;
(MANDOCERR_BADESCAPE,
r->parse, ln,
(int)(stesc - *bufp), NULL);
- return(ROFF_CONT);
+ continue;
}
if (0 == maxl && ']' == *cp)
break;
ln, (int)(stesc - *bufp), NULL);
res = "";
}
+ ressz = strlen(res);
/* Replace the escape sequence by the string. */
- pos = stesc - *bufp;
-
- nsz = *szp + strlen(res) + 1;
- nbuf = mandoc_malloc(nsz);
+ *szp += ressz + 1;
+ nbuf = mandoc_malloc(*szp);
strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
- strlcat(nbuf, res, nsz);
- strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
+ strlcat(nbuf, res, *szp);
+ strlcat(nbuf, cp + (maxl ? 0 : 1), *szp);
- free(*bufp);
+ /* Prepare for the next replacement. */
+ start = nbuf + pos;
+ stesc = nbuf + (stesc - *bufp) + ressz;
+ free(*bufp);
*bufp = nbuf;
- *szp = nsz;
-
- if (EXPAND_LIMIT >= ++expand_count)
- goto again;
-
- /* Just leave the string unexpanded. */
- mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
- return(ROFF_IGN);
}
return(ROFF_CONT);
}
/* Spring the input line trap. */
if (1 == roffit_lines) {
- isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
- if (-1 == isz) {
- perror(NULL);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
free(*bufp);
*bufp = p;
*szp = isz + 1;
roff_cond_sub(ROFF_ARGS)
{
enum rofft t;
- enum roffrule rr;
char *ep;
+ int rr;
rr = r->last->rule;
roffnode_cleanscope(r);
*/
if ((ROFF_MAX != t) &&
- (ROFFRULE_ALLOW == rr ||
- ROFFMAC_STRUCT & roffs[t].flags)) {
+ (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
assert(roffs[t].proc);
return((*roffs[t].proc)(r, t, bufp, szp,
ln, ppos, pos, offs));
ep = *bufp + pos;
if ('\\' == ep[0] && '}' == ep[1])
- rr = ROFFRULE_DENY;
+ rr = 0;
/* Always check for the closing delimiter `\}'. */
while (NULL != (ep = strchr(ep, '\\'))) {
- if ('}' != *(++ep))
- continue;
- *ep = '&';
- roff_ccond(r, ln, pos);
+ if ('}' == *(++ep)) {
+ *ep = '&';
+ roff_ccond(r, ln, ep - *bufp - 1);
+ }
+ ++ep;
}
- return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
+ return(rr ? ROFF_CONT : ROFF_IGN);
}
/* ARGSUSED */
roff_cond_text(ROFF_ARGS)
{
char *ep;
- enum roffrule rr;
+ int rr;
rr = r->last->rule;
roffnode_cleanscope(r);
- ep = &(*bufp)[pos];
- for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
- ep++;
- if ('}' != *ep)
- continue;
- *ep = '&';
- roff_ccond(r, ln, pos);
+ ep = *bufp + pos;
+ while (NULL != (ep = strchr(ep, '\\'))) {
+ if ('}' == *(++ep)) {
+ *ep = '&';
+ roff_ccond(r, ln, ep - *bufp - 1);
+ }
+ ++ep;
}
- return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
+ return(rr ? ROFF_CONT : ROFF_IGN);
}
+/*
+ * Parse a single signed integer number. Stop at the first non-digit.
+ * If there is at least one digit, return success and advance the
+ * parse point, else return failure and let the parse point unchanged.
+ * Ignore overflows, treat them just like the C language.
+ */
static int
roff_getnum(const char *v, int *pos, int *res)
{
p++;
for (*res = 0; isdigit((unsigned char)v[p]); p++)
- *res += 10 * *res + v[p] - '0';
+ *res = 10 * *res + v[p] - '0';
if (p == *pos + n)
return 0;
return 1;
}
+/*
+ * Evaluate a string comparison condition.
+ * The first character is the delimiter.
+ * Succeed if the string up to its second occurrence
+ * matches the string up to its third occurence.
+ * Advance the cursor after the third occurrence
+ * or lacking that, to the end of the line.
+ */
static int
-roff_getop(const char *v, int *pos, char *res)
+roff_evalstrcond(const char *v, int *pos)
{
- int e;
+ const char *s1, *s2, *s3;
+ int match;
- *res = v[*pos];
- e = v[*pos + 1] == '=';
+ match = 0;
+ s1 = v + *pos; /* initial delimiter */
+ s2 = s1 + 1; /* for scanning the first string */
+ s3 = strchr(s2, *s1); /* for scanning the second string */
- switch (*res) {
- case '=':
- break;
- case '>':
- if (e)
- *res = 'g';
- break;
- case '<':
- if (e)
- *res = 'l';
- break;
- default:
- return(0);
- }
+ if (NULL == s3) /* found no middle delimiter */
+ goto out;
- *pos += 1 + e;
+ while ('\0' != *++s3) {
+ if (*s2 != *s3) { /* mismatch */
+ s3 = strchr(s3, *s1);
+ break;
+ }
+ if (*s3 == *s1) { /* found the final delimiter */
+ match = 1;
+ break;
+ }
+ s2++;
+ }
- return(*res);
+out:
+ if (NULL == s3)
+ s3 = strchr(s2, '\0');
+ else
+ s3++;
+ *pos = s3 - v;
+ return(match);
}
-static enum roffrule
+/*
+ * Evaluate an optionally negated single character, numerical,
+ * or string condition.
+ */
+static int
roff_evalcond(const char *v, int *pos)
{
- int not, lh, rh;
- char op;
+ int wanttrue, number;
+
+ if ('!' == v[*pos]) {
+ wanttrue = 0;
+ (*pos)++;
+ } else
+ wanttrue = 1;
switch (v[*pos]) {
case ('n'):
+ /* FALLTHROUGH */
+ case ('o'):
(*pos)++;
- return(ROFFRULE_ALLOW);
+ return(wanttrue);
+ case ('c'):
+ /* FALLTHROUGH */
+ case ('d'):
+ /* FALLTHROUGH */
case ('e'):
/* FALLTHROUGH */
- case ('o'):
+ case ('r'):
/* FALLTHROUGH */
case ('t'):
(*pos)++;
- return(ROFFRULE_DENY);
- case ('!'):
- (*pos)++;
- not = 1;
- break;
+ return(!wanttrue);
default:
- not = 0;
break;
}
- if (!roff_getnum(v, pos, &lh))
- return ROFFRULE_DENY;
- if (!roff_getop(v, pos, &op)) {
- if (lh < 0)
- lh = 0;
- goto out;
- }
- if (!roff_getnum(v, pos, &rh))
- return ROFFRULE_DENY;
- switch (op) {
- case 'g':
- lh = lh >= rh;
- break;
- case 'l':
- lh = lh <= rh;
- break;
- case '=':
- lh = lh == rh;
- break;
- case '>':
- lh = lh > rh;
- break;
- case '<':
- lh = lh < rh;
- break;
- default:
- return ROFFRULE_DENY;
- }
-out:
- if (not)
- lh = !lh;
- return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
+ if (roff_evalnum(v, pos, &number, 0))
+ return((number > 0) == wanttrue);
+ else
+ return(roff_evalstrcond(v, pos) == wanttrue);
}
/* ARGSUSED */
*/
r->last->rule = ROFF_el == tok ?
- (r->rstackpos < 0 ?
- ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
+ (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
roff_evalcond(*bufp, &pos);
/*
r->parse, ln, ppos, NULL);
return(ROFF_ERR);
}
- r->rstack[++r->rstackpos] =
- ROFFRULE_DENY == r->last->rule ?
- ROFFRULE_ALLOW : ROFFRULE_DENY;
+ r->rstack[++r->rstackpos] = !r->last->rule;
}
/* If the parent has false as its rule, then so do we. */
- if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
- r->last->rule = ROFFRULE_DENY;
+ if (r->last->parent && !r->last->parent->rule)
+ r->last->rule = 0;
/*
* Determine scope.
return(ROFF_IGN);
}
+/*
+ * Parse a single operator, one or two characters long.
+ * If the operator is recognized, return success and advance the
+ * parse point, else return failure and let the parse point unchanged.
+ */
+static int
+roff_getop(const char *v, int *pos, char *res)
+{
+
+ *res = v[*pos];
+
+ switch (*res) {
+ case ('+'):
+ /* FALLTHROUGH */
+ case ('-'):
+ /* FALLTHROUGH */
+ case ('*'):
+ /* FALLTHROUGH */
+ case ('/'):
+ /* FALLTHROUGH */
+ case ('%'):
+ /* FALLTHROUGH */
+ case ('&'):
+ /* FALLTHROUGH */
+ case (':'):
+ break;
+ case '<':
+ switch (v[*pos + 1]) {
+ case ('='):
+ *res = 'l';
+ (*pos)++;
+ break;
+ case ('>'):
+ *res = '!';
+ (*pos)++;
+ break;
+ case ('?'):
+ *res = 'i';
+ (*pos)++;
+ break;
+ default:
+ break;
+ }
+ break;
+ case '>':
+ switch (v[*pos + 1]) {
+ case ('='):
+ *res = 'g';
+ (*pos)++;
+ break;
+ case ('?'):
+ *res = 'a';
+ (*pos)++;
+ break;
+ default:
+ break;
+ }
+ break;
+ case '=':
+ if ('=' == v[*pos + 1])
+ (*pos)++;
+ break;
+ default:
+ return(0);
+ }
+ (*pos)++;
+
+ return(*res);
+}
+
+/*
+ * Evaluate either a parenthesized numeric expression
+ * or a single signed integer number.
+ */
+static int
+roff_evalpar(const char *v, int *pos, int *res)
+{
+
+ if ('(' != v[*pos])
+ return(roff_getnum(v, pos, res));
+
+ (*pos)++;
+ if ( ! roff_evalnum(v, pos, res, 1))
+ return(0);
+
+ /* If the trailing parenthesis is missing, ignore the error. */
+ if (')' == v[*pos])
+ (*pos)++;
+
+ return(1);
+}
+
+/*
+ * Evaluate a complete numeric expression.
+ * Proceed left to right, there is no concept of precedence.
+ */
+static int
+roff_evalnum(const char *v, int *pos, int *res, int skipwhite)
+{
+ int mypos, operand2;
+ char operator;
+
+ if (NULL == pos) {
+ mypos = 0;
+ pos = &mypos;
+ }
+
+ if (skipwhite)
+ while (isspace((unsigned char)v[*pos]))
+ (*pos)++;
+
+ if ( ! roff_evalpar(v, pos, res))
+ return(0);
+
+ while (1) {
+ if (skipwhite)
+ while (isspace((unsigned char)v[*pos]))
+ (*pos)++;
+
+ if ( ! roff_getop(v, pos, &operator))
+ break;
+
+ if (skipwhite)
+ while (isspace((unsigned char)v[*pos]))
+ (*pos)++;
+
+ if ( ! roff_evalpar(v, pos, &operand2))
+ return(0);
+
+ if (skipwhite)
+ while (isspace((unsigned char)v[*pos]))
+ (*pos)++;
+
+ switch (operator) {
+ case ('+'):
+ *res += operand2;
+ break;
+ case ('-'):
+ *res -= operand2;
+ break;
+ case ('*'):
+ *res *= operand2;
+ break;
+ case ('/'):
+ *res /= operand2;
+ break;
+ case ('%'):
+ *res %= operand2;
+ break;
+ case ('<'):
+ *res = *res < operand2;
+ break;
+ case ('>'):
+ *res = *res > operand2;
+ break;
+ case ('l'):
+ *res = *res <= operand2;
+ break;
+ case ('g'):
+ *res = *res >= operand2;
+ break;
+ case ('='):
+ *res = *res == operand2;
+ break;
+ case ('!'):
+ *res = *res != operand2;
+ break;
+ case ('&'):
+ *res = *res && operand2;
+ break;
+ case (':'):
+ *res = *res || operand2;
+ break;
+ case ('i'):
+ if (operand2 < *res)
+ *res = operand2;
+ break;
+ case ('a'):
+ if (operand2 > *res)
+ *res = operand2;
+ break;
+ default:
+ abort();
+ }
+ }
+ return(1);
+}
+
void
roff_setreg(struct roff *r, const char *name, int val, char sign)
{
}
}
-/* ARGSUSED */
static enum rofferr
roff_nr(ROFF_ARGS)
{
const char *key;
char *val;
- size_t sz;
int iv;
char sign;
if ('+' == sign || '-' == sign)
val++;
- sz = strspn(val, "0123456789");
- iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
+ if (roff_evalnum(val, NULL, &iv, 0))
+ roff_setreg(r, key, iv, sign);
- roff_setreg(r, key, iv, sign);
+ return(ROFF_IGN);
+}
+
+static enum rofferr
+roff_rr(ROFF_ARGS)
+{
+ struct roffreg *reg, **prev;
+ const char *name;
+ char *cp;
+ cp = *bufp + pos;
+ name = roff_getname(r, &cp, ln, pos);
+
+ prev = &r->regtab;
+ while (1) {
+ reg = *prev;
+ if (NULL == reg || !strcmp(name, reg->key.p))
+ break;
+ prev = ®->next;
+ }
+ if (NULL != reg) {
+ *prev = reg->next;
+ free(reg->key.p);
+ free(reg);
+ }
return(ROFF_IGN);
}
{
const char *const *cp;
- if (0 == r->quick && MPARSE_MDOC != r->parsetype)
+ if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
for (cp = __mdoc_reserved; *cp; cp++)
roff_setstr(r, *cp, NULL, 0);
{
const char *const *cp;
- if (0 == r->quick && MPARSE_MDOC != r->parsetype)
+ if (0 == (MPARSE_QUICK & r->options))
for (cp = __man_reserved; *cp; cp++)
roff_setstr(r, *cp, NULL, 0);