X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/569c276460dfd948ab28a199826274e234cf815c..ca00a3c56f21bcf2b86b3afd6aae2ce699736a63:/argv.c

diff --git a/argv.c b/argv.c
index 9e884777..8ad5d662 100644
--- a/argv.c
+++ b/argv.c
@@ -1,4 +1,4 @@
-/* $Id: argv.c,v 1.11 2009/01/12 10:31:53 kristaps Exp $ */
+/* $Id: argv.c,v 1.34 2009/02/28 12:16:02 kristaps Exp $ */
 /*
  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -25,39 +25,280 @@
 
 #include "private.h"
 
+/*
+ * Routines to parse arguments of macros.  Arguments follow the syntax
+ * of `-arg [val [valN...]]'.  Arguments come in all types:  quoted
+ * arguments, multiple arguments per value, no-value arguments, etc.
+ */
 
-static	int		 lookup(int, const char *);
-static	int		 parse(struct mdoc *, int, int,
+#define	ARGS_QUOTED	(1 << 0)
+#define	ARGS_DELIM	(1 << 1)
+#define	ARGS_TABSEP	(1 << 2)
+
+static	int		 argv_a2arg(int, const char *);
+static	int		 args(struct mdoc *, int, int *, 
+				char *, int, char **);
+static	int		 argv(struct mdoc *, int,
+				struct mdoc_arg *, int *, char *);
+static	int		 argv_single(struct mdoc *, int, 
 				struct mdoc_arg *, int *, char *);
-static	int		 postparse(struct mdoc *, int, 
-				const struct mdoc_arg *, int);
+static	int		 argv_multi(struct mdoc *, int, 
+				struct mdoc_arg *, int *, char *);
+static	int		 pwarn(struct mdoc *, int, int, int);
+static	int		 perr(struct mdoc *, int, int, int);
+
+/* Warning messages. */
+
+#define	WQUOTPARM	(0)
+#define	WARGVPARM	(1)
+#define	WCOLEMPTY	(2)
+#define	WTAILWS		(3)
+
+/* Error messages. */
+
+#define	EQUOTTERM	(0)
+#define	EARGVAL		(1)
+#define	EARGMANY	(2)
+
+static	int mdoc_argflags[MDOC_MAX] = {
+	0, /* \" */
+	0, /* Dd */
+	0, /* Dt */
+	0, /* Os */
+	0, /* Sh */
+	0, /* Ss */ 
+	ARGS_DELIM, /* Pp */ 
+	ARGS_DELIM, /* D1 */
+	ARGS_DELIM, /* Dl */
+	0, /* Bd */
+	0, /* Ed */
+	0, /* Bl */
+	0, /* El */
+	0, /* It */
+	ARGS_DELIM, /* Ad */ 
+	ARGS_DELIM, /* An */
+	ARGS_DELIM, /* Ar */
+	ARGS_QUOTED, /* Cd */
+	ARGS_DELIM, /* Cm */
+	ARGS_DELIM, /* Dv */ 
+	ARGS_DELIM, /* Er */ 
+	ARGS_DELIM, /* Ev */ 
+	0, /* Ex */
+	ARGS_DELIM | ARGS_QUOTED, /* Fa */ 
+	0, /* Fd */ 
+	ARGS_DELIM, /* Fl */
+	ARGS_DELIM | ARGS_QUOTED, /* Fn */ 
+	ARGS_DELIM | ARGS_QUOTED, /* Ft */ 
+	ARGS_DELIM, /* Ic */ 
+	0, /* In */ 
+	ARGS_DELIM, /* Li */
+	0, /* Nd */ 
+	ARGS_DELIM, /* Nm */ 
+	ARGS_DELIM, /* Op */
+	0, /* Ot */
+	ARGS_DELIM, /* Pa */
+	0, /* Rv */
+	ARGS_DELIM, /* St */ 
+	ARGS_DELIM, /* Va */
+	ARGS_DELIM, /* Vt */ 
+	ARGS_DELIM, /* Xr */
+	ARGS_QUOTED, /* %A */
+	ARGS_QUOTED, /* %B */
+	ARGS_QUOTED, /* %D */
+	ARGS_QUOTED, /* %I */
+	ARGS_QUOTED, /* %J */
+	ARGS_QUOTED, /* %N */
+	ARGS_QUOTED, /* %O */
+	ARGS_QUOTED, /* %P */
+	ARGS_QUOTED, /* %R */
+	ARGS_QUOTED, /* %T */
+	ARGS_QUOTED, /* %V */
+	ARGS_DELIM, /* Ac */
+	0, /* Ao */
+	ARGS_DELIM, /* Aq */
+	ARGS_DELIM, /* At */
+	ARGS_DELIM, /* Bc */
+	0, /* Bf */ 
+	0, /* Bo */
+	ARGS_DELIM, /* Bq */
+	ARGS_DELIM, /* Bsx */
+	ARGS_DELIM, /* Bx */
+	0, /* Db */
+	ARGS_DELIM, /* Dc */
+	0, /* Do */
+	ARGS_DELIM, /* Dq */
+	ARGS_DELIM, /* Ec */
+	0, /* Ef */
+	ARGS_DELIM, /* Em */ 
+	0, /* Eo */
+	ARGS_DELIM, /* Fx */
+	ARGS_DELIM, /* Ms */
+	ARGS_DELIM, /* No */
+	ARGS_DELIM, /* Ns */
+	ARGS_DELIM, /* Nx */
+	ARGS_DELIM, /* Ox */
+	ARGS_DELIM, /* Pc */
+	ARGS_DELIM, /* Pf */
+	0, /* Po */
+	ARGS_DELIM, /* Pq */
+	ARGS_DELIM, /* Qc */
+	ARGS_DELIM, /* Ql */
+	0, /* Qo */
+	ARGS_DELIM, /* Qq */
+	0, /* Re */
+	0, /* Rs */
+	ARGS_DELIM, /* Sc */
+	0, /* So */
+	ARGS_DELIM, /* Sq */
+	0, /* Sm */
+	ARGS_DELIM, /* Sx */
+	ARGS_DELIM, /* Sy */
+	ARGS_DELIM, /* Tn */
+	ARGS_DELIM, /* Ux */
+	ARGS_DELIM, /* Xc */
+	0, /* Xo */
+	0, /* Fo */ 
+	0, /* Fc */ 
+	0, /* Oo */
+	ARGS_DELIM, /* Oc */
+	0, /* Bk */
+	0, /* Ek */
+	0, /* Bt */
+	0, /* Hf */
+	0, /* Fr */
+	0, /* Ud */
+};
+
+
+static int
+perr(struct mdoc *mdoc, int line, int pos, int code)
+{
+	int		 c;
+
+	switch (code) {
+	case (EQUOTTERM):
+		c = mdoc_perr(mdoc, line, pos, 
+				"unterminated quoted parameter");
+		break;
+	case (EARGVAL):
+		c = mdoc_perr(mdoc, line, pos, 
+				"argument requires a value");
+		break;
+	case (EARGMANY):
+		c = mdoc_perr(mdoc, line, pos, 
+				"too many values for argument");
+		break;
+	default:
+		abort();
+		/* NOTREACHED */
+	}
+	return(c);
+}
+
+
+static int
+pwarn(struct mdoc *mdoc, int line, int pos, int code)
+{
+	int		 c;
+
+	switch (code) {
+	case (WQUOTPARM):
+		c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, 
+				"unexpected quoted parameter");
+		break;
+	case (WARGVPARM):
+		c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, 
+				"argument-like parameter");
+		break;
+	case (WCOLEMPTY):
+		c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, 
+				"last list column is empty");
+		break;
+	case (WTAILWS):
+		c = mdoc_pwarn(mdoc, line, pos, WARN_COMPAT, 
+				"trailing whitespace");
+		break;
+	default:
+		abort();
+		/* NOTREACHED */
+	}
+	return(c);
+}
 
 
 int
-mdoc_args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v)
+mdoc_args(struct mdoc *mdoc, int line, 
+		int *pos, char *buf, int tok, char **v)
 {
-	int		 i;
+	int		  fl, c, i;
+	struct mdoc_node *n;
+
+	fl = (0 == tok) ? 0 : mdoc_argflags[tok];
+
+	/* 
+	 * First see if we should use TABSEP (Bl -column).  This
+	 * invalidates the use of ARGS_DELIM.
+	 */
+
+	if (MDOC_It == tok) {
+		for (n = mdoc->last; n; n = n->parent)
+			if (MDOC_BLOCK == n->type)
+				if (MDOC_Bl == n->tok)
+					break;
+		assert(n);
+		c = (int)n->data.block.argc;
+		assert(c > 0);
+
+		/* LINTED */
+		for (i = 0; i < c; i++) {
+			if (MDOC_Column != n->data.block.argv[i].arg)
+				continue;
+			fl |= ARGS_TABSEP;
+			fl &= ~ARGS_DELIM;
+			break;
+		}
+	}
+
+	return(args(mdoc, line, pos, buf, fl, v));
+}
+
+
+static int
+args(struct mdoc *mdoc, int line, 
+		int *pos, char *buf, int fl, char **v)
+{
+	int		  i;
+	char		 *p, *pp;
+
+	assert(*pos > 0);
 
 	if (0 == buf[*pos])
 		return(ARGS_EOLN);
 
 	if ('\"' == buf[*pos] && ! (fl & ARGS_QUOTED))
-		if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_SYNTAX_QUOTED))
+		if ( ! pwarn(mdoc, line, *pos, WQUOTPARM))
 			return(ARGS_ERROR);
 
 	if ('-' == buf[*pos]) 
-		if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_SYNTAX_ARGLIKE))
+		if ( ! pwarn(mdoc, line, *pos, WARGVPARM))
 			return(ARGS_ERROR);
 
+	/* 
+	 * If the first character is a delimiter and we're to look for
+	 * delimited strings, then pass down the buffer seeing if it
+	 * follows the pattern of [[::delim::][ ]+]+.
+	 */
+
 	if ((fl & ARGS_DELIM) && mdoc_iscdelim(buf[*pos])) {
 		for (i = *pos; buf[i]; ) {
 			if ( ! mdoc_iscdelim(buf[i]))
 				break;
 			i++;
-			if (0 == buf[i] || ! isspace(buf[i]))
+			/* There must be at least one space... */
+			if (0 == buf[i] || ! isspace((int)buf[i]))
 				break;
 			i++;
-			while (buf[i] && isspace(buf[i]))
+			while (buf[i] && isspace((int)buf[i]))
 				i++;
 		}
 		if (0 == buf[i]) {
@@ -66,30 +307,127 @@ mdoc_args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v)
 		}
 	}
 
-	/*
-	 * Parse routine for non-quoted string.  
-	 */
+	/* First parse non-quoted strings. */
 
-	if ('\"' != buf[*pos]) {
+	if ('\"' != buf[*pos] || ! (ARGS_QUOTED & fl)) {
 		*v = &buf[*pos];
 
-		while (buf[*pos] && ! isspace(buf[*pos]))
-			(*pos)++;
+		/* 
+		 * Thar be dragons here!  If we're tab-separated, search
+		 * ahead for either a tab or the `Ta' macro.  If a tab
+		 * is detected, it mustn't be escaped; if a `Ta' is
+		 * detected, it must be space-buffered before and after.
+		 * If either of these hold true, then prune out the
+		 * extra spaces and call it an argument.
+		 */
+
+		if (ARGS_TABSEP & fl) {
+			/* Scan ahead to unescaped tab. */
+
+			for (p = *v; ; p++) {
+				if (NULL == (p = strchr(p, '\t')))
+					break;
+				if (p == *v)
+					break;
+				if ('\\' != *(p - 1))
+					break;
+			}
+
+			/* Scan ahead to unescaped `Ta'. */
+
+			for (pp = *v; ; pp++) {
+				if (NULL == (pp = strstr(pp, "Ta")))
+					break;
+				if (pp > *v && ' ' != *(pp - 1))
+					continue;
+				if (' ' == *(pp + 2) || 0 == *(pp + 2))
+					break;
+			}
+
+			/* Choose delimiter tab/Ta. */
+
+			if (p && pp)
+				p = (p < pp ? p : pp);
+			else if ( ! p && pp)
+				p = pp;
+
+			/* Strip delimiter's preceding whitespace. */
+
+			if (p && p > *v) {
+				pp = p - 1;
+				while (pp > *v && ' ' == *pp)
+					pp--;
+				if (pp == *v && ' ' == *pp) 
+					*pp = 0;
+				else if (' ' == *pp)
+					*(pp + 1) = 0;
+			}
+
+			/* ...in- and proceding whitespace. */
+
+			if (p && ('\t' != *p)) {
+				*p++ = 0;
+				*p++ = 0;
+			} else if (p)
+				*p++ = 0;
+
+			if (p) {
+				while (' ' == *p)
+					p++;
+				if (0 != *p)
+					*(p - 1) = 0;
+				*pos += (int)(p - *v);
+			} 
+
+			if (p && 0 == *p)
+				if ( ! pwarn(mdoc, line, *pos, WCOLEMPTY))
+					return(0);
+			if (p && 0 == *p && p > *v && ' ' == *(p - 1))
+				if ( ! pwarn(mdoc, line, *pos, WTAILWS))
+					return(0);
+
+			if (p)
+				return(ARGS_WORD);
+
+			/* Configure the eoln case, too. */
+
+			p = strchr(*v, 0);
+			assert(p);
+
+			if (p > *v && ' ' == *(p - 1))
+				if ( ! pwarn(mdoc, line, *pos, WTAILWS))
+					return(0);
+			*pos += (int)(p - *v);
+
+			return(ARGS_WORD);
+		} 
+
+		/* Do non-tabsep look-ahead here. */
+		
+		if ( ! (ARGS_TABSEP & fl))
+			while (buf[*pos]) {
+				if (isspace((int)buf[*pos]))
+					if ('\\' != buf[*pos - 1])
+						break;
+				(*pos)++;
+			}
 
 		if (0 == buf[*pos])
 			return(ARGS_WORD);
 
 		buf[(*pos)++] = 0;
+
 		if (0 == buf[*pos])
 			return(ARGS_WORD);
 
-		while (buf[*pos] && isspace(buf[*pos]))
-			(*pos)++;
+		if ( ! (ARGS_TABSEP & fl))
+			while (buf[*pos] && isspace((int)buf[*pos]))
+				(*pos)++;
 
 		if (buf[*pos])
 			return(ARGS_WORD);
 
-		if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_SYNTAX_WS_EOLN))
+		if ( ! pwarn(mdoc, line, *pos, WTAILWS))
 			return(ARGS_ERROR);
 
 		return(ARGS_WORD);
@@ -107,29 +445,29 @@ mdoc_args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v)
 		(*pos)++;
 
 	if (0 == buf[*pos]) {
-		(void)mdoc_perr(mdoc, line, *pos, ERR_SYNTAX_UNQUOTE);
+		(void)perr(mdoc, line, *pos, EQUOTTERM);
 		return(ARGS_ERROR);
 	}
 
 	buf[(*pos)++] = 0;
 	if (0 == buf[*pos])
-		return(ARGS_WORD);
+		return(ARGS_QWORD);
 
-	while (buf[*pos] && isspace(buf[*pos]))
+	while (buf[*pos] && isspace((int)buf[*pos]))
 		(*pos)++;
 
 	if (buf[*pos])
-		return(ARGS_WORD);
+		return(ARGS_QWORD);
 
-	if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_SYNTAX_WS_EOLN))
+	if ( ! pwarn(mdoc, line, *pos, WTAILWS))
 		return(ARGS_ERROR);
 
-	return(ARGS_WORD);
+	return(ARGS_QWORD);
 }
 
 
 static int
-lookup(int tok, const char *argv)
+argv_a2arg(int tok, const char *argv)
 {
 
 	switch (tok) {
@@ -145,6 +483,8 @@ lookup(int tok, const char *argv)
 			return(MDOC_Ragged);
 		else if (xstrcmp(argv, "unfilled"))
 			return(MDOC_Unfilled);
+		else if (xstrcmp(argv, "filled"))
+			return(MDOC_Filled);
 		else if (xstrcmp(argv, "literal"))
 			return(MDOC_Literal);
 		else if (xstrcmp(argv, "file"))
@@ -291,92 +631,80 @@ lookup(int tok, const char *argv)
 
 
 static int
-postparse(struct mdoc *mdoc, int line, const struct mdoc_arg *v, int pos)
+argv_multi(struct mdoc *mdoc, int line, 
+		struct mdoc_arg *v, int *pos, char *buf)
 {
+	int		 c, ppos;
+	char		*p;
 
-	switch (v->arg) {
-	case (MDOC_Offset):
-		assert(v->value);
-		assert(v->value[0]);
-		if (xstrcmp(v->value[0], "left"))
-			break;
-		if (xstrcmp(v->value[0], "right"))
-			break;
-		if (xstrcmp(v->value[0], "center"))
-			break;
-		if (xstrcmp(v->value[0], "indent"))
+	v->sz = 0;
+	v->value = xcalloc(MDOC_LINEARG_MAX, sizeof(char *));
+
+	ppos = *pos;
+
+	for (v->sz = 0; v->sz < MDOC_LINEARG_MAX; v->sz++) {
+		if ('-' == buf[*pos])
 			break;
-		if (xstrcmp(v->value[0], "indent-two"))
+		c = args(mdoc, line, pos, buf, ARGS_QUOTED, &p);
+		if (ARGS_ERROR == c) {
+			free(v->value);
+			return(0);
+		} else if (ARGS_EOLN == c)
 			break;
-		return(mdoc_perr(mdoc, line, pos, ERR_SYNTAX_ARGBAD));
-	default:
-		break;
+		v->value[(int)v->sz] = p;
 	}
 
-	return(1);
+	if (0 < v->sz && v->sz < MDOC_LINEARG_MAX)
+		return(1);
+
+	free(v->value);
+	if (0 == v->sz) 
+		return(perr(mdoc, line, ppos, EARGVAL));
+
+	return(perr(mdoc, line, ppos, EARGMANY));
 }
 
 
 static int
-parse(struct mdoc *mdoc, int line, int tok, 
+argv_single(struct mdoc *mdoc, int line, 
 		struct mdoc_arg *v, int *pos, char *buf)
 {
+	int		 c, ppos;
 	char		*p;
-	int		 c, ppos, i;
 
 	ppos = *pos;
 
+	c = args(mdoc, line, pos, buf, ARGS_QUOTED, &p);
+	if (ARGS_ERROR == c)
+		return(0);
+	if (ARGS_EOLN == c)
+		return(perr(mdoc, line, ppos,  EARGVAL));
+
+	v->sz = 1;
+	v->value = xcalloc(1, sizeof(char *));
+	v->value[0] = p;
+	return(1);
+}
+
+
+static int
+argv(struct mdoc *mdoc, int line, 
+		struct mdoc_arg *v, int *pos, char *buf)
+{
+
+	v->sz = 0;
+	v->value = NULL;
+
 	switch (v->arg) {
 	case(MDOC_Std):
 		/* FALLTHROUGH */
 	case(MDOC_Width):
 		/* FALLTHROUGH */
 	case(MDOC_Offset):
-		/*
-		 * This has a single value for an argument.
-		 */
-		c = mdoc_args(mdoc, line, pos, buf, ARGS_QUOTED, &p);
-		if (ARGS_ERROR == c)
-			return(0);
-		else if (ARGS_EOLN != c) {
-			v->sz = 1;
-			v->value = xcalloc(1, sizeof(char *));
-			v->value[0] = p;
-			break;
-		}
-		return(mdoc_perr(mdoc, line, ppos, ERR_SYNTAX_ARGVAL));
-
+		return(argv_single(mdoc, line, v, pos, buf));
 	case(MDOC_Column):
-		/*
-		 * This has several value for a single argument.  We
-		 * pre-allocate a pointer array and don't let it exceed
-		 * this size.
-		 */
-		v->sz = 0;
-		v->value = xcalloc(MDOC_LINEARG_MAX, sizeof(char *));
-		for (i = 0; i < MDOC_LINEARG_MAX; i++) {
-			c = mdoc_args(mdoc, line, pos, buf, ARGS_QUOTED, &p);
-			if (ARGS_ERROR == c) {
-				free(v->value);
-				return(0);
-			} else if (ARGS_EOLN == c)
-				break;
-			v->value[i] = p;
-		}
-		if (0 == i) {
-			free(v->value);
-			return(mdoc_perr(mdoc, line, ppos, 
-						ERR_SYNTAX_ARGVAL));
-		} else if (MDOC_LINEARG_MAX == i)
-			return(mdoc_perr(mdoc, line, ppos, 
-						ERR_SYNTAX_ARGMANY));
-
-		v->sz = i;
-		break;
-
+		return(argv_multi(mdoc, line, v, pos, buf));
 	default:
-		v->sz = 0;
-		v->value = NULL;
 		break;
 	}
 
@@ -388,45 +716,50 @@ int
 mdoc_argv(struct mdoc *mdoc, int line, int tok,
 		struct mdoc_arg *v, int *pos, char *buf)
 {
-	int		 i, ppos;
-	char		*argv;
+	int		 i;
+	char		*p;
 
 	(void)memset(v, 0, sizeof(struct mdoc_arg));
 
 	if (0 == buf[*pos])
 		return(ARGV_EOLN);
 
-	assert( ! isspace(buf[*pos]));
+	assert( ! isspace((int)buf[*pos]));
 
 	if ('-' != buf[*pos])
 		return(ARGV_WORD);
 
 	i = *pos;
-	argv = &buf[++(*pos)];
+	p = &buf[++(*pos)];
 
 	v->line = line;
 	v->pos = *pos;
 
-	while (buf[*pos] && ! isspace(buf[*pos]))
+	assert(*pos > 0);
+
+	/* LINTED */
+	while (buf[*pos]) {
+		if (isspace((int)buf[*pos])) 
+			if ('\\' != buf[*pos - 1])
+				break;
 		(*pos)++;
+	}
 
 	if (buf[*pos])
 		buf[(*pos)++] = 0;
 
-	if (MDOC_ARG_MAX == (v->arg = lookup(tok, argv))) {
-		(void)mdoc_pwarn(mdoc, line, i, WARN_SYNTAX_ARGLIKE);
+	if (MDOC_ARG_MAX == (v->arg = argv_a2arg(tok, p))) {
+		if ( ! pwarn(mdoc, line, i, WARGVPARM))
+			return(ARGV_ERROR);
 		return(ARGV_WORD);
 	}
 
-	while (buf[*pos] && isspace(buf[*pos]))
+	while (buf[*pos] && isspace((int)buf[*pos]))
 		(*pos)++;
 
 	/* FIXME: whitespace if no value. */
 
-	ppos = *pos;
-	if ( ! parse(mdoc, line, tok, v, pos, buf))
-		return(ARGV_ERROR);
-	if ( ! postparse(mdoc, line, v, ppos))
+	if ( ! argv(mdoc, line, v, pos, buf))
 		return(ARGV_ERROR);
 
 	return(ARGV_ARG);