From ae7348914a6e3613b79a674a717da0186553c7e7 Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Fri, 14 May 2010 12:55:22 +0000 Subject: Proper handling of quoted tab-separated column lists. --- libmdoc.h | 10 ++++++---- mdoc_argv.c | 60 ++++++++++++++++++++++++++++++++++++++++-------------------- mdoc_macro.c | 19 +++++++------------ 3 files changed, 53 insertions(+), 36 deletions(-) diff --git a/libmdoc.h b/libmdoc.h index 0d3bad59..095eb5ed 100644 --- a/libmdoc.h +++ b/libmdoc.h @@ -1,4 +1,4 @@ -/* $Id: libmdoc.h,v 1.42 2010/05/13 06:22:11 kristaps Exp $ */ +/* $Id: libmdoc.h,v 1.43 2010/05/14 12:55:22 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -32,6 +32,7 @@ struct mdoc { #define MDOC_LITERAL (1 << 1) /* in a literal scope */ #define MDOC_PBODY (1 << 2) /* in the document body */ #define MDOC_NEWLINE (1 << 3) /* first macro/text in a line */ +#define MDOC_PHRASELIT (1 << 4) /* in a literal within a phrase */ int pflags; enum mdoc_next next; struct mdoc_node *last; @@ -186,9 +187,10 @@ enum margserr mdoc_args(struct mdoc *, int, int *, char *, enum mdoct, char **); enum margserr mdoc_zargs(struct mdoc *, int, int *, char *, int, char **); -#define ARGS_DELIM (1 << 1) /* See args(). */ -#define ARGS_TABSEP (1 << 2) /* See args(). */ -#define ARGS_NOWARN (1 << 3) /* See args(). */ +#define ARGS_DELIM (1 << 1) +#define ARGS_TABSEP (1 << 2) +#define ARGS_NOWARN (1 << 3) +#define ARGS_PPHRASED (1 << 4) int mdoc_macroend(struct mdoc *); diff --git a/mdoc_argv.c b/mdoc_argv.c index 96ccef17..44b72071 100644 --- a/mdoc_argv.c +++ b/mdoc_argv.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_argv.c,v 1.45 2010/05/09 21:06:50 kristaps Exp $ */ +/* $Id: mdoc_argv.c,v 1.46 2010/05/14 12:55:22 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -398,8 +398,21 @@ args(struct mdoc *m, int line, int *pos, assert(*pos); assert(' ' != buf[*pos]); - if ('\0' == buf[*pos]) + if ('\0' == buf[*pos]) { + if (ARGS_PPHRASED & fl) + return(ARGS_EOLN); + /* + * If we're not in a partial phrase and the flag for + * being a phrase literal is still set, the punctuation + * is unterminated. + */ + if (MDOC_PHRASELIT & m->flags) + if ( ! mdoc_pwarn(m, line, *pos, EQUOTTERM)) + return(ARGS_ERROR); + + m->flags &= ~MDOC_PHRASELIT; return(ARGS_EOLN); + } /* * If the first character is a closing delimiter and we're to @@ -410,7 +423,7 @@ args(struct mdoc *m, int line, int *pos, if ((fl & ARGS_DELIM) && mdoc_iscdelim(buf[*pos]) > 1) { for (i = *pos; buf[i]; ) { - if ( mdoc_iscdelim(buf[i]) < 2) + if (mdoc_iscdelim(buf[i]) < 2) break; i++; if ('\0' == buf[i] || ' ' != buf[i]) @@ -444,16 +457,18 @@ args(struct mdoc *m, int line, int *pos, if (ARGS_TABSEP & fl) { /* Scan ahead to tab (can't be escaped). */ p = strchr(*v, '\t'); + pp = NULL; /* Scan ahead to unescaped `Ta'. */ - for (pp = *v; ; pp++) { - if (NULL == (pp = strstr(pp, "Ta"))) - break; - if (pp > *v && ' ' != *(pp - 1)) - continue; - if (' ' == *(pp + 2) || 0 == *(pp + 2)) - break; - } + if ( ! (MDOC_PHRASELIT & m->flags)) + for (pp = *v; ; pp++) { + if (NULL == (pp = strstr(pp, "Ta"))) + break; + if (pp > *v && ' ' != *(pp - 1)) + continue; + if (' ' == *(pp + 2) || 0 == *(pp + 2)) + break; + } /* By default, assume a phrase. */ rc = ARGS_PHRASE; @@ -506,8 +521,12 @@ args(struct mdoc *m, int line, int *pos, * Whitespace is NOT involved in literal termination. */ - if ('\"' == buf[*pos]) { - *v = &buf[++(*pos)]; + if (MDOC_PHRASELIT & m->flags || '\"' == buf[*pos]) { + if ( ! (MDOC_PHRASELIT & m->flags)) + *v = &buf[++(*pos)]; + + if (ARGS_PPHRASED & fl) + m->flags |= MDOC_PHRASELIT; for ( ; buf[*pos]; (*pos)++) { if ('\"' != buf[*pos]) @@ -517,17 +536,18 @@ args(struct mdoc *m, int line, int *pos, (*pos)++; } - if (0 == buf[*pos]) { - if (ARGS_NOWARN & fl) + if ('\0' == buf[*pos]) { + if (ARGS_NOWARN & fl || ARGS_PPHRASED & fl) return(ARGS_QWORD); if ( ! mdoc_pwarn(m, line, *pos, EQUOTTERM)) return(ARGS_ERROR); return(ARGS_QWORD); } - buf[(*pos)++] = 0; + m->flags &= ~MDOC_PHRASELIT; + buf[(*pos)++] = '\0'; - if (0 == buf[*pos]) + if ('\0' == buf[*pos]) return(ARGS_QWORD); while (' ' == buf[*pos]) @@ -549,15 +569,15 @@ args(struct mdoc *m, int line, int *pos, if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) break; - if (0 == buf[*pos]) + if ('\0' == buf[*pos]) return(ARGS_WORD); - buf[(*pos)++] = 0; + buf[(*pos)++] = '\0'; while (' ' == buf[*pos]) (*pos)++; - if (0 == buf[*pos] && ! (ARGS_NOWARN & fl)) + if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl)) if ( ! mdoc_pwarn(m, line, *pos, ETAILWS)) return(ARGS_ERROR); diff --git a/mdoc_macro.c b/mdoc_macro.c index d9cf11da..8f1b9bf5 100644 --- a/mdoc_macro.c +++ b/mdoc_macro.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_macro.c,v 1.61 2010/05/13 11:34:45 kristaps Exp $ */ +/* $Id: mdoc_macro.c,v 1.62 2010/05/14 12:55:22 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -48,7 +48,7 @@ static int append_delims(struct mdoc *, static enum mdoct lookup(enum mdoct, const char *); static enum mdoct lookup_raw(const char *); static int phrase(struct mdoc *, int, int, - char *, enum margserr, int); + char *, enum margserr); static enum mdoct rew_alt(enum mdoct); static int rew_dobreak(enum mdoct, const struct mdoc_node *); @@ -888,7 +888,7 @@ in_line(MACRO_PROT_ARGS) static int blk_full(MACRO_PROT_ARGS) { - int la, pcnt; + int la; struct mdoc_arg *arg; struct mdoc_node *head; /* save of head macro */ struct mdoc_node *body; /* save of body macro */ @@ -959,7 +959,7 @@ blk_full(MACRO_PROT_ARGS) ac = ARGS_ERROR; - for (pcnt = 0; ; ) { + for ( ; ; ) { la = *pos; lac = ac; ac = mdoc_args(m, line, pos, buf, tok, &p); @@ -1007,7 +1007,7 @@ blk_full(MACRO_PROT_ARGS) else if (ARGS_PEND == ac && ARGS_PHRASE == lac) ac = ARGS_PHRASE; - if ( ! phrase(m, line, la, buf, ac, pcnt++)) + if ( ! phrase(m, line, la, buf, ac)) return(0); if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) return(0); @@ -1536,8 +1536,7 @@ obsolete(MACRO_PROT_ARGS) * macro is encountered. */ static int -phrase(struct mdoc *m, int line, int ppos, char *buf, - enum margserr ac, int count) +phrase(struct mdoc *m, int line, int ppos, char *buf, enum margserr ac) { int la, pos; enum margserr aac; @@ -1548,14 +1547,10 @@ phrase(struct mdoc *m, int line, int ppos, char *buf, ARGS_PEND == ac || ARGS_PPHRASE == ac); - if (count && ARGS_PPHRASE == ac) - return(mdoc_word_alloc(m, line, ppos, &buf[ppos])); - for (pos = ppos; ; ) { la = pos; - /* Note: no calling context! */ - aac = mdoc_zargs(m, line, &pos, buf, 0, &p); + aac = mdoc_zargs(m, line, &pos, buf, ARGS_PPHRASED, &p); if (ARGS_ERROR == aac) return(0); -- cgit v1.2.3-56-ge451