From: Kristaps Dzonsons Date: Fri, 7 May 2010 15:49:36 +0000 (+0000) Subject: De-chunking of text removed from parsers. This is a significant change (and I don... X-Git-Tag: VERSION_1_9_24~13 X-Git-Url: https://git.cameronkatri.com/mandoc.git/commitdiff_plain/98c1a46e2586de979ce0fc696b15d91fb80166c5?hp=20276f6161ced22d83924afefed6d39cb45df6f3 De-chunking of text removed from parsers. This is a significant change (and I don't really like it), but it's what groff does. Distinction of ARGS_PHRASE and ARGS_PPHRASE in backend (not yet used). --- diff --git a/man.7 b/man.7 index 37fbbc94..76c63a38 100644 --- a/man.7 +++ b/man.7 @@ -1,4 +1,4 @@ -.\" $Id: man.7,v 1.62 2010/04/13 05:26:49 kristaps Exp $ +.\" $Id: man.7,v 1.63 2010/05/07 15:49:36 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: April 13 2010 $ +.Dd $Mdocdate: May 7 2010 $ .Dt MAN 7 .Os .Sh NAME @@ -887,11 +887,6 @@ In quoted literals, GNU troff allowed pair-wise double-quotes to produce a standalone double-quote in formatted output. It is not known whether this behaviour is exhibited by other formatters. .It -Blocks of whitespace are stripped from macro and free-form text lines -(except when in literal mode) in mandoc. This is not the case for GNU -troff: for maximum portability, whitespace sensitive blocks should be -enclosed in literal contexts. -.It The .Sx \&sp macro does not accept negative values in mandoc. In GNU troff, this diff --git a/man.c b/man.c index 44cc06b1..f7b2bba3 100644 --- a/man.c +++ b/man.c @@ -1,4 +1,4 @@ -/* $Id: man.c,v 1.60 2010/04/08 07:53:01 kristaps Exp $ */ +/* $Id: man.c,v 1.61 2010/05/07 15:49:36 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -83,8 +83,6 @@ static int man_ptext(struct man *, int, char *); static int man_pmacro(struct man *, int, char *); static void man_free1(struct man *); static void man_alloc1(struct man *); -static int pstring(struct man *, int, int, - const char *, size_t); static int macrowarn(struct man *, int, const char *); @@ -318,35 +316,29 @@ man_block_alloc(struct man *m, int line, int pos, enum mant tok) } -static int -pstring(struct man *m, int line, int pos, - const char *p, size_t len) +int +man_word_alloc(struct man *m, int line, int pos, const char *word) { struct man_node *n; - size_t sv; + size_t sv, len; + + len = strlen(word); n = man_node_alloc(line, pos, MAN_TEXT, MAN_MAX); n->string = mandoc_malloc(len + 1); - sv = strlcpy(n->string, p, len + 1); + sv = strlcpy(n->string, word, len + 1); /* Prohibit truncation. */ assert(sv < len + 1); if ( ! man_node_append(m, n)) return(0); + m->next = MAN_NEXT_SIBLING; return(1); } -int -man_word_alloc(struct man *m, int line, int pos, const char *word) -{ - - return(pstring(m, line, pos, word, strlen(word))); -} - - /* * Free all of the resources held by a node. This does NOT unlink a * node from its context; for that, see man_node_unlink(). @@ -376,8 +368,7 @@ man_node_delete(struct man *m, struct man_node *p) static int man_ptext(struct man *m, int line, char *buf) { - int i, j; - char sv; + int i; /* Ignore bogus comments. */ @@ -392,61 +383,32 @@ man_ptext(struct man *m, int line, char *buf) goto descope; } - /* First de-chunk and allocate words. */ + /* Pump blank lines directly into the backend. */ for (i = 0; ' ' == buf[i]; i++) /* Skip leading whitespace. */ ; if ('\0' == buf[i]) { - /* Trailing whitespace? */ - if (i && ' ' == buf[i - 1]) - if ( ! man_pwarn(m, line, i - 1, WTSPACE)) - return(0); - if ( ! pstring(m, line, 0, &buf[i], 0)) + /* Allocate a blank entry. */ + if ( ! man_word_alloc(m, line, 0, "")) return(0); goto descope; } - for (j = i; buf[i]; i++) { - if (' ' != buf[i]) - continue; + /* Warn if the last un-escaped character is whitespace. */ - /* Escaped whitespace. */ - if (i && ' ' == buf[i] && '\\' == buf[i - 1]) - continue; + i = (int)strlen(buf); + assert(i); - sv = buf[i]; - buf[i++] = '\0'; - - if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) - return(0); - - /* Trailing whitespace? Check at overwritten byte. */ - - if (' ' == sv && '\0' == buf[i]) + if (' ' == buf[i - 1] || '\t' == buf[i - 1]) + if (1 == i || ('\\' != buf[i - 2])) if ( ! man_pwarn(m, line, i - 1, WTSPACE)) return(0); - for ( ; ' ' == buf[i]; i++) - /* Skip trailing whitespace. */ ; - - j = i; - - /* Trailing whitespace? */ - - if (' ' == buf[i - 1] && '\0' == buf[i]) - if ( ! man_pwarn(m, line, i - 1, WTSPACE)) - return(0); - - if ('\0' == buf[i]) - break; - } - - if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) + if ( ! man_word_alloc(m, line, 0, buf)) return(0); descope: - /* * Co-ordinate what happens with having a next-line scope open: * first close out the element scope (if applicable), then close diff --git a/mdoc.7 b/mdoc.7 index 42969fb7..e2eaa5e2 100644 --- a/mdoc.7 +++ b/mdoc.7 @@ -1,4 +1,4 @@ -.\" $Id: mdoc.7,v 1.94 2010/04/13 05:26:49 kristaps Exp $ +.\" $Id: mdoc.7,v 1.95 2010/05/07 15:49:36 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: April 13 2010 $ +.Dd $Mdocdate: May 7 2010 $ .Dt MDOC 7 .Os .Sh NAME @@ -1824,10 +1824,6 @@ are aliases, as are and .Fl unfilled . .It -In mandoc, blocks of whitespace are stripped from both macro and -free-form text lines (except when in literal mode); groff would retain -whitespace in free-form text lines. -.It Historic groff has many un-callable macros. Most of these (excluding some block-level macros) are now callable. .It diff --git a/mdoc.c b/mdoc.c index db962970..e3741cdd 100644 --- a/mdoc.c +++ b/mdoc.c @@ -1,4 +1,4 @@ -/* $Id: mdoc.c,v 1.123 2010/04/08 07:53:01 kristaps Exp $ */ +/* $Id: mdoc.c,v 1.124 2010/05/07 15:49:36 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -154,8 +154,7 @@ static int node_append(struct mdoc *, static int mdoc_ptext(struct mdoc *, int, char *); static int mdoc_pmacro(struct mdoc *, int, char *); static int macrowarn(struct mdoc *, int, const char *); -static int pstring(struct mdoc *, int, int, - const char *, size_t); + const struct mdoc_node * mdoc_node(const struct mdoc *m) @@ -535,11 +534,13 @@ mdoc_elem_alloc(struct mdoc *m, int line, int pos, } -static int -pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) +int +mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) { struct mdoc_node *n; - size_t sv; + size_t sv, len; + + len = strlen(p); n = node_alloc(m, line, pos, -1, MDOC_TEXT); n->string = mandoc_malloc(len + 1); @@ -550,19 +551,12 @@ pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) if ( ! node_append(m, n)) return(0); + m->next = MDOC_NEXT_SIBLING; return(1); } -int -mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) -{ - - return(pstring(m, line, pos, p, strlen(p))); -} - - void mdoc_node_free(struct mdoc_node *p) { @@ -633,88 +627,57 @@ mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) static int mdoc_ptext(struct mdoc *m, int line, char *buf) { - int i, j; - char sv; + int i; /* Ignore bogus comments. */ if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2]) return(mdoc_pwarn(m, line, 0, EBADCOMMENT)); + /* No text before an initial macro. */ + if (SEC_NONE == m->lastnamed) return(mdoc_perr(m, line, 0, ETEXTPROL)); - - /* - * If in literal mode, then pass the buffer directly to the - * back-end, as it should be preserved as a single term. - */ + /* Literal just gets pulled in as-is. */ + if (MDOC_LITERAL & m->flags) return(mdoc_word_alloc(m, line, 0, buf)); - /* Disallow blank/white-space lines in non-literal mode. */ + /* Check for a blank line, which may also consist of spaces. */ for (i = 0; ' ' == buf[i]; i++) - /* Skip leading whitespace. */ ; + /* Skip to first non-space. */ ; if ('\0' == buf[i]) { if ( ! mdoc_pwarn(m, line, 0, ENOBLANK)) return(0); + /* - * Assume that a `Pp' should be inserted in the case of - * a blank line. Technically, blank lines aren't - * allowed, but enough manuals assume this behaviour - * that we want to work around it. + * Insert a `Pp' in the case of a blank line. Technically, + * blank lines aren't allowed, but enough manuals assume this + * behaviour that we want to work around it. */ if ( ! mdoc_elem_alloc(m, line, 0, MDOC_Pp, NULL)) return(0); - } - - /* - * Break apart a free-form line into tokens. Spaces are - * stripped out of the input. - */ - for (j = i; buf[i]; i++) { - if (' ' != buf[i]) - continue; - - /* Escaped whitespace. */ - if (i && ' ' == buf[i] && '\\' == buf[i - 1]) - continue; - - sv = buf[i]; - buf[i++] = '\0'; - - if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) - return(0); - - /* Trailing whitespace? Check at overwritten byte. */ - - if (' ' == sv && '\0' == buf[i]) - if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS)) - return(0); - - for ( ; ' ' == buf[i]; i++) - /* Skip trailing whitespace. */ ; + m->next = MDOC_NEXT_SIBLING; + return(1); + } - j = i; + /* Warn if the last un-escaped character is whitespace. */ - /* Trailing whitespace? */ + i = (int)strlen(buf); + assert(i); - if (' ' == buf[i - 1] && '\0' == buf[i]) + if (' ' == buf[i - 1] || '\t' == buf[i - 1]) + if (1 == i || ('\\' != buf[i - 2])) if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS)) return(0); - if ('\0' == buf[i]) - break; - } - - if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) - return(0); + /* Allocate the whole word. */ - m->next = MDOC_NEXT_SIBLING; - return(1); + return(mdoc_word_alloc(m, line, 0, buf)); } diff --git a/mdoc_argv.c b/mdoc_argv.c index b68d2c74..c1a5ee31 100644 --- a/mdoc_argv.c +++ b/mdoc_argv.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_argv.c,v 1.41 2010/05/07 06:05:38 kristaps Exp $ */ +/* $Id: mdoc_argv.c,v 1.42 2010/05/07 15:49:36 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -398,7 +398,7 @@ args(struct mdoc *m, int line, int *pos, assert(*pos); assert(' ' != buf[*pos]); - if (0 == buf[*pos]) + if ('\0' == buf[*pos]) return(ARGS_EOLN); /* @@ -441,7 +441,6 @@ args(struct mdoc *m, int line, int *pos, */ if (ARGS_TABSEP & fl) { - rc = ARGS_PHRASE; /* Scan ahead to tab (can't be escaped). */ p = strchr(*v, '\t'); @@ -455,14 +454,19 @@ args(struct mdoc *m, int line, int *pos, break; } + /* By default, assume a phrase. */ + rc = ARGS_PHRASE; + /* * Adjust new-buffer position to be beyond delimiter * mark (e.g., Ta -> end + 2). */ if (p && pp) { *pos += pp < p ? 2 : 1; + rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE; p = pp < p ? pp : p; } else if (p && ! pp) { + rc = ARGS_PPHRASE; *pos += 1; } else if (pp && ! p) { p = pp;