From 632038c8c98f1c408bb368edf910b849f186dc8a Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Tue, 16 Jun 2009 19:13:28 +0000 Subject: Modernised comment handling: text following \" is thrown away before either parser is invoked. Single-dot lines correctly handled. This confirms with both new- and old-groff. "Comment" subsection added to mdoc.7 and man.7. --- main.c | 65 +++++++++++++++++++++++++++++++++++++++-------------------------- man.7 | 14 ++++++++++++-- man.c | 6 +----- mdoc.7 | 16 +++++++++++++--- mdoc.c | 8 ++------ 5 files changed, 67 insertions(+), 42 deletions(-) diff --git a/main.c b/main.c index ce6d4a91..286b5bd6 100644 --- a/main.c +++ b/main.c @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.28 2009/06/15 10:36:01 kristaps Exp $ */ +/* $Id: main.c,v 1.29 2009/06/16 19:13:28 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -74,12 +74,12 @@ struct curparse { #define NO_IGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */ #define NO_IGN_MACRO (1 << 2) /* Don't ignore bad macros. */ #define NO_IGN_CHARS (1 << 3) /* Don't ignore bad chars. */ - enum intt inttype; /* Input parsers. */ + enum intt inttype; /* Input parsers... */ struct man *man; struct man *lastman; struct mdoc *mdoc; struct mdoc *lastmdoc; - enum outt outtype; /* Output devices. */ + enum outt outtype; /* Output devices... */ out_mdoc outmdoc; out_man outman; out_free outfree; @@ -325,7 +325,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp) size_t sz; ssize_t ssz; struct stat st; - int j, i, pos, lnn; + int j, i, pos, lnn, comment; struct man *man; struct mdoc *mdoc; @@ -355,7 +355,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp) /* Fill buf with file blocksize. */ - for (lnn = 0, pos = 0; ; ) { + for (lnn = pos = comment = 0; ; ) { if (-1 == (ssz = read(curp->fd, blk->buf, sz))) { warn("%s", curp->file); return(0); @@ -375,17 +375,34 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp) } if ('\n' != blk->buf[i]) { + if (comment) + continue; ln->buf[pos++] = blk->buf[i]; + + /* Handle in-line `\"' comments. */ + + if (1 == pos || '\"' != ln->buf[pos - 1]) + continue; + + for (j = pos - 2; j >= 0; j--) + if ('\\' != ln->buf[j]) + break; + + if ( ! ((pos - 2 - j) % 2)) + continue; + + comment = 1; + pos -= 2; continue; - } + } - /* Check for CPP-escaped newline. */ + /* Handle escaped `\\n' newlines. */ - if (pos > 0 && '\\' == ln->buf[pos - 1]) { + if (pos > 0 && 0 == comment && + '\\' == ln->buf[pos - 1]) { for (j = pos - 1; j >= 0; j--) if ('\\' != ln->buf[j]) break; - if ( ! ((pos - j) % 2)) { pos--; lnn++; @@ -395,19 +412,14 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp) ln->buf[pos] = 0; lnn++; - - /* - * If no manual parser has been assigned, then - * try to assign one in pset(), which may do - * nothing at all. After this, parse the manual - * line accordingly. - */ + + /* If unset, assign parser in pset(). */ if ( ! (man || mdoc) && ! pset(ln->buf, pos, curp, &man, &mdoc)) return(0); - pos = 0; + pos = comment = 0; if (man && ! man_parseln(man, lnn, ln->buf)) return(0); @@ -416,7 +428,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp) } } - /* Note that a parser may not have been assigned, yet. */ + /* NOTE a parser may not have been assigned, yet. */ if ( ! (man || mdoc)) { warnx("%s: not a manual", curp->file); @@ -428,12 +440,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp) if (man && ! man_endparse(man)) return(0); - /* - * If an output device hasn't been allocated, see if we should - * do so now. Note that not all outtypes have functions, so - * this switch statement may be superfluous, but it's - * low-overhead enough not to matter very much. - */ + /* If unset, allocate output dev now (if applicable). */ if ( ! (curp->outman && curp->outmdoc)) { switch (curp->outtype) { @@ -469,6 +476,7 @@ static int pset(const char *buf, int pos, struct curparse *curp, struct man **man, struct mdoc **mdoc) { + int i; /* * Try to intuit which kind of manual parser should be used. If @@ -478,8 +486,13 @@ pset(const char *buf, int pos, struct curparse *curp, * default to -man, which is more lenient. */ - if (pos >= 3 && 0 == memcmp(buf, ".\\\"", 3)) - return(1); + if (buf[0] == '.') { + for (i = 1; buf[i]; i++) + if (' ' != buf[i] && '\t' != buf[i]) + break; + if (0 == buf[i]) + return(1); + } switch (curp->inttype) { case (INTT_MDOC): diff --git a/man.7 b/man.7 index e9dd35d9..fa473867 100644 --- a/man.7 +++ b/man.7 @@ -1,4 +1,4 @@ -.\" $Id: man.7,v 1.12 2009/06/11 07:26:35 kristaps Exp $ +.\" $Id: man.7,v 1.13 2009/06/16 19:13:28 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: June 11 2009 $ +.Dd $Mdocdate: June 16 2009 $ .Dt MAN 7 .Os .\" SECTION @@ -70,6 +70,16 @@ escape is common in historical documents; if encountered at the end of a word, it ensures that the subsequent word isn't off-set by whitespace. .\" SUB-SECTION +.Ss Comments +Anything following a +.Sq \e" +delimiter is considered a comment (unless the +.Sq \e +itself has been escaped) and is ignored to the end of line. +Furthermore, a macro line with only a control character +.Sq \. , +optionally followed by whitespace, is ignored. +.\" SUB-SECTION .Ss Special Characters Special character sequences begin with the escape character .Sq \e diff --git a/man.c b/man.c index 56b0721c..5b83d958 100644 --- a/man.c +++ b/man.c @@ -1,4 +1,4 @@ -/* $Id: man.c,v 1.19 2009/06/10 20:18:43 kristaps Exp $ */ +/* $Id: man.c,v 1.20 2009/06/16 19:13:28 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -324,10 +324,6 @@ man_pmacro(struct man *m, int ln, char *buf) ppos = i; - if (buf[i] && '\\' == buf[i]) - if (buf[i + 1] && '\"' == buf[i + 1]) - goto out; - /* Copy the first word into a nil-terminated buffer. */ for (j = 0; j < 4; j++, i++) { diff --git a/mdoc.7 b/mdoc.7 index 23626653..35d1fb38 100644 --- a/mdoc.7 +++ b/mdoc.7 @@ -1,4 +1,4 @@ -.\" $Id: mdoc.7,v 1.28 2009/06/12 12:40:44 kristaps Exp $ +.\" $Id: mdoc.7,v 1.29 2009/06/16 19:13:28 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: June 12 2009 $ +.Dd $Mdocdate: June 16 2009 $ .Dt MDOC 7 .Os .\" SECTION @@ -78,9 +78,19 @@ or .Sq \&.Bd \-unfilled contexts. .\" SUB-SECTION +.Ss Comments +Anything following a +.Sq \e" +delimiter is considered a comment (unless the +.Sq \e +itself has been escaped) and is ignored to the end of line. +Furthermore, a macro line with only a control character +.Sq \. , +optionally followed by whitespace, is ignored. +.\" SUB-SECTION .Ss Reserved Characters Within a macro line, the following characters are reserved: -.Bl -tag -width 12n -offset XXXX -compact +.Bl -tag -width Ds -offset XXXX -compact .It \&. .Pq period .It \&, diff --git a/mdoc.c b/mdoc.c index c1d07afa..3808f144 100644 --- a/mdoc.c +++ b/mdoc.c @@ -1,4 +1,4 @@ -/* $Id: mdoc.c,v 1.80 2009/06/15 10:36:01 kristaps Exp $ */ +/* $Id: mdoc.c,v 1.81 2009/06/16 19:13:28 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -657,7 +657,7 @@ parsemacro(struct mdoc *m, int ln, char *buf) int i, c; char mac[5]; - /* Comments and empties are quickly ignored. */ + /* Empty lines are ignored. */ if (0 == buf[1]) return(1); @@ -671,10 +671,6 @@ parsemacro(struct mdoc *m, int ln, char *buf) return(perr(m, ln, 1, ESPACE)); } - if (buf[1] && '\\' == buf[1]) - if (buf[2] && '\"' == buf[2]) - return(1); - /* Copy the first word into a nil-terminated buffer. */ for (i = 1; i < 5; i++) { -- cgit v1.2.3-56-ge451