Modernised comment handling: text following \" is thrown away before

either parser is invoked. Single-dot lines correctly handled. This confirms with both new- and old-groff. "Comment" subsection added to mdoc.7 and man.7.
author: Kristaps Dzonsons <kristaps@bsd.lv> 2009-06-16 19:13:28 +0000
committer: Kristaps Dzonsons <kristaps@bsd.lv> 2009-06-16 19:13:28 +0000
commit: 632038c8c98f1c408bb368edf910b849f186dc8a (patch)
tree: 000479224756a6b4a7749394e9208b7308c220ab
parent: b06a75d0798f3ff62d04b3ade52cf34a4ff94613 (diff)
download: mandoc-632038c8c98f1c408bb368edf910b849f186dc8a.tar.gz
mandoc-632038c8c98f1c408bb368edf910b849f186dc8a.tar.zst
mandoc-632038c8c98f1c408bb368edf910b849f186dc8a.zip
5 files changed, 67 insertions, 42 deletions
diff --git a/main.c b/main.c
index ce6d4a91..286b5bd6 100644
--- a/main.c
+++ b/main.c
@@ -1,4 +1,4 @@
-/*	$Id: main.c,v 1.28 2009/06/15 10:36:01 kristaps Exp $ */
+/*	$Id: main.c,v 1.29 2009/06/16 19:13:28 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -74,12 +74,12 @@ struct	curparse {
 #define	NO_IGN_ESCAPE	 (1 << 1) 	/* Don't ignore bad escapes. */
 #define	NO_IGN_MACRO	 (1 << 2) 	/* Don't ignore bad macros. */
 #define	NO_IGN_CHARS	 (1 << 3)	/* Don't ignore bad chars. */
-	enum intt	  inttype;	/* Input parsers. */
+	enum intt	  inttype;	/* Input parsers... */
 	struct man	 *man;
 	struct man	 *lastman;
 	struct mdoc	 *mdoc;
 	struct mdoc	 *lastmdoc;
-	enum outt	  outtype;	/* Output devices. */
+	enum outt	  outtype;	/* Output devices... */
 	out_mdoc	  outmdoc;
 	out_man	  	  outman;
 	out_free	  outfree;
@@ -325,7 +325,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
 	size_t		 sz;
 	ssize_t		 ssz;
 	struct stat	 st;
-	int		 j, i, pos, lnn;
+	int		 j, i, pos, lnn, comment;
 	struct man	*man;
 	struct mdoc	*mdoc;
 
@@ -355,7 +355,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
 
 	/* Fill buf with file blocksize. */
 
-	for (lnn = 0, pos = 0; ; ) {
+	for (lnn = pos = comment = 0; ; ) {
 		if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
 			warn("%s", curp->file);
 			return(0);
@@ -375,17 +375,34 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
 			}
 
 			if ('\n' != blk->buf[i]) {
+				if (comment)
+					continue;
 				ln->buf[pos++] = blk->buf[i];
+
+				/* Handle in-line `\"' comments. */
+
+				if (1 == pos || '\"' != ln->buf[pos - 1])
+					continue;
+
+				for (j = pos - 2; j >= 0; j--)
+					if ('\\' != ln->buf[j])
+						break;
+
+				if ( ! ((pos - 2 - j) % 2))
+					continue;
+
+				comment = 1;
+				pos -= 2;
 				continue;
-			}
+			} 
 
-			/* Check for CPP-escaped newline. */
+			/* Handle escaped `\\n' newlines. */
 
-			if (pos > 0 && '\\' == ln->buf[pos - 1]) {
+			if (pos > 0 && 0 == comment && 
+					'\\' == ln->buf[pos - 1]) {
 				for (j = pos - 1; j >= 0; j--)
 					if ('\\' != ln->buf[j])
 						break;
-
 				if ( ! ((pos - j) % 2)) {
 					pos--;
 					lnn++;
@@ -395,19 +412,14 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
 
 			ln->buf[pos] = 0;
 			lnn++;
-			
-			/*
-			 * If no manual parser has been assigned, then
-			 * try to assign one in pset(), which may do
-			 * nothing at all.  After this, parse the manual
-			 * line accordingly.
-			 */
+
+			/* If unset, assign parser in pset(). */
 
 			if ( ! (man || mdoc) && ! pset(ln->buf, 
 						pos, curp, &man, &mdoc))
 				return(0);
 
-			pos = 0;
+			pos = comment = 0;
 
 			if (man && ! man_parseln(man, lnn, ln->buf))
 				return(0);
@@ -416,7 +428,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
 		}
 	}
 
-	/* Note that a parser may not have been assigned, yet. */
+	/* NOTE a parser may not have been assigned, yet. */
 
 	if ( ! (man || mdoc)) {
 		warnx("%s: not a manual", curp->file);
@@ -428,12 +440,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
 	if (man && ! man_endparse(man))
 		return(0);
 
-	/*
-	 * If an output device hasn't been allocated, see if we should
-	 * do so now.  Note that not all outtypes have functions, so
-	 * this switch statement may be superfluous, but it's
-	 * low-overhead enough not to matter very much.
-	 */
+	/* If unset, allocate output dev now (if applicable). */
 
 	if ( ! (curp->outman && curp->outmdoc)) {
 		switch (curp->outtype) {
@@ -469,6 +476,7 @@ static int
 pset(const char *buf, int pos, struct curparse *curp,
 		struct man **man, struct mdoc **mdoc)
 {
+	int		 i;
 
 	/*
 	 * Try to intuit which kind of manual parser should be used.  If
@@ -478,8 +486,13 @@ pset(const char *buf, int pos, struct curparse *curp,
 	 * default to -man, which is more lenient.
 	 */
 
-	if (pos >= 3 && 0 == memcmp(buf, ".\\\"", 3))
-		return(1);
+	if (buf[0] == '.') {
+		for (i = 1; buf[i]; i++)
+			if (' ' != buf[i] && '\t' != buf[i])
+				break;
+		if (0 == buf[i])
+			return(1);
+	}
 
 	switch (curp->inttype) {
 	case (INTT_MDOC):
diff --git a/man.7 b/man.7
index e9dd35d9..fa473867 100644
--- a/man.7
+++ b/man.7
@@ -1,4 +1,4 @@
-.\"	$Id: man.7,v 1.12 2009/06/11 07:26:35 kristaps Exp $
+.\"	$Id: man.7,v 1.13 2009/06/16 19:13:28 kristaps Exp $
 .\"
 .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
 .\"
@@ -14,7 +14,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: June 11 2009 $
+.Dd $Mdocdate: June 16 2009 $
 .Dt MAN 7
 .Os
 .\" SECTION
@@ -70,6 +70,16 @@ escape is common in historical
 documents; if encountered at the end of a word, it ensures that the
 subsequent word isn't off-set by whitespace.
 .\" SUB-SECTION
+.Ss Comments
+Anything following a
+.Sq \e" 
+delimiter is considered a comment (unless the 
+.Sq \e
+itself has been escaped) and is ignored to the end of line.
+Furthermore, a macro line with only a control character
+.Sq \. ,
+optionally followed by whitespace, is ignored.
+.\" SUB-SECTION
 .Ss Special Characters
 Special character sequences begin with the escape character
 .Sq \e
diff --git a/man.c b/man.c
index 56b0721c..5b83d958 100644
--- a/man.c
+++ b/man.c
@@ -1,4 +1,4 @@
-/*	$Id: man.c,v 1.19 2009/06/10 20:18:43 kristaps Exp $ */
+/*	$Id: man.c,v 1.20 2009/06/16 19:13:28 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -324,10 +324,6 @@ man_pmacro(struct man *m, int ln, char *buf)
 
 	ppos = i;
 
-	if (buf[i] && '\\' == buf[i])
-		if (buf[i + 1] && '\"' == buf[i + 1])
-			goto out;
-
 	/* Copy the first word into a nil-terminated buffer. */
 
 	for (j = 0; j < 4; j++, i++) {
diff --git a/mdoc.7 b/mdoc.7
index 23626653..35d1fb38 100644
--- a/mdoc.7
+++ b/mdoc.7
@@ -1,4 +1,4 @@
-.\"	$Id: mdoc.7,v 1.28 2009/06/12 12:40:44 kristaps Exp $
+.\"	$Id: mdoc.7,v 1.29 2009/06/16 19:13:28 kristaps Exp $
 .\"
 .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
 .\"
@@ -14,7 +14,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\" 
-.Dd $Mdocdate: June 12 2009 $
+.Dd $Mdocdate: June 16 2009 $
 .Dt MDOC 7
 .Os
 .\" SECTION
@@ -78,9 +78,19 @@ or
 .Sq \&.Bd \-unfilled
 contexts.
 .\" SUB-SECTION
+.Ss Comments
+Anything following a
+.Sq \e" 
+delimiter is considered a comment (unless the 
+.Sq \e
+itself has been escaped) and is ignored to the end of line.
+Furthermore, a macro line with only a control character
+.Sq \. ,
+optionally followed by whitespace, is ignored.
+.\" SUB-SECTION
 .Ss Reserved Characters
 Within a macro line, the following characters are reserved:
-.Bl -tag -width 12n -offset XXXX -compact
+.Bl -tag -width Ds -offset XXXX -compact
 .It \&.
 .Pq period
 .It \&,
diff --git a/mdoc.c b/mdoc.c
index c1d07afa..3808f144 100644
--- a/mdoc.c
+++ b/mdoc.c
@@ -1,4 +1,4 @@
-/*	$Id: mdoc.c,v 1.80 2009/06/15 10:36:01 kristaps Exp $ */
+/*	$Id: mdoc.c,v 1.81 2009/06/16 19:13:28 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -657,7 +657,7 @@ parsemacro(struct mdoc *m, int ln, char *buf)
 	int		  i, c;
 	char		  mac[5];
 
-	/* Comments and empties are quickly ignored. */
+	/* Empty lines are ignored. */
 
 	if (0 == buf[1])
 		return(1);
@@ -671,10 +671,6 @@ parsemacro(struct mdoc *m, int ln, char *buf)
 		return(perr(m, ln, 1, ESPACE));
 	}
 
-	if (buf[1] && '\\' == buf[1])
-		if (buf[2] && '\"' == buf[2])
-			return(1);
-
 	/* Copy the first word into a nil-terminated buffer. */
 
 	for (i = 1; i < 5; i++) {
author	Kristaps Dzonsons <kristaps@bsd.lv>	2009-06-16 19:13:28 +0000
committer	Kristaps Dzonsons <kristaps@bsd.lv>	2009-06-16 19:13:28 +0000
commit	632038c8c98f1c408bb368edf910b849f186dc8a (patch)
tree	000479224756a6b4a7749394e9208b7308c220ab
parent	b06a75d0798f3ff62d04b3ade52cf34a4ff94613 (diff)
download	mandoc-632038c8c98f1c408bb368edf910b849f186dc8a.tar.gz mandoc-632038c8c98f1c408bb368edf910b849f186dc8a.tar.zst mandoc-632038c8c98f1c408bb368edf910b849f186dc8a.zip