summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2009-06-16 19:13:28 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2009-06-16 19:13:28 +0000
commit632038c8c98f1c408bb368edf910b849f186dc8a (patch)
tree000479224756a6b4a7749394e9208b7308c220ab
parentb06a75d0798f3ff62d04b3ade52cf34a4ff94613 (diff)
downloadmandoc-632038c8c98f1c408bb368edf910b849f186dc8a.tar.gz
mandoc-632038c8c98f1c408bb368edf910b849f186dc8a.tar.zst
mandoc-632038c8c98f1c408bb368edf910b849f186dc8a.zip
Modernised comment handling: text following \" is thrown away before
either parser is invoked. Single-dot lines correctly handled. This confirms with both new- and old-groff. "Comment" subsection added to mdoc.7 and man.7.
-rw-r--r--main.c65
-rw-r--r--man.714
-rw-r--r--man.c6
-rw-r--r--mdoc.716
-rw-r--r--mdoc.c8
5 files changed, 67 insertions, 42 deletions
diff --git a/main.c b/main.c
index ce6d4a91..286b5bd6 100644
--- a/main.c
+++ b/main.c
@@ -1,4 +1,4 @@
-/* $Id: main.c,v 1.28 2009/06/15 10:36:01 kristaps Exp $ */
+/* $Id: main.c,v 1.29 2009/06/16 19:13:28 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -74,12 +74,12 @@ struct curparse {
#define NO_IGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
#define NO_IGN_MACRO (1 << 2) /* Don't ignore bad macros. */
#define NO_IGN_CHARS (1 << 3) /* Don't ignore bad chars. */
- enum intt inttype; /* Input parsers. */
+ enum intt inttype; /* Input parsers... */
struct man *man;
struct man *lastman;
struct mdoc *mdoc;
struct mdoc *lastmdoc;
- enum outt outtype; /* Output devices. */
+ enum outt outtype; /* Output devices... */
out_mdoc outmdoc;
out_man outman;
out_free outfree;
@@ -325,7 +325,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
size_t sz;
ssize_t ssz;
struct stat st;
- int j, i, pos, lnn;
+ int j, i, pos, lnn, comment;
struct man *man;
struct mdoc *mdoc;
@@ -355,7 +355,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
/* Fill buf with file blocksize. */
- for (lnn = 0, pos = 0; ; ) {
+ for (lnn = pos = comment = 0; ; ) {
if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
warn("%s", curp->file);
return(0);
@@ -375,17 +375,34 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
}
if ('\n' != blk->buf[i]) {
+ if (comment)
+ continue;
ln->buf[pos++] = blk->buf[i];
+
+ /* Handle in-line `\"' comments. */
+
+ if (1 == pos || '\"' != ln->buf[pos - 1])
+ continue;
+
+ for (j = pos - 2; j >= 0; j--)
+ if ('\\' != ln->buf[j])
+ break;
+
+ if ( ! ((pos - 2 - j) % 2))
+ continue;
+
+ comment = 1;
+ pos -= 2;
continue;
- }
+ }
- /* Check for CPP-escaped newline. */
+ /* Handle escaped `\\n' newlines. */
- if (pos > 0 && '\\' == ln->buf[pos - 1]) {
+ if (pos > 0 && 0 == comment &&
+ '\\' == ln->buf[pos - 1]) {
for (j = pos - 1; j >= 0; j--)
if ('\\' != ln->buf[j])
break;
-
if ( ! ((pos - j) % 2)) {
pos--;
lnn++;
@@ -395,19 +412,14 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
ln->buf[pos] = 0;
lnn++;
-
- /*
- * If no manual parser has been assigned, then
- * try to assign one in pset(), which may do
- * nothing at all. After this, parse the manual
- * line accordingly.
- */
+
+ /* If unset, assign parser in pset(). */
if ( ! (man || mdoc) && ! pset(ln->buf,
pos, curp, &man, &mdoc))
return(0);
- pos = 0;
+ pos = comment = 0;
if (man && ! man_parseln(man, lnn, ln->buf))
return(0);
@@ -416,7 +428,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
}
}
- /* Note that a parser may not have been assigned, yet. */
+ /* NOTE a parser may not have been assigned, yet. */
if ( ! (man || mdoc)) {
warnx("%s: not a manual", curp->file);
@@ -428,12 +440,7 @@ fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
if (man && ! man_endparse(man))
return(0);
- /*
- * If an output device hasn't been allocated, see if we should
- * do so now. Note that not all outtypes have functions, so
- * this switch statement may be superfluous, but it's
- * low-overhead enough not to matter very much.
- */
+ /* If unset, allocate output dev now (if applicable). */
if ( ! (curp->outman && curp->outmdoc)) {
switch (curp->outtype) {
@@ -469,6 +476,7 @@ static int
pset(const char *buf, int pos, struct curparse *curp,
struct man **man, struct mdoc **mdoc)
{
+ int i;
/*
* Try to intuit which kind of manual parser should be used. If
@@ -478,8 +486,13 @@ pset(const char *buf, int pos, struct curparse *curp,
* default to -man, which is more lenient.
*/
- if (pos >= 3 && 0 == memcmp(buf, ".\\\"", 3))
- return(1);
+ if (buf[0] == '.') {
+ for (i = 1; buf[i]; i++)
+ if (' ' != buf[i] && '\t' != buf[i])
+ break;
+ if (0 == buf[i])
+ return(1);
+ }
switch (curp->inttype) {
case (INTT_MDOC):
diff --git a/man.7 b/man.7
index e9dd35d9..fa473867 100644
--- a/man.7
+++ b/man.7
@@ -1,4 +1,4 @@
-.\" $Id: man.7,v 1.12 2009/06/11 07:26:35 kristaps Exp $
+.\" $Id: man.7,v 1.13 2009/06/16 19:13:28 kristaps Exp $
.\"
.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
.\"
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: June 11 2009 $
+.Dd $Mdocdate: June 16 2009 $
.Dt MAN 7
.Os
.\" SECTION
@@ -70,6 +70,16 @@ escape is common in historical
documents; if encountered at the end of a word, it ensures that the
subsequent word isn't off-set by whitespace.
.\" SUB-SECTION
+.Ss Comments
+Anything following a
+.Sq \e"
+delimiter is considered a comment (unless the
+.Sq \e
+itself has been escaped) and is ignored to the end of line.
+Furthermore, a macro line with only a control character
+.Sq \. ,
+optionally followed by whitespace, is ignored.
+.\" SUB-SECTION
.Ss Special Characters
Special character sequences begin with the escape character
.Sq \e
diff --git a/man.c b/man.c
index 56b0721c..5b83d958 100644
--- a/man.c
+++ b/man.c
@@ -1,4 +1,4 @@
-/* $Id: man.c,v 1.19 2009/06/10 20:18:43 kristaps Exp $ */
+/* $Id: man.c,v 1.20 2009/06/16 19:13:28 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -324,10 +324,6 @@ man_pmacro(struct man *m, int ln, char *buf)
ppos = i;
- if (buf[i] && '\\' == buf[i])
- if (buf[i + 1] && '\"' == buf[i + 1])
- goto out;
-
/* Copy the first word into a nil-terminated buffer. */
for (j = 0; j < 4; j++, i++) {
diff --git a/mdoc.7 b/mdoc.7
index 23626653..35d1fb38 100644
--- a/mdoc.7
+++ b/mdoc.7
@@ -1,4 +1,4 @@
-.\" $Id: mdoc.7,v 1.28 2009/06/12 12:40:44 kristaps Exp $
+.\" $Id: mdoc.7,v 1.29 2009/06/16 19:13:28 kristaps Exp $
.\"
.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
.\"
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: June 12 2009 $
+.Dd $Mdocdate: June 16 2009 $
.Dt MDOC 7
.Os
.\" SECTION
@@ -78,9 +78,19 @@ or
.Sq \&.Bd \-unfilled
contexts.
.\" SUB-SECTION
+.Ss Comments
+Anything following a
+.Sq \e"
+delimiter is considered a comment (unless the
+.Sq \e
+itself has been escaped) and is ignored to the end of line.
+Furthermore, a macro line with only a control character
+.Sq \. ,
+optionally followed by whitespace, is ignored.
+.\" SUB-SECTION
.Ss Reserved Characters
Within a macro line, the following characters are reserved:
-.Bl -tag -width 12n -offset XXXX -compact
+.Bl -tag -width Ds -offset XXXX -compact
.It \&.
.Pq period
.It \&,
diff --git a/mdoc.c b/mdoc.c
index c1d07afa..3808f144 100644
--- a/mdoc.c
+++ b/mdoc.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc.c,v 1.80 2009/06/15 10:36:01 kristaps Exp $ */
+/* $Id: mdoc.c,v 1.81 2009/06/16 19:13:28 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -657,7 +657,7 @@ parsemacro(struct mdoc *m, int ln, char *buf)
int i, c;
char mac[5];
- /* Comments and empties are quickly ignored. */
+ /* Empty lines are ignored. */
if (0 == buf[1])
return(1);
@@ -671,10 +671,6 @@ parsemacro(struct mdoc *m, int ln, char *buf)
return(perr(m, ln, 1, ESPACE));
}
- if (buf[1] && '\\' == buf[1])
- if (buf[2] && '\"' == buf[2])
- return(1);
-
/* Copy the first word into a nil-terminated buffer. */
for (i = 1; i < 5; i++) {