aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2022-05-01 16:22:06 +0000
committerIngo Schwarze <schwarze@openbsd.org>2022-05-01 16:22:06 +0000
commitfc560c515dc97bd36329ee99e655bd3e30c705da (patch)
tree9b4867f531f1d576b379113305aa84f97715244d
parentb592e2d9ee8dd845f058dac8ae07a679a4bd10e2 (diff)
downloadmandoc-fc560c515dc97bd36329ee99e655bd3e30c705da.tar.gz
mandoc-fc560c515dc97bd36329ee99e655bd3e30c705da.tar.zst
mandoc-fc560c515dc97bd36329ee99e655bd3e30c705da.zip
Split a new function roff_parse_comment() out of roff_expand() because this
functionality is not needed when called from roff_getarg(). This makes the long and complicated function roff_expand() significantly shorter, and also simpler in so far as it no longer needs to return ROFF_APPEND. No functional change intended.
-rw-r--r--regress/roff/esc/Makefile6
-rw-r--r--regress/roff/esc/comment.in25
-rw-r--r--regress/roff/esc/comment.out_ascii19
-rw-r--r--regress/roff/esc/comment.out_lint1
-rw-r--r--roff.c204
5 files changed, 155 insertions, 100 deletions
diff --git a/regress/roff/esc/Makefile b/regress/roff/esc/Makefile
index f00aa4d3..111e59bc 100644
--- a/regress/roff/esc/Makefile
+++ b/regress/roff/esc/Makefile
@@ -1,10 +1,10 @@
-# $OpenBSD: Makefile,v 1.19 2022/04/27 13:30:19 schwarze Exp $
+# $OpenBSD: Makefile,v 1.20 2022/05/01 16:18:59 schwarze Exp $
-REGRESS_TARGETS = one two multi
+REGRESS_TARGETS = one two multi comment
REGRESS_TARGETS += B bs_man bs_mdoc c c_man E1 e f h hneg l O1 o p w z
REGRESS_TARGETS += ignore invalid unsupp
HTML_TARGETS = f
-LINT_TARGETS = B h l O1 w ignore invalid unsupp
+LINT_TARGETS = comment B h l O1 w ignore invalid unsupp
# mandoc defect:
# - \h with a negative argument replaces output characters
diff --git a/regress/roff/esc/comment.in b/regress/roff/esc/comment.in
new file mode 100644
index 00000000..692cbf00
--- /dev/null
+++ b/regress/roff/esc/comment.in
@@ -0,0 +1,25 @@
+.\" $OpenBSD: comment.in,v 1.1 2022/05/01 16:18:59 schwarze Exp $
+.Dd $Mdocdate: May 1 2022 $
+.Dt ROFF-ESC-COMMENT 1
+.Os
+.Sh NAME
+.Nm roff-esc-comment
+.Nd roff(7) comments
+.Sh DESCRIPTION
+text line cont\
+inuation
+.Pp
+macro line continuation:
+.Op Fl f A\
+r file
+.Pp
+whitespace \&
+at the end of an input line
+.Pp
+text line with \"not printed\
+comment
+.Pp
+continuation \#not printed
+requested by a comment
+.Pp
+Surpisingly, the sequence \\" does not start a comment.
diff --git a/regress/roff/esc/comment.out_ascii b/regress/roff/esc/comment.out_ascii
new file mode 100644
index 00000000..a8698686
--- /dev/null
+++ b/regress/roff/esc/comment.out_ascii
@@ -0,0 +1,19 @@
+ROFF-ESC-COMMENT(1) General Commands Manual ROFF-ESC-COMMENT(1)
+
+NNAAMMEE
+ rrooffff--eesscc--ccoommmmeenntt - roff(7) comments
+
+DDEESSCCRRIIPPTTIIOONN
+ text line continuation
+
+ macro line continuation: [--ff _f_i_l_e]
+
+ whitespace at the end of an input line
+
+ text line with comment
+
+ continuation requested by a comment
+
+ Surpisingly, the sequence \" does not start a comment.
+
+OpenBSD May 1, 2022 OpenBSD
diff --git a/regress/roff/esc/comment.out_lint b/regress/roff/esc/comment.out_lint
new file mode 100644
index 00000000..2844ea8c
--- /dev/null
+++ b/regress/roff/esc/comment.out_lint
@@ -0,0 +1 @@
+mandoc: comment.in:22:29: STYLE: whitespace at end of input line
diff --git a/roff.c b/roff.c
index 323122ba..567e7b02 100644
--- a/roff.c
+++ b/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.386 2022/04/30 18:51:36 schwarze Exp $ */
+/* $Id: roff.c,v 1.387 2022/05/01 16:22:06 schwarze Exp $ */
/*
* Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
@@ -234,6 +234,8 @@ static int roff_nr(ROFF_ARGS);
static int roff_onearg(ROFF_ARGS);
static enum roff_tok roff_parse(struct roff *, char *, int *,
int, int);
+static int roff_parse_comment(struct roff *, struct buf *,
+ int, int, char);
static int roff_parsetext(struct roff *, struct buf *,
int, int *);
static int roff_renamed(ROFF_ARGS);
@@ -1231,6 +1233,98 @@ deroff(char **dest, const struct roff_node *n)
/* --- main functions of the roff parser ---------------------------------- */
+static int
+roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos,
+ char newesc)
+{
+ struct roff_node *n; /* used for header comments */
+ const char *start; /* start of the string to process */
+ const char *cp; /* for RCS id parsing */
+ char *stesc; /* start of an escape sequence ('\\') */
+ char *ep; /* end of comment string */
+ int rcsid; /* kind of RCS id seen */
+
+ for (start = stesc = buf->buf + pos;; stesc++) {
+ /* The line ends without continuation or comment. */
+ if (stesc[0] == '\0')
+ return ROFF_CONT;
+
+ /* Unescaped byte: skip it. */
+ if (stesc[0] != newesc)
+ continue;
+
+ /* Backslash at end of line requests line continuation. */
+ if (stesc[1] == '\0') {
+ stesc[0] = '\0';
+ return ROFF_IGN | ROFF_APPEND;
+ }
+
+ /* Found a comment: process it. */
+ if (stesc[1] == '"' || stesc[1] == '#')
+ break;
+
+ /* Escaped escape character: skip them both. */
+ if (stesc[1] == newesc)
+ stesc++;
+ }
+
+ /* Look for an RCS id in the comment. */
+
+ rcsid = 0;
+ if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
+ rcsid = 1 << MANDOC_OS_OPENBSD;
+ cp += 8;
+ } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
+ rcsid = 1 << MANDOC_OS_NETBSD;
+ cp += 7;
+ }
+ if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
+ strchr(cp, '$') != NULL) {
+ if (r->man->meta.rcsids & rcsid)
+ mandoc_msg(MANDOCERR_RCS_REP, ln,
+ (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
+ r->man->meta.rcsids |= rcsid;
+ }
+
+ /* Warn about trailing whitespace at the end of the comment. */
+
+ ep = strchr(stesc + 2, '\0') - 1;
+ if (*ep == '\n')
+ *ep-- = '\0';
+ if (*ep == ' ' || *ep == '\t')
+ mandoc_msg(MANDOCERR_SPACE_EOL,
+ ln, (int)(ep - buf->buf), NULL);
+
+ /* Save comments preceding the title macro in the syntax tree. */
+
+ if (r->options & MPARSE_COMMENT) {
+ while (*ep == ' ' || *ep == '\t')
+ ep--;
+ ep[1] = '\0';
+ n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
+ ROFFT_COMMENT, TOKEN_NONE);
+ n->string = mandoc_strdup(stesc + 2);
+ roff_node_append(r->man, n);
+ n->flags |= NODE_VALID | NODE_ENDED;
+ r->man->next = ROFF_NEXT_SIBLING;
+ }
+
+ /* The comment requests line continuation. */
+
+ if (stesc[1] == '#') {
+ *stesc = '\0';
+ return ROFF_IGN | ROFF_APPEND;
+ }
+
+ /* Discard the comment including preceding whitespace. */
+
+ while (stesc > start && stesc[-1] == ' ' &&
+ (stesc == start + 1 || stesc[-2] != '\\'))
+ stesc--;
+ *stesc = '\0';
+ return ROFF_CONT;
+}
+
/*
* In the current line, expand escape sequences that produce parsable
* input text. Also check the syntax of the remaining escape sequences,
@@ -1241,11 +1335,9 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
{
struct mctx *ctx; /* current macro call context */
char ubuf[24]; /* buffer to print the number */
- struct roff_node *n; /* used for header comments */
const char *start; /* start of the string to process */
char *stesc; /* start of an escape sequence ('\\') */
const char *esct; /* type of esccape sequence */
- char *ep; /* end of comment string */
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
const char *res; /* the string to be substituted */
@@ -1259,98 +1351,15 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
int npos; /* position in numeric expression */
int arg_complete; /* argument not interrupted by eol */
int quote_args; /* true for \\$@, false for \\$* */
- int done; /* no more input available */
int deftype; /* type of definition to paste */
- int rcsid; /* kind of RCS id seen */
enum mandocerr err; /* for escape sequence problems */
char sign; /* increment number register */
char term; /* character terminating the escape */
- /* Search forward for comments. */
-
- done = 0;
start = buf->buf + pos;
- for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
- if (stesc[0] != newesc || stesc[1] == '\0')
- continue;
- stesc++;
- if (*stesc != '"' && *stesc != '#')
- continue;
-
- /* Comment found, look for RCS id. */
-
- rcsid = 0;
- if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
- rcsid = 1 << MANDOC_OS_OPENBSD;
- cp += 8;
- } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
- rcsid = 1 << MANDOC_OS_NETBSD;
- cp += 7;
- }
- if (cp != NULL &&
- isalnum((unsigned char)*cp) == 0 &&
- strchr(cp, '$') != NULL) {
- if (r->man->meta.rcsids & rcsid)
- mandoc_msg(MANDOCERR_RCS_REP, ln,
- (int)(stesc - buf->buf) + 1,
- "%s", stesc + 1);
- r->man->meta.rcsids |= rcsid;
- }
-
- /* Handle trailing whitespace. */
-
- ep = strchr(stesc--, '\0') - 1;
- if (*ep == '\n') {
- done = 1;
- ep--;
- }
- if (*ep == ' ' || *ep == '\t')
- mandoc_msg(MANDOCERR_SPACE_EOL,
- ln, (int)(ep - buf->buf), NULL);
-
- /*
- * Save comments preceding the title macro
- * in the syntax tree.
- */
-
- if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
- while (*ep == ' ' || *ep == '\t')
- ep--;
- ep[1] = '\0';
- n = roff_node_alloc(r->man,
- ln, stesc + 1 - buf->buf,
- ROFFT_COMMENT, TOKEN_NONE);
- n->string = mandoc_strdup(stesc + 2);
- roff_node_append(r->man, n);
- n->flags |= NODE_VALID | NODE_ENDED;
- r->man->next = ROFF_NEXT_SIBLING;
- }
-
- /* Line continuation with comment. */
-
- if (stesc[1] == '#') {
- *stesc = '\0';
- return ROFF_IGN | ROFF_APPEND;
- }
-
- /* Discard normal comments. */
-
- while (stesc > start && stesc[-1] == ' ' &&
- (stesc == start + 1 || stesc[-2] != '\\'))
- stesc--;
- *stesc = '\0';
- break;
- }
- if (stesc == start)
- return ROFF_CONT;
- stesc--;
-
- /* Notice the end of the input. */
-
- if (*stesc == '\n') {
+ stesc = strchr(start, '\0') - 1;
+ if (stesc >= start && *stesc == '\n')
*stesc-- = '\0';
- done = 1;
- }
expand_count = 0;
while (stesc >= start) {
@@ -1389,15 +1398,11 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
while (stesc > cp)
*stesc-- = '\\';
continue;
- } else if (stesc[1] != '\0') {
- *stesc = '\\';
- } else {
+ } else if (stesc[1] == '\0') {
*stesc-- = '\0';
- if (done)
- continue;
- else
- return ROFF_IGN | ROFF_APPEND;
- }
+ continue;
+ } else
+ *stesc = '\\';
/* Decide whether to expand or to check only. */
@@ -1856,7 +1861,12 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
assert(e == ROFF_CONT);
}
- /* Expand some escape sequences. */
+ /* Handle comments and escape sequences. */
+
+ e = roff_parse_comment(r, buf, ln, pos, r->escape);
+ if ((e & ROFF_MASK) == ROFF_IGN)
+ return e;
+ assert(e == ROFF_CONT);
e = roff_expand(r, buf, ln, pos, r->escape);
if ((e & ROFF_MASK) == ROFF_IGN)