aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-09-01 22:09:50 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-09-01 22:09:50 +0000
commitacfe4f96d50c12ef7200840ede40f289416d481c (patch)
tree91ff95d49440557e65f536913e6d6848f6291eea
parentc94f8a44e39bddd5b62e05dd047ac8698c1dbce2 (diff)
downloadmandoc-acfe4f96d50c12ef7200840ede40f289416d481c.tar.gz
mandoc-acfe4f96d50c12ef7200840ede40f289416d481c.tar.zst
mandoc-acfe4f96d50c12ef7200840ede40f289416d481c.zip
Make `-w' mode work much better. This is INCREDIBLY poorly specified in
any other deroff manual, and as I don't think anybody actually uses deroff, I don't feel compelled to research its behaviour too much and can just do what's logical.
-rw-r--r--demandoc.112
-rw-r--r--demandoc.c59
2 files changed, 64 insertions, 7 deletions
diff --git a/demandoc.1 b/demandoc.1
index 30f6d2e7..9415fe70 100644
--- a/demandoc.1
+++ b/demandoc.1
@@ -1,4 +1,4 @@
-.\" $Id: demandoc.1,v 1.2 2011/09/01 20:55:50 kristaps Exp $
+.\" $Id: demandoc.1,v 1.3 2011/09/01 22:09:50 kristaps Exp $
.\"
.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
@@ -39,9 +39,10 @@ Its arguments are as follows:
Output a word list.
This outputs each word of text on its own line.
A
-.Qq word
-starts with at least two letters and consists of at least three letters
-total.
+.Qq word ,
+in this case, refers to whitespace-delimited terms beginning with at
+least two letters after opening punctuation and not consisting of any
+escape sequences.
.It Ar
The input files.
.El
@@ -51,12 +52,13 @@ If
is not provided,
.Nm
accepts standard input.
+If a document is not well-formed, it is skipped.
.Pp
By default,
.Nm
parses its input and outputs only text nodes, preserving line column
position.
-If a document is not well-formed, it is skipped.
+Escape sequences are omitted from the output.
.Pp
The
.Fl i ,
diff --git a/demandoc.c b/demandoc.c
index 63c7b7bb..76548de0 100644
--- a/demandoc.c
+++ b/demandoc.c
@@ -1,4 +1,4 @@
-/* $Id: demandoc.c,v 1.4 2011/09/01 20:55:50 kristaps Exp $ */
+/* $Id: demandoc.c,v 1.5 2011/09/01 22:09:50 kristaps Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -121,7 +121,8 @@ pmandoc(struct mparse *mp, int fd, const char *fn, int list)
else
return;
- putchar('\n');
+ if ( ! list)
+ putchar('\n');
}
/*
@@ -131,12 +132,58 @@ static void
pstring(const char *p, int col, int *colp, int list)
{
enum mandoc_esc esc;
+ const char *start;
+ int emit;
+
+ /*
+ * Print as many column spaces til we achieve parity with the
+ * input document.
+ */
+
+again:
+ if (list && '\0' != *p) {
+ while (isspace((unsigned char)*p))
+ p++;
+
+ while ('\'' == *p || '(' == *p || '"' == *p)
+ p++;
+
+ emit = isalpha((unsigned char)p[0]) &&
+ isalpha((unsigned char)p[1]);
+
+ for (start = p; '\0' != *p; p++)
+ if ('\\' == *p) {
+ p++;
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc)
+ return;
+ emit = 0;
+ } else if (isspace((unsigned char)*p))
+ break;
+
+ if (emit && p - start >= 2) {
+ for ( ; start != p; start++)
+ if (ASCII_HYPH == *start)
+ putchar('-');
+ else
+ putchar((unsigned char)*start);
+ putchar('\n');
+ }
+
+ if (isspace((unsigned char)*p))
+ goto again;
+
+ return;
+ }
while (*colp < col) {
putchar(' ');
(*colp)++;
}
+ /*
+ * Print the input word, skipping any special characters.
+ */
while ('\0' != *p)
if ('\\' == *p) {
p++;
@@ -153,6 +200,14 @@ static void
pline(int line, int *linep, int *col, int list)
{
+ if (list)
+ return;
+
+ /*
+ * Print out as many lines as needed to reach parity with the
+ * original input.
+ */
+
while (*linep < line) {
putchar('\n');
(*linep)++;