summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2009-02-27 08:20:15 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2009-02-27 08:20:15 +0000
commitff1018946028fdff5987992f5ccc7e26a7ba7d55 (patch)
tree6c8f4b804f725d10e8ef48f0c5b3d900f4073785
parent02d0e8c68c70b476d56141862eb8b20addbc233f (diff)
downloadmandoc-ff1018946028fdff5987992f5ccc7e26a7ba7d55.tar.gz
mandoc-ff1018946028fdff5987992f5ccc7e26a7ba7d55.tar.zst
mandoc-ff1018946028fdff5987992f5ccc7e26a7ba7d55.zip
More character-encoding.
-rw-r--r--Makefile7
-rw-r--r--mdoc.345
-rw-r--r--mdocterm.111
-rw-r--r--mdocterm.c170
-rw-r--r--strings.c18
-rw-r--r--term.c9
6 files changed, 221 insertions, 39 deletions
diff --git a/Makefile b/Makefile
index ae89552e..556ca5c5 100644
--- a/Makefile
+++ b/Makefile
@@ -87,7 +87,9 @@ FAIL = regress/test.empty \
regress/test.escape.06 \
regress/test.escape.07 \
regress/test.escape.08 \
- regress/test.escape.09
+ regress/test.escape.09 \
+ regress/test.escape.11 \
+ regress/test.escape.12
SUCCEED = regress/test.prologue.05 \
regress/test.prologue.07 \
@@ -114,7 +116,8 @@ SUCCEED = regress/test.prologue.05 \
regress/test.sh.01 \
regress/test.sh.02 \
regress/test.escape.00 \
- regress/test.escape.05
+ regress/test.escape.05 \
+ regress/test.escape.10
REGRESS = $(FAIL) $(SUCCEED)
diff --git a/mdoc.3 b/mdoc.3
index 5b5f70e8..e7ee3ffe 100644
--- a/mdoc.3
+++ b/mdoc.3
@@ -1,4 +1,4 @@
-.\" $Id: mdoc.3,v 1.11 2009/02/25 17:02:47 kristaps Exp $
+.\" $Id: mdoc.3,v 1.12 2009/02/27 08:20:15 kristaps Exp $
.\"
.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
.\"
@@ -16,7 +16,7 @@
.\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
.\" PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: February 25 2009 $
+.Dd $Mdocdate: February 27 2009 $
.Dt mdoc 3
.Os
.\" SECTION
@@ -184,7 +184,8 @@ The
.Xr mdoc 3
library accepts only printable ASCII characters as defined by
.Xr isprint 3 .
-Non-ASCII character sequences are escaped with an escape character
+Non-ASCII character sequences are delimited in various ways. All are
+preceeded by an escape character
.Sq \\
and followed by either an open-parenthesis
.Sq \&(
@@ -192,7 +193,43 @@ for two-character sequences; an open-bracket
.Sq \&[
for n-character sequences (terminated at a close-bracket
.Sq \&] ) ;
-or one of a small set of single characters for other escapes.
+an asterisk and open-parenthesis
+.Sq \&*(
+for two-character sequences;
+an asterisk and non-open-parenthesis
+.Sq \&*
+for single-character sequences; or one of a small set of standalone
+single characters for other escapes.
+.Pp
+Examples:
+.Pp
+.Bl -tag -width "XXXXXXXX" -offset "XXXX" -compact
+.\" LIST-ITEM
+.It \\*(<=
+prints
+.Dq \*(<=
+.Pq greater-equal
+.\" LIST-ITEM
+.It \\(<-
+prints
+.Dq \(<-
+.Pq left-arrow
+.\" LIST-ITEM
+.It \\[<-]
+also prints
+.Dq \(<-
+.Pq left-arrow
+.\" LIST-ITEM
+.It \\*(Ba
+prints
+.Dq \*(Ba
+.Pq bar
+.\" LIST-ITEM
+.It \\*q
+prints
+.Dq \*q
+.Pq double-quote
+.El
.\" SUBSECTION
.Ss Abstract Syntax Tree
The
diff --git a/mdocterm.1 b/mdocterm.1
index c7033d41..6c0c5737 100644
--- a/mdocterm.1
+++ b/mdocterm.1
@@ -1,4 +1,4 @@
-.\" $Id: mdocterm.1,v 1.5 2009/02/25 15:12:26 kristaps Exp $
+.\" $Id: mdocterm.1,v 1.6 2009/02/27 08:20:15 kristaps Exp $
.\"
.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
.\"
@@ -16,7 +16,7 @@
.\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
.\" PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: February 25 2009 $
+.Dd $Mdocdate: February 27 2009 $
.Dt mdocmterm 1
.Os
.\" SECTION
@@ -119,10 +119,3 @@ See
.Xr mdoc 3
for a list of bugs, caveats, and incomplete macros regarding the
document parse.
-.Pp
-For front-end formatting, the
-.Sq -hang ,
-.Sq -inset
-and
-.Sq -column
-list types aren't yet supported.
diff --git a/mdocterm.c b/mdocterm.c
index 9b08c4e2..5e5e751d 100644
--- a/mdocterm.c
+++ b/mdocterm.c
@@ -1,4 +1,4 @@
-/* $Id: mdocterm.c,v 1.15 2009/02/26 17:11:38 kristaps Exp $ */
+/* $Id: mdocterm.c,v 1.16 2009/02/27 08:20:15 kristaps Exp $ */
/*
* Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -31,6 +31,32 @@
#include "mmain.h"
#include "term.h"
+#define TERMSYM_RBRACK "]"
+#define TERMSYM_LBRACK "["
+#define TERMSYM_LARROW "<-"
+#define TERMSYM_RARROW "->"
+#define TERMSYM_UARROW "^"
+#define TERMSYM_LSQUOTE "`"
+#define TERMSYM_RSQUOTE "\'"
+#define TERMSYM_SQUOTE "\'"
+#define TERMSYM_LDQUOTE "``"
+#define TERMSYM_RDQUOTE "\'\'"
+#define TERMSYM_DQUOTE "\""
+#define TERMSYM_LT "<"
+#define TERMSYM_GT ">"
+#define TERMSYM_LE "<="
+#define TERMSYM_GE ">="
+#define TERMSYM_EQ "=="
+#define TERMSYM_NEQ "!="
+#define TERMSYM_ACUTE "\'"
+#define TERMSYM_GRAVE "`"
+#define TERMSYM_PI "pi"
+#define TERMSYM_PLUSMINUS "+="
+#define TERMSYM_INFINITY "infinity"
+#define TERMSYM_NAN "NaN"
+#define TERMSYM_BAR "|"
+#define TERMSYM_BULLET "o"
+
#ifdef __NetBSD__
#define xisspace(x) isspace((int)(x))
#else
@@ -133,7 +159,6 @@ flushln(struct termp *p)
* If we're literal, print out verbatim.
*/
if (p->flags & TERMP_LITERAL) {
- /* FIXME: count non-printing chars. */
for (i = 0; i < p->col; i++)
putchar(p->buf[i]);
putchar('\n');
@@ -168,8 +193,9 @@ flushln(struct termp *p)
* the line with TERMP_NOBREAK).
*/
+ /* FIXME: allow selective right-margin breaking. */
+
if (vis && vis + vsz > maxvis) {
- /* FIXME */
if (p->flags & TERMP_NOBREAK)
errx(1, "word breaks right margin");
putchar('\n');
@@ -177,7 +203,6 @@ flushln(struct termp *p)
putchar(' ');
vis = 0;
} else if (vis + vsz > maxvis)
- /* FIXME */
errx(1, "word breaks right margin");
/*
@@ -258,9 +283,16 @@ static void
chara(struct termp *p, char c)
{
- /* TODO: dynamically expand the buffer. */
- if (p->col + 1 >= p->maxcols)
- errx(1, "line overrun");
+ /*
+ * Insert a single character into the line-buffer. If the
+ * buffer's space is exceeded, then allocate more space.
+ */
+ if (p->col + 1 >= p->maxcols) {
+ p->buf = realloc(p->buf, p->maxcols * 2);
+ if (NULL == p->buf)
+ err(1, "malloc");
+ p->maxcols *= 2;
+ }
p->buf[(p->col)++] = c;
}
@@ -297,21 +329,59 @@ nescape(struct termp *p, const char *word, size_t len)
{
switch (len) {
+ case (1):
+ if ('q' == word[0])
+ stringa(p, TERMSYM_DQUOTE);
+ break;
case (2):
if ('r' == word[0] && 'B' == word[1])
- chara(p, ']');
+ stringa(p, TERMSYM_RBRACK);
else if ('l' == word[0] && 'B' == word[1])
- chara(p, '[');
+ stringa(p, TERMSYM_LBRACK);
else if ('<' == word[0] && '-' == word[1])
- stringa(p, "<-");
+ stringa(p, TERMSYM_LARROW);
else if ('-' == word[0] && '>' == word[1])
- stringa(p, "->");
+ stringa(p, TERMSYM_RARROW);
else if ('l' == word[0] && 'q' == word[1])
- chara(p, '\"');
+ stringa(p, TERMSYM_DQUOTE);
else if ('r' == word[0] && 'q' == word[1])
- chara(p, '\"');
+ stringa(p, TERMSYM_DQUOTE);
else if ('b' == word[0] && 'u' == word[1])
- chara(p, 'o');
+ stringa(p, TERMSYM_BULLET);
+ else if ('L' == word[0] && 'e' == word[1])
+ stringa(p, TERMSYM_LE);
+ else if ('<' == word[0] && '=' == word[1])
+ stringa(p, TERMSYM_LE);
+ else if ('G' == word[0] && 'e' == word[1])
+ stringa(p, TERMSYM_GE);
+ else if ('>' == word[0] && '=' == word[1])
+ stringa(p, TERMSYM_GE);
+ else if ('R' == word[0] && 'q' == word[1])
+ stringa(p, TERMSYM_RDQUOTE);
+ else if ('L' == word[0] && 'q' == word[1])
+ stringa(p, TERMSYM_LDQUOTE);
+ else if ('u' == word[0] && 'a' == word[1])
+ stringa(p, TERMSYM_UARROW);
+ else if ('a' == word[0] && 'a' == word[1])
+ stringa(p, TERMSYM_ACUTE);
+ else if ('g' == word[0] && 'a' == word[1])
+ stringa(p, TERMSYM_GRAVE);
+ else if ('P' == word[0] && 'i' == word[1])
+ stringa(p, TERMSYM_PI);
+ else if ('N' == word[0] && 'e' == word[1])
+ stringa(p, TERMSYM_NEQ);
+ else if ('L' == word[0] && 't' == word[1])
+ stringa(p, TERMSYM_LT);
+ else if ('G' == word[0] && 't' == word[1])
+ stringa(p, TERMSYM_GT);
+ else if ('P' == word[0] && 'm' == word[1])
+ stringa(p, TERMSYM_PLUSMINUS);
+ else if ('I' == word[0] && 'f' == word[1])
+ stringa(p, TERMSYM_INFINITY);
+ else if ('N' == word[0] && 'a' == word[1])
+ stringa(p, TERMSYM_NAN);
+ else if ('B' == word[0] && 'a' == word[1])
+ stringa(p, TERMSYM_BAR);
break;
default:
break;
@@ -327,6 +397,11 @@ pescape(struct termp *p, const char *word, size_t *i, size_t len)
(*i)++;
assert(*i < len);
+ /*
+ * Handle an escape sequence. This must manage both groff-style
+ * escapes and mdoc-style escapes.
+ */
+
if ('(' == word[*i]) {
/* Two-character escapes. */
(*i)++;
@@ -335,6 +410,22 @@ pescape(struct termp *p, const char *word, size_t *i, size_t len)
(*i)++;
return;
+ } else if ('*' == word[*i]) {
+ (*i)++;
+ assert(*i < len);
+ switch (word[*i]) {
+ case ('('):
+ (*i)++;
+ assert(*i + 1 < len);
+ nescape(p, &word[*i], 2);
+ (*i)++;
+ return;
+ default:
+ break;
+ }
+ nescape(p, &word[*i], 1);
+ return;
+
} else if ('[' != word[*i]) {
/* One-character escapes. */
switch (word[*i]) {
@@ -371,6 +462,12 @@ pword(struct termp *p, const char *word, size_t len)
/*assert(len > 0);*/ /* Can be, if literal. */
+ /*
+ * Handle pwords, partial words, which may be either a single
+ * word or a phrase that cannot be broken down (such as a
+ * literal string). This handles word styling.
+ */
+
if ( ! (p->flags & TERMP_NOSPACE) &&
! (p->flags & TERMP_LITERAL))
chara(p, ' ');
@@ -378,6 +475,11 @@ pword(struct termp *p, const char *word, size_t len)
if ( ! (p->flags & TERMP_NONOSPACE))
p->flags &= ~TERMP_NOSPACE;
+ /*
+ * XXX - if literal and underlining, this will underline the
+ * spaces between literal words.
+ */
+
if (p->flags & TERMP_BOLD)
style(p, STYLE_BOLD);
if (p->flags & TERMP_UNDERLINE)
@@ -402,6 +504,13 @@ word(struct termp *p, const char *word)
{
size_t i, j, len;
+ /*
+ * Break apart a word into tokens. If we're a literal word,
+ * then don't. This doesn't handle zero-length words (there
+ * should be none) and makes sure that pword doesn't get spaces
+ * or nil words unless literal.
+ */
+
if (p->flags & TERMP_LITERAL) {
pword(p, word, strlen(word));
return;
@@ -443,6 +552,12 @@ body(struct termp *p, struct termpair *ppair,
int dochild;
struct termpair pair;
+ /*
+ * This is the main function for printing out nodes. It's
+ * constituted of PRE and POST functions, which correspond to
+ * prefix and infix processing.
+ */
+
/* Pre-processing. */
dochild = 1;
@@ -505,6 +620,13 @@ footer(struct termp *p, const struct mdoc_meta *meta)
(void)strlcpy(os, meta->os, p->rmargin);
+ /*
+ * This is /slightly/ different from regular groff output
+ * because we don't have page numbers. Print the following:
+ *
+ * OS MDOCDATE
+ */
+
vspace(p);
p->flags |= TERMP_NOSPACE | TERMP_NOBREAK;
@@ -530,7 +652,7 @@ footer(struct termp *p, const struct mdoc_meta *meta)
static void
header(struct termp *p, const struct mdoc_meta *meta)
{
- char *buf, *title;
+ char *buf, *title, *bufp;
const char *pp;
if (NULL == (buf = malloc(p->rmargin)))
@@ -569,8 +691,21 @@ header(struct termp *p, const struct mdoc_meta *meta)
break;
}
+ /*
+ * The header is strange. It has three components, which are
+ * really two with the first duplicated. It goes like this:
+ *
+ * IDENTIFIER TITLE IDENTIFIER
+ *
+ * The IDENTIFIER is NAME(SECTION), which is the command-name
+ * (if given, or "unknown" if not) followed by the manual page
+ * section. These are given in `Dt'. The TITLE is a free-form
+ * string depending on the manual volume. If not specified, it
+ * switches on the manual section.
+ */
+
if (mdoc_arch2a(meta->arch))
- (void)snprintf(buf, p->rmargin, "%s(%s)",
+ (void)snprintf(buf, p->rmargin, "%s (%s)",
pp, mdoc_arch2a(meta->arch));
else
(void)strlcpy(buf, pp, p->rmargin);
@@ -580,6 +715,9 @@ header(struct termp *p, const struct mdoc_meta *meta)
(void)snprintf(title, p->rmargin, "%s(%s)",
meta->title, pp ? pp : "");
+ for (bufp = title; *bufp; bufp++)
+ *bufp = toupper(*bufp);
+
p->offset = 0;
p->rmargin = (p->maxrmargin - strlen(buf)) / 2;
p->flags |= TERMP_NOBREAK | TERMP_NOSPACE;
diff --git a/strings.c b/strings.c
index 5d143492..87a9b35e 100644
--- a/strings.c
+++ b/strings.c
@@ -1,4 +1,4 @@
-/* $Id: strings.c,v 1.20 2009/02/26 16:08:11 kristaps Exp $ */
+/* $Id: strings.c,v 1.21 2009/02/27 08:20:15 kristaps Exp $ */
/*
* Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -61,6 +61,18 @@ mdoc_isescape(const char *p)
/* FALLTHROUGH */
case ('e'):
return(2);
+ case ('*'):
+ if (0 == *++p || ! isgraph((int)*p))
+ return(0);
+ switch (*p) {
+ case ('('):
+ if (0 == *++p || ! isgraph((int)*p))
+ return(0);
+ return(4);
+ default:
+ break;
+ }
+ return(3);
case ('('):
if (0 == *++p || ! isgraph((int)*p))
return(0);
@@ -178,9 +190,9 @@ mdoc_atotime(const char *p)
(void)memset(&tm, 0, sizeof(struct tm));
- if (xstrcmp(p, "$Mdocdate: February 26 2009 $"))
+ if (xstrcmp(p, "$Mdocdate: February 27 2009 $"))
return(time(NULL));
- if ((pp = strptime(p, "$Mdocdate: February 26 2009 $", &tm)) && 0 == *pp)
+ if ((pp = strptime(p, "$Mdocdate: February 27 2009 $", &tm)) && 0 == *pp)
return(mktime(&tm));
/* XXX - this matches "June 1999", which is wrong. */
if ((pp = strptime(p, "%b %d %Y", &tm)) && 0 == *pp)
diff --git a/term.c b/term.c
index d3b7d07a..6b8a3fe3 100644
--- a/term.c
+++ b/term.c
@@ -1,4 +1,4 @@
-/* $Id: term.c,v 1.25 2009/02/26 16:08:11 kristaps Exp $ */
+/* $Id: term.c,v 1.26 2009/02/27 08:20:15 kristaps Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -25,7 +25,7 @@
#include "term.h"
-#define INDENT 4
+#define INDENT 6
/*
* Performs actions on nodes of the abstract syntax tree. Both pre- and
@@ -279,7 +279,6 @@ arg_width(const struct mdoc_arg *arg)
{
size_t len, i, v;
- /* TODO */
assert(*arg->value);
if (0 == strcmp(*arg->value, "indent"))
return(INDENT);
@@ -1330,7 +1329,7 @@ termp_bq_pre(DECL_ARGS)
if (MDOC_BODY != node->type)
return(1);
- word(p, "[");
+ word(p, "\\[");
p->flags |= TERMP_NOSPACE;
return(1);
}
@@ -1354,7 +1353,7 @@ termp_pq_pre(DECL_ARGS)
if (MDOC_BODY != node->type)
return(1);
- word(p, "(");
+ word(p, "\\&(");
p->flags |= TERMP_NOSPACE;
return(1);
}