Clarified special chars/predefined chars in mandoc_char.7.

Cleaned up escape section in man.7, mdoc.7. Cleaned up function names in term.c.
author: Kristaps Dzonsons <kristaps@bsd.lv> 2009-07-27 12:35:53 +0000
committer: Kristaps Dzonsons <kristaps@bsd.lv> 2009-07-27 12:35:53 +0000
commit: 29761020ae538441c939e621297260904dba1160 (patch)
tree: c981779692d5388acb0c6bcc7c82fdbd290cbd4c
parent: 0441b4f4454e1e8fa3005d8b12b9731e24099509 (diff)
download: mandoc-29761020ae538441c939e621297260904dba1160.tar.gz
mandoc-29761020ae538441c939e621297260904dba1160.tar.zst
mandoc-29761020ae538441c939e621297260904dba1160.zip
4 files changed, 172 insertions, 107 deletions
diff --git a/man.7 b/man.7
index 91f469c7..e5c47164 100644
--- a/man.7
+++ b/man.7
@@ -1,4 +1,4 @@
-.\"	$Id: man.7,v 1.20 2009/07/20 13:45:11 kristaps Exp $
+.\"	$Id: man.7,v 1.21 2009/07/27 12:35:53 kristaps Exp $
 .\"
 .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
 .\"
@@ -14,7 +14,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: July 20 2009 $
+.Dd $Mdocdate: July 27 2009 $
 .Dt MAN 7
 .Os
 .\" SECTION
@@ -70,17 +70,16 @@ documents; if encountered at the end of a word, it ensures that the
 subsequent word isn't off-set by whitespace.
 .\" SUB-SECTION
 .Ss Comments
-Anything following a
-.Sq \e"
-delimiter is considered a comment (unless the
-.Sq \e
-itself has been escaped) and is ignored to the end of line.
-Furthermore, a macro line with only a control character
-.Sq \. ,
-optionally followed by whitespace, is ignored.
+Text following a
+.Sq \e" ,
+whether in a macro or free-form text line, is ignored to the end of
+line.  A macro line with only a control character and comment escape,
+.Sq \&.\e" ,
+is also ignored.
 .\" SUB-SECTION
 .Ss Special Characters
-Special character sequences begin with the escape character
+Special characters may occur in both macro and free-form lines.
+Sequences begin with the escape character
 .Sq \e
 followed by either an open-parenthesis
 .Sq \&(
@@ -88,17 +87,21 @@ for two-character sequences; an open-bracket
 .Sq \&[
 for n-character sequences (terminated at a close-bracket
 .Sq \&] ) ;
-or a single one-character sequence.
-.Pp
-Characters may alternatively be escaped by a slash-asterisk,
-.Sq \e* ,
-with the same combinations as described above.
-.Pp
-Terms may also be text-decorated using the
+or a single one-character sequence.  See
+.Xr mandoc_char 7
+for a complete list.  Examples include
+.Sq \e(em
+.Pq em-dash
+and
+.Sq \ee
+.Pq back-slash .
+.\" SUB-SECTION----------------------
+.Ss Text Decoration
+Terms may be text-decorated using the
 .Sq \ef
-escape followed by a text-decoration letter: B (bold), I, (italic), or P
-and R (Roman, or reset).
-.\" SUB-SECTION
+escape followed by an indicator: B (bold), I, (italic), or P and R
+(Roman, or reset).
+.\" SUB-SECTION----------------------
 .Ss Whitespace
 Unless specifically escaped, consecutive blocks of whitespace are pruned
 from input.  These are later re-added, if applicable, by a front-end
diff --git a/mandoc_char.7 b/mandoc_char.7
index f4ce7041..c7199604 100644
--- a/mandoc_char.7
+++ b/mandoc_char.7
@@ -1,4 +1,4 @@
-.\"	$Id: mandoc_char.7,v 1.9 2009/07/27 12:02:49 kristaps Exp $
+.\"	$Id: mandoc_char.7,v 1.10 2009/07/27 12:35:53 kristaps Exp $
 .\"
 .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
 .\"
@@ -23,7 +23,7 @@
 .Nd mandoc special characters
 .\" SECTION
 .Sh DESCRIPTION
-This documents the special characters accepted by 
+This documents the special characters and predefined strings accepted by 
 .Xr mandoc 1
 to format
 .Xr mdoc 7
@@ -34,12 +34,13 @@ documents.  Specific output devices of
 dictated by the
 .Fl T Ns Ar output
 argument, will properly render these sequences.
+.\" PARAGRAPH
 .Pp
 Both
 .Xr mdoc 7
 and
 .Xr man 7
-encode these special characters with 
+encode special characters with 
 .Sq \eX
 .Pq for a one-character escape ,
 .Sq \e(XX
@@ -50,7 +51,30 @@ and
 One may generalise
 .Sq \e(XX
 as
-.Sq \e[XX] .
+.Sq \e[XX] 
+and
+.Sq \eX
+as
+.Sq \e[X] .
+Predefined strings are functionally similar to special characters, using 
+.Sq \e*X
+.Pq for a one-character escape ,
+.Sq \e*(XX
+.Pq two-character ,
+and
+.Sq \e*[N]
+.Pq N-character .
+One may generalise
+.Sq \e*(XX
+as
+.Sq \e*[XX]
+and
+.Sq \e*X
+as
+.Sq \e*[X] .
+.\" SECTION
+.Sh Special Characters
+These are the preferred input symbols for producing special characters.
 .\" PARAGRAPH
 .Pp
 Typographic:
@@ -119,8 +143,6 @@ Enclosures:
 .Pq right bracket
 .It \e(lB
 .Pq left bracket
-.It \eq
-.Pq double-quote
 .It \e(lq
 .Pq left double-quote
 .It \e(rq
@@ -230,7 +252,7 @@ Mathematical:
 .It \e(if
 .Pq infinity
 .It \e(na
-.Pq NaN , an extension
+.Pq NaN, an extension
 .It \e(+-
 .Pq plus-minus
 .It \e(**
@@ -459,6 +481,56 @@ Special symbols:
 .Pq escape
 .El 
 .\" SECTION
+.Sh PREDEFINED STRINGS
+These are not recommended for use, as they differ across
+implementations:
+.Pp
+Mathematical:
+.Bl -tag -width Ds -offset indent -compact
+.It \e*(Ne
+.Pq not equal
+.It \e*(Ge
+.Pq greater-than-equal
+.It \e*(Le
+.Pq less-than-equal
+.It \e*(Gt
+.Pq greater-than
+.It \e*(Lt
+.Pq greater-than
+.It \e*(Pm
+.Pq plus-minus
+.It \e*(If
+.Pq infinity
+.It \e*(Pi
+.Pq pi
+.It \e*(Na
+.Pq NaN
+.El
+.\" PARAGRAPH
+.Pp
+Special symbols:
+.Bl -tag -width Ds -offset indent -compact
+.It \e*(Ba
+.Pq vertical bar
+.It \e*(Am
+.Pq ampersand
+.El
+.\" PARAGRAPH
+.Pp
+Enclosures:
+.Bl -tag -width Ds -offset indent -compact
+.It \e*q
+.Pq double-quote
+.It \e*(Rq
+.Pq right-double-quote
+.It \e*(Lq
+.Pq left-double-quote
+.It \e*(lp
+.Pq right-parenthesis
+.It \e*(rp
+.Pq left-parenthesis
+.El
+.\" SECTION
 .Sh COMPATIBILITY
 This section documents compatibility of
 .Nm
diff --git a/mdoc.7 b/mdoc.7
index 73f693be..b64f4931 100644
--- a/mdoc.7
+++ b/mdoc.7
@@ -1,4 +1,4 @@
-.\"	$Id: mdoc.7,v 1.53 2009/07/26 19:30:50 kristaps Exp $
+.\"	$Id: mdoc.7,v 1.54 2009/07/27 12:35:54 kristaps Exp $
 .\"
 .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
 .\"
@@ -14,7 +14,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: July 26 2009 $
+.Dd $Mdocdate: July 27 2009 $
 .Dt MDOC 7
 .Os
 .\" SECTION---------------------------------------------
@@ -111,26 +111,46 @@ for two-character sequences; an open-bracket
 for n-character sequences (terminated at a close-bracket
 .Sq \&] ) ;
 or a single one-character sequence.  See
-.Xr mandoc_char 1
+.Xr mandoc_char 7
 for a complete list.  Examples include
 .Sq \e(em
 .Pq em-dash
 and
 .Sq \ee
 .Pq back-slash .
-.\" PARAGRAPH------------
-.Pp
-An alternative escape sequence is
-the slash-asterisk,
-.Sq \e* ,
-but this method is discouraged for compatibility reasons.
-.\" PARAGRAPH------------
-.Pp
-Terms may
-also be text-decorated using the
+.\" SUB-SECTION----------------------
+.Ss Text Decoration
+Terms may be text-decorated using the
 .Sq \ef
 escape followed by an indicator: B (bold), I, (italic), or P and R
-(Roman, or reset).  This form is not recommended.
+(Roman, or reset).  This form is not recommended for 
+.Nm ,
+which encourages semantic, not presentation, annotation.
+.\" SUB-SECTION----------------------
+.Ss Predefined Strings
+Historically, 
+.Xr groff 1
+also defined a set of package-specific 
+.Dq predefined strings ,
+which, like 
+.Sx Special Characters ,
+demark special output characters and strings by way of input codes.
+Predefined strings are escaped with the slash-asterisk,
+.Sq \e* :
+single-character
+.Sq \e*X ,
+two-character
+.Sq \e*(XX ,
+and N-character
+.Sq \e*[N] .
+See
+.Xr mandoc_char 7
+for a complete list.  Examples include
+.Sq \e*(Am
+.Pq ampersand
+and
+.Sq \e*(Ba
+.Pq vertical bar .
 .\" SUB-SECTION----------------------
 .Ss Whitespace
 In non-literal free-form lines, consecutive blocks of whitespace are
@@ -476,15 +496,6 @@ The
 macro does not accept negative numbers.
 .\" LIST-ITEM
 .It
-Some character sequences in groff are not handled depending on escape
-style, e.g.,
-.Sq \e(ba
-and
-.Sq \e*(Ba
-may not be interchanged.  This is no longer the case: all character
-sequences resolve to the same symbol, regardless the escape style.
-.\" LIST-ITEM
-.It
 Blocks of whitespace are stripped from both macro and free-form text
 lines (except when in literal mode), while groff would retain whitespace
 in free-form text lines.
@@ -525,12 +536,6 @@ incorrectly by following it with a reserved character and expecting the
 delimiter to render.  This is not supported.
 .\" LIST-ITEM
 .It
-If an special-character control character is escaped
-.Sq \e\e ,
-it will obviously not render the subsequent sequence.  Even newer
-versions of groff seem to dither on this.
-.\" LIST-ITEM
-.It
 In groff, the
 .Sq \&Fo
 macro only produces the first parameter.  This is no longer the case.
@@ -615,7 +620,7 @@ There's no way to refer to references in
 blocks.
 .\" LIST-ITEM
 .It
-The \-split and \-nosplit arguments to
+The \-split and \-nosplit dictates via
 .Sq \&An
-are inane.
+are re-set when entering and leaving the AUTHORS section.
 .El
diff --git a/term.c b/term.c
index fa9d8d68..dcd8c519 100644
--- a/term.c
+++ b/term.c
@@ -1,4 +1,4 @@
-/*	$Id: term.c,v 1.94 2009/07/27 12:02:49 kristaps Exp $ */
+/*	$Id: term.c,v 1.95 2009/07/27 12:35:54 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -31,15 +31,16 @@ extern	int		  mdoc_run(struct termp *,
 
 static	struct termp	 *term_alloc(enum termenc);
 static	void		  term_free(struct termp *);
-static	void		  term_pescape(struct termp *, const char **);
-static	void		  term_nescape(struct termp *,
+
+static	void		  do_escaped(struct termp *, const char **);
+static	void		  do_special(struct termp *,
 				const char *, size_t);
-static	void		  term_sescape(struct termp *,
+static	void		  do_reserved(struct termp *,
 				const char *, size_t);
-static	void		  term_chara(struct termp *, char);
-static	void		  term_encodea(struct termp *, char);
-static	int		  term_isopendelim(const char *);
-static	int		  term_isclosedelim(const char *);
+static	void		  buffer(struct termp *, char);
+static	void		  encode(struct termp *, char);
+static	int		  isopendelim(const char *);
+static	int		  isclosedelim(const char *);
 
 
 void *
@@ -112,7 +113,7 @@ term_alloc(enum termenc enc)
 
 
 static int
-term_isclosedelim(const char *p)
+isclosedelim(const char *p)
 {
 
 	if ( ! (*p && 0 == *(p + 1)))
@@ -146,7 +147,7 @@ term_isclosedelim(const char *p)
 
 
 static int
-term_isopendelim(const char *p)
+isopendelim(const char *p)
 {
 
 	if ( ! (*p && 0 == *(p + 1)))
@@ -207,12 +208,6 @@ term_isopendelim(const char *p)
  *  Otherwise, the line will break at the right margin.  Extremely long
  *  lines will cause the system to emit a warning (TODO: hyphenate, if
  *  possible).
- *
- *  FIXME: newline breaks occur (in groff) also occur when a single
- *  space follows a NOBREAK (try `Bl -tag')
- *
- *  FIXME: there's a newline error where a `Bl -diag' will have a
- *  trailing newline if the line is exactly 73 chars long.
  */
 void
 term_flushln(struct termp *p)
@@ -387,13 +382,8 @@ term_vspace(struct termp *p)
 }
 
 
-/*
- * Determine the symbol indicated by an escape sequences, that is, one
- * starting with a backslash.  Once done, we pass this value into the
- * output buffer by way of the symbol table.
- */
 static void
-term_nescape(struct termp *p, const char *word, size_t len)
+do_special(struct termp *p, const char *word, size_t len)
 {
 	const char	*rhs;
 	size_t		 sz;
@@ -404,12 +394,12 @@ term_nescape(struct termp *p, const char *word, size_t len)
 	if (NULL == rhs)
 		return;
 	for (i = 0; i < (int)sz; i++) 
-		term_encodea(p, rhs[i]);
+		encode(p, rhs[i]);
 }
 
 
 static void
-term_sescape(struct termp *p, const char *word, size_t len)
+do_reserved(struct termp *p, const char *word, size_t len)
 {
 	const char	*rhs;
 	size_t		 sz;
@@ -420,7 +410,7 @@ term_sescape(struct termp *p, const char *word, size_t len)
 	if (NULL == rhs)
 		return;
 	for (i = 0; i < (int)sz; i++) 
-		term_encodea(p, rhs[i]);
+		encode(p, rhs[i]);
 }
 
 
@@ -430,7 +420,7 @@ term_sescape(struct termp *p, const char *word, size_t len)
  * the escape sequence (we assert upon badly-formed escape sequences).
  */
 static void
-term_pescape(struct termp *p, const char **word)
+do_escaped(struct termp *p, const char **word)
 {
 	int		 j;
 	const char	*wp;
@@ -449,7 +439,7 @@ term_pescape(struct termp *p, const char **word)
 			return;
 		}
 
-		term_nescape(p, wp, 2);
+		do_special(p, wp, 2);
 		*word = ++wp;
 		return;
 
@@ -467,13 +457,13 @@ term_pescape(struct termp *p, const char **word)
 				return;
 			}
 
-			term_sescape(p, wp, 2);
+			do_reserved(p, wp, 2);
 			*word = ++wp;
 			return;
 		case ('['):
 			break;
 		default:
-			term_sescape(p, wp, 1);
+			do_reserved(p, wp, 1);
 			*word = wp;
 			return;
 		}
@@ -504,7 +494,7 @@ term_pescape(struct termp *p, const char **word)
 		return;
 
 	} else if ('[' != *wp) {
-		term_nescape(p, wp, 1);
+		do_special(p, wp, 1);
 		*word = wp;
 		return;
 	}
@@ -518,7 +508,7 @@ term_pescape(struct termp *p, const char **word)
 		return;
 	}
 
-	term_nescape(p, wp - j, (size_t)j);
+	do_special(p, wp - j, (size_t)j);
 	*word = wp;
 }
 
@@ -533,28 +523,23 @@ term_word(struct termp *p, const char *word)
 {
 	const char	 *sv;
 
-	if (term_isclosedelim(word))
+	if (isclosedelim(word))
 		if ( ! (TERMP_IGNDELIM & p->flags))
 			p->flags |= TERMP_NOSPACE;
 
 	if ( ! (TERMP_NOSPACE & p->flags))
-		term_chara(p, ' ');
+		buffer(p, ' ');
 
 	if ( ! (p->flags & TERMP_NONOSPACE))
 		p->flags &= ~TERMP_NOSPACE;
 
-	/* 
-	 * If ANSI (word-length styling), then apply our style now,
-	 * before the word.
-	 */
-
 	for (sv = word; *word; word++)
 		if ('\\' != *word)
-			term_encodea(p, *word);
+			encode(p, *word);
 		else
-			term_pescape(p, &word);
+			do_escaped(p, &word);
 
-	if (term_isopendelim(sv))
+	if (isopendelim(sv))
 		p->flags |= TERMP_NOSPACE;
 }
 
@@ -565,7 +550,7 @@ term_word(struct termp *p, const char *word)
  * size.
  */
 static void
-term_chara(struct termp *p, char c)
+buffer(struct termp *p, char c)
 {
 	size_t		 s;
 
@@ -583,18 +568,18 @@ term_chara(struct termp *p, char c)
 
 
 static void
-term_encodea(struct termp *p, char c)
+encode(struct termp *p, char c)
 {
 	
 	if (' ' != c && TERMP_STYLE & p->flags) {
 		if (TERMP_BOLD & p->flags) {
-			term_chara(p, c);
-			term_chara(p, 8);
+			buffer(p, c);
+			buffer(p, 8);
 		}
 		if (TERMP_UNDER & p->flags) {
-			term_chara(p, '_');
-			term_chara(p, 8);
+			buffer(p, '_');
+			buffer(p, 8);
 		}
 	}
-	term_chara(p, c);
+	buffer(p, c);
 }
author	Kristaps Dzonsons <kristaps@bsd.lv>	2009-07-27 12:35:53 +0000
committer	Kristaps Dzonsons <kristaps@bsd.lv>	2009-07-27 12:35:53 +0000
commit	29761020ae538441c939e621297260904dba1160 (patch)
tree	c981779692d5388acb0c6bcc7c82fdbd290cbd4c
parent	0441b4f4454e1e8fa3005d8b12b9731e24099509 (diff)
download	mandoc-29761020ae538441c939e621297260904dba1160.tar.gz mandoc-29761020ae538441c939e621297260904dba1160.tar.zst mandoc-29761020ae538441c939e621297260904dba1160.zip