aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--mandoc.c10
-rw-r--r--mandoc.h3
-rw-r--r--mandoc_char.718
3 files changed, 27 insertions, 4 deletions
diff --git a/mandoc.c b/mandoc.c
index 671f059a..c9290214 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -1,4 +1,4 @@
-/* $Id: mandoc.c,v 1.51 2011/05/14 17:54:42 kristaps Exp $ */
+/* $Id: mandoc.c,v 1.52 2011/05/15 15:30:33 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -125,6 +125,14 @@ mandoc_escape(const char **end, const char **start, int *sz)
break;
case ('['):
gly = ESCAPE_SPECIAL;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX] to
+ * \[u10FFFF], where the contained value must be a valid
+ * Unicode codepoint. Here, however, only check whether
+ * it's not a zero-width escape.
+ */
+ if ('u' == cp[i] && ']' != cp[i + 1])
+ gly = ESCAPE_UNICODE;
term = ']';
break;
case ('C'):
diff --git a/mandoc.h b/mandoc.h
index 55878b62..db7b30bf 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.74 2011/04/30 22:24:31 kristaps Exp $ */
+/* $Id: mandoc.h,v 1.75 2011/05/15 15:30:33 kristaps Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -299,6 +299,7 @@ enum mandoc_esc {
ESCAPE_FONTROMAN, /* roman font mode */
ESCAPE_FONTPREV, /* previous font mode */
ESCAPE_NUMBERED, /* a numbered glyph */
+ ESCAPE_UNICODE, /* a unicode codepoint */
ESCAPE_NOSPACE /* suppress space if the last on a line */
};
diff --git a/mandoc_char.7 b/mandoc_char.7
index c52d1e78..d0c5dd7f 100644
--- a/mandoc_char.7
+++ b/mandoc_char.7
@@ -1,4 +1,4 @@
-.\" $Id: mandoc_char.7,v 1.44 2011/05/01 08:45:10 kristaps Exp $
+.\" $Id: mandoc_char.7,v 1.45 2011/05/15 15:30:33 kristaps Exp $
.\"
.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: May 1 2011 $
+.Dd $Mdocdate: May 15 2011 $
.Dt MANDOC_CHAR 7
.Os
.Sh NAME
@@ -520,6 +520,20 @@ portable.
.It \e*(Px Ta \*(Px Ta POSIX standard name
.It \e*(Ai Ta \*(Ai Ta ANSI standard name
.El
+.Sh UNICODE CHARACTERS
+The escape sequence
+.Pp
+.Dl \e[uXXXX]
+.Pp
+is interpreted as a Unicode codepoint.
+The codepoint must be in the range above U+0080 and less than U+10FFFF.
+For compatibility, points must be zero-padded to four characters; if
+greater than four characters, no zero padding is allowed.
+Unicode surrogates are not allowed.
+.\" .Pp
+.\" Unicode glyphs attenuate to the
+.\" .Sq \&?
+.\" character if invalid or not rendered by current output media.
.Sh NUMBERED CHARACTERS
For backward compatibility with existing manuals,
.Xr mandoc 1