http://mdocml.bsd.lv/archives/tech/0368.html
For the time being, we just throw it away.
-/* $Id: mandoc.c,v 1.51 2011/05/14 17:54:42 kristaps Exp $ */
+/* $Id: mandoc.c,v 1.52 2011/05/15 15:30:33 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
break;
case ('['):
gly = ESCAPE_SPECIAL;
break;
case ('['):
gly = ESCAPE_SPECIAL;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX] to
+ * \[u10FFFF], where the contained value must be a valid
+ * Unicode codepoint. Here, however, only check whether
+ * it's not a zero-width escape.
+ */
+ if ('u' == cp[i] && ']' != cp[i + 1])
+ gly = ESCAPE_UNICODE;
term = ']';
break;
case ('C'):
term = ']';
break;
case ('C'):
-/* $Id: mandoc.h,v 1.74 2011/04/30 22:24:31 kristaps Exp $ */
+/* $Id: mandoc.h,v 1.75 2011/05/15 15:30:33 kristaps Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
ESCAPE_FONTROMAN, /* roman font mode */
ESCAPE_FONTPREV, /* previous font mode */
ESCAPE_NUMBERED, /* a numbered glyph */
ESCAPE_FONTROMAN, /* roman font mode */
ESCAPE_FONTPREV, /* previous font mode */
ESCAPE_NUMBERED, /* a numbered glyph */
+ ESCAPE_UNICODE, /* a unicode codepoint */
ESCAPE_NOSPACE /* suppress space if the last on a line */
};
ESCAPE_NOSPACE /* suppress space if the last on a line */
};
-.\" $Id: mandoc_char.7,v 1.44 2011/05/01 08:45:10 kristaps Exp $
+.\" $Id: mandoc_char.7,v 1.45 2011/05/15 15:30:33 kristaps Exp $
.\"
.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
.\"
.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: May 1 2011 $
+.Dd $Mdocdate: May 15 2011 $
.Dt MANDOC_CHAR 7
.Os
.Sh NAME
.Dt MANDOC_CHAR 7
.Os
.Sh NAME
.It \e*(Px Ta \*(Px Ta POSIX standard name
.It \e*(Ai Ta \*(Ai Ta ANSI standard name
.El
.It \e*(Px Ta \*(Px Ta POSIX standard name
.It \e*(Ai Ta \*(Ai Ta ANSI standard name
.El
+.Sh UNICODE CHARACTERS
+The escape sequence
+.Pp
+.Dl \e[uXXXX]
+.Pp
+is interpreted as a Unicode codepoint.
+The codepoint must be in the range above U+0080 and less than U+10FFFF.
+For compatibility, points must be zero-padded to four characters; if
+greater than four characters, no zero padding is allowed.
+Unicode surrogates are not allowed.
+.\" .Pp
+.\" Unicode glyphs attenuate to the
+.\" .Sq \&?
+.\" character if invalid or not rendered by current output media.
.Sh NUMBERED CHARACTERS
For backward compatibility with existing manuals,
.Xr mandoc 1
.Sh NUMBERED CHARACTERS
For backward compatibility with existing manuals,
.Xr mandoc 1