summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-05-17 11:50:20 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-05-17 11:50:20 +0000
commit1ee1eeda195d12cccd87b8fdfca9e982035a89e7 (patch)
treedc8c759f44513405842fdbdf491a24a9926a28b9
parent6fcb5ea18d95e1cc79dd37b4e2860f4e8c898346 (diff)
downloadmandoc-1ee1eeda195d12cccd87b8fdfca9e982035a89e7.tar.gz
mandoc-1ee1eeda195d12cccd87b8fdfca9e982035a89e7.tar.zst
mandoc-1ee1eeda195d12cccd87b8fdfca9e982035a89e7.zip
Flip on unicode output (via \[uNNNN]) in -T[x]html. Here we go!
-rw-r--r--chars.c20
-rw-r--r--html.c10
-rw-r--r--mandoc.318
-rw-r--r--mandoc.h3
4 files changed, 42 insertions, 9 deletions
diff --git a/chars.c b/chars.c
index 06a2a923..808d70d7 100644
--- a/chars.c
+++ b/chars.c
@@ -1,4 +1,4 @@
-/* $Id: chars.c,v 1.43 2011/05/15 22:29:50 kristaps Exp $ */
+/* $Id: chars.c,v 1.44 2011/05/17 11:50:20 kristaps Exp $ */
/*
* Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -138,7 +138,7 @@ mchars_res2cp(struct mchars *arg, const char *p, size_t sz)
}
/*
- * Numbered character to literal character.
+ * Numbered character string to ASCII codepoint.
* This can only be a printable character (i.e., alnum, punct, space) so
* prevent the character from ruining our state (backspace, newline, and
* so on).
@@ -151,10 +151,24 @@ mchars_num2char(const char *p, size_t sz)
if ((i = mandoc_strntou(p, sz, 10)) < 0)
return('\0');
-
return(isprint(i) ? i : '\0');
}
+/*
+ * Hex character string to Unicode codepoint.
+ * If the character is illegal, returns '\0'.
+ */
+int
+mchars_num2uc(const char *p, size_t sz)
+{
+ int i;
+
+ if ((i = mandoc_strntou(p, sz, 16)) < 0)
+ return('\0');
+ /* FIXME: make sure we're not in a bogus range. */
+ return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
+}
+
/*
* Special character to string array.
*/
diff --git a/html.c b/html.c
index a50d2eb0..fd696284 100644
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/* $Id: html.c,v 1.143 2011/05/17 11:38:18 kristaps Exp $ */
+/* $Id: html.c,v 1.144 2011/05/17 11:50:20 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -315,6 +315,8 @@ html_strlen(const char *cp)
switch (mandoc_escape(&cp, &seq, &ssz)) {
case (ESCAPE_ERROR):
return(sz);
+ case (ESCAPE_UNICODE):
+ /* FALLTHROUGH */
case (ESCAPE_NUMBERED):
/* FALLTHROUGH */
case (ESCAPE_PREDEF):
@@ -373,6 +375,12 @@ print_encode(struct html *h, const char *p, int norecurse)
break;
switch (esc) {
+ case (ESCAPE_UNICODE):
+ /* Skip passed "u" header. */
+ c = mchars_num2uc(seq + 1, len - 1);
+ if ('\0' != c)
+ printf("&#x%x;", c);
+ break;
case (ESCAPE_NUMBERED):
c = mchars_num2char(seq, len);
if ('\0' != c)
diff --git a/mandoc.3 b/mandoc.3
index 66148c4d..0521e391 100644
--- a/mandoc.3
+++ b/mandoc.3
@@ -1,4 +1,4 @@
-.\" $Id: mandoc.3,v 1.6 2011/05/01 10:40:52 kristaps Exp $
+.\" $Id: mandoc.3,v 1.7 2011/05/17 11:50:20 kristaps Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: May 1 2011 $
+.Dd $Mdocdate: May 17 2011 $
.Dt MANDOC 3
.Os
.Sh NAME
@@ -26,6 +26,7 @@
.Nm mchars_alloc ,
.Nm mchars_free ,
.Nm mchars_num2char ,
+.Nm mchars_num2uc ,
.Nm mchars_res2cp ,
.Nm mchars_res2str ,
.Nm mchars_spec2cp ,
@@ -64,6 +65,8 @@
.Fn mchars_free "struct mchars *p"
.Ft char
.Fn mchars_num2char "const char *cp" "size_t sz"
+.Ft int
+.Fn mchars_num2uc "const char *cp" "size_t sz"
.Ft "const char *"
.Fo mchars_res2str
.Fa "struct mchars *p"
@@ -188,6 +191,9 @@ library also contains routines for translating character strings into glyphs
.Pq see Fn mchars_alloc
and parsing escape sequences from strings
.Pq see Fn mandoc_escape .
+.Pp
+This library is
+.Ud
.Sh REFERENCE
This section documents the functions, types, and variables available
via
@@ -247,8 +253,12 @@ The object must be freed with
Free an object created with
.Fn mchars_alloc .
.It Fn mchars_num2char
-Convert a character index as found in \eN\(aq\(aq into a printable
-character.
+Convert a character index (e.g., the \eN\(aq\(aq escape) into a
+printable ASCII character.
+Returns \e0 (the nil character) if the input sequence is malformed.
+.It Fn mchars_num2uc
+Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
+a Unicode codepoint.
Returns \e0 (the nil character) if the input sequence is malformed.
.It Fn mchars_res2cp
Convert a predefined character into a valid Unicode codepoint.
diff --git a/mandoc.h b/mandoc.h
index db7b30bf..5f01644c 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.75 2011/05/15 15:30:33 kristaps Exp $ */
+/* $Id: mandoc.h,v 1.76 2011/05/17 11:50:20 kristaps Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -330,6 +330,7 @@ enum mandoc_esc mandoc_escape(const char **, const char **, int *);
struct mchars *mchars_alloc(void);
char mchars_num2char(const char *, size_t);
+int mchars_num2uc(const char *, size_t);
const char *mchars_spec2str(struct mchars *, const char *, size_t, size_t *);
int mchars_spec2cp(struct mchars *, const char *, size_t);
const char *mchars_res2str(struct mchars *, const char *, size_t, size_t *);