aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-29 00:17:43 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-29 00:17:43 +0000
commitada69bce757bdb8fff8d778e0660446c440c7613 (patch)
treec855535ac67e0f62353307e69e90b8952d8ecad3
parent3e8d538cdcbed1408b260186b980e5fa4eeb81af (diff)
downloadmandoc-ada69bce757bdb8fff8d778e0660446c440c7613.tar.gz
mandoc-ada69bce757bdb8fff8d778e0660446c440c7613.tar.zst
mandoc-ada69bce757bdb8fff8d778e0660446c440c7613.zip
In terminal output, unify handling of Unicode and numbered character
escape sequences just like it was earlier implemented for -Thtml. Do not let control characters other than ASCII 9 (horizontal tab) propagate to the output, even though groff allows them; but that really doesn't look like a great idea. Let mchars_num2char() return int such that we can distinguish invalid \N syntax from \N'0'. This also reduces the danger of signed char issues popping up.
-rw-r--r--chars.c10
-rw-r--r--html.c11
-rw-r--r--mandoc.h4
-rw-r--r--term.c104
4 files changed, 78 insertions, 51 deletions
diff --git a/chars.c b/chars.c
index 237fbe25..fe0b17de 100644
--- a/chars.c
+++ b/chars.c
@@ -1,4 +1,4 @@
-/* $Id: chars.c,v 1.64 2014/10/28 17:36:19 schwarze Exp $ */
+/* $Id: chars.c,v 1.65 2014/10/29 00:17:43 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -107,15 +107,13 @@ mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
return(ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1);
}
-char
+int
mchars_num2char(const char *p, size_t sz)
{
int i;
- if ((i = mandoc_strntoi(p, sz, 10)) < 0)
- return('\0');
-
- return(i > 0 && i < 256 && isprint(i) ? i : '\0');
+ i = mandoc_strntoi(p, sz, 10);
+ return(i >= 0 && i < 256 ? i : -1);
}
int
diff --git a/html.c b/html.c
index 912c006d..050fefe6 100644
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/* $Id: html.c,v 1.180 2014/10/28 17:36:19 schwarze Exp $ */
+/* $Id: html.c,v 1.181 2014/10/29 00:17:43 schwarze Exp $ */
/*
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -422,9 +422,13 @@ print_encode(struct html *h, const char *p, int norecurse)
break;
case ESCAPE_NUMBERED:
c = mchars_num2char(seq, len);
+ if (c < 0)
+ continue;
break;
case ESCAPE_SPECIAL:
c = mchars_spec2cp(h->symtab, seq, len);
+ if (c <= 0)
+ continue;
break;
case ESCAPE_NOSPACE:
if ('\0' == *p)
@@ -433,9 +437,8 @@ print_encode(struct html *h, const char *p, int norecurse)
default:
continue;
}
- if (c <= 0)
- continue;
- if (c < 0x20 || (c > 0x7E && c < 0xA0))
+ if ((c < 0x20 && c != 0x09) ||
+ (c > 0x7E && c < 0xA0))
c = 0xFFFD;
if (c > 0x7E)
printf("&#%d;", c);
diff --git a/mandoc.h b/mandoc.h
index 329b59c0..f3dff946 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.167 2014/10/28 17:36:19 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.168 2014/10/29 00:17:43 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -425,7 +425,7 @@ __BEGIN_DECLS
enum mandoc_esc mandoc_escape(const char **, const char **, int *);
struct mchars *mchars_alloc(void);
void mchars_free(struct mchars *);
-char mchars_num2char(const char *, size_t);
+int mchars_num2char(const char *, size_t);
const char *mchars_uc2str(int);
int mchars_num2uc(const char *, size_t);
int mchars_spec2cp(const struct mchars *,
diff --git a/term.c b/term.c
index 6cac5ab5..d51243b1 100644
--- a/term.c
+++ b/term.c
@@ -1,4 +1,4 @@
-/* $Id: term.c,v 1.232 2014/10/28 18:49:33 schwarze Exp $ */
+/* $Id: term.c,v 1.233 2014/10/29 00:17:43 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -391,7 +391,6 @@ term_word(struct termp *p, const char *word)
{
const char nbrsp[2] = { ASCII_NBRSP, 0 };
const char *seq, *cp;
- char c;
int sz, uc;
size_t ssz;
enum mandoc_esc esc;
@@ -443,16 +442,11 @@ term_word(struct termp *p, const char *word)
switch (esc) {
case ESCAPE_UNICODE:
uc = mchars_num2uc(seq + 1, sz - 1);
- if (p->enc == TERMENC_ASCII) {
- cp = ascii_uc2str(uc);
- encode(p, cp, strlen(cp));
- } else
- encode1(p, uc);
break;
case ESCAPE_NUMBERED:
- c = mchars_num2char(seq, sz);
- if ('\0' != c)
- encode(p, &c, 1);
+ uc = mchars_num2char(seq, sz);
+ if (uc < 0)
+ continue;
break;
case ESCAPE_SPECIAL:
if (p->enc == TERMENC_ASCII) {
@@ -465,35 +459,50 @@ term_word(struct termp *p, const char *word)
if (uc > 0)
encode1(p, uc);
}
- break;
+ continue;
case ESCAPE_FONTBOLD:
term_fontrepl(p, TERMFONT_BOLD);
- break;
+ continue;
case ESCAPE_FONTITALIC:
term_fontrepl(p, TERMFONT_UNDER);
- break;
+ continue;
case ESCAPE_FONTBI:
term_fontrepl(p, TERMFONT_BI);
- break;
+ continue;
case ESCAPE_FONT:
/* FALLTHROUGH */
case ESCAPE_FONTROMAN:
term_fontrepl(p, TERMFONT_NONE);
- break;
+ continue;
case ESCAPE_FONTPREV:
term_fontlast(p);
- break;
+ continue;
case ESCAPE_NOSPACE:
if (TERMP_SKIPCHAR & p->flags)
p->flags &= ~TERMP_SKIPCHAR;
else if ('\0' == *word)
p->flags |= TERMP_NOSPACE;
- break;
+ continue;
case ESCAPE_SKIPCHAR:
p->flags |= TERMP_SKIPCHAR;
- break;
+ continue;
default:
- break;
+ continue;
+ }
+
+ /*
+ * Common handling for Unicode and numbered
+ * character escape sequences.
+ */
+
+ if (p->enc == TERMENC_ASCII) {
+ cp = ascii_uc2str(uc);
+ encode(p, cp, strlen(cp));
+ } else {
+ if ((uc < 0x20 && uc != 0x09) ||
+ (uc > 0x7E && uc < 0xA0))
+ uc = 0xFFFD;
+ encode1(p, uc);
}
}
p->flags &= ~TERMP_NBRWORD;
@@ -645,7 +654,7 @@ size_t
term_strlen(const struct termp *p, const char *cp)
{
size_t sz, rsz, i;
- int ssz, skip, c;
+ int ssz, skip, uc;
const char *seq, *rhs;
enum mandoc_esc esc;
static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
@@ -675,44 +684,61 @@ term_strlen(const struct termp *p, const char *cp)
switch (esc) {
case ESCAPE_UNICODE:
- c = mchars_num2uc(seq + 1, sz - 1);
- if (p->enc == TERMENC_ASCII) {
- rhs = ascii_uc2str(c);
- rsz = strlen(rhs);
- } else
- sz += cond_width(p, c, &skip);
+ uc = mchars_num2uc(seq + 1, sz - 1);
break;
case ESCAPE_NUMBERED:
- c = mchars_num2char(seq, ssz);
- if ('\0' != c)
- sz += cond_width(p, c, &skip);
+ uc = mchars_num2char(seq, ssz);
+ if (uc < 0)
+ continue;
break;
case ESCAPE_SPECIAL:
- if (p->enc == TERMENC_ASCII)
+ if (p->enc == TERMENC_ASCII) {
rhs = mchars_spec2str(p->symtab,
seq, ssz, &rsz);
- else {
- c = mchars_spec2cp(p->symtab,
+ if (rhs != NULL)
+ break;
+ } else {
+ uc = mchars_spec2cp(p->symtab,
seq, ssz);
- if (c > 0)
- sz += cond_width(p, c, &skip);
+ if (uc > 0)
+ sz += cond_width(p, uc, &skip);
}
- break;
+ continue;
case ESCAPE_SKIPCHAR:
skip = 1;
- break;
+ continue;
default:
- break;
+ continue;
}
- if (NULL == rhs)
- break;
+ /*
+ * Common handling for Unicode and numbered
+ * character escape sequences.
+ */
+
+ if (rhs == NULL) {
+ if (p->enc == TERMENC_ASCII) {
+ rhs = ascii_uc2str(uc);
+ rsz = strlen(rhs);
+ } else {
+ if ((uc < 0x20 && uc != 0x09) ||
+ (uc > 0x7E && uc < 0xA0))
+ uc = 0xFFFD;
+ sz += cond_width(p, uc, &skip);
+ continue;
+ }
+ }
if (skip) {
skip = 0;
break;
}
+ /*
+ * Common handling for all escape sequences
+ * printing more than one character.
+ */
+
for (i = 0; i < rsz; i++)
sz += (*p->width)(p, *rhs++);
break;