aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/term_ascii.c
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-05-17 22:32:45 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-05-17 22:32:45 +0000
commit9fbd9ce5cadeb91ed28f18559e80d0bb5a2e1d54 (patch)
tree29d663369951c30cefc0d594ac559ef72b0bf666 /term_ascii.c
parent0587ad80d46d89f36315c37bbd67cf8899708b8d (diff)
downloadmandoc-9fbd9ce5cadeb91ed28f18559e80d0bb5a2e1d54.tar.gz
mandoc-9fbd9ce5cadeb91ed28f18559e80d0bb5a2e1d54.tar.zst
mandoc-9fbd9ce5cadeb91ed28f18559e80d0bb5a2e1d54.zip
Locale support. I'm checking this in to clean up fall-out in-tree, but
it looks pretty good. Basically, the -Tlocale option propogates into term_ascii.c, where we set locale-specific console call-backs IFF (1) setlocale() works; (2) locale support is compiled in (see Makefile for -DUSE_WCHAR); (3) the internal structure of wchar_t maps directly to Unicode codepoints as defined by __STDC_ISO_10646__; and (4) the console supports multi-byte characters. To date, this configuration only supports GNU/Linux. OpenBSD doesn't export __STDC_ISO_10646__ although I'm told by stsp@openbsd.org that it should (it has the correct map). Apparently FreeBSD is the same way. NetBSD? Don't know. Apple also supports this, but doesn't define the macro. Special-casing! Benchmark: -Tlocale incurs less than 0.2 factor overhead when run through several thousand manuals when UTF8 output is enabled. Native mode (whether directly -Tascii or through no locale or whatever) is UNCHANGED: the function callbacks are the same as before. Note. If the underlying system does NOT support STDC_ISO_10646, there is a "slow" version possible with iconv or other means of flipping from a Unicode codepoint to a wchar_t.
Diffstat (limited to 'term_ascii.c')
-rw-r--r--term_ascii.c83
1 files changed, 70 insertions, 13 deletions
diff --git a/term_ascii.c b/term_ascii.c
index 5b2ee847..7619907e 100644
--- a/term_ascii.c
+++ b/term_ascii.c
@@ -1,4 +1,4 @@
-/* $Id: term_ascii.c,v 1.14 2011/05/17 14:38:34 kristaps Exp $ */
+/* $Id: term_ascii.c,v 1.15 2011/05/17 22:32:45 kristaps Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -21,16 +21,26 @@
#include <sys/types.h>
#include <assert.h>
+#ifdef USE_WCHAR
+# include <locale.h>
+#endif
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#ifdef USE_WCHAR
+# include <wchar.h>
+#endif
#include "mandoc.h"
#include "out.h"
#include "term.h"
#include "main.h"
+#if ! defined(__STDC_ISO_10646__)
+# undef USE_WCHAR
+#endif
+
static struct termp *ascii_init(enum termenc, char *);
static double ascii_hspan(const struct termp *,
const struct roffsu *);
@@ -41,6 +51,13 @@ static void ascii_end(struct termp *);
static void ascii_endline(struct termp *);
static void ascii_letter(struct termp *, int);
+#ifdef USE_WCHAR
+static void locale_advance(struct termp *, size_t);
+static void locale_endline(struct termp *);
+static void locale_letter(struct termp *, int);
+static size_t locale_width(const struct termp *, int);
+#endif
+
static struct termp *
ascii_init(enum termenc enc, char *outopts)
{
@@ -54,15 +71,28 @@ ascii_init(enum termenc enc, char *outopts)
p->tabwidth = 5;
p->defrmargin = 78;
- p->advance = ascii_advance;
p->begin = ascii_begin;
p->end = ascii_end;
- p->endline = ascii_endline;
p->hspan = ascii_hspan;
- p->letter = ascii_letter;
p->type = TERMTYPE_CHAR;
+
+ p->enc = TERMENC_ASCII;
+ p->advance = ascii_advance;
+ p->endline = ascii_endline;
+ p->letter = ascii_letter;
p->width = ascii_width;
+#if defined (USE_WCHAR)
+ if (TERMENC_LOCALE == enc)
+ if (setlocale(LC_ALL, "") && MB_CUR_MAX > 1) {
+ p->enc = enc;
+ p->advance = locale_advance;
+ p->endline = locale_endline;
+ p->letter = locale_letter;
+ p->width = locale_width;
+ }
+#endif
+
toks[0] = "width";
toks[1] = NULL;
@@ -104,7 +134,6 @@ ascii_width(const struct termp *p, int c)
return(1);
}
-
void
ascii_free(void *arg)
{
@@ -112,17 +141,14 @@ ascii_free(void *arg)
term_free((struct termp *)arg);
}
-
/* ARGSUSED */
static void
ascii_letter(struct termp *p, int c)
{
- /* LINTED */
putchar(c);
}
-
static void
ascii_begin(struct termp *p)
{
@@ -130,7 +156,6 @@ ascii_begin(struct termp *p)
(*p->headf)(p, p->argf);
}
-
static void
ascii_end(struct termp *p)
{
@@ -138,7 +163,6 @@ ascii_end(struct termp *p)
(*p->footf)(p, p->argf);
}
-
/* ARGSUSED */
static void
ascii_endline(struct termp *p)
@@ -147,19 +171,16 @@ ascii_endline(struct termp *p)
putchar('\n');
}
-
/* ARGSUSED */
static void
ascii_advance(struct termp *p, size_t len)
{
size_t i;
- /* Just print whitespace on the terminal. */
for (i = 0; i < len; i++)
putchar(' ');
}
-
/* ARGSUSED */
static double
ascii_hspan(const struct termp *p, const struct roffsu *su)
@@ -198,3 +219,39 @@ ascii_hspan(const struct termp *p, const struct roffsu *su)
return(r);
}
+#ifdef USE_WCHAR
+/* ARGSUSED */
+static size_t
+locale_width(const struct termp *p, int c)
+{
+ int rc;
+
+ return((rc = wcwidth(c)) < 0 ? 0 : rc);
+}
+
+/* ARGSUSED */
+static void
+locale_advance(struct termp *p, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ putwchar(L' ');
+}
+
+/* ARGSUSED */
+static void
+locale_endline(struct termp *p)
+{
+
+ putwchar(L'\n');
+}
+
+/* ARGSUSED */
+static void
+locale_letter(struct termp *p, int c)
+{
+
+ putwchar(c);
+}
+#endif