VERSION = 1.11.2
VDATE = 12 May 2011
-CFLAGS += -g -DHAVE_CONFIG_H -DVERSION="\"$(VERSION)\""
+# If your system doesn't support multi-byte functions (specifically
+# setlocale(), wcwidth(), putwchar()), then remove -DUSE_CHAR. You'll
+# still be able to use -Tlocale, but it becomes a synonym for -Tascii.
+CFLAGS += -g -DUSE_WCHAR -DHAVE_CONFIG_H -DVERSION="\"$(VERSION)\""
CFLAGS += -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings
PREFIX = /usr/local
BINDIR = $(PREFIX)/bin
-.\" $Id: mandoc.1,v 1.86 2011/05/17 12:22:15 kristaps Exp $
+.\" $Id: mandoc.1,v 1.87 2011/05/17 22:32:45 kristaps Exp $
.\"
.\" Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
.Fl T
arguments, which correspond to output modes:
.Bl -tag -width Ds
+.It Fl T Ns Cm locale
+This option encodes output characters using the current
+.Xr locale 1
+configuration.
+See
+.Sx Locale Output .
.It Fl T Ns Cm ascii
Produce 7-bit ASCII output.
This is the default.
.Pp
If multiple input files are specified, these will be processed by the
corresponding filter in-order.
+.Ss Locale Output
+Locale-depending output encoding is triggered with
+.Fl T Ns Cm locale .
+This option is not available on all systems: systems without locale
+support, or those whose internal representation is not natively UCS-4,
+will fall back to
+.Fl T Ns Cm ascii .
+See
+.Sx ASCII Output
+for font style specification and available command-line arguments.
.Ss ASCII Output
Output produced by
.Fl T Ns Cm ascii ,
The special characters documented in
.Xr mandoc_char 7
are rendered best-effort in an ASCII equivalent.
+If no equivalent is found,
+.Sq \&?
+is used instead.
.Pp
Output width is limited to 78 visible columns unless literal input lines
exceed this limit.
.Ss ASCII Compatibility
.Bl -bullet -compact
.It
-Unicode codepoints specified with
+Unrenderable unicode codepoints specified with
.Sq \e[uNNNN]
escapes are printed as
.Sq \&?
-/* $Id: term.c,v 1.193 2011/05/17 14:38:34 kristaps Exp $ */
+/* $Id: term.c,v 1.194 2011/05/17 22:32:45 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
static void adjbuf(struct termp *p, int);
static void bufferc(struct termp *, char);
static void encode(struct termp *, const char *, size_t);
+static void encode1(struct termp *, int);
void
term_free(struct termp *p)
{
const char *seq, *cp;
char c;
- int sz;
+ int sz, uc;
size_t ssz;
enum mandoc_esc esc;
switch (esc) {
case (ESCAPE_UNICODE):
- encode(p, "?", 1);
+ if (TERMENC_ASCII == p->enc) {
+ encode1(p, '?');
+ break;
+ }
+ uc = mchars_num2uc(seq + 1, sz - 1);
+ if ('\0' != uc)
+ encode1(p, uc);
break;
case (ESCAPE_NUMBERED):
if ('\0' != (c = mchars_num2char(seq, sz)))
p->buf[p->col++] = c;
}
+/*
+ * See encode().
+ * Do this for a single (probably unicode) value.
+ * Does not check for non-decorated glyphs.
+ */
+static void
+encode1(struct termp *p, int c)
+{
+ enum termfont f;
+
+ if (p->col + 4 >= p->maxcols)
+ adjbuf(p, p->col + 4);
+
+ f = term_fonttop(p);
+
+ if (TERMFONT_NONE == f) {
+ p->buf[p->col++] = c;
+ return;
+ } else if (TERMFONT_UNDER == f) {
+ p->buf[p->col++] = '_';
+ } else
+ p->buf[p->col++] = c;
+
+ p->buf[p->col++] = 8;
+ p->buf[p->col++] = c;
+}
+
static void
encode(struct termp *p, const char *word, size_t sz)
{
case (ESCAPE_ERROR):
return(sz);
case (ESCAPE_UNICODE):
- c = '?';
- /* FALLTHROUGH */
- case (ESCAPE_NUMBERED):
+ if (TERMENC_ASCII != p->enc) {
+ sz += (*p->width)(p, '?');
+ break;
+ }
+ c = mchars_num2uc(seq + 1, ssz - 1);
if ('\0' != c)
- c = mchars_num2char(seq, ssz);
+ sz += (*p->width)(p, c);
+ break;
+ case (ESCAPE_NUMBERED):
+ c = mchars_num2char(seq, ssz);
if ('\0' != c)
sz += (*p->width)(p, c);
break;
-/* $Id: term_ascii.c,v 1.14 2011/05/17 14:38:34 kristaps Exp $ */
+/* $Id: term_ascii.c,v 1.15 2011/05/17 22:32:45 kristaps Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
#include <sys/types.h>
#include <assert.h>
+#ifdef USE_WCHAR
+# include <locale.h>
+#endif
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#ifdef USE_WCHAR
+# include <wchar.h>
+#endif
#include "mandoc.h"
#include "out.h"
#include "term.h"
#include "main.h"
+#if ! defined(__STDC_ISO_10646__)
+# undef USE_WCHAR
+#endif
+
static struct termp *ascii_init(enum termenc, char *);
static double ascii_hspan(const struct termp *,
const struct roffsu *);
static void ascii_endline(struct termp *);
static void ascii_letter(struct termp *, int);
+#ifdef USE_WCHAR
+static void locale_advance(struct termp *, size_t);
+static void locale_endline(struct termp *);
+static void locale_letter(struct termp *, int);
+static size_t locale_width(const struct termp *, int);
+#endif
+
static struct termp *
ascii_init(enum termenc enc, char *outopts)
{
p->tabwidth = 5;
p->defrmargin = 78;
- p->advance = ascii_advance;
p->begin = ascii_begin;
p->end = ascii_end;
- p->endline = ascii_endline;
p->hspan = ascii_hspan;
- p->letter = ascii_letter;
p->type = TERMTYPE_CHAR;
+
+ p->enc = TERMENC_ASCII;
+ p->advance = ascii_advance;
+ p->endline = ascii_endline;
+ p->letter = ascii_letter;
p->width = ascii_width;
+#if defined (USE_WCHAR)
+ if (TERMENC_LOCALE == enc)
+ if (setlocale(LC_ALL, "") && MB_CUR_MAX > 1) {
+ p->enc = enc;
+ p->advance = locale_advance;
+ p->endline = locale_endline;
+ p->letter = locale_letter;
+ p->width = locale_width;
+ }
+#endif
+
toks[0] = "width";
toks[1] = NULL;
return(1);
}
-
void
ascii_free(void *arg)
{
term_free((struct termp *)arg);
}
-
/* ARGSUSED */
static void
ascii_letter(struct termp *p, int c)
{
- /* LINTED */
putchar(c);
}
-
static void
ascii_begin(struct termp *p)
{
(*p->headf)(p, p->argf);
}
-
static void
ascii_end(struct termp *p)
{
(*p->footf)(p, p->argf);
}
-
/* ARGSUSED */
static void
ascii_endline(struct termp *p)
putchar('\n');
}
-
/* ARGSUSED */
static void
ascii_advance(struct termp *p, size_t len)
{
size_t i;
- /* Just print whitespace on the terminal. */
for (i = 0; i < len; i++)
putchar(' ');
}
-
/* ARGSUSED */
static double
ascii_hspan(const struct termp *p, const struct roffsu *su)
return(r);
}
+#ifdef USE_WCHAR
+/* ARGSUSED */
+static size_t
+locale_width(const struct termp *p, int c)
+{
+ int rc;
+
+ return((rc = wcwidth(c)) < 0 ? 0 : rc);
+}
+
+/* ARGSUSED */
+static void
+locale_advance(struct termp *p, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ putwchar(L' ');
+}
+
+/* ARGSUSED */
+static void
+locale_endline(struct termp *p)
+{
+
+ putwchar(L'\n');
+}
+
+/* ARGSUSED */
+static void
+locale_letter(struct termp *p, int c)
+{
+
+ putwchar(c);
+}
+#endif