aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2010-05-25 12:37:20 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2010-05-25 12:37:20 +0000
commit5909cbca3fe375aeac1e2247b3f8edbaba32e56e (patch)
tree22379e2aa842475f26d517eadfe52749991a4340
parent2aefdbffeeeeda1b600266dae034d7133292ad46 (diff)
downloadmandoc-5909cbca3fe375aeac1e2247b3f8edbaba32e56e.tar.gz
mandoc-5909cbca3fe375aeac1e2247b3f8edbaba32e56e.tar.zst
mandoc-5909cbca3fe375aeac1e2247b3f8edbaba32e56e.zip
Modified version of Ingo Schwarze's patch for hyphen-breaking.
Breakable hyphens are cued in the back-ends (with ASCII_HYPH) and acted upon in term.c or ignored in html.c. Also cleaned up XML decl printing (no need for extra vars).
-rw-r--r--chars.c3
-rw-r--r--chars.h4
-rw-r--r--html.c31
-rw-r--r--libmandoc.h3
-rw-r--r--mandoc.c30
-rw-r--r--mandoc.h6
-rw-r--r--mdoc.c6
-rw-r--r--term.c25
8 files changed, 77 insertions, 31 deletions
diff --git a/chars.c b/chars.c
index 461ac067..3129aae3 100644
--- a/chars.c
+++ b/chars.c
@@ -1,4 +1,4 @@
-/* $Id: chars.c,v 1.17 2010/03/23 13:25:01 kristaps Exp $ */
+/* $Id: chars.c,v 1.18 2010/05/25 12:37:20 kristaps Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -23,6 +23,7 @@
#include <stdlib.h>
#include <string.h>
+#include "mandoc.h"
#include "chars.h"
#define PRINT_HI 126
diff --git a/chars.h b/chars.h
index 1205bc76..81097027 100644
--- a/chars.h
+++ b/chars.h
@@ -1,4 +1,4 @@
-/* $Id: chars.h,v 1.2 2010/05/16 01:35:37 schwarze Exp $ */
+/* $Id: chars.h,v 1.3 2010/05/25 12:37:20 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -17,8 +17,6 @@
#ifndef CHARS_H
#define CHARS_H
-#define ASCII_NBRSP 31 /* non-breaking space */
-
__BEGIN_DECLS
enum chars {
diff --git a/html.c b/html.c
index 0ad84e4f..d05e4b1d 100644
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/* $Id: html.c,v 1.99 2010/04/12 19:45:39 kristaps Exp $ */
+/* $Id: html.c,v 1.100 2010/05/25 12:37:20 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -29,6 +29,7 @@
#include <string.h>
#include <unistd.h>
+#include "mandoc.h"
#include "out.h"
#include "chars.h"
#include "html.h"
@@ -296,11 +297,12 @@ print_encode(struct html *h, const char *p, int norecurse)
int len, nospace;
const char *seq;
enum roffdeco deco;
+ static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
nospace = 0;
for (; *p; p++) {
- sz = strcspn(p, "\\<>&");
+ sz = strcspn(p, rejs);
fwrite(p, 1, sz, stdout);
p += /* LINTED */
@@ -315,6 +317,15 @@ print_encode(struct html *h, const char *p, int norecurse)
} else if ('&' == *p) {
printf("&amp;");
continue;
+ } else if (ASCII_HYPH == *p) {
+ /*
+ * Note: "soft hyphens" aren't graphically
+ * displayed when not breaking the text; we want
+ * them to be displayed.
+ */
+ /*printf("&#173;");*/
+ putchar('-');
+ continue;
} else if ('\0' == *p)
break;
@@ -443,21 +454,9 @@ print_gen_decls(struct html *h)
static void
print_xmltype(struct html *h)
{
- const char *decl;
-
- switch (h->type) {
- case (HTML_XHTML_1_0_STRICT):
- decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
- break;
- default:
- decl = NULL;
- break;
- }
-
- if (NULL == decl)
- return;
- printf("%s\n", decl);
+ if (HTML_XHTML_1_0_STRICT == h->type)
+ printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
}
diff --git a/libmandoc.h b/libmandoc.h
index 7d31c84f..bdc119d4 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -1,4 +1,4 @@
-/* $Id: libmandoc.h,v 1.5 2010/05/12 17:08:03 kristaps Exp $ */
+/* $Id: libmandoc.h,v 1.6 2010/05/25 12:37:20 kristaps Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -30,6 +30,7 @@ time_t mandoc_a2time(int, const char *);
#define MTIME_MDOCDATE (1 << 2)
#define MTIME_ISO_8601 (1 << 3)
int mandoc_eos(const char *, size_t);
+int mandoc_hyph(const char *, const char *);
__END_DECLS
diff --git a/mandoc.c b/mandoc.c
index 3ade0787..f1ac7c9a 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -1,4 +1,4 @@
-/* $Id: mandoc.c,v 1.15 2010/05/15 07:01:51 kristaps Exp $ */
+/* $Id: mandoc.c,v 1.16 2010/05/25 12:37:20 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -340,3 +340,31 @@ mandoc_eos(const char *p, size_t sz)
return(0);
}
+
+
+int
+mandoc_hyph(const char *start, const char *c)
+{
+
+ /*
+ * Choose whether to break at a hyphenated character. We only
+ * do this if it's free-standing within a word.
+ */
+
+ /* Skip first/last character of buffer. */
+ if (c == start || '\0' == *(c + 1))
+ return(0);
+ /* Skip first/last character of word. */
+ if ('\t' == *(c + 1) || '\t' == *(c - 1))
+ return(0);
+ if (' ' == *(c + 1) || ' ' == *(c - 1))
+ return(0);
+ /* Skip double invocations. */
+ if ('-' == *(c + 1) || '-' == *(c - 1))
+ return(0);
+ /* Skip escapes. */
+ if ('\\' == *(c - 1))
+ return(0);
+
+ return(1);
+}
diff --git a/mandoc.h b/mandoc.h
index e8c4dd60..f6203c5b 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.6 2010/05/24 01:41:31 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.7 2010/05/25 12:37:20 kristaps Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -17,6 +17,10 @@
#ifndef MANDOC_H
#define MANDOC_H
+#define ASCII_NBRSP 31 /* non-breaking space */
+#define ASCII_HYPH 30 /* breakable hyphen */
+
+
__BEGIN_DECLS
enum mandocerr {
diff --git a/mdoc.c b/mdoc.c
index 19920954..e67462e0 100644
--- a/mdoc.c
+++ b/mdoc.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc.c,v 1.137 2010/05/24 13:39:47 schwarze Exp $ */
+/* $Id: mdoc.c,v 1.138 2010/05/25 12:37:20 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -563,6 +563,10 @@ mdoc_ptext(struct mdoc *m, int line, char *buf, int offs)
ws = NULL;
for (c = end = buf + offs; *c; c++) {
switch (*c) {
+ case '-':
+ if (mandoc_hyph(buf + offs, c))
+ *c = ASCII_HYPH;
+ break;
case ' ':
if (NULL == ws)
ws = c;
diff --git a/term.c b/term.c
index b4beb633..02f69d9f 100644
--- a/term.c
+++ b/term.c
@@ -1,4 +1,4 @@
-/* $Id: term.c,v 1.139 2010/05/24 21:51:20 schwarze Exp $ */
+/* $Id: term.c,v 1.140 2010/05/25 12:37:20 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -138,6 +138,7 @@ term_flushln(struct termp *p)
size_t vend; /* end of word visual position on output */
size_t bp; /* visual right border position */
int j; /* temporary loop index */
+ int jhy; /* last hyphen before line overflow */
size_t maxvis, mmax;
/*
@@ -190,20 +191,23 @@ term_flushln(struct termp *p)
*/
/* LINTED */
- for ( ; j < (int)p->col; j++) {
+ for (jhy = 0; j < (int)p->col; j++) {
if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
break;
- if (8 == p->buf[j])
- vend--;
- else
+ if (8 != p->buf[j]) {
+ if (vend > vis && vend < bp &&
+ ASCII_HYPH == p->buf[j])
+ jhy = j;
vend++;
+ } else
+ vend--;
}
/*
* Find out whether we would exceed the right margin.
* If so, break to the next line.
*/
- if (vend > bp && vis > 0) {
+ if (vend > bp && 0 == jhy && vis > 0) {
vend -= vis;
putchar('\n');
if (TERMP_NOBREAK & p->flags) {
@@ -231,6 +235,8 @@ term_flushln(struct termp *p)
/* Write out the [remaining] word. */
for ( ; i < (int)p->col; i++) {
+ if (vend > bp && jhy > 0 && i > jhy)
+ break;
if ('\t' == p->buf[i])
break;
if (' ' == p->buf[i]) {
@@ -256,7 +262,12 @@ term_flushln(struct termp *p)
p->viscol += vbl;
vbl = 0;
}
- putchar(p->buf[i]);
+
+ if (ASCII_HYPH == p->buf[i])
+ putchar('-');
+ else
+ putchar(p->buf[i]);
+
p->viscol += 1;
}
vend += vbl;