]>
git.cameronkatri.com Git - mandoc.git/blob - mdocterm.c
1 /* $Id: mdocterm.c,v 1.33 2009/03/05 13:12:12 kristaps Exp $ */
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
19 #include <sys/utsname.h>
40 static void body(struct termp
*,
42 const struct mdoc_meta
*,
43 const struct mdoc_node
*);
44 static void header(struct termp
*,
45 const struct mdoc_meta
*);
46 static void footer(struct termp
*,
47 const struct mdoc_meta
*);
49 static void pword(struct termp
*, const char *, size_t);
50 static void pescape(struct termp
*, const char *,
52 static void nescape(struct termp
*,
53 const char *, size_t);
54 static void chara(struct termp
*, char);
55 static void stringa(struct termp
*,
56 const char *, size_t);
57 static void symbola(struct termp
*, enum tsym
);
58 static void stylea(struct termp
*, enum tstyle
);
61 extern size_t strlcat(char *, const char *, size_t);
62 extern size_t strlcpy(char *, const char *, size_t);
65 static struct termenc termenc1
[] = {
66 { "\\", TERMSYM_SLASH
},
67 { "\'", TERMSYM_RSQUOTE
},
68 { "`", TERMSYM_LSQUOTE
},
69 { "-", TERMSYM_HYPHEN
},
70 { " ", TERMSYM_SPACE
},
71 { ".", TERMSYM_PERIOD
},
72 { "&", TERMSYM_BREAK
},
73 { "e", TERMSYM_SLASH
},
74 { "q", TERMSYM_DQUOTE
},
78 static struct termenc termenc2
[] = {
79 { "rB", TERMSYM_RBRACK
},
80 { "lB", TERMSYM_LBRACK
},
81 { "ra", TERMSYM_RANGLE
},
82 { "la", TERMSYM_LANGLE
},
83 { "Lq", TERMSYM_LDQUOTE
},
84 { "lq", TERMSYM_LDQUOTE
},
85 { "Rq", TERMSYM_RDQUOTE
},
86 { "rq", TERMSYM_RDQUOTE
},
87 { "oq", TERMSYM_LSQUOTE
},
88 { "aq", TERMSYM_RSQUOTE
},
90 { "<-", TERMSYM_LARROW
},
91 { "->", TERMSYM_RARROW
},
92 { "ua", TERMSYM_UARROW
},
93 { "da", TERMSYM_DARROW
},
95 { "bu", TERMSYM_BULLET
},
96 { "Ba", TERMSYM_BAR
},
97 { "ba", TERMSYM_BAR
},
98 { "co", TERMSYM_COPY
},
99 { "Am", TERMSYM_AMP
},
101 { "Le", TERMSYM_LE
},
102 { "<=", TERMSYM_LE
},
103 { "Ge", TERMSYM_GE
},
104 { ">=", TERMSYM_GE
},
105 { "==", TERMSYM_EQ
},
106 { "Ne", TERMSYM_NEQ
},
107 { "!=", TERMSYM_NEQ
},
108 { "Pm", TERMSYM_PLUSMINUS
},
109 { "+-", TERMSYM_PLUSMINUS
},
110 { "If", TERMSYM_INF2
},
111 { "if", TERMSYM_INF
},
112 { "Na", TERMSYM_NAN
},
113 { "na", TERMSYM_NAN
},
114 { "**", TERMSYM_ASTERISK
},
115 { "Gt", TERMSYM_GT
},
116 { "Lt", TERMSYM_LT
},
118 { "aa", TERMSYM_ACUTE
},
119 { "ga", TERMSYM_GRAVE
},
121 { "en", TERMSYM_EN
},
122 { "em", TERMSYM_EM
},
124 { "Pi", TERMSYM_PI
},
128 static struct termsym termsym_ansi
[] = {
129 { "]", 1 }, /* TERMSYM_RBRACK */
130 { "[", 1 }, /* TERMSYM_LBRACK */
131 { "<-", 2 }, /* TERMSYM_LARROW */
132 { "->", 2 }, /* TERMSYM_RARROW */
133 { "^", 1 }, /* TERMSYM_UARROW */
134 { "v", 1 }, /* TERMSYM_DARROW */
135 { "`", 1 }, /* TERMSYM_LSQUOTE */
136 { "\'", 1 }, /* TERMSYM_RSQUOTE */
137 { "\'", 1 }, /* TERMSYM_SQUOTE */
138 { "``", 2 }, /* TERMSYM_LDQUOTE */
139 { "\'\'", 2 }, /* TERMSYM_RDQUOTE */
140 { "\"", 1 }, /* TERMSYM_DQUOTE */
141 { "<", 1 }, /* TERMSYM_LT */
142 { ">", 1 }, /* TERMSYM_GT */
143 { "<=", 2 }, /* TERMSYM_LE */
144 { ">=", 2 }, /* TERMSYM_GE */
145 { "==", 2 }, /* TERMSYM_EQ */
146 { "!=", 2 }, /* TERMSYM_NEQ */
147 { "\'", 1 }, /* TERMSYM_ACUTE */
148 { "`", 1 }, /* TERMSYM_GRAVE */
149 { "pi", 2 }, /* TERMSYM_PI */
150 { "+=", 2 }, /* TERMSYM_PLUSMINUS */
151 { "oo", 2 }, /* TERMSYM_INF */
152 { "infinity", 8 }, /* TERMSYM_INF2 */
153 { "NaN", 3 }, /* TERMSYM_NAN */
154 { "|", 1 }, /* TERMSYM_BAR */
155 { "o", 1 }, /* TERMSYM_BULLET */
156 { "&", 1 }, /* TERMSYM_AMP */
157 { "--", 2 }, /* TERMSYM_EM */
158 { "-", 1 }, /* TERMSYM_EN */
159 { "(C)", 3 }, /* TERMSYM_COPY */
160 { "*", 1 }, /* TERMSYM_ASTERISK */
161 { "\\", 1 }, /* TERMSYM_SLASH */
162 { "-", 1 }, /* TERMSYM_HYPHEN */
163 { " ", 1 }, /* TERMSYM_SPACE */
164 { ".", 1 }, /* TERMSYM_PERIOD */
165 { "", 0 }, /* TERMSYM_BREAK */
166 { "<", 1 }, /* TERMSYM_LANGLE */
167 { ">", 1 }, /* TERMSYM_RANGLE */
170 static const char ansi_clear
[] = { 27, '[', '0', 'm' };
171 static const char ansi_bold
[] = { 27, '[', '1', 'm' };
172 static const char ansi_under
[] = { 27, '[', '4', 'm' };
174 static struct termsym termstyle_ansi
[] = {
182 main(int argc
, char *argv
[])
185 const struct mdoc
*mdoc
;
190 if ( ! mmain_getopt(p
, argc
, argv
, NULL
, NULL
, NULL
, NULL
))
193 if (NULL
== (mdoc
= mmain_mdoc(p
)))
196 termp
.maxrmargin
= termp
.rmargin
= 78; /* XXX */
197 termp
.maxcols
= 1024;
198 termp
.offset
= termp
.col
= 0;
199 termp
.flags
= TERMP_NOSPACE
;
200 termp
.symtab
= termsym_ansi
;
201 termp
.styletab
= termstyle_ansi
;
203 if (NULL
== (termp
.buf
= malloc(termp
.maxcols
)))
206 header(&termp
, mdoc_meta(mdoc
));
207 body(&termp
, NULL
, mdoc_meta(mdoc
), mdoc_node(mdoc
));
208 footer(&termp
, mdoc_meta(mdoc
));
218 * Flush a line of text. A "line" is loosely defined as being something
219 * that should be followed by a newline, regardless of whether it's
220 * broken apart by newlines getting there. A line can also be a
221 * fragment of a columnar list.
223 * Specifically, a line is whatever's in p->buf of length p->col, which
224 * is zeroed after this function returns.
226 * The variables TERMP_NOLPAD, TERMP_LITERAL and TERMP_NOBREAK are of
227 * critical importance here. Their behaviour follows:
229 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
230 * offset value. This is useful when doing columnar lists where the
231 * prior column has right-padded.
233 * - TERMP_NOBREAK: this is the most important and is used when making
234 * columns. In short: don't print a newline and instead pad to the
235 * right margin. Used in conjunction with TERMP_NOLPAD.
237 * In-line line breaking:
239 * If TERMP_NOBREAK is specified and the line overruns the right
240 * margin, it will break and pad-right to the right margin after
241 * writing. If maxrmargin is violated, it will break and continue
242 * writing from the right-margin, which will lead to the above
243 * scenario upon exit.
245 * Otherwise, the line will break at the right margin. Extremely long
246 * lines will cause the system to emit a warning (TODO: hyphenate, if
250 flushln(struct termp
*p
)
252 size_t i
, j
, vsz
, vis
, maxvis
, mmax
, bp
;
255 * First, establish the maximum columns of "visible" content.
256 * This is usually the difference between the right-margin and
257 * an indentation, but can be, for tagged lists or columns, a
258 * small set of values.
261 assert(p
->offset
< p
->rmargin
);
262 maxvis
= p
->rmargin
- p
->offset
;
263 mmax
= p
->maxrmargin
- p
->offset
;
264 bp
= TERMP_NOBREAK
& p
->flags
? mmax
: maxvis
;
268 * If in the standard case (left-justified), then begin with our
269 * indentation, otherwise (columns, etc.) just start spitting
273 if ( ! (p
->flags
& TERMP_NOLPAD
))
275 for (j
= 0; j
< p
->offset
; j
++)
278 for (i
= 0; i
< p
->col
; i
++) {
280 * Count up visible word characters. Control sequences
281 * (starting with the CSI) aren't counted. A space
282 * generates a non-printing word, which is valid (the
283 * space is printed according to regular spacing rules).
286 /* FIXME: make non-ANSI friendly. */
289 for (j
= i
, vsz
= 0; j
< p
->col
; j
++) {
290 if (isspace((u_char
)p
->buf
[j
]))
292 else if (27 == p
->buf
[j
]) {
293 assert(j
+ 4 <= p
->col
);
300 * Do line-breaking. If we're greater than our
301 * break-point and already in-line, break to the next
302 * line and start writing. If we're at the line start,
303 * then write out the word (TODO: hyphenate) and break
304 * in a subsequent loop invocation.
307 if ( ! (TERMP_NOBREAK
& p
->flags
)) {
308 if (vis
&& vis
+ vsz
> bp
) {
310 for (j
= 0; j
< p
->offset
; j
++)
313 } else if (vis
+ vsz
> bp
)
314 warnx("word breaks right margin");
316 /* TODO: hyphenate. */
319 if (vis
&& vis
+ vsz
> bp
) {
321 for (j
= 0; j
< p
->rmargin
; j
++)
323 vis
= p
->rmargin
- p
->offset
;
324 } else if (vis
+ vsz
> bp
)
325 warnx("word breaks right margin");
327 /* TODO: hyphenate. */
331 * Write out the word and a trailing space. Omit the
332 * space if we're the last word in the line or beyond
336 for ( ; i
< p
->col
; i
++) {
337 if (isspace((u_char
)p
->buf
[i
]))
342 if (i
< p
->col
&& vis
<= bp
) {
349 * If we've overstepped our maximum visible no-break space, then
350 * cause a newline and offset at the right margin.
353 if ((TERMP_NOBREAK
& p
->flags
) && vis
>= maxvis
) {
354 if ( ! (TERMP_NONOBREAK
& p
->flags
)) {
356 for (i
= 0; i
< p
->rmargin
; i
++)
364 * If we're not to right-marginalise it (newline), then instead
365 * pad to the right margin and stay off.
368 if (p
->flags
& TERMP_NOBREAK
) {
369 if ( ! (TERMP_NONOBREAK
& p
->flags
))
370 for ( ; vis
< maxvis
; vis
++)
380 * A newline only breaks an existing line; it won't assert vertical
381 * space. All data in the output buffer is flushed prior to the newline
385 newln(struct termp
*p
)
388 p
->flags
|= TERMP_NOSPACE
;
390 p
->flags
&= ~TERMP_NOLPAD
;
394 p
->flags
&= ~TERMP_NOLPAD
;
399 * Asserts a vertical space (a full, empty line-break between lines).
400 * Note that if used twice, this will cause two blank spaces and so on.
401 * All data in the output buffer is flushed prior to the newline
405 vspace(struct termp
*p
)
414 * Break apart a word into "pwords" (partial-words, usually from
415 * breaking up a phrase into individual words) and, eventually, put them
416 * into the output buffer. If we're a literal word, then don't break up
417 * the word and put it verbatim into the output buffer.
420 word(struct termp
*p
, const char *word
)
424 if (p
->flags
& TERMP_LITERAL
) {
425 pword(p
, word
, strlen(word
));
432 if (mdoc_isdelim(word
)) {
433 if ( ! (p
->flags
& TERMP_IGNDELIM
))
434 p
->flags
|= TERMP_NOSPACE
;
435 p
->flags
&= ~TERMP_IGNDELIM
;
439 for (j
= i
= 0; i
< len
; i
++) {
440 if ( ! isspace((u_char
)word
[i
])) {
445 /* Escaped spaces don't delimit... */
446 if (i
> 0 && isspace((u_char
)word
[i
]) &&
447 '\\' == word
[i
- 1]) {
455 pword(p
, &word
[i
- j
], j
);
460 pword(p
, &word
[i
- j
], j
);
466 * This is the main function for printing out nodes. It's constituted
467 * of PRE and POST functions, which correspond to prefix and infix
468 * processing. The termpair structure allows data to persist between
469 * prefix and postfix invocations.
472 body(struct termp
*p
, struct termpair
*ppair
,
473 const struct mdoc_meta
*meta
,
474 const struct mdoc_node
*node
)
477 struct termpair pair
;
479 /* Pre-processing. */
484 pair
.offset
= pair
.rmargin
= 0;
488 if (MDOC_TEXT
!= node
->type
) {
489 if (termacts
[node
->tok
].pre
)
490 if ( ! (*termacts
[node
->tok
].pre
)(p
, &pair
, meta
, node
))
492 } else /* MDOC_TEXT == node->type */
493 word(p
, node
->data
.text
.string
);
497 if (TERMPAIR_FLAG
& pair
.type
)
498 p
->flags
|= pair
.flag
;
500 if (dochild
&& node
->child
)
501 body(p
, &pair
, meta
, node
->child
);
503 if (TERMPAIR_FLAG
& pair
.type
)
504 p
->flags
&= ~pair
.flag
;
506 /* Post-processing. */
508 if (MDOC_TEXT
!= node
->type
)
509 if (termacts
[node
->tok
].post
)
510 (*termacts
[node
->tok
].post
)(p
, &pair
, meta
, node
);
515 body(p
, ppair
, meta
, node
->next
);
520 footer(struct termp
*p
, const struct mdoc_meta
*meta
)
525 if (NULL
== (buf
= malloc(p
->rmargin
)))
527 if (NULL
== (os
= malloc(p
->rmargin
)))
530 tm
= localtime(&meta
->date
);
533 if (NULL
== strftime(buf
, p
->rmargin
, "%B %d, %Y", tm
))
535 if (0 == strftime(buf
, p
->rmargin
, "%B %d, %Y", tm
))
539 (void)strlcpy(os
, meta
->os
, p
->rmargin
);
542 * This is /slightly/ different from regular groff output
543 * because we don't have page numbers. Print the following:
550 p
->flags
|= TERMP_NOSPACE
| TERMP_NOBREAK
;
551 p
->rmargin
= p
->maxrmargin
- strlen(buf
);
557 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
558 p
->offset
= p
->rmargin
;
559 p
->rmargin
= p
->maxrmargin
;
560 p
->flags
&= ~TERMP_NOBREAK
;
571 header(struct termp
*p
, const struct mdoc_meta
*meta
)
573 char *buf
, *title
, *bufp
, *vbuf
;
577 p
->rmargin
= p
->maxrmargin
;
580 if (NULL
== (buf
= malloc(p
->rmargin
)))
582 if (NULL
== (title
= malloc(p
->rmargin
)))
584 if (NULL
== (vbuf
= malloc(p
->rmargin
)))
587 if (NULL
== (pp
= mdoc_vol2a(meta
->vol
))) {
588 switch (meta
->msec
) {
594 pp
= mdoc_vol2a(VOL_URM
);
597 pp
= mdoc_vol2a(VOL_SMM
);
606 pp
= mdoc_vol2a(VOL_PRM
);
609 pp
= mdoc_vol2a(VOL_KM
);
618 if (-1 == uname(&uts
))
620 (void)strlcat(vbuf
, uts
.sysname
, p
->rmargin
);
621 (void)strlcat(vbuf
, " ", p
->rmargin
);
622 } else if (NULL
== (pp
= mdoc_msec2a(meta
->msec
)))
623 pp
= mdoc_msec2a(MSEC_local
);
625 (void)strlcat(vbuf
, pp
, p
->rmargin
);
628 * The header is strange. It has three components, which are
629 * really two with the first duplicated. It goes like this:
631 * IDENTIFIER TITLE IDENTIFIER
633 * The IDENTIFIER is NAME(SECTION), which is the command-name
634 * (if given, or "unknown" if not) followed by the manual page
635 * section. These are given in `Dt'. The TITLE is a free-form
636 * string depending on the manual volume. If not specified, it
637 * switches on the manual section.
640 if (mdoc_arch2a(meta
->arch
))
641 (void)snprintf(buf
, p
->rmargin
, "%s (%s)",
642 vbuf
, mdoc_arch2a(meta
->arch
));
644 (void)strlcpy(buf
, vbuf
, p
->rmargin
);
646 pp
= mdoc_msec2a(meta
->msec
);
648 (void)snprintf(title
, p
->rmargin
, "%s(%s)",
649 meta
->title
, pp
? pp
: "");
651 for (bufp
= title
; *bufp
; bufp
++)
652 *bufp
= toupper((u_char
)*bufp
);
655 p
->rmargin
= (p
->maxrmargin
- strlen(buf
)) / 2;
656 p
->flags
|= TERMP_NOBREAK
| TERMP_NOSPACE
;
661 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
662 p
->offset
= p
->rmargin
;
663 p
->rmargin
= p
->maxrmargin
- strlen(title
);
668 p
->offset
= p
->rmargin
;
669 p
->rmargin
= p
->maxrmargin
;
670 p
->flags
&= ~TERMP_NOBREAK
;
671 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
676 p
->rmargin
= p
->maxrmargin
;
678 p
->flags
&= ~TERMP_NOSPACE
;
687 * Determine the symbol indicated by an escape sequences, that is, one
688 * starting with a backslash. Once done, we pass this value into the
689 * output buffer by way of the symbol table.
692 nescape(struct termp
*p
, const char *word
, size_t len
)
704 warnx("unsupported %zu-byte escape sequence", len
);
708 for ( ; enc
->enc
; enc
++)
709 if (0 == memcmp(enc
->enc
, word
, len
)) {
710 symbola(p
, enc
->sym
);
714 warnx("unsupported %zu-byte escape sequence", len
);
719 * Handle an escape sequence: determine its length and pass it to the
720 * escape-symbol look table. Note that we assume mdoc(3) has validated
721 * the escape sequence (we assert upon badly-formed escape sequences).
724 pescape(struct termp
*p
, const char *word
, size_t *i
, size_t len
)
731 if ('(' == word
[*i
]) {
733 assert(*i
+ 1 < len
);
734 nescape(p
, &word
[*i
], 2);
738 } else if ('*' == word
[*i
]) {
739 /* XXX - deprecated! */
745 assert(*i
+ 1 < len
);
746 nescape(p
, &word
[*i
], 2);
752 nescape(p
, &word
[*i
], 1);
756 } else if ('[' != word
[*i
]) {
757 nescape(p
, &word
[*i
], 1);
762 for (j
= 0; word
[*i
] && ']' != word
[*i
]; (*i
)++, j
++)
766 nescape(p
, &word
[*i
- j
], j
);
771 * Handle pwords, partial words, which may be either a single word or a
772 * phrase that cannot be broken down (such as a literal string). This
773 * handles word styling.
776 pword(struct termp
*p
, const char *word
, size_t len
)
780 if ( ! (TERMP_NOSPACE
& p
->flags
) &&
781 ! (TERMP_LITERAL
& p
->flags
))
784 if ( ! (p
->flags
& TERMP_NONOSPACE
))
785 p
->flags
&= ~TERMP_NOSPACE
;
788 * XXX - if literal and underlining, this will underline the
789 * spaces between literal words.
792 if (p
->flags
& TERMP_BOLD
)
793 stylea(p
, TERMSTYLE_BOLD
);
794 if (p
->flags
& TERMP_UNDERLINE
)
795 stylea(p
, TERMSTYLE_UNDER
);
797 for (i
= 0; i
< len
; i
++) {
798 if ('\\' == word
[i
]) {
799 pescape(p
, word
, &i
, len
);
805 if (p
->flags
& TERMP_BOLD
||
806 p
->flags
& TERMP_UNDERLINE
)
807 stylea(p
, TERMSTYLE_CLEAR
);
812 * Add a symbol to the output line buffer.
815 symbola(struct termp
*p
, enum tsym sym
)
818 assert(p
->symtab
[sym
].sym
);
819 stringa(p
, p
->symtab
[sym
].sym
, p
->symtab
[sym
].sz
);
824 * Add a style to the output line buffer.
827 stylea(struct termp
*p
, enum tstyle style
)
830 assert(p
->styletab
[style
].sym
);
831 stringa(p
, p
->styletab
[style
].sym
, p
->styletab
[style
].sz
);
836 * Like chara() but for arbitrary-length buffers. Resize the buffer by
837 * a factor of two (if the buffer is less than that) or the buffer's
841 stringa(struct termp
*p
, const char *c
, size_t sz
)
848 s
= sz
> p
->maxcols
* 2 ? sz
: p
->maxcols
* 2;
851 if (p
->col
+ sz
>= p
->maxcols
) {
852 p
->buf
= realloc(p
->buf
, s
);
858 (void)memcpy(&p
->buf
[p
->col
], c
, sz
);
864 * Insert a single character into the line-buffer. If the buffer's
865 * space is exceeded, then allocate more space by doubling the buffer
869 chara(struct termp
*p
, char c
)
872 if (p
->col
+ 1 >= p
->maxcols
) {
873 p
->buf
= realloc(p
->buf
, p
->maxcols
* 2);
878 p
->buf
[(p
->col
)++] = c
;