]>
git.cameronkatri.com Git - mandoc.git/blob - mdocterm.c
f2f05dfa9edd92bbaf3bf2c98537c1042b4ca5c2
1 /* $Id: mdocterm.c,v 1.26 2009/03/02 17:29:16 kristaps Exp $ */
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
19 #include <sys/utsname.h>
35 static void body(struct termp
*,
37 const struct mdoc_meta
*,
38 const struct mdoc_node
*);
39 static void header(struct termp
*,
40 const struct mdoc_meta
*);
41 static void footer(struct termp
*,
42 const struct mdoc_meta
*);
44 static void pword(struct termp
*, const char *, size_t);
45 static void pescape(struct termp
*, const char *,
47 static void nescape(struct termp
*,
48 const char *, size_t);
49 static void chara(struct termp
*, char);
50 static void stringa(struct termp
*,
51 const char *, size_t);
52 static void symbola(struct termp
*, enum tsym
);
53 static void stylea(struct termp
*, enum tstyle
);
56 extern size_t strlcat(char *, const char *, size_t);
57 extern size_t strlcpy(char *, const char *, size_t);
60 static struct termsym termsym_ansi
[] = {
61 { "]", 1 }, /* TERMSYM_RBRACK */
62 { "[", 1 }, /* TERMSYM_LBRACK */
63 { "<-", 2 }, /* TERMSYM_LARROW */
64 { "->", 2 }, /* TERMSYM_RARROW */
65 { "^", 1 }, /* TERMSYM_UARROW */
66 { "v", 1 }, /* TERMSYM_DARROW */
67 { "`", 1 }, /* TERMSYM_LSQUOTE */
68 { "\'", 1 }, /* TERMSYM_RSQUOTE */
69 { "\'", 1 }, /* TERMSYM_SQUOTE */
70 { "``", 2 }, /* TERMSYM_LDQUOTE */
71 { "\'\'", 2 }, /* TERMSYM_RDQUOTE */
72 { "\"", 1 }, /* TERMSYM_DQUOTE */
73 { "<", 1 }, /* TERMSYM_LT */
74 { ">", 1 }, /* TERMSYM_GT */
75 { "<=", 2 }, /* TERMSYM_LE */
76 { ">=", 2 }, /* TERMSYM_GE */
77 { "==", 2 }, /* TERMSYM_EQ */
78 { "!=", 2 }, /* TERMSYM_NEQ */
79 { "\'", 1 }, /* TERMSYM_ACUTE */
80 { "`", 1 }, /* TERMSYM_GRAVE */
81 { "pi", 2 }, /* TERMSYM_PI */
82 { "+=", 2 }, /* TERMSYM_PLUSMINUS */
83 { "oo", 2 }, /* TERMSYM_INF */
84 { "infinity", 8 }, /* TERMSYM_INF2 */
85 { "NaN", 3 }, /* TERMSYM_NAN */
86 { "|", 1 }, /* TERMSYM_BAR */
87 { "o", 1 }, /* TERMSYM_BULLET */
88 { "&", 1 }, /* TERMSYM_AND */
89 { "|", 1 }, /* TERMSYM_OR */
92 static const char ansi_clear
[] = { 27, '[', '0', 'm' };
93 static const char ansi_bold
[] = { 27, '[', '1', 'm' };
94 static const char ansi_under
[] = { 27, '[', '4', 'm' };
96 static struct termsym termstyle_ansi
[] = {
104 main(int argc
, char *argv
[])
107 const struct mdoc
*mdoc
;
112 if ( ! mmain_getopt(p
, argc
, argv
, NULL
, NULL
, NULL
, NULL
))
115 if (NULL
== (mdoc
= mmain_mdoc(p
)))
118 termp
.maxrmargin
= 78; /* XXX */
119 termp
.rmargin
= termp
.maxrmargin
;
120 termp
.maxcols
= 1024;
121 termp
.offset
= termp
.col
= 0;
122 termp
.flags
= TERMP_NOSPACE
;
123 termp
.symtab
= termsym_ansi
;
124 termp
.styletab
= termstyle_ansi
;
126 if (NULL
== (termp
.buf
= malloc(termp
.maxcols
)))
129 header(&termp
, mdoc_meta(mdoc
));
130 body(&termp
, NULL
, mdoc_meta(mdoc
), mdoc_node(mdoc
));
131 footer(&termp
, mdoc_meta(mdoc
));
141 * Flush a line of text. A "line" is loosely defined as being something
142 * that should be followed by a newline, regardless of whether it's
143 * broken apart by newlines getting there. A line can also be a
144 * fragment of a columnar list.
146 * Specifically, a line is whatever's in p->buf of length p->col, which
147 * is zeroed after this function returns.
149 * The variables TERMP_NOLPAD, TERMP_LITERAL and TERMP_NOBREAK are of
150 * critical importance here. Their behaviour follows:
152 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
153 * offset value. This is useful when doing columnar lists where the
154 * prior column has right-padded.
156 * - TERMP_LITERAL: don't break apart words. Note that a long literal
157 * word will violate the right margin.
159 * - TERMP_NOBREAK: this is the most important and is used when making
160 * columns. In short: don't print a newline and instead pad to the
161 * right margin. Used in conjunction with TERMP_NOLPAD.
163 * In-line line breaking:
165 * If TERMP_NOBREAK is specified and the line overruns the right
166 * margin, it will break and pad-right to the right margin after
167 * writing. If maxrmargin is violated, it will break and continue
168 * writing from the right-margin, which will lead to the above
169 * scenario upon exit.
171 * Otherwise, the line will break at the right margin. Extremely long
172 * lines will cause the system to emit a warning (TODO: hyphenate, if
176 flushln(struct termp
*p
)
178 size_t i
, j
, vsz
, vis
, maxvis
, mmax
, bp
;
181 * First, establish the maximum columns of "visible" content.
182 * This is usually the difference between the right-margin and
183 * an indentation, but can be, for tagged lists or columns, a
184 * small set of values.
187 assert(p
->offset
< p
->rmargin
);
188 maxvis
= p
->rmargin
- p
->offset
;
189 mmax
= p
->maxrmargin
- p
->offset
;
190 bp
= TERMP_NOBREAK
& p
->flags
? mmax
: maxvis
;
194 * If in the standard case (left-justified), then begin with our
195 * indentation, otherwise (columns, etc.) just start spitting
199 if ( ! (p
->flags
& TERMP_NOLPAD
))
201 for (j
= 0; j
< p
->offset
; j
++)
204 for (i
= 0; i
< p
->col
; i
++) {
206 * Count up visible word characters. Control sequences
207 * (starting with the CSI) aren't counted. A space
208 * generates a non-printing word, which is valid (the
209 * space is printed according to regular spacing rules).
212 /* FIXME: make non-ANSI friendly. */
215 for (j
= i
, vsz
= 0; j
< p
->col
; j
++) {
216 if (isspace((int)p
->buf
[j
]))
218 else if (27 == p
->buf
[j
]) {
219 assert(j
+ 4 <= p
->col
);
226 * Do line-breaking. If we're greater than our
227 * break-point and already in-line, break to the next
228 * line and start writing. If we're at the line start,
229 * then write out the word (TODO: hyphenate) and break
230 * in a subsequent loop invocation.
233 if ( ! (TERMP_NOBREAK
& p
->flags
)) {
234 if (vis
&& vis
+ vsz
> bp
) {
236 for (j
= 0; j
< p
->offset
; j
++)
239 } else if (vis
+ vsz
> bp
)
240 warnx("word breaks right margin");
242 /* TODO: hyphenate. */
245 if (vis
&& vis
+ vsz
> bp
) {
247 for (j
= 0; j
< p
->rmargin
; j
++)
250 } else if (vis
+ vsz
> bp
)
251 warnx("word breaks right margin");
253 /* TODO: hyphenate. */
257 * Write out the word and a trailing space. Omit the
258 * space if we're the last word in the line or beyond
262 for ( ; i
< p
->col
; i
++) {
263 if (isspace((int)p
->buf
[i
]))
268 if (i
< p
->col
&& vis
<= bp
) {
275 * If we've overstepped our maximum visible no-break space, then
276 * cause a newline and offset at the right margin.
279 if ((TERMP_NOBREAK
& p
->flags
) && vis
>= maxvis
) {
281 for (i
= 0; i
< p
->rmargin
; i
++)
288 * If we're not to right-marginalise it (newline), then instead
289 * pad to the right margin and stay off.
292 if (p
->flags
& TERMP_NOBREAK
) {
293 for ( ; vis
< maxvis
; vis
++)
303 * A newline only breaks an existing line; it won't assert vertical
304 * space. All data in the output buffer is flushed prior to the newline
308 newln(struct termp
*p
)
311 p
->flags
|= TERMP_NOSPACE
;
313 p
->flags
&= ~TERMP_NOLPAD
;
317 p
->flags
&= ~TERMP_NOLPAD
;
322 * Asserts a vertical space (a full, empty line-break between lines).
323 * Note that if used twice, this will cause two blank spaces and so on.
324 * All data in the output buffer is flushed prior to the newline
328 vspace(struct termp
*p
)
337 * Break apart a word into "pwords" (partial-words, usually from
338 * breaking up a phrase into individual words) and, eventually, put them
339 * into the output buffer. If we're a literal word, then don't break up
340 * the word and put it verbatim into the output buffer.
343 word(struct termp
*p
, const char *word
)
347 if (p
->flags
& TERMP_LITERAL
) {
348 pword(p
, word
, strlen(word
));
355 if (mdoc_isdelim(word
)) {
356 if ( ! (p
->flags
& TERMP_IGNDELIM
))
357 p
->flags
|= TERMP_NOSPACE
;
358 p
->flags
&= ~TERMP_IGNDELIM
;
362 for (j
= i
= 0; i
< len
; i
++) {
363 if ( ! isspace((int)word
[i
])) {
368 /* Escaped spaces don't delimit... */
369 if (i
> 0 && isspace((int)word
[i
]) &&
370 '\\' == word
[i
- 1]) {
378 pword(p
, &word
[i
- j
], j
);
383 pword(p
, &word
[i
- j
], j
);
389 * This is the main function for printing out nodes. It's constituted
390 * of PRE and POST functions, which correspond to prefix and infix
391 * processing. The termpair structure allows data to persist between
392 * prefix and postfix invocations.
395 body(struct termp
*p
, struct termpair
*ppair
,
396 const struct mdoc_meta
*meta
,
397 const struct mdoc_node
*node
)
400 struct termpair pair
;
402 /* Pre-processing. */
407 pair
.offset
= pair
.rmargin
= 0;
411 if (MDOC_TEXT
!= node
->type
) {
412 if (termacts
[node
->tok
].pre
)
413 if ( ! (*termacts
[node
->tok
].pre
)(p
, &pair
, meta
, node
))
415 } else /* MDOC_TEXT == node->type */
416 word(p
, node
->data
.text
.string
);
420 if (TERMPAIR_FLAG
& pair
.type
)
421 p
->flags
|= pair
.flag
;
423 if (dochild
&& node
->child
)
424 body(p
, &pair
, meta
, node
->child
);
426 if (TERMPAIR_FLAG
& pair
.type
)
427 p
->flags
&= ~pair
.flag
;
429 /* Post-processing. */
431 if (MDOC_TEXT
!= node
->type
)
432 if (termacts
[node
->tok
].post
)
433 (*termacts
[node
->tok
].post
)(p
, &pair
, meta
, node
);
438 body(p
, ppair
, meta
, node
->next
);
443 footer(struct termp
*p
, const struct mdoc_meta
*meta
)
448 if (NULL
== (buf
= malloc(p
->rmargin
)))
450 if (NULL
== (os
= malloc(p
->rmargin
)))
453 tm
= localtime(&meta
->date
);
456 if (NULL
== strftime(buf
, p
->rmargin
, "%B %d, %Y", tm
))
458 if (0 == strftime(buf
, p
->rmargin
, "%B %d, %Y", tm
))
462 (void)strlcpy(os
, meta
->os
, p
->rmargin
);
465 * This is /slightly/ different from regular groff output
466 * because we don't have page numbers. Print the following:
473 p
->flags
|= TERMP_NOSPACE
| TERMP_NOBREAK
;
474 p
->rmargin
= p
->maxrmargin
- strlen(buf
);
480 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
481 p
->offset
= p
->rmargin
;
482 p
->rmargin
= p
->maxrmargin
;
483 p
->flags
&= ~TERMP_NOBREAK
;
494 header(struct termp
*p
, const struct mdoc_meta
*meta
)
496 char *buf
, *title
, *bufp
, *vbuf
;
500 p
->rmargin
= p
->maxrmargin
;
503 if (NULL
== (buf
= malloc(p
->rmargin
)))
505 if (NULL
== (title
= malloc(p
->rmargin
)))
507 if (NULL
== (vbuf
= malloc(p
->rmargin
)))
510 if (NULL
== (pp
= mdoc_vol2a(meta
->vol
))) {
511 switch (meta
->msec
) {
517 pp
= mdoc_vol2a(VOL_URM
);
520 pp
= mdoc_vol2a(VOL_SMM
);
529 pp
= mdoc_vol2a(VOL_PRM
);
532 pp
= mdoc_vol2a(VOL_KM
);
541 if (-1 == uname(&uts
))
543 (void)strlcat(vbuf
, uts
.sysname
, p
->rmargin
);
544 (void)strlcat(vbuf
, " ", p
->rmargin
);
545 } else if (NULL
== (pp
= mdoc_msec2a(meta
->msec
)))
546 pp
= mdoc_msec2a(MSEC_local
);
548 (void)strlcat(vbuf
, pp
, p
->rmargin
);
551 * The header is strange. It has three components, which are
552 * really two with the first duplicated. It goes like this:
554 * IDENTIFIER TITLE IDENTIFIER
556 * The IDENTIFIER is NAME(SECTION), which is the command-name
557 * (if given, or "unknown" if not) followed by the manual page
558 * section. These are given in `Dt'. The TITLE is a free-form
559 * string depending on the manual volume. If not specified, it
560 * switches on the manual section.
563 if (mdoc_arch2a(meta
->arch
))
564 (void)snprintf(buf
, p
->rmargin
, "%s (%s)",
565 vbuf
, mdoc_arch2a(meta
->arch
));
567 (void)strlcpy(buf
, vbuf
, p
->rmargin
);
569 pp
= mdoc_msec2a(meta
->msec
);
571 (void)snprintf(title
, p
->rmargin
, "%s(%s)",
572 meta
->title
, pp
? pp
: "");
574 for (bufp
= title
; *bufp
; bufp
++)
575 *bufp
= toupper(*bufp
);
578 p
->rmargin
= (p
->maxrmargin
- strlen(buf
)) / 2;
579 p
->flags
|= TERMP_NOBREAK
| TERMP_NOSPACE
;
584 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
585 p
->offset
= p
->rmargin
;
586 p
->rmargin
= p
->maxrmargin
- strlen(title
);
591 p
->offset
= p
->rmargin
;
592 p
->rmargin
= p
->maxrmargin
;
593 p
->flags
&= ~TERMP_NOBREAK
;
594 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
599 p
->rmargin
= p
->maxrmargin
;
601 p
->flags
&= ~TERMP_NOSPACE
;
610 * Determine the symbol indicated by an escape sequences, that is, one
611 * starting with a backslash. Once done, we pass this value into the
612 * output buffer by way of the symbol table.
615 nescape(struct termp
*p
, const char *word
, size_t len
)
632 chara(p
, word
[0]); /* FIXME */
637 chara(p
, '\\'); /* FIXME */
640 symbola(p
, TERMSYM_DQUOTE
);
643 warnx("escape sequence not supported: %c",
650 if ('r' == word
[0] && 'B' == word
[1])
651 symbola(p
, TERMSYM_RBRACK
);
652 else if ('l' == word
[0] && 'B' == word
[1])
653 symbola(p
, TERMSYM_LBRACK
);
654 else if ('l' == word
[0] && 'q' == word
[1])
655 symbola(p
, TERMSYM_LDQUOTE
);
656 else if ('r' == word
[0] && 'q' == word
[1])
657 symbola(p
, TERMSYM_RDQUOTE
);
658 else if ('o' == word
[0] && 'q' == word
[1])
659 symbola(p
, TERMSYM_LSQUOTE
);
660 else if ('a' == word
[0] && 'q' == word
[1])
661 symbola(p
, TERMSYM_RSQUOTE
);
662 else if ('<' == word
[0] && '-' == word
[1])
663 symbola(p
, TERMSYM_LARROW
);
664 else if ('-' == word
[0] && '>' == word
[1])
665 symbola(p
, TERMSYM_RARROW
);
666 else if ('b' == word
[0] && 'u' == word
[1])
667 symbola(p
, TERMSYM_BULLET
);
668 else if ('<' == word
[0] && '=' == word
[1])
669 symbola(p
, TERMSYM_LE
);
670 else if ('>' == word
[0] && '=' == word
[1])
671 symbola(p
, TERMSYM_GE
);
672 else if ('=' == word
[0] && '=' == word
[1])
673 symbola(p
, TERMSYM_EQ
);
674 else if ('+' == word
[0] && '-' == word
[1])
675 symbola(p
, TERMSYM_PLUSMINUS
);
676 else if ('u' == word
[0] && 'a' == word
[1])
677 symbola(p
, TERMSYM_UARROW
);
678 else if ('d' == word
[0] && 'a' == word
[1])
679 symbola(p
, TERMSYM_DARROW
);
680 else if ('a' == word
[0] && 'a' == word
[1])
681 symbola(p
, TERMSYM_ACUTE
);
682 else if ('g' == word
[0] && 'a' == word
[1])
683 symbola(p
, TERMSYM_GRAVE
);
684 else if ('!' == word
[0] && '=' == word
[1])
685 symbola(p
, TERMSYM_NEQ
);
686 else if ('i' == word
[0] && 'f' == word
[1])
687 symbola(p
, TERMSYM_INF
);
688 else if ('n' == word
[0] && 'a' == word
[1])
689 symbola(p
, TERMSYM_NAN
);
690 else if ('b' == word
[0] && 'a' == word
[1])
691 symbola(p
, TERMSYM_BAR
);
693 /* Deprecated forms. */
694 else if ('A' == word
[0] && 'm' == word
[1])
695 symbola(p
, TERMSYM_AMP
);
696 else if ('B' == word
[0] && 'a' == word
[1])
697 symbola(p
, TERMSYM_BAR
);
698 else if ('I' == word
[0] && 'f' == word
[1])
699 symbola(p
, TERMSYM_INF2
);
700 else if ('G' == word
[0] && 'e' == word
[1])
701 symbola(p
, TERMSYM_GE
);
702 else if ('G' == word
[0] && 't' == word
[1])
703 symbola(p
, TERMSYM_GT
);
704 else if ('L' == word
[0] && 'e' == word
[1])
705 symbola(p
, TERMSYM_LE
);
706 else if ('L' == word
[0] && 'q' == word
[1])
707 symbola(p
, TERMSYM_LDQUOTE
);
708 else if ('L' == word
[0] && 't' == word
[1])
709 symbola(p
, TERMSYM_LT
);
710 else if ('N' == word
[0] && 'a' == word
[1])
711 symbola(p
, TERMSYM_NAN
);
712 else if ('N' == word
[0] && 'e' == word
[1])
713 symbola(p
, TERMSYM_NEQ
);
714 else if ('P' == word
[0] && 'i' == word
[1])
715 symbola(p
, TERMSYM_PI
);
716 else if ('P' == word
[0] && 'm' == word
[1])
717 symbola(p
, TERMSYM_PLUSMINUS
);
718 else if ('R' == word
[0] && 'q' == word
[1])
719 symbola(p
, TERMSYM_RDQUOTE
);
721 warnx("escape sequence not supported: %c%c",
726 warnx("escape sequence not supported");
733 * Handle an escape sequence: determine its length and pass it to the
734 * escape-symbol look table. Note that we assume mdoc(3) has validated
735 * the escape sequence (we assert upon badly-formed escape sequences).
738 pescape(struct termp
*p
, const char *word
, size_t *i
, size_t len
)
745 if ('(' == word
[*i
]) {
747 assert(*i
+ 1 < len
);
748 nescape(p
, &word
[*i
], 2);
752 } else if ('*' == word
[*i
]) {
753 /* XXX - deprecated! */
759 assert(*i
+ 1 < len
);
760 nescape(p
, &word
[*i
], 2);
766 nescape(p
, &word
[*i
], 1);
770 } else if ('[' != word
[*i
]) {
771 nescape(p
, &word
[*i
], 1);
776 for (j
= 0; word
[*i
] && ']' != word
[*i
]; (*i
)++, j
++)
780 nescape(p
, &word
[*i
- j
], j
);
785 * Handle pwords, partial words, which may be either a single word or a
786 * phrase that cannot be broken down (such as a literal string). This
787 * handles word styling.
790 pword(struct termp
*p
, const char *word
, size_t len
)
794 if ( ! (TERMP_NOSPACE
& p
->flags
) &&
795 ! (TERMP_LITERAL
& p
->flags
))
798 if ( ! (p
->flags
& TERMP_NONOSPACE
))
799 p
->flags
&= ~TERMP_NOSPACE
;
802 * XXX - if literal and underlining, this will underline the
803 * spaces between literal words.
806 if (p
->flags
& TERMP_BOLD
)
807 stylea(p
, TERMSTYLE_BOLD
);
808 if (p
->flags
& TERMP_UNDERLINE
)
809 stylea(p
, TERMSTYLE_UNDER
);
811 for (i
= 0; i
< len
; i
++) {
812 if ('\\' == word
[i
]) {
813 pescape(p
, word
, &i
, len
);
819 if (p
->flags
& TERMP_BOLD
||
820 p
->flags
& TERMP_UNDERLINE
)
821 stylea(p
, TERMSTYLE_CLEAR
);
826 * Add a symbol to the output line buffer.
829 symbola(struct termp
*p
, enum tsym sym
)
832 assert(p
->symtab
[sym
].sym
);
833 stringa(p
, p
->symtab
[sym
].sym
, p
->symtab
[sym
].sz
);
838 * Add a style to the output line buffer.
841 stylea(struct termp
*p
, enum tstyle style
)
844 assert(p
->styletab
[style
].sym
);
845 stringa(p
, p
->styletab
[style
].sym
, p
->styletab
[style
].sz
);
850 * Like chara() but for arbitrary-length buffers. Resize the buffer by
851 * a factor of two (if the buffer is less than that) or the buffer's
855 stringa(struct termp
*p
, const char *c
, size_t sz
)
859 s
= sz
> p
->maxcols
* 2 ? sz
: p
->maxcols
* 2;
862 if (p
->col
+ sz
>= p
->maxcols
) {
863 p
->buf
= realloc(p
->buf
, s
);
869 (void)memcpy(&p
->buf
[p
->col
], c
, sz
);
875 * Insert a single character into the line-buffer. If the buffer's
876 * space is exceeded, then allocate more space by doubling the buffer
880 chara(struct termp
*p
, char c
)
883 if (p
->col
+ 1 >= p
->maxcols
) {
884 p
->buf
= realloc(p
->buf
, p
->maxcols
* 2);
889 p
->buf
[(p
->col
)++] = c
;