]>
git.cameronkatri.com Git - mandoc.git/blob - mdocterm.c
1 /* $Id: mdocterm.c,v 1.40 2009/03/12 06:32:17 kristaps Exp $ */
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
19 #include <sys/types.h>
37 static void body(struct termp
*,
39 const struct mdoc_meta
*,
40 const struct mdoc_node
*);
41 static void header(struct termp
*,
42 const struct mdoc_meta
*);
43 static void footer(struct termp
*,
44 const struct mdoc_meta
*);
46 static void pword(struct termp
*, const char *, size_t);
47 static void pescape(struct termp
*, const char *,
49 static void nescape(struct termp
*,
50 const char *, size_t);
51 static void chara(struct termp
*, char);
52 static void stringa(struct termp
*,
53 const char *, size_t);
54 static void symbola(struct termp
*, enum tsym
);
55 static void sanity(const struct mdoc_node
*);
56 static void stylea(struct termp
*, enum tstyle
);
59 extern size_t strlcat(char *, const char *, size_t);
60 extern size_t strlcpy(char *, const char *, size_t);
63 static struct termenc termenc1
[] = {
64 { "\\", TERMSYM_SLASH
},
65 { "\'", TERMSYM_RSQUOTE
},
66 { "`", TERMSYM_LSQUOTE
},
67 { "-", TERMSYM_HYPHEN
},
68 { " ", TERMSYM_SPACE
},
69 { ".", TERMSYM_PERIOD
},
70 { "&", TERMSYM_BREAK
},
71 { "e", TERMSYM_SLASH
},
72 { "q", TERMSYM_DQUOTE
},
76 static struct termenc termenc2
[] = {
77 { "rC", TERMSYM_RBRACE
},
78 { "lC", TERMSYM_LBRACE
},
79 { "rB", TERMSYM_RBRACK
},
80 { "lB", TERMSYM_LBRACK
},
81 { "ra", TERMSYM_RANGLE
},
82 { "la", TERMSYM_LANGLE
},
83 { "Lq", TERMSYM_LDQUOTE
},
84 { "lq", TERMSYM_LDQUOTE
},
85 { "Rq", TERMSYM_RDQUOTE
},
86 { "rq", TERMSYM_RDQUOTE
},
87 { "oq", TERMSYM_LSQUOTE
},
88 { "aq", TERMSYM_RSQUOTE
},
90 { "<-", TERMSYM_LARROW
},
91 { "->", TERMSYM_RARROW
},
92 { "ua", TERMSYM_UARROW
},
93 { "da", TERMSYM_DARROW
},
95 { "bu", TERMSYM_BULLET
},
96 { "Ba", TERMSYM_BAR
},
97 { "ba", TERMSYM_BAR
},
98 { "co", TERMSYM_COPY
},
99 { "Am", TERMSYM_AMP
},
101 { "Le", TERMSYM_LE
},
102 { "<=", TERMSYM_LE
},
103 { "Ge", TERMSYM_GE
},
104 { ">=", TERMSYM_GE
},
105 { "==", TERMSYM_EQ
},
106 { "Ne", TERMSYM_NEQ
},
107 { "!=", TERMSYM_NEQ
},
108 { "Pm", TERMSYM_PLUSMINUS
},
109 { "+-", TERMSYM_PLUSMINUS
},
110 { "If", TERMSYM_INF2
},
111 { "if", TERMSYM_INF
},
112 { "Na", TERMSYM_NAN
},
113 { "na", TERMSYM_NAN
},
114 { "**", TERMSYM_ASTERISK
},
115 { "Gt", TERMSYM_GT
},
116 { "Lt", TERMSYM_LT
},
118 { "aa", TERMSYM_ACUTE
},
119 { "ga", TERMSYM_GRAVE
},
121 { "en", TERMSYM_EN
},
122 { "em", TERMSYM_EM
},
124 { "Pi", TERMSYM_PI
},
128 static struct termsym termsym_ansi
[] = {
129 { "]", 1 }, /* TERMSYM_RBRACK */
130 { "[", 1 }, /* TERMSYM_LBRACK */
131 { "<-", 2 }, /* TERMSYM_LARROW */
132 { "->", 2 }, /* TERMSYM_RARROW */
133 { "^", 1 }, /* TERMSYM_UARROW */
134 { "v", 1 }, /* TERMSYM_DARROW */
135 { "`", 1 }, /* TERMSYM_LSQUOTE */
136 { "\'", 1 }, /* TERMSYM_RSQUOTE */
137 { "\'", 1 }, /* TERMSYM_SQUOTE */
138 { "``", 2 }, /* TERMSYM_LDQUOTE */
139 { "\'\'", 2 }, /* TERMSYM_RDQUOTE */
140 { "\"", 1 }, /* TERMSYM_DQUOTE */
141 { "<", 1 }, /* TERMSYM_LT */
142 { ">", 1 }, /* TERMSYM_GT */
143 { "<=", 2 }, /* TERMSYM_LE */
144 { ">=", 2 }, /* TERMSYM_GE */
145 { "==", 2 }, /* TERMSYM_EQ */
146 { "!=", 2 }, /* TERMSYM_NEQ */
147 { "\'", 1 }, /* TERMSYM_ACUTE */
148 { "`", 1 }, /* TERMSYM_GRAVE */
149 { "pi", 2 }, /* TERMSYM_PI */
150 { "+=", 2 }, /* TERMSYM_PLUSMINUS */
151 { "oo", 2 }, /* TERMSYM_INF */
152 { "infinity", 8 }, /* TERMSYM_INF2 */
153 { "NaN", 3 }, /* TERMSYM_NAN */
154 { "|", 1 }, /* TERMSYM_BAR */
155 { "o", 1 }, /* TERMSYM_BULLET */
156 { "&", 1 }, /* TERMSYM_AMP */
157 { "--", 2 }, /* TERMSYM_EM */
158 { "-", 1 }, /* TERMSYM_EN */
159 { "(C)", 3 }, /* TERMSYM_COPY */
160 { "*", 1 }, /* TERMSYM_ASTERISK */
161 { "\\", 1 }, /* TERMSYM_SLASH */
162 { "-", 1 }, /* TERMSYM_HYPHEN */
163 { " ", 1 }, /* TERMSYM_SPACE */
164 { ".", 1 }, /* TERMSYM_PERIOD */
165 { "", 0 }, /* TERMSYM_BREAK */
166 { "<", 1 }, /* TERMSYM_LANGLE */
167 { ">", 1 }, /* TERMSYM_RANGLE */
168 { "{", 1 }, /* TERMSYM_LBRACE */
169 { "}", 1 }, /* TERMSYM_RBRACE */
172 static const char ansi_clear
[] = { 27, '[', '0', 'm' };
173 static const char ansi_bold
[] = { 27, '[', '1', 'm' };
174 static const char ansi_under
[] = { 27, '[', '4', 'm' };
176 static struct termsym termstyle_ansi
[] = {
184 main(int argc
, char *argv
[])
188 const struct mdoc
*mdoc
;
193 c
= mmain_getopt(p
, argc
, argv
, NULL
, NULL
, NULL
, NULL
);
195 mmain_exit(p
, -1 == c
? 1 : 0);
197 if (NULL
== (mdoc
= mmain_mdoc(p
)))
200 termp
.maxrmargin
= termp
.rmargin
= 78; /* XXX */
201 termp
.maxcols
= 1024;
202 termp
.offset
= termp
.col
= 0;
203 termp
.flags
= TERMP_NOSPACE
;
204 termp
.symtab
= termsym_ansi
;
205 termp
.styletab
= termstyle_ansi
;
207 if (NULL
== (termp
.buf
= malloc(termp
.maxcols
)))
210 header(&termp
, mdoc_meta(mdoc
));
211 body(&termp
, NULL
, mdoc_meta(mdoc
), mdoc_node(mdoc
));
212 footer(&termp
, mdoc_meta(mdoc
));
222 * Flush a line of text. A "line" is loosely defined as being something
223 * that should be followed by a newline, regardless of whether it's
224 * broken apart by newlines getting there. A line can also be a
225 * fragment of a columnar list.
227 * Specifically, a line is whatever's in p->buf of length p->col, which
228 * is zeroed after this function returns.
230 * The variables TERMP_NOLPAD, TERMP_LITERAL and TERMP_NOBREAK are of
231 * critical importance here. Their behaviour follows:
233 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
234 * offset value. This is useful when doing columnar lists where the
235 * prior column has right-padded.
237 * - TERMP_NOBREAK: this is the most important and is used when making
238 * columns. In short: don't print a newline and instead pad to the
239 * right margin. Used in conjunction with TERMP_NOLPAD.
241 * In-line line breaking:
243 * If TERMP_NOBREAK is specified and the line overruns the right
244 * margin, it will break and pad-right to the right margin after
245 * writing. If maxrmargin is violated, it will break and continue
246 * writing from the right-margin, which will lead to the above
247 * scenario upon exit.
249 * Otherwise, the line will break at the right margin. Extremely long
250 * lines will cause the system to emit a warning (TODO: hyphenate, if
254 flushln(struct termp
*p
)
256 size_t i
, j
, vsz
, vis
, maxvis
, mmax
, bp
;
259 * First, establish the maximum columns of "visible" content.
260 * This is usually the difference between the right-margin and
261 * an indentation, but can be, for tagged lists or columns, a
262 * small set of values.
265 assert(p
->offset
< p
->rmargin
);
266 maxvis
= p
->rmargin
- p
->offset
;
267 mmax
= p
->maxrmargin
- p
->offset
;
268 bp
= TERMP_NOBREAK
& p
->flags
? mmax
: maxvis
;
272 * If in the standard case (left-justified), then begin with our
273 * indentation, otherwise (columns, etc.) just start spitting
277 if ( ! (p
->flags
& TERMP_NOLPAD
))
279 for (j
= 0; j
< p
->offset
; j
++)
282 for (i
= 0; i
< p
->col
; i
++) {
284 * Count up visible word characters. Control sequences
285 * (starting with the CSI) aren't counted. A space
286 * generates a non-printing word, which is valid (the
287 * space is printed according to regular spacing rules).
290 /* FIXME: make non-ANSI friendly. */
293 for (j
= i
, vsz
= 0; j
< p
->col
; j
++) {
294 if (isspace((u_char
)p
->buf
[j
]))
296 else if (27 == p
->buf
[j
]) {
297 assert(j
+ 4 <= p
->col
);
304 * Do line-breaking. If we're greater than our
305 * break-point and already in-line, break to the next
306 * line and start writing. If we're at the line start,
307 * then write out the word (TODO: hyphenate) and break
308 * in a subsequent loop invocation.
311 if ( ! (TERMP_NOBREAK
& p
->flags
)) {
312 if (vis
&& vis
+ vsz
> bp
) {
314 for (j
= 0; j
< p
->offset
; j
++)
317 } else if (vis
+ vsz
> bp
)
318 warnx("word breaks right margin");
320 /* TODO: hyphenate. */
323 if (vis
&& vis
+ vsz
> bp
) {
325 for (j
= 0; j
< p
->rmargin
; j
++)
327 vis
= p
->rmargin
- p
->offset
;
328 } else if (vis
+ vsz
> bp
)
329 warnx("word breaks right margin");
331 /* TODO: hyphenate. */
335 * Write out the word and a trailing space. Omit the
336 * space if we're the last word in the line or beyond
340 for ( ; i
< p
->col
; i
++) {
341 if (isspace((u_char
)p
->buf
[i
]))
346 if (i
< p
->col
&& vis
<= bp
) {
353 * If we've overstepped our maximum visible no-break space, then
354 * cause a newline and offset at the right margin.
357 if ((TERMP_NOBREAK
& p
->flags
) && vis
>= maxvis
) {
358 if ( ! (TERMP_NONOBREAK
& p
->flags
)) {
360 for (i
= 0; i
< p
->rmargin
; i
++)
368 * If we're not to right-marginalise it (newline), then instead
369 * pad to the right margin and stay off.
372 if (p
->flags
& TERMP_NOBREAK
) {
373 if ( ! (TERMP_NONOBREAK
& p
->flags
))
374 for ( ; vis
< maxvis
; vis
++)
384 * A newline only breaks an existing line; it won't assert vertical
385 * space. All data in the output buffer is flushed prior to the newline
389 newln(struct termp
*p
)
392 p
->flags
|= TERMP_NOSPACE
;
394 p
->flags
&= ~TERMP_NOLPAD
;
398 p
->flags
&= ~TERMP_NOLPAD
;
403 * Asserts a vertical space (a full, empty line-break between lines).
404 * Note that if used twice, this will cause two blank spaces and so on.
405 * All data in the output buffer is flushed prior to the newline
409 vspace(struct termp
*p
)
418 * Break apart a word into "pwords" (partial-words, usually from
419 * breaking up a phrase into individual words) and, eventually, put them
420 * into the output buffer. If we're a literal word, then don't break up
421 * the word and put it verbatim into the output buffer.
424 word(struct termp
*p
, const char *word
)
428 if (p
->flags
& TERMP_LITERAL
) {
429 pword(p
, word
, strlen(word
));
433 if (0 == (len
= strlen(word
)))
434 errx(1, "blank line not in literal context");
436 if (mdoc_isdelim(word
)) {
437 if ( ! (p
->flags
& TERMP_IGNDELIM
))
438 p
->flags
|= TERMP_NOSPACE
;
439 p
->flags
&= ~TERMP_IGNDELIM
;
443 for (j
= i
= 0; i
< len
; i
++) {
444 if ( ! isspace((u_char
)word
[i
])) {
449 /* Escaped spaces don't delimit... */
450 if (i
> 0 && isspace((u_char
)word
[i
]) &&
451 '\\' == word
[i
- 1]) {
459 pword(p
, &word
[i
- j
], j
);
464 pword(p
, &word
[i
- j
], j
);
470 * This is the main function for printing out nodes. It's constituted
471 * of PRE and POST functions, which correspond to prefix and infix
472 * processing. The termpair structure allows data to persist between
473 * prefix and postfix invocations.
476 body(struct termp
*p
, struct termpair
*ppair
,
477 const struct mdoc_meta
*meta
,
478 const struct mdoc_node
*node
)
481 struct termpair pair
;
483 /* Some quick sanity-checking. */
487 /* Pre-processing. */
492 pair
.offset
= pair
.rmargin
= 0;
496 if (MDOC_TEXT
!= node
->type
) {
497 if (termacts
[node
->tok
].pre
)
498 if ( ! (*termacts
[node
->tok
].pre
)(p
, &pair
, meta
, node
))
500 } else /* MDOC_TEXT == node->type */
501 word(p
, node
->string
);
505 if (TERMPAIR_FLAG
& pair
.type
)
506 p
->flags
|= pair
.flag
;
508 if (dochild
&& node
->child
)
509 body(p
, &pair
, meta
, node
->child
);
511 if (TERMPAIR_FLAG
& pair
.type
)
512 p
->flags
&= ~pair
.flag
;
514 /* Post-processing. */
516 if (MDOC_TEXT
!= node
->type
)
517 if (termacts
[node
->tok
].post
)
518 (*termacts
[node
->tok
].post
)(p
, &pair
, meta
, node
);
523 body(p
, ppair
, meta
, node
->next
);
528 footer(struct termp
*p
, const struct mdoc_meta
*meta
)
533 if (NULL
== (buf
= malloc(p
->rmargin
)))
535 if (NULL
== (os
= malloc(p
->rmargin
)))
538 tm
= localtime(&meta
->date
);
541 if (NULL
== strftime(buf
, p
->rmargin
, "%B %d, %Y", tm
))
543 if (0 == strftime(buf
, p
->rmargin
, "%B %d, %Y", tm
))
547 (void)strlcpy(os
, meta
->os
, p
->rmargin
);
550 * This is /slightly/ different from regular groff output
551 * because we don't have page numbers. Print the following:
558 p
->flags
|= TERMP_NOSPACE
| TERMP_NOBREAK
;
559 p
->rmargin
= p
->maxrmargin
- strlen(buf
);
565 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
566 p
->offset
= p
->rmargin
;
567 p
->rmargin
= p
->maxrmargin
;
568 p
->flags
&= ~TERMP_NOBREAK
;
579 header(struct termp
*p
, const struct mdoc_meta
*meta
)
581 char *buf
, *title
, *bufp
;
583 p
->rmargin
= p
->maxrmargin
;
586 if (NULL
== (buf
= malloc(p
->rmargin
)))
588 if (NULL
== (title
= malloc(p
->rmargin
)))
592 * The header is strange. It has three components, which are
593 * really two with the first duplicated. It goes like this:
595 * IDENTIFIER TITLE IDENTIFIER
597 * The IDENTIFIER is NAME(SECTION), which is the command-name
598 * (if given, or "unknown" if not) followed by the manual page
599 * section. These are given in `Dt'. The TITLE is a free-form
600 * string depending on the manual volume. If not specified, it
601 * switches on the manual section.
605 (void)strlcpy(buf
, meta
->vol
, p
->rmargin
);
608 (void)strlcat(buf
, " (", p
->rmargin
);
609 (void)strlcat(buf
, meta
->arch
, p
->rmargin
);
610 (void)strlcat(buf
, ")", p
->rmargin
);
613 (void)snprintf(title
, p
->rmargin
, "%s(%d)",
614 meta
->title
, meta
->msec
);
616 for (bufp
= title
; *bufp
; bufp
++)
617 *bufp
= toupper((u_char
)*bufp
);
620 p
->rmargin
= (p
->maxrmargin
- strlen(buf
)) / 2;
621 p
->flags
|= TERMP_NOBREAK
| TERMP_NOSPACE
;
626 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
627 p
->offset
= p
->rmargin
;
628 p
->rmargin
= p
->maxrmargin
- strlen(title
);
633 p
->offset
= p
->rmargin
;
634 p
->rmargin
= p
->maxrmargin
;
635 p
->flags
&= ~TERMP_NOBREAK
;
636 p
->flags
|= TERMP_NOLPAD
| TERMP_NOSPACE
;
641 p
->rmargin
= p
->maxrmargin
;
643 p
->flags
&= ~TERMP_NOSPACE
;
651 * Determine the symbol indicated by an escape sequences, that is, one
652 * starting with a backslash. Once done, we pass this value into the
653 * output buffer by way of the symbol table.
656 nescape(struct termp
*p
, const char *word
, size_t len
)
668 warnx("unsupported %zu-byte escape sequence", len
);
672 for ( ; enc
->enc
; enc
++)
673 if (0 == memcmp(enc
->enc
, word
, len
)) {
674 symbola(p
, enc
->sym
);
678 warnx("unsupported %zu-byte escape sequence", len
);
683 * Handle an escape sequence: determine its length and pass it to the
684 * escape-symbol look table. Note that we assume mdoc(3) has validated
685 * the escape sequence (we assert upon badly-formed escape sequences).
688 pescape(struct termp
*p
, const char *word
, size_t *i
, size_t len
)
693 warnx("ignoring bad escape sequence");
697 if ('(' == word
[*i
]) {
700 warnx("ignoring bad escape sequence");
703 nescape(p
, &word
[*i
], 2);
707 } else if ('*' == word
[*i
]) {
710 warnx("ignoring bad escape sequence");
717 warnx("ignoring bad escape sequence");
720 nescape(p
, &word
[*i
], 2);
726 nescape(p
, &word
[*i
], 1);
730 } else if ('[' != word
[*i
]) {
731 nescape(p
, &word
[*i
], 1);
736 for (j
= 0; word
[*i
] && ']' != word
[*i
]; (*i
)++, j
++)
740 warnx("ignoring bad escape sequence");
743 nescape(p
, &word
[*i
- j
], j
);
748 * Handle pwords, partial words, which may be either a single word or a
749 * phrase that cannot be broken down (such as a literal string). This
750 * handles word styling.
753 pword(struct termp
*p
, const char *word
, size_t len
)
757 if ( ! (TERMP_NOSPACE
& p
->flags
) &&
758 ! (TERMP_LITERAL
& p
->flags
))
761 if ( ! (p
->flags
& TERMP_NONOSPACE
))
762 p
->flags
&= ~TERMP_NOSPACE
;
765 * XXX - if literal and underlining, this will underline the
766 * spaces between literal words.
769 if (p
->flags
& TERMP_BOLD
)
770 stylea(p
, TERMSTYLE_BOLD
);
771 if (p
->flags
& TERMP_UNDERLINE
)
772 stylea(p
, TERMSTYLE_UNDER
);
774 for (i
= 0; i
< len
; i
++) {
775 if ('\\' == word
[i
]) {
776 pescape(p
, word
, &i
, len
);
782 if (p
->flags
& TERMP_BOLD
||
783 p
->flags
& TERMP_UNDERLINE
)
784 stylea(p
, TERMSTYLE_CLEAR
);
789 * Add a symbol to the output line buffer.
792 symbola(struct termp
*p
, enum tsym sym
)
795 assert(p
->symtab
[sym
].sym
);
796 stringa(p
, p
->symtab
[sym
].sym
, p
->symtab
[sym
].sz
);
801 * Add a style to the output line buffer.
804 stylea(struct termp
*p
, enum tstyle style
)
807 assert(p
->styletab
[style
].sym
);
808 stringa(p
, p
->styletab
[style
].sym
, p
->styletab
[style
].sz
);
813 * Like chara() but for arbitrary-length buffers. Resize the buffer by
814 * a factor of two (if the buffer is less than that) or the buffer's
818 stringa(struct termp
*p
, const char *c
, size_t sz
)
825 s
= sz
> p
->maxcols
* 2 ? sz
: p
->maxcols
* 2;
828 if (p
->col
+ sz
>= p
->maxcols
) {
829 p
->buf
= realloc(p
->buf
, s
);
835 (void)memcpy(&p
->buf
[p
->col
], c
, sz
);
841 * Insert a single character into the line-buffer. If the buffer's
842 * space is exceeded, then allocate more space by doubling the buffer
846 chara(struct termp
*p
, char c
)
849 if (p
->col
+ 1 >= p
->maxcols
) {
850 p
->buf
= realloc(p
->buf
, p
->maxcols
* 2);
855 p
->buf
[(p
->col
)++] = c
;
860 sanity(const struct mdoc_node
*n
)
866 errx(1, "regular form violated (1)");
867 if (NULL
== n
->parent
)
868 errx(1, "regular form violated (2)");
869 if (NULL
== n
->string
)
870 errx(1, "regular form violated (3)");
871 switch (n
->parent
->type
) {
875 errx(1, "regular form violated (4)");
882 if (NULL
== n
->parent
)
883 errx(1, "regular form violated (5)");
884 switch (n
->parent
->type
) {
892 errx(1, "regular form violated (6)");
895 if (n
->child
) switch (n
->child
->type
) {
899 errx(1, "regular form violated (7(");
908 if (NULL
== n
->parent
)
909 errx(1, "regular form violated (8)");
910 if (MDOC_BLOCK
!= n
->parent
->type
)
911 errx(1, "regular form violated (9)");
912 if (n
->child
) switch (n
->child
->type
) {
920 errx(1, "regular form violated (a)");
925 if (NULL
== n
->parent
)
926 errx(1, "regular form violated (b)");
927 if (NULL
== n
->child
)
928 errx(1, "regular form violated (c)");
929 switch (n
->parent
->type
) {
939 errx(1, "regular form violated (d)");
942 switch (n
->child
->type
) {
946 errx(1, "regular form violated (e)");
954 errx(1, "regular form violated (f)");
955 if (NULL
== n
->child
)
956 errx(1, "regular form violated (10)");
957 switch (n
->child
->type
) {
961 errx(1, "regular form violated (11)");