]> git.cameronkatri.com Git - mandoc.git/blob - term.c
Text tokens with leading whitespace (like indented blocks in `Bd -literal') are print...
[mandoc.git] / term.c
1 /* $Id: term.c,v 1.93 2009/07/24 11:54:25 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <err.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "term.h"
24 #include "man.h"
25 #include "mdoc.h"
26
27 extern int man_run(struct termp *,
28 const struct man *);
29 extern int mdoc_run(struct termp *,
30 const struct mdoc *);
31
32 static struct termp *term_alloc(enum termenc);
33 static void term_free(struct termp *);
34 static void term_pescape(struct termp *, const char **);
35 static void term_nescape(struct termp *,
36 const char *, size_t);
37 static void term_chara(struct termp *, char);
38 static void term_encodea(struct termp *, char);
39 static int term_isopendelim(const char *);
40 static int term_isclosedelim(const char *);
41
42
43 void *
44 ascii_alloc(void)
45 {
46
47 return(term_alloc(TERMENC_ASCII));
48 }
49
50
51 int
52 terminal_man(void *arg, const struct man *man)
53 {
54 struct termp *p;
55
56 p = (struct termp *)arg;
57 if (NULL == p->symtab)
58 p->symtab = term_ascii2htab();
59
60 return(man_run(p, man));
61 }
62
63
64 int
65 terminal_mdoc(void *arg, const struct mdoc *mdoc)
66 {
67 struct termp *p;
68
69 p = (struct termp *)arg;
70 if (NULL == p->symtab)
71 p->symtab = term_ascii2htab();
72
73 return(mdoc_run(p, mdoc));
74 }
75
76
77 void
78 terminal_free(void *arg)
79 {
80
81 term_free((struct termp *)arg);
82 }
83
84
85 static void
86 term_free(struct termp *p)
87 {
88
89 if (p->buf)
90 free(p->buf);
91 if (TERMENC_ASCII == p->enc && p->symtab)
92 term_asciifree(p->symtab);
93
94 free(p);
95 }
96
97
98 static struct termp *
99 term_alloc(enum termenc enc)
100 {
101 struct termp *p;
102
103 if (NULL == (p = malloc(sizeof(struct termp))))
104 err(1, "malloc");
105 bzero(p, sizeof(struct termp));
106 p->maxrmargin = 78;
107 p->enc = enc;
108 return(p);
109 }
110
111
112 static int
113 term_isclosedelim(const char *p)
114 {
115
116 if ( ! (*p && 0 == *(p + 1)))
117 return(0);
118
119 switch (*p) {
120 case('.'):
121 /* FALLTHROUGH */
122 case(','):
123 /* FALLTHROUGH */
124 case(';'):
125 /* FALLTHROUGH */
126 case(':'):
127 /* FALLTHROUGH */
128 case('?'):
129 /* FALLTHROUGH */
130 case('!'):
131 /* FALLTHROUGH */
132 case(')'):
133 /* FALLTHROUGH */
134 case(']'):
135 /* FALLTHROUGH */
136 case('}'):
137 return(1);
138 default:
139 break;
140 }
141
142 return(0);
143 }
144
145
146 static int
147 term_isopendelim(const char *p)
148 {
149
150 if ( ! (*p && 0 == *(p + 1)))
151 return(0);
152
153 switch (*p) {
154 case('('):
155 /* FALLTHROUGH */
156 case('['):
157 /* FALLTHROUGH */
158 case('{'):
159 return(1);
160 default:
161 break;
162 }
163
164 return(0);
165 }
166
167
168 /*
169 * Flush a line of text. A "line" is loosely defined as being something
170 * that should be followed by a newline, regardless of whether it's
171 * broken apart by newlines getting there. A line can also be a
172 * fragment of a columnar list.
173 *
174 * Specifically, a line is whatever's in p->buf of length p->col, which
175 * is zeroed after this function returns.
176 *
177 * The usage of termp:flags is as follows:
178 *
179 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
180 * offset value. This is useful when doing columnar lists where the
181 * prior column has right-padded.
182 *
183 * - TERMP_NOBREAK: this is the most important and is used when making
184 * columns. In short: don't print a newline and instead pad to the
185 * right margin. Used in conjunction with TERMP_NOLPAD.
186 *
187 * - TERMP_TWOSPACE: when padding, make sure there are at least two
188 * space characters of padding. Otherwise, rather break the line.
189 *
190 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
191 * the line is overrun, and don't pad-right if it's underrun.
192 *
193 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
194 * overruning, instead save the position and continue at that point
195 * when the next invocation.
196 *
197 * In-line line breaking:
198 *
199 * If TERMP_NOBREAK is specified and the line overruns the right
200 * margin, it will break and pad-right to the right margin after
201 * writing. If maxrmargin is violated, it will break and continue
202 * writing from the right-margin, which will lead to the above
203 * scenario upon exit.
204 *
205 * Otherwise, the line will break at the right margin. Extremely long
206 * lines will cause the system to emit a warning (TODO: hyphenate, if
207 * possible).
208 *
209 * FIXME: newline breaks occur (in groff) also occur when a single
210 * space follows a NOBREAK (try `Bl -tag')
211 *
212 * FIXME: there's a newline error where a `Bl -diag' will have a
213 * trailing newline if the line is exactly 73 chars long.
214 */
215 void
216 term_flushln(struct termp *p)
217 {
218 int i, j;
219 size_t vbl, vsz, vis, maxvis, mmax, bp;
220 static int overstep = 0;
221
222 /*
223 * First, establish the maximum columns of "visible" content.
224 * This is usually the difference between the right-margin and
225 * an indentation, but can be, for tagged lists or columns, a
226 * small set of values.
227 */
228
229 assert(p->offset < p->rmargin);
230 assert((int)(p->rmargin - p->offset) - overstep > 0);
231
232 maxvis = /* LINTED */
233 p->rmargin - p->offset - overstep;
234 mmax = /* LINTED */
235 p->maxrmargin - p->offset - overstep;
236
237 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
238 vis = 0;
239 overstep = 0;
240
241 /*
242 * If in the standard case (left-justified), then begin with our
243 * indentation, otherwise (columns, etc.) just start spitting
244 * out text.
245 */
246
247 if ( ! (p->flags & TERMP_NOLPAD))
248 /* LINTED */
249 for (j = 0; j < (int)p->offset; j++)
250 putchar(' ');
251
252 for (i = 0; i < (int)p->col; i++) {
253 /*
254 * Count up visible word characters. Control sequences
255 * (starting with the CSI) aren't counted. A space
256 * generates a non-printing word, which is valid (the
257 * space is printed according to regular spacing rules).
258 */
259
260 /* LINTED */
261 for (j = i, vsz = 0; j < (int)p->col; j++) {
262 if (j && ' ' == p->buf[j])
263 break;
264 else if (8 == p->buf[j])
265 vsz--;
266 else
267 vsz++;
268 }
269
270 /*
271 * Choose the number of blanks to prepend: no blank at the
272 * beginning of a line, one between words -- but do not
273 * actually write them yet.
274 */
275 vbl = (size_t)(0 == vis ? 0 : 1);
276
277 /*
278 * Find out whether we would exceed the right margin.
279 * If so, break to the next line. (TODO: hyphenate)
280 * Otherwise, write the chosen number of blanks now.
281 */
282 if (vis && vis + vbl + vsz > bp) {
283 putchar('\n');
284 if (TERMP_NOBREAK & p->flags) {
285 for (j = 0; j < (int)p->rmargin; j++)
286 putchar(' ');
287 vis = p->rmargin - p->offset;
288 } else {
289 for (j = 0; j < (int)p->offset; j++)
290 putchar(' ');
291 vis = 0;
292 }
293 } else {
294 for (j = 0; j < (int)vbl; j++)
295 putchar(' ');
296 vis += vbl;
297 }
298
299 /*
300 * Finally, write out the word.
301 */
302 for ( ; i < (int)p->col; i++) {
303 if (' ' == p->buf[i])
304 break;
305 putchar(p->buf[i]);
306 }
307 vis += vsz;
308 }
309 p->col = 0;
310
311 if ( ! (TERMP_NOBREAK & p->flags)) {
312 putchar('\n');
313 return;
314 }
315
316 if (TERMP_HANG & p->flags) {
317 /* We need one blank after the tag. */
318 overstep = /* LINTED */
319 vis - maxvis + 1;
320
321 /*
322 * Behave exactly the same way as groff:
323 * If we have overstepped the margin, temporarily move
324 * it to the right and flag the rest of the line to be
325 * shorter.
326 * If we landed right at the margin, be happy.
327 * If we are one step before the margin, temporarily
328 * move it one step LEFT and flag the rest of the line
329 * to be longer.
330 */
331 if (overstep >= -1) {
332 assert((int)maxvis + overstep >= 0);
333 /* LINTED */
334 maxvis += overstep;
335 } else
336 overstep = 0;
337
338 } else if (TERMP_DANGLE & p->flags)
339 return;
340
341 /* Right-pad. */
342 if (maxvis > vis + /* LINTED */
343 ((TERMP_TWOSPACE & p->flags) ? 1 : 0))
344 for ( ; vis < maxvis; vis++)
345 putchar(' ');
346 else { /* ...or newline break. */
347 putchar('\n');
348 for (i = 0; i < (int)p->rmargin; i++)
349 putchar(' ');
350 }
351 }
352
353
354 /*
355 * A newline only breaks an existing line; it won't assert vertical
356 * space. All data in the output buffer is flushed prior to the newline
357 * assertion.
358 */
359 void
360 term_newln(struct termp *p)
361 {
362
363 p->flags |= TERMP_NOSPACE;
364 if (0 == p->col) {
365 p->flags &= ~TERMP_NOLPAD;
366 return;
367 }
368 term_flushln(p);
369 p->flags &= ~TERMP_NOLPAD;
370 }
371
372
373 /*
374 * Asserts a vertical space (a full, empty line-break between lines).
375 * Note that if used twice, this will cause two blank spaces and so on.
376 * All data in the output buffer is flushed prior to the newline
377 * assertion.
378 */
379 void
380 term_vspace(struct termp *p)
381 {
382
383 term_newln(p);
384 putchar('\n');
385 }
386
387
388 /*
389 * Determine the symbol indicated by an escape sequences, that is, one
390 * starting with a backslash. Once done, we pass this value into the
391 * output buffer by way of the symbol table.
392 */
393 static void
394 term_nescape(struct termp *p, const char *word, size_t len)
395 {
396 const char *rhs;
397 size_t sz;
398 int i;
399
400 rhs = term_a2ascii(p->symtab, word, len, &sz);
401
402 if (rhs)
403 for (i = 0; i < (int)sz; i++)
404 term_encodea(p, rhs[i]);
405 }
406
407
408 /*
409 * Handle an escape sequence: determine its length and pass it to the
410 * escape-symbol look table. Note that we assume mdoc(3) has validated
411 * the escape sequence (we assert upon badly-formed escape sequences).
412 */
413 static void
414 term_pescape(struct termp *p, const char **word)
415 {
416 int j;
417 const char *wp;
418
419 wp = *word;
420
421 if (0 == *(++wp)) {
422 *word = wp;
423 return;
424 }
425
426 if ('(' == *wp) {
427 wp++;
428 if (0 == *wp || 0 == *(wp + 1)) {
429 *word = 0 == *wp ? wp : wp + 1;
430 return;
431 }
432
433 term_nescape(p, wp, 2);
434 *word = ++wp;
435 return;
436
437 } else if ('*' == *wp) {
438 if (0 == *(++wp)) {
439 *word = wp;
440 return;
441 }
442
443 switch (*wp) {
444 case ('('):
445 wp++;
446 if (0 == *wp || 0 == *(wp + 1)) {
447 *word = 0 == *wp ? wp : wp + 1;
448 return;
449 }
450
451 term_nescape(p, wp, 2);
452 *word = ++wp;
453 return;
454 case ('['):
455 break;
456 default:
457 term_nescape(p, wp, 1);
458 *word = wp;
459 return;
460 }
461
462 } else if ('f' == *wp) {
463 if (0 == *(++wp)) {
464 *word = wp;
465 return;
466 }
467
468 switch (*wp) {
469 case ('B'):
470 p->flags |= TERMP_BOLD;
471 break;
472 case ('I'):
473 p->flags |= TERMP_UNDER;
474 break;
475 case ('P'):
476 /* FALLTHROUGH */
477 case ('R'):
478 p->flags &= ~TERMP_STYLE;
479 break;
480 default:
481 break;
482 }
483
484 *word = wp;
485 return;
486
487 } else if ('[' != *wp) {
488 term_nescape(p, wp, 1);
489 *word = wp;
490 return;
491 }
492
493 wp++;
494 for (j = 0; *wp && ']' != *wp; wp++, j++)
495 /* Loop... */ ;
496
497 if (0 == *wp) {
498 *word = wp;
499 return;
500 }
501
502 term_nescape(p, wp - j, (size_t)j);
503 *word = wp;
504 }
505
506
507 /*
508 * Handle pwords, partial words, which may be either a single word or a
509 * phrase that cannot be broken down (such as a literal string). This
510 * handles word styling.
511 */
512 void
513 term_word(struct termp *p, const char *word)
514 {
515 const char *sv;
516
517 if (term_isclosedelim(word))
518 if ( ! (TERMP_IGNDELIM & p->flags))
519 p->flags |= TERMP_NOSPACE;
520
521 if ( ! (TERMP_NOSPACE & p->flags))
522 term_chara(p, ' ');
523
524 if ( ! (p->flags & TERMP_NONOSPACE))
525 p->flags &= ~TERMP_NOSPACE;
526
527 /*
528 * If ANSI (word-length styling), then apply our style now,
529 * before the word.
530 */
531
532 for (sv = word; *word; word++)
533 if ('\\' != *word)
534 term_encodea(p, *word);
535 else
536 term_pescape(p, &word);
537
538 if (term_isopendelim(sv))
539 p->flags |= TERMP_NOSPACE;
540 }
541
542
543 /*
544 * Insert a single character into the line-buffer. If the buffer's
545 * space is exceeded, then allocate more space by doubling the buffer
546 * size.
547 */
548 static void
549 term_chara(struct termp *p, char c)
550 {
551 size_t s;
552
553 if (p->col + 1 >= p->maxcols) {
554 if (0 == p->maxcols)
555 p->maxcols = 256;
556 s = p->maxcols * 2;
557 p->buf = realloc(p->buf, s);
558 if (NULL == p->buf)
559 err(1, "realloc");
560 p->maxcols = s;
561 }
562 p->buf[(int)(p->col)++] = c;
563 }
564
565
566 static void
567 term_encodea(struct termp *p, char c)
568 {
569
570 if (' ' != c && TERMP_STYLE & p->flags) {
571 if (TERMP_BOLD & p->flags) {
572 term_chara(p, c);
573 term_chara(p, 8);
574 }
575 if (TERMP_UNDER & p->flags) {
576 term_chara(p, '_');
577 term_chara(p, 8);
578 }
579 }
580 term_chara(p, c);
581 }